aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/fscrypt.rst10
-rw-r--r--MAINTAINERS8
-rw-r--r--arch/arm/crypto/Kconfig7
-rw-r--r--arch/arm/crypto/Makefile2
-rw-r--r--arch/arm/crypto/chacha20-neon-core.S277
-rw-r--r--arch/arm/crypto/crc32-ce-glue.c2
-rw-r--r--arch/arm/crypto/ghash-ce-core.S108
-rw-r--r--arch/arm/crypto/ghash-ce-glue.c38
-rw-r--r--arch/arm/crypto/speck-neon-core.S434
-rw-r--r--arch/arm/crypto/speck-neon-glue.c288
-rw-r--r--arch/arm64/configs/defconfig2
-rw-r--r--arch/arm64/crypto/Kconfig11
-rw-r--r--arch/arm64/crypto/Makefile6
-rw-r--r--arch/arm64/crypto/aes-ce.S5
-rw-r--r--arch/arm64/crypto/aes-glue.c217
-rw-r--r--arch/arm64/crypto/aes-modes.S416
-rw-r--r--arch/arm64/crypto/aes-neon.S6
-rw-r--r--arch/arm64/crypto/crc32-ce-core.S287
-rw-r--r--arch/arm64/crypto/crc32-ce-glue.c244
-rw-r--r--arch/arm64/crypto/crct10dif-ce-core.S314
-rw-r--r--arch/arm64/crypto/crct10dif-ce-glue.c14
-rw-r--r--arch/arm64/crypto/speck-neon-core.S352
-rw-r--r--arch/arm64/crypto/speck-neon-glue.c282
-rw-r--r--arch/m68k/configs/amiga_defconfig2
-rw-r--r--arch/m68k/configs/apollo_defconfig2
-rw-r--r--arch/m68k/configs/atari_defconfig2
-rw-r--r--arch/m68k/configs/bvme6000_defconfig2
-rw-r--r--arch/m68k/configs/hp300_defconfig2
-rw-r--r--arch/m68k/configs/mac_defconfig2
-rw-r--r--arch/m68k/configs/multi_defconfig2
-rw-r--r--arch/m68k/configs/mvme147_defconfig2
-rw-r--r--arch/m68k/configs/mvme16x_defconfig2
-rw-r--r--arch/m68k/configs/q40_defconfig2
-rw-r--r--arch/m68k/configs/sun3_defconfig2
-rw-r--r--arch/m68k/configs/sun3x_defconfig2
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/configs/performance_defconfig1
-rw-r--r--arch/s390/crypto/aes_s390.c48
-rw-r--r--arch/s390/defconfig1
-rw-r--r--arch/x86/crypto/Makefile5
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c47
-rw-r--r--arch/x86/crypto/fpu.c207
-rw-r--r--arch/x86/crypto/sha1-mb/Makefile14
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb.c1011
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_ctx.h134
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr.h110
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S287
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S304
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c64
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S209
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_x8_avx2.S492
-rw-r--r--arch/x86/crypto/sha256-mb/Makefile14
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb.c1013
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_ctx.h134
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr.h108
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S304
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S307
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c65
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S214
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_x8_avx2.S598
-rw-r--r--arch/x86/crypto/sha512-mb/Makefile12
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb.c1047
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_ctx.h128
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr.h104
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S281
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S297
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c69
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S224
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_x4_avx2.S531
-rw-r--r--crypto/Kconfig101
-rw-r--r--crypto/Makefile4
-rw-r--r--crypto/aegis.h20
-rw-r--r--crypto/ahash.c25
-rw-r--r--crypto/algapi.c17
-rw-r--r--crypto/algboss.c2
-rw-r--r--crypto/algif_aead.c12
-rw-r--r--crypto/algif_hash.c2
-rw-r--r--crypto/authenc.c8
-rw-r--r--crypto/authencesn.c8
-rw-r--r--crypto/ccm.c9
-rw-r--r--crypto/chacha20_generic.c7
-rw-r--r--crypto/cryptd.c32
-rw-r--r--crypto/crypto_null.c11
-rw-r--r--crypto/crypto_user_base.c (renamed from crypto/crypto_user.c)9
-rw-r--r--crypto/crypto_user_stat.c463
-rw-r--r--crypto/echainiv.c4
-rw-r--r--crypto/gcm.c8
-rw-r--r--crypto/internal.h8
-rw-r--r--crypto/lrw.c339
-rw-r--r--crypto/mcryptd.c675
-rw-r--r--crypto/morus1280.c7
-rw-r--r--crypto/morus640.c16
-rw-r--r--crypto/ofb.c225
-rw-r--r--crypto/rng.c1
-rw-r--r--crypto/rsa-pkcs1pad.c9
-rw-r--r--crypto/seqiv.c4
-rw-r--r--crypto/shash.c33
-rw-r--r--crypto/skcipher.c24
-rw-r--r--crypto/speck.c307
-rw-r--r--crypto/tcrypt.c27
-rw-r--r--crypto/tcrypt.h1
-rw-r--r--crypto/testmgr.c42
-rw-r--r--crypto/testmgr.h863
-rw-r--r--crypto/xcbc.c8
-rw-r--r--crypto/xts.c269
-rw-r--r--drivers/block/cryptoloop.c22
-rw-r--r--drivers/bus/fsl-mc/fsl-mc-bus.c5
-rw-r--r--drivers/char/hw_random/core.c4
-rw-r--r--drivers/char/random.c24
-rw-r--r--drivers/crypto/Makefile2
-rw-r--r--drivers/crypto/atmel-aes.c5
-rw-r--r--drivers/crypto/atmel-authenc.h13
-rw-r--r--drivers/crypto/atmel-ecc.c11
-rw-r--r--drivers/crypto/atmel-ecc.h14
-rw-r--r--drivers/crypto/atmel-sha.c5
-rw-r--r--drivers/crypto/atmel-tdes.c5
-rw-r--r--drivers/crypto/axis/artpec6_crypto.c20
-rw-r--r--drivers/crypto/caam/Kconfig57
-rw-r--r--drivers/crypto/caam/Makefile10
-rw-r--r--drivers/crypto/caam/caamalg.c728
-rw-r--r--drivers/crypto/caam/caamalg_desc.c143
-rw-r--r--drivers/crypto/caam/caamalg_desc.h28
-rw-r--r--drivers/crypto/caam/caamalg_qi.c627
-rw-r--r--drivers/crypto/caam/caamalg_qi2.c5165
-rw-r--r--drivers/crypto/caam/caamalg_qi2.h223
-rw-r--r--drivers/crypto/caam/caamhash.c80
-rw-r--r--drivers/crypto/caam/caamhash_desc.c80
-rw-r--r--drivers/crypto/caam/caamhash_desc.h21
-rw-r--r--drivers/crypto/caam/caampkc.c1
-rw-r--r--drivers/crypto/caam/caamrng.c1
-rw-r--r--drivers/crypto/caam/compat.h2
-rw-r--r--drivers/crypto/caam/ctrl.c1
-rw-r--r--drivers/crypto/caam/dpseci.c426
-rw-r--r--drivers/crypto/caam/dpseci.h333
-rw-r--r--drivers/crypto/caam/dpseci_cmd.h149
-rw-r--r--drivers/crypto/caam/error.c79
-rw-r--r--drivers/crypto/caam/error.h6
-rw-r--r--drivers/crypto/caam/jr.c1
-rw-r--r--drivers/crypto/caam/qi.c43
-rw-r--r--drivers/crypto/caam/qi.h3
-rw-r--r--drivers/crypto/caam/regs.h30
-rw-r--r--drivers/crypto/caam/sg_sw_qm.h29
-rw-r--r--drivers/crypto/caam/sg_sw_qm2.h30
-rw-r--r--drivers/crypto/cavium/cpt/cptvf_reqmanager.c20
-rw-r--r--drivers/crypto/cavium/nitrox/Makefile3
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_common.h19
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_csr.h111
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_debugfs.c115
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_dev.h162
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_hal.c71
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_hal.h23
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_isr.c337
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_isr.h10
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_lib.c98
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_main.c203
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_reqmgr.c49
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_sriov.c151
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes-xts.c13
-rw-r--r--drivers/crypto/ccp/ccp-crypto.h2
-rw-r--r--drivers/crypto/ccp/psp-dev.c47
-rw-r--r--drivers/crypto/ccp/sp-platform.c53
-rw-r--r--drivers/crypto/ccree/cc_hw_queue_defs.h6
-rw-r--r--drivers/crypto/chelsio/chcr_algo.c30
-rw-r--r--drivers/crypto/chelsio/chcr_core.c2
-rw-r--r--drivers/crypto/chelsio/chcr_crypto.h2
-rw-r--r--drivers/crypto/chelsio/chtls/chtls_cm.c7
-rw-r--r--drivers/crypto/chelsio/chtls/chtls_main.c3
-rw-r--r--drivers/crypto/mxs-dcp.c142
-rw-r--r--drivers/crypto/omap-aes.c17
-rw-r--r--drivers/crypto/omap-aes.h2
-rw-r--r--drivers/crypto/picoxcell_crypto.c21
-rw-r--r--drivers/crypto/qat/qat_common/qat_algs.c60
-rw-r--r--drivers/crypto/qce/ablkcipher.c13
-rw-r--r--drivers/crypto/qce/cipher.h2
-rw-r--r--drivers/crypto/s5p-sss.c113
-rw-r--r--drivers/crypto/sahara.c31
-rw-r--r--drivers/crypto/vmx/aes_cbc.c22
-rw-r--r--drivers/crypto/vmx/aes_ctr.c18
-rw-r--r--drivers/crypto/vmx/aes_xts.c18
-rw-r--r--drivers/md/dm-integrity.c23
-rw-r--r--drivers/md/dm-verity-fec.c5
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c20
-rw-r--r--drivers/net/ppp/ppp_mppe.c27
-rw-r--r--drivers/soc/fsl/dpio/dpio-service.c58
-rw-r--r--drivers/staging/rtl8192e/rtllib_crypt_tkip.c34
-rw-r--r--drivers/staging/rtl8192e/rtllib_crypt_wep.c28
-rw-r--r--drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c34
-rw-r--r--drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_wep.c26
-rw-r--r--drivers/usb/wusbcore/crypto.c16
-rw-r--r--fs/crypto/fscrypt_private.h4
-rw-r--r--fs/crypto/keyinfo.c10
-rw-r--r--include/crypto/acompress.h38
-rw-r--r--include/crypto/aead.h51
-rw-r--r--include/crypto/akcipher.h76
-rw-r--r--include/crypto/algapi.h14
-rw-r--r--include/crypto/cbc.h2
-rw-r--r--include/crypto/chacha20.h3
-rw-r--r--include/crypto/hash.h38
-rw-r--r--include/crypto/internal/cryptouser.h8
-rw-r--r--include/crypto/internal/geniv.h2
-rw-r--r--include/crypto/kpp.h51
-rw-r--r--include/crypto/mcryptd.h114
-rw-r--r--include/crypto/morus1280_glue.h2
-rw-r--r--include/crypto/morus640_glue.h2
-rw-r--r--include/crypto/null.h2
-rw-r--r--include/crypto/rng.h29
-rw-r--r--include/crypto/skcipher.h118
-rw-r--r--include/crypto/speck.h62
-rw-r--r--include/linux/compiler_types.h1
-rw-r--r--include/linux/cpufeature.h2
-rw-r--r--include/linux/crc-t10dif.h1
-rw-r--r--include/linux/crypto.h110
-rw-r--r--include/linux/fsl/mc.h6
-rw-r--r--include/linux/hw_random.h3
-rw-r--r--include/linux/sunrpc/gss_krb5.h30
-rw-r--r--include/soc/fsl/dpaa2-fd.h242
-rw-r--r--include/soc/fsl/dpaa2-global.h15
-rw-r--r--include/soc/fsl/dpaa2-io.h4
-rw-r--r--include/uapi/linux/cryptouser.h52
-rw-r--r--include/uapi/linux/fs.h4
-rw-r--r--lib/chacha20.c6
-rw-r--r--lib/crc-t10dif.c57
-rw-r--r--net/ceph/crypto.c12
-rw-r--r--net/ceph/crypto.h2
-rw-r--r--net/mac802154/llsec.c16
-rw-r--r--net/mac802154/llsec.h2
-rw-r--r--net/rxrpc/ar-internal.h2
-rw-r--r--net/rxrpc/rxkad.c44
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c87
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c9
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c53
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c18
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c20
-rw-r--r--tools/crypto/getstat.c294
234 files changed, 11921 insertions, 15829 deletions
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 48b424de85bb..cfbc18f0d9c9 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -191,21 +191,11 @@ Currently, the following pairs of encryption modes are supported:
- AES-256-XTS for contents and AES-256-CTS-CBC for filenames
- AES-128-CBC for contents and AES-128-CTS-CBC for filenames
-- Speck128/256-XTS for contents and Speck128/256-CTS-CBC for filenames
It is strongly recommended to use AES-256-XTS for contents encryption.
AES-128-CBC was added only for low-powered embedded devices with
crypto accelerators such as CAAM or CESA that do not support XTS.
-Similarly, Speck128/256 support was only added for older or low-end
-CPUs which cannot do AES fast enough -- especially ARM CPUs which have
-NEON instructions but not the Cryptography Extensions -- and for which
-it would not otherwise be feasible to use encryption at all. It is
-not recommended to use Speck on CPUs that have AES instructions.
-Speck support is only available if it has been enabled in the crypto
-API via CONFIG_CRYPTO_SPECK. Also, on ARM platforms, to get
-acceptable performance CONFIG_CRYPTO_SPECK_NEON must be enabled.
-
New encryption modes can be added relatively easily, without changes
to individual filesystems. However, authenticated encryption (AE)
modes are not currently supported because of the difficulty of dealing
diff --git a/MAINTAINERS b/MAINTAINERS
index 99540a5a4e19..8f22f6af3782 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7578,14 +7578,6 @@ S: Supported
F: drivers/infiniband/hw/i40iw/
F: include/uapi/rdma/i40iw-abi.h
-INTEL SHA MULTIBUFFER DRIVER
-M: Megha Dey <megha.dey@linux.intel.com>
-R: Tim Chen <tim.c.chen@linux.intel.com>
-L: linux-crypto@vger.kernel.org
-S: Supported
-F: arch/x86/crypto/sha*-mb/
-F: crypto/mcryptd.c
-
INTEL TELEMETRY DRIVER
M: Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
L: platform-driver-x86@vger.kernel.org
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 925d1364727a..ef0c7feea6e2 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -99,6 +99,7 @@ config CRYPTO_GHASH_ARM_CE
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
select CRYPTO_CRYPTD
+ select CRYPTO_GF128MUL
help
Use an implementation of GHASH (used by the GCM AEAD chaining mode)
that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
@@ -121,10 +122,4 @@ config CRYPTO_CHACHA20_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_CHACHA20
-config CRYPTO_SPECK_NEON
- tristate "NEON accelerated Speck cipher algorithms"
- depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
- select CRYPTO_SPECK
-
endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 8de542c48ade..bd5bceef0605 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -10,7 +10,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
-obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -54,7 +53,6 @@ ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
-speck-neon-y := speck-neon-core.o speck-neon-glue.o
ifdef REGENERATE_ARM_CRYPTO
quiet_cmd_perl = PERL $@
diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S
index 451a849ad518..50e7b9896818 100644
--- a/arch/arm/crypto/chacha20-neon-core.S
+++ b/arch/arm/crypto/chacha20-neon-core.S
@@ -18,6 +18,34 @@
* (at your option) any later version.
*/
+ /*
+ * NEON doesn't have a rotate instruction. The alternatives are, more or less:
+ *
+ * (a) vshl.u32 + vsri.u32 (needs temporary register)
+ * (b) vshl.u32 + vshr.u32 + vorr (needs temporary register)
+ * (c) vrev32.16 (16-bit rotations only)
+ * (d) vtbl.8 + vtbl.8 (multiple of 8 bits rotations only,
+ * needs index vector)
+ *
+ * ChaCha20 has 16, 12, 8, and 7-bit rotations. For the 12 and 7-bit
+ * rotations, the only choices are (a) and (b). We use (a) since it takes
+ * two-thirds the cycles of (b) on both Cortex-A7 and Cortex-A53.
+ *
+ * For the 16-bit rotation, we use vrev32.16 since it's consistently fastest
+ * and doesn't need a temporary register.
+ *
+ * For the 8-bit rotation, we use vtbl.8 + vtbl.8. On Cortex-A7, this sequence
+ * is twice as fast as (a), even when doing (a) on multiple registers
+ * simultaneously to eliminate the stall between vshl and vsri. Also, it
+ * parallelizes better when temporary registers are scarce.
+ *
+ * A disadvantage is that on Cortex-A53, the vtbl sequence is the same speed as
+ * (a), so the need to load the rotation table actually makes the vtbl method
+ * slightly slower overall on that CPU (~1.3% slower ChaCha20). Still, it
+ * seems to be a good compromise to get a more significant speed boost on some
+ * CPUs, e.g. ~4.8% faster ChaCha20 on Cortex-A7.
+ */
+
#include <linux/linkage.h>
.text
@@ -46,7 +74,9 @@ ENTRY(chacha20_block_xor_neon)
vmov q10, q2
vmov q11, q3
+ adr ip, .Lrol8_table
mov r3, #10
+ vld1.8 {d10}, [ip, :64]
.Ldoubleround:
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
@@ -62,9 +92,9 @@ ENTRY(chacha20_block_xor_neon)
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
vadd.i32 q0, q0, q1
- veor q4, q3, q0
- vshl.u32 q3, q4, #8
- vsri.u32 q3, q4, #24
+ veor q3, q3, q0
+ vtbl.8 d6, {d6}, d10
+ vtbl.8 d7, {d7}, d10
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
vadd.i32 q2, q2, q3
@@ -92,9 +122,9 @@ ENTRY(chacha20_block_xor_neon)
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
vadd.i32 q0, q0, q1
- veor q4, q3, q0
- vshl.u32 q3, q4, #8
- vsri.u32 q3, q4, #24
+ veor q3, q3, q0
+ vtbl.8 d6, {d6}, d10
+ vtbl.8 d7, {d7}, d10
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
vadd.i32 q2, q2, q3
@@ -139,13 +169,17 @@ ENTRY(chacha20_block_xor_neon)
bx lr
ENDPROC(chacha20_block_xor_neon)
+ .align 4
+.Lctrinc: .word 0, 1, 2, 3
+.Lrol8_table: .byte 3, 0, 1, 2, 7, 4, 5, 6
+
.align 5
ENTRY(chacha20_4block_xor_neon)
- push {r4-r6, lr}
- mov ip, sp // preserve the stack pointer
- sub r3, sp, #0x20 // allocate a 32 byte buffer
- bic r3, r3, #0x1f // aligned to 32 bytes
- mov sp, r3
+ push {r4-r5}
+ mov r4, sp // preserve the stack pointer
+ sub ip, sp, #0x20 // allocate a 32 byte buffer
+ bic ip, ip, #0x1f // aligned to 32 bytes
+ mov sp, ip
// r0: Input state matrix, s
// r1: 4 data blocks output, o
@@ -155,25 +189,24 @@ ENTRY(chacha20_4block_xor_neon)
// This function encrypts four consecutive ChaCha20 blocks by loading
// the state matrix in NEON registers four times. The algorithm performs
// each operation on the corresponding word of each state matrix, hence
- // requires no word shuffling. For final XORing step we transpose the
- // matrix by interleaving 32- and then 64-bit words, which allows us to
- // do XOR in NEON registers.
+ // requires no word shuffling. The words are re-interleaved before the
+ // final addition of the original state and the XORing step.
//
- // x0..15[0-3] = s0..3[0..3]
- add r3, r0, #0x20
+ // x0..15[0-3] = s0..15[0-3]
+ add ip, r0, #0x20
vld1.32 {q0-q1}, [r0]
- vld1.32 {q2-q3}, [r3]
+ vld1.32 {q2-q3}, [ip]
- adr r3, CTRINC
+ adr r5, .Lctrinc
vdup.32 q15, d7[1]
vdup.32 q14, d7[0]
- vld1.32 {q11}, [r3, :128]
+ vld1.32 {q4}, [r5, :128]
vdup.32 q13, d6[1]
vdup.32 q12, d6[0]
- vadd.i32 q12, q12, q11 // x12 += counter values 0-3
vdup.32 q11, d5[1]
vdup.32 q10, d5[0]
+ vadd.u32 q12, q12, q4 // x12 += counter values 0-3
vdup.32 q9, d4[1]
vdup.32 q8, d4[0]
vdup.32 q7, d3[1]
@@ -185,9 +218,13 @@ ENTRY(chacha20_4block_xor_neon)
vdup.32 q1, d0[1]
vdup.32 q0, d0[0]
+ adr ip, .Lrol8_table
mov r3, #10
+ b 1f
.Ldoubleround4:
+ vld1.32 {q8-q9}, [sp, :256]
+1:
// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
@@ -236,24 +273,25 @@ ENTRY(chacha20_4block_xor_neon)
// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
+ vld1.8 {d16}, [ip, :64]
vadd.i32 q0, q0, q4
vadd.i32 q1, q1, q5
vadd.i32 q2, q2, q6
vadd.i32 q3, q3, q7
- veor q8, q12, q0
- veor q9, q13, q1
- vshl.u32 q12, q8, #8
- vshl.u32 q13, q9, #8
- vsri.u32 q12, q8, #24
- vsri.u32 q13, q9, #24
+ veor q12, q12, q0
+ veor q13, q13, q1
+ veor q14, q14, q2
+ veor q15, q15, q3
- veor q8, q14, q2
- veor q9, q15, q3
- vshl.u32 q14, q8, #8
- vshl.u32 q15, q9, #8
- vsri.u32 q14, q8, #24
- vsri.u32 q15, q9, #24
+ vtbl.8 d24, {d24}, d16
+ vtbl.8 d25, {d25}, d16
+ vtbl.8 d26, {d26}, d16
+ vtbl.8 d27, {d27}, d16
+ vtbl.8 d28, {d28}, d16
+ vtbl.8 d29, {d29}, d16
+ vtbl.8 d30, {d30}, d16
+ vtbl.8 d31, {d31}, d16
vld1.32 {q8-q9}, [sp, :256]
@@ -332,24 +370,25 @@ ENTRY(chacha20_4block_xor_neon)
// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
+ vld1.8 {d16}, [ip, :64]
vadd.i32 q0, q0, q5
vadd.i32 q1, q1, q6
vadd.i32 q2, q2, q7
vadd.i32 q3, q3, q4
- veor q8, q15, q0
- veor q9, q12, q1
- vshl.u32 q15, q8, #8
- vshl.u32 q12, q9, #8
- vsri.u32 q15, q8, #24
- vsri.u32 q12, q9, #24
+ veor q15, q15, q0
+ veor q12, q12, q1
+ veor q13, q13, q2
+ veor q14, q14, q3
- veor q8, q13, q2
- veor q9, q14, q3
- vshl.u32 q13, q8, #8
- vshl.u32 q14, q9, #8
- vsri.u32 q13, q8, #24
- vsri.u32 q14, q9, #24
+ vtbl.8 d30, {d30}, d16
+ vtbl.8 d31, {d31}, d16
+ vtbl.8 d24, {d24}, d16
+ vtbl.8 d25, {d25}, d16
+ vtbl.8 d26, {d26}, d16
+ vtbl.8 d27, {d27}, d16
+ vtbl.8 d28, {d28}, d16
+ vtbl.8 d29, {d29}, d16
vld1.32 {q8-q9}, [sp, :256]
@@ -379,104 +418,76 @@ ENTRY(chacha20_4block_xor_neon)
vsri.u32 q6, q9, #25
subs r3, r3, #1
- beq 0f
-
- vld1.32 {q8-q9}, [sp, :256]
- b .Ldoubleround4
-
- // x0[0-3] += s0[0]
- // x1[0-3] += s0[1]
- // x2[0-3] += s0[2]
- // x3[0-3] += s0[3]
-0: ldmia r0!, {r3-r6}
- vdup.32 q8, r3
- vdup.32 q9, r4
- vadd.i32 q0, q0, q8
- vadd.i32 q1, q1, q9
- vdup.32 q8, r5
- vdup.32 q9, r6
- vadd.i32 q2, q2, q8
- vadd.i32 q3, q3, q9
-
- // x4[0-3] += s1[0]
- // x5[0-3] += s1[1]
- // x6[0-3] += s1[2]
- // x7[0-3] += s1[3]
- ldmia r0!, {r3-r6}
- vdup.32 q8, r3
- vdup.32 q9, r4
- vadd.i32 q4, q4, q8
- vadd.i32 q5, q5, q9
- vdup.32 q8, r5
- vdup.32 q9, r6
- vadd.i32 q6, q6, q8
- vadd.i32 q7, q7, q9
-
- // interleave 32-bit words in state n, n+1
- vzip.32 q0, q1
- vzip.32 q2, q3
- vzip.32 q4, q5
- vzip.32 q6, q7
-
- // interleave 64-bit words in state n, n+2
+ bne .Ldoubleround4
+
+ // x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15.
+ // x8..9[0-3] are on the stack.
+
+ // Re-interleave the words in the first two rows of each block (x0..7).
+ // Also add the counter values 0-3 to x12[0-3].
+ vld1.32 {q8}, [r5, :128] // load counter values 0-3
+ vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1)
+ vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3)
+ vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5)
+ vzip.32 q6, q7 // => (6 7 6 7) (6 7 6 7)
+ vadd.u32 q12, q8 // x12 += counter values 0-3
vswp d1, d4
vswp d3, d6
+ vld1.32 {q8-q9}, [r0]! // load s0..7
vswp d9, d12
vswp d11, d14
- // xor with corresponding input, write to output
+ // Swap q1 and q4 so that we'll free up consecutive registers (q0-q1)
+ // after XORing the first 32 bytes.
+ vswp q1, q4
+
+ // First two rows of each block are (q0 q1) (q2 q6) (q4 q5) (q3 q7)
+
+ // x0..3[0-3] += s0..3[0-3] (add orig state to 1st row of each block)
+ vadd.u32 q0, q0, q8
+ vadd.u32 q2, q2, q8
+ vadd.u32 q4, q4, q8
+ vadd.u32 q3, q3, q8
+
+ // x4..7[0-3] += s4..7[0-3] (add orig state to 2nd row of each block)
+ vadd.u32 q1, q1, q9
+ vadd.u32 q6, q6, q9
+ vadd.u32 q5, q5, q9
+ vadd.u32 q7, q7, q9
+
+ // XOR first 32 bytes using keystream from first two rows of first block
vld1.8 {q8-q9}, [r2]!
veor q8, q8, q0
- veor q9, q9, q4
+ veor q9, q9, q1
vst1.8 {q8-q9}, [r1]!
+ // Re-interleave the words in the last two rows of each block (x8..15).
vld1.32 {q8-q9}, [sp, :256]
-
- // x8[0-3] += s2[0]
- // x9[0-3] += s2[1]
- // x10[0-3] += s2[2]
- // x11[0-3] += s2[3]
- ldmia r0!, {r3-r6}
- vdup.32 q0, r3
- vdup.32 q4, r4
- vadd.i32 q8, q8, q0
- vadd.i32 q9, q9, q4
- vdup.32 q0, r5
- vdup.32 q4, r6
- vadd.i32 q10, q10, q0
- vadd.i32 q11, q11, q4
-
- // x12[0-3] += s3[0]
- // x13[0-3] += s3[1]
- // x14[0-3] += s3[2]
- // x15[0-3] += s3[3]
- ldmia r0!, {r3-r6}
- vdup.32 q0, r3
- vdup.32 q4, r4
- adr r3, CTRINC
- vadd.i32 q12, q12, q0
- vld1.32 {q0}, [r3, :128]
- vadd.i32 q13, q13, q4
- vadd.i32 q12, q12, q0 // x12 += counter values 0-3
-
- vdup.32 q0, r5
- vdup.32 q4, r6
- vadd.i32 q14, q14, q0
- vadd.i32 q15, q15, q4
-
- // interleave 32-bit words in state n, n+1
- vzip.32 q8, q9
- vzip.32 q10, q11
- vzip.32 q12, q13
- vzip.32 q14, q15
-
- // interleave 64-bit words in state n, n+2
- vswp d17, d20
- vswp d19, d22
+ vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13)
+ vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15)
+ vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9)
+ vzip.32 q10, q11 // => (10 11 10 11) (10 11 10 11)
+ vld1.32 {q0-q1}, [r0] // load s8..15
vswp d25, d28
vswp d27, d30
+ vswp d17, d20
+ vswp d19, d22
+
+ // Last two rows of each block are (q8 q12) (q10 q14) (q9 q13) (q11 q15)
+
+ // x8..11[0-3] += s8..11[0-3] (add orig state to 3rd row of each block)
+ vadd.u32 q8, q8, q0
+ vadd.u32 q10, q10, q0
+ vadd.u32 q9, q9, q0
+ vadd.u32 q11, q11, q0
+
+ // x12..15[0-3] += s12..15[0-3] (add orig state to 4th row of each block)
+ vadd.u32 q12, q12, q1
+ vadd.u32 q14, q14, q1
+ vadd.u32 q13, q13, q1
+ vadd.u32 q15, q15, q1
- vmov q4, q1
+ // XOR the rest of the data with the keystream
vld1.8 {q0-q1}, [r2]!
veor q0, q0, q8
@@ -509,13 +520,11 @@ ENTRY(chacha20_4block_xor_neon)
vst1.8 {q0-q1}, [r1]!
vld1.8 {q0-q1}, [r2]
+ mov sp, r4 // restore original stack pointer
veor q0, q0, q11
veor q1, q1, q15
vst1.8 {q0-q1}, [r1]
- mov sp, ip
- pop {r4-r6, pc}
+ pop {r4-r5}
+ bx lr
ENDPROC(chacha20_4block_xor_neon)
-
- .align 4
-CTRINC: .word 0, 1, 2, 3
diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c
index 96e62ec105d0..cd9e93b46c2d 100644
--- a/arch/arm/crypto/crc32-ce-glue.c
+++ b/arch/arm/crypto/crc32-ce-glue.c
@@ -236,7 +236,7 @@ static void __exit crc32_pmull_mod_exit(void)
ARRAY_SIZE(crc32_pmull_algs));
}
-static const struct cpu_feature crc32_cpu_feature[] = {
+static const struct cpu_feature __maybe_unused crc32_cpu_feature[] = {
{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
};
MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
index 2f78c10b1881..406009afa9cf 100644
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -63,6 +63,33 @@
k48 .req d31
SHASH2_p64 .req d31
+ HH .req q10
+ HH3 .req q11
+ HH4 .req q12
+ HH34 .req q13
+
+ HH_L .req d20
+ HH_H .req d21
+ HH3_L .req d22
+ HH3_H .req d23
+ HH4_L .req d24
+ HH4_H .req d25
+ HH34_L .req d26
+ HH34_H .req d27
+ SHASH2_H .req d29
+
+ XL2 .req q5
+ XM2 .req q6
+ XH2 .req q7
+ T3 .req q8
+
+ XL2_L .req d10
+ XL2_H .req d11
+ XM2_L .req d12
+ XM2_H .req d13
+ T3_L .req d16
+ T3_H .req d17
+
.text
.fpu crypto-neon-fp-armv8
@@ -175,12 +202,77 @@
beq 0f
vld1.64 {T1}, [ip]
teq r0, #0
- b 1f
+ b 3f
+
+0: .ifc \pn, p64
+ tst r0, #3 // skip until #blocks is a
+ bne 2f // round multiple of 4
+
+ vld1.8 {XL2-XM2}, [r2]!
+1: vld1.8 {T3-T2}, [r2]!
+ vrev64.8 XL2, XL2
+ vrev64.8 XM2, XM2
+
+ subs r0, r0, #4
+
+ vext.8 T1, XL2, XL2, #8
+ veor XL2_H, XL2_H, XL_L
+ veor XL, XL, T1
+
+ vrev64.8 T3, T3
+ vrev64.8 T1, T2
+
+ vmull.p64 XH, HH4_H, XL_H // a1 * b1
+ veor XL2_H, XL2_H, XL_H
+ vmull.p64 XL, HH4_L, XL_L // a0 * b0
+ vmull.p64 XM, HH34_H, XL2_H // (a1 + a0)(b1 + b0)
+
+ vmull.p64 XH2, HH3_H, XM2_L // a1 * b1
+ veor XM2_L, XM2_L, XM2_H
+ vmull.p64 XL2, HH3_L, XM2_H // a0 * b0
+ vmull.p64 XM2, HH34_L, XM2_L // (a1 + a0)(b1 + b0)
+
+ veor XH, XH, XH2
+ veor XL, XL, XL2
+ veor XM, XM, XM2
+
+ vmull.p64 XH2, HH_H, T3_L // a1 * b1
+ veor T3_L, T3_L, T3_H
+ vmull.p64 XL2, HH_L, T3_H // a0 * b0
+ vmull.p64 XM2, SHASH2_H, T3_L // (a1 + a0)(b1 + b0)
+
+ veor XH, XH, XH2
+ veor XL, XL, XL2
+ veor XM, XM, XM2
+
+ vmull.p64 XH2, SHASH_H, T1_L // a1 * b1
+ veor T1_L, T1_L, T1_H
+ vmull.p64 XL2, SHASH_L, T1_H // a0 * b0
+ vmull.p64 XM2, SHASH2_p64, T1_L // (a1 + a0)(b1 + b0)
+
+ veor XH, XH, XH2
+ veor XL, XL, XL2
+ veor XM, XM, XM2
-0: vld1.64 {T1}, [r2]!
+ beq 4f
+
+ vld1.8 {XL2-XM2}, [r2]!
+
+ veor T1, XL, XH
+ veor XM, XM, T1
+
+ __pmull_reduce_p64
+
+ veor T1, T1, XH
+ veor XL, XL, T1
+
+ b 1b
+ .endif
+
+2: vld1.64 {T1}, [r2]!
subs r0, r0, #1
-1: /* multiply XL by SHASH in GF(2^128) */
+3: /* multiply XL by SHASH in GF(2^128) */
#ifndef CONFIG_CPU_BIG_ENDIAN
vrev64.8 T1, T1
#endif
@@ -193,7 +285,7 @@
__pmull_\pn XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l @ a0 * b0
__pmull_\pn XM, T1_L, SHASH2_\pn @ (a1+a0)(b1+b0)
- veor T1, XL, XH
+4: veor T1, XL, XH
veor XM, XM, T1
__pmull_reduce_\pn
@@ -212,8 +304,14 @@
* struct ghash_key const *k, const char *head)
*/
ENTRY(pmull_ghash_update_p64)
- vld1.64 {SHASH}, [r3]
+ vld1.64 {SHASH}, [r3]!
+ vld1.64 {HH}, [r3]!
+ vld1.64 {HH3-HH4}, [r3]
+
veor SHASH2_p64, SHASH_L, SHASH_H
+ veor SHASH2_H, HH_L, HH_H
+ veor HH34_L, HH3_L, HH3_H
+ veor HH34_H, HH4_L, HH4_H
vmov.i8 MASK, #0xe1
vshl.u64 MASK, MASK, #57
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index 8930fc4e7c22..b7d30b6cf49c 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -1,7 +1,7 @@
/*
* Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
*
- * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
@@ -28,8 +28,10 @@ MODULE_ALIAS_CRYPTO("ghash");
#define GHASH_DIGEST_SIZE 16
struct ghash_key {
- u64 a;
- u64 b;
+ u64 h[2];
+ u64 h2[2];
+ u64 h3[2];
+ u64 h4[2];
};
struct ghash_desc_ctx {
@@ -117,26 +119,40 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
return 0;
}
+static void ghash_reflect(u64 h[], const be128 *k)
+{
+ u64 carry = be64_to_cpu(k->a) >> 63;
+
+ h[0] = (be64_to_cpu(k->b) << 1) | carry;
+ h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63);
+
+ if (carry)
+ h[1] ^= 0xc200000000000000UL;
+}
+
static int ghash_setkey(struct crypto_shash *tfm,
const u8 *inkey, unsigned int keylen)
{
struct ghash_key *key = crypto_shash_ctx(tfm);
- u64 a, b;
+ be128 h, k;
if (keylen != GHASH_BLOCK_SIZE) {
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
- /* perform multiplication by 'x' in GF(2^128) */
- b = get_unaligned_be64(inkey);
- a = get_unaligned_be64(inkey + 8);
+ memcpy(&k, inkey, GHASH_BLOCK_SIZE);
+ ghash_reflect(key->h, &k);
+
+ h = k;
+ gf128mul_lle(&h, &k);
+ ghash_reflect(key->h2, &h);
- key->a = (a << 1) | (b >> 63);
- key->b = (b << 1) | (a >> 63);
+ gf128mul_lle(&h, &k);
+ ghash_reflect(key->h3, &h);
- if (b >> 63)
- key->b ^= 0xc200000000000000UL;
+ gf128mul_lle(&h, &k);
+ ghash_reflect(key->h4, &h);
return 0;
}
diff --git a/arch/arm/crypto/speck-neon-core.S b/arch/arm/crypto/speck-neon-core.S
deleted file mode 100644
index 57caa742016e..000000000000
--- a/arch/arm/crypto/speck-neon-core.S
+++ /dev/null
@@ -1,434 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Author: Eric Biggers <ebiggers@google.com>
- */
-
-#include <linux/linkage.h>
-
- .text
- .fpu neon
-
- // arguments
- ROUND_KEYS .req r0 // const {u64,u32} *round_keys
- NROUNDS .req r1 // int nrounds
- DST .req r2 // void *dst
- SRC .req r3 // const void *src
- NBYTES .req r4 // unsigned int nbytes
- TWEAK .req r5 // void *tweak
-
- // registers which hold the data being encrypted/decrypted
- X0 .req q0
- X0_L .req d0
- X0_H .req d1
- Y0 .req q1
- Y0_H .req d3
- X1 .req q2
- X1_L .req d4
- X1_H .req d5
- Y1 .req q3
- Y1_H .req d7
- X2 .req q4
- X2_L .req d8
- X2_H .req d9
- Y2 .req q5
- Y2_H .req d11
- X3 .req q6
- X3_L .req d12
- X3_H .req d13
- Y3 .req q7
- Y3_H .req d15
-
- // the round key, duplicated in all lanes
- ROUND_KEY .req q8
- ROUND_KEY_L .req d16
- ROUND_KEY_H .req d17
-
- // index vector for vtbl-based 8-bit rotates
- ROTATE_TABLE .req d18
-
- // multiplication table for updating XTS tweaks
- GF128MUL_TABLE .req d19
- GF64MUL_TABLE .req d19
-
- // current XTS tweak value(s)
- TWEAKV .req q10
- TWEAKV_L .req d20
- TWEAKV_H .req d21
-
- TMP0 .req q12
- TMP0_L .req d24
- TMP0_H .req d25
- TMP1 .req q13
- TMP2 .req q14
- TMP3 .req q15
-
- .align 4
-.Lror64_8_table:
- .byte 1, 2, 3, 4, 5, 6, 7, 0
-.Lror32_8_table:
- .byte 1, 2, 3, 0, 5, 6, 7, 4
-.Lrol64_8_table:
- .byte 7, 0, 1, 2, 3, 4, 5, 6
-.Lrol32_8_table:
- .byte 3, 0, 1, 2, 7, 4, 5, 6
-.Lgf128mul_table:
- .byte 0, 0x87
- .fill 14
-.Lgf64mul_table:
- .byte 0, 0x1b, (0x1b << 1), (0x1b << 1) ^ 0x1b
- .fill 12
-
-/*
- * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
- *
- * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
- * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
- * of ROUND_KEY. 'n' is the lane size: 64 for Speck128, or 32 for Speck64.
- *
- * The 8-bit rotates are implemented using vtbl instead of vshr + vsli because
- * the vtbl approach is faster on some processors and the same speed on others.
- */
-.macro _speck_round_128bytes n
-
- // x = ror(x, 8)
- vtbl.8 X0_L, {X0_L}, ROTATE_TABLE
- vtbl.8 X0_H, {X0_H}, ROTATE_TABLE
- vtbl.8 X1_L, {X1_L}, ROTATE_TABLE
- vtbl.8 X1_H, {X1_H}, ROTATE_TABLE
- vtbl.8 X2_L, {X2_L}, ROTATE_TABLE
- vtbl.8 X2_H, {X2_H}, ROTATE_TABLE
- vtbl.8 X3_L, {X3_L}, ROTATE_TABLE
- vtbl.8 X3_H, {X3_H}, ROTATE_TABLE
-
- // x += y
- vadd.u\n X0, Y0
- vadd.u\n X1, Y1
- vadd.u\n X2, Y2
- vadd.u\n X3, Y3
-
- // x ^= k
- veor X0, ROUND_KEY
- veor X1, ROUND_KEY
- veor X2, ROUND_KEY
- veor X3, ROUND_KEY
-
- // y = rol(y, 3)
- vshl.u\n TMP0, Y0, #3
- vshl.u\n TMP1, Y1, #3
- vshl.u\n TMP2, Y2, #3
- vshl.u\n TMP3, Y3, #3
- vsri.u\n TMP0, Y0, #(\n - 3)
- vsri.u\n TMP1, Y1, #(\n - 3)
- vsri.u\n TMP2, Y2, #(\n - 3)
- vsri.u\n TMP3, Y3, #(\n - 3)
-
- // y ^= x
- veor Y0, TMP0, X0
- veor Y1, TMP1, X1
- veor Y2, TMP2, X2
- veor Y3, TMP3, X3
-.endm
-
-/*
- * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
- *
- * This is the inverse of _speck_round_128bytes().
- */
-.macro _speck_unround_128bytes n
-
- // y ^= x
- veor TMP0, Y0, X0
- veor TMP1, Y1, X1
- veor TMP2, Y2, X2
- veor TMP3, Y3, X3
-
- // y = ror(y, 3)
- vshr.u\n Y0, TMP0, #3
- vshr.u\n Y1, TMP1, #3
- vshr.u\n Y2, TMP2, #3
- vshr.u\n Y3, TMP3, #3
- vsli.u\n Y0, TMP0, #(\n - 3)
- vsli.u\n Y1, TMP1, #(\n - 3)
- vsli.u\n Y2, TMP2, #(\n - 3)
- vsli.u\n Y3, TMP3, #(\n - 3)
-
- // x ^= k
- veor X0, ROUND_KEY
- veor X1, ROUND_KEY
- veor X2, ROUND_KEY
- veor X3, ROUND_KEY
-
- // x -= y
- vsub.u\n X0, Y0
- vsub.u\n X1, Y1
- vsub.u\n X2, Y2
- vsub.u\n X3, Y3
-
- // x = rol(x, 8);
- vtbl.8 X0_L, {X0_L}, ROTATE_TABLE
- vtbl.8 X0_H, {X0_H}, ROTATE_TABLE
- vtbl.8 X1_L, {X1_L}, ROTATE_TABLE
- vtbl.8 X1_H, {X1_H}, ROTATE_TABLE
- vtbl.8 X2_L, {X2_L}, ROTATE_TABLE
- vtbl.8 X2_H, {X2_H}, ROTATE_TABLE
- vtbl.8 X3_L, {X3_L}, ROTATE_TABLE
- vtbl.8 X3_H, {X3_H}, ROTATE_TABLE
-.endm
-
-.macro _xts128_precrypt_one dst_reg, tweak_buf, tmp
-
- // Load the next source block
- vld1.8 {\dst_reg}, [SRC]!
-
- // Save the current tweak in the tweak buffer
- vst1.8 {TWEAKV}, [\tweak_buf:128]!
-
- // XOR the next source block with the current tweak
- veor \dst_reg, TWEAKV
-
- /*
- * Calculate the next tweak by multiplying the current one by x,
- * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
- */
- vshr.u64 \tmp, TWEAKV, #63
- vshl.u64 TWEAKV, #1
- veor TWEAKV_H, \tmp\()_L
- vtbl.8 \tmp\()_H, {GF128MUL_TABLE}, \tmp\()_H
- veor TWEAKV_L, \tmp\()_H
-.endm
-
-.macro _xts64_precrypt_two dst_reg, tweak_buf, tmp
-
- // Load the next two source blocks
- vld1.8 {\dst_reg}, [SRC]!
-
- // Save the current two tweaks in the tweak buffer
- vst1.8 {TWEAKV}, [\tweak_buf:128]!
-
- // XOR the next two source blocks with the current two tweaks
- veor \dst_reg, TWEAKV
-
- /*
- * Calculate the next two tweaks by multiplying the current ones by x^2,
- * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
- */
- vshr.u64 \tmp, TWEAKV, #62
- vshl.u64 TWEAKV, #2
- vtbl.8 \tmp\()_L, {GF64MUL_TABLE}, \tmp\()_L
- vtbl.8 \tmp\()_H, {GF64MUL_TABLE}, \tmp\()_H
- veor TWEAKV, \tmp
-.endm
-
-/*
- * _speck_xts_crypt() - Speck-XTS encryption/decryption
- *
- * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
- * using Speck-XTS, specifically the variant with a block size of '2n' and round
- * count given by NROUNDS. The expanded round keys are given in ROUND_KEYS, and
- * the current XTS tweak value is given in TWEAK. It's assumed that NBYTES is a
- * nonzero multiple of 128.
- */
-.macro _speck_xts_crypt n, decrypting
- push {r4-r7}
- mov r7, sp
-
- /*
- * The first four parameters were passed in registers r0-r3. Load the
- * additional parameters, which were passed on the stack.
- */
- ldr NBYTES, [sp, #16]
- ldr TWEAK, [sp, #20]
-
- /*
- * If decrypting, modify the ROUND_KEYS parameter to point to the last
- * round key rather than the first, since for decryption the round keys
- * are used in reverse order.
- */
-.if \decrypting
-.if \n == 64
- add ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #3
- sub ROUND_KEYS, #8
-.else
- add ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #2
- sub ROUND_KEYS, #4
-.endif
-.endif
-
- // Load the index vector for vtbl-based 8-bit rotates
-.if \decrypting
- ldr r12, =.Lrol\n\()_8_table
-.else
- ldr r12, =.Lror\n\()_8_table
-.endif
- vld1.8 {ROTATE_TABLE}, [r12:64]
-
- // One-time XTS preparation
-
- /*
- * Allocate stack space to store 128 bytes worth of tweaks. For
- * performance, this space is aligned to a 16-byte boundary so that we
- * can use the load/store instructions that declare 16-byte alignment.
- * For Thumb2 compatibility, don't do the 'bic' directly on 'sp'.
- */
- sub r12, sp, #128
- bic r12, #0xf
- mov sp, r12
-
-.if \n == 64
- // Load first tweak
- vld1.8 {TWEAKV}, [TWEAK]
-
- // Load GF(2^128) multiplication table
- ldr r12, =.Lgf128mul_table
- vld1.8 {GF128MUL_TABLE}, [r12:64]
-.else
- // Load first tweak
- vld1.8 {TWEAKV_L}, [TWEAK]
-
- // Load GF(2^64) multiplication table
- ldr r12, =.Lgf64mul_table
- vld1.8 {GF64MUL_TABLE}, [r12:64]
-
- // Calculate second tweak, packing it together with the first
- vshr.u64 TMP0_L, TWEAKV_L, #63
- vtbl.u8 TMP0_L, {GF64MUL_TABLE}, TMP0_L
- vshl.u64 TWEAKV_H, TWEAKV_L, #1
- veor TWEAKV_H, TMP0_L
-.endif
-
-.Lnext_128bytes_\@:
-
- /*
- * Load the source blocks into {X,Y}[0-3], XOR them with their XTS tweak
- * values, and save the tweaks on the stack for later. Then
- * de-interleave the 'x' and 'y' elements of each block, i.e. make it so
- * that the X[0-3] registers contain only the second halves of blocks,
- * and the Y[0-3] registers contain only the first halves of blocks.
- * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
- */
- mov r12, sp
-.if \n == 64
- _xts128_precrypt_one X0, r12, TMP0
- _xts128_precrypt_one Y0, r12, TMP0
- _xts128_precrypt_one X1, r12, TMP0
- _xts128_precrypt_one Y1, r12, TMP0
- _xts128_precrypt_one X2, r12, TMP0
- _xts128_precrypt_one Y2, r12, TMP0
- _xts128_precrypt_one X3, r12, TMP0
- _xts128_precrypt_one Y3, r12, TMP0
- vswp X0_L, Y0_H
- vswp X1_L, Y1_H
- vswp X2_L, Y2_H
- vswp X3_L, Y3_H
-.else
- _xts64_precrypt_two X0, r12, TMP0
- _xts64_precrypt_two Y0, r12, TMP0
- _xts64_precrypt_two X1, r12, TMP0
- _xts64_precrypt_two Y1, r12, TMP0
- _xts64_precrypt_two X2, r12, TMP0
- _xts64_precrypt_two Y2, r12, TMP0
- _xts64_precrypt_two X3, r12, TMP0
- _xts64_precrypt_two Y3, r12, TMP0
- vuzp.32 Y0, X0
- vuzp.32 Y1, X1
- vuzp.32 Y2, X2
- vuzp.32 Y3, X3
-.endif
-
- // Do the cipher rounds
-
- mov r12, ROUND_KEYS
- mov r6, NROUNDS
-
-.Lnext_round_\@:
-.if \decrypting
-.if \n == 64
- vld1.64 ROUND_KEY_L, [r12]
- sub r12, #8
- vmov ROUND_KEY_H, ROUND_KEY_L
-.else
- vld1.32 {ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]
- sub r12, #4
-.endif
- _speck_unround_128bytes \n
-.else
-.if \n == 64
- vld1.64 ROUND_KEY_L, [r12]!
- vmov ROUND_KEY_H, ROUND_KEY_L
-.else
- vld1.32 {ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]!
-.endif
- _speck_round_128bytes \n
-.endif
- subs r6, r6, #1
- bne .Lnext_round_\@
-
- // Re-interleave the 'x' and 'y' elements of each block
-.if \n == 64
- vswp X0_L, Y0_H
- vswp X1_L, Y1_H
- vswp X2_L, Y2_H
- vswp X3_L, Y3_H
-.else
- vzip.32 Y0, X0
- vzip.32 Y1, X1
- vzip.32 Y2, X2
- vzip.32 Y3, X3
-.endif
-
- // XOR the encrypted/decrypted blocks with the tweaks we saved earlier
- mov r12, sp
- vld1.8 {TMP0, TMP1}, [r12:128]!
- vld1.8 {TMP2, TMP3}, [r12:128]!
- veor X0, TMP0
- veor Y0, TMP1
- veor X1, TMP2
- veor Y1, TMP3
- vld1.8 {TMP0, TMP1}, [r12:128]!
- vld1.8 {TMP2, TMP3}, [r12:128]!
- veor X2, TMP0
- veor Y2, TMP1
- veor X3, TMP2
- veor Y3, TMP3
-
- // Store the ciphertext in the destination buffer
- vst1.8 {X0, Y0}, [DST]!
- vst1.8 {X1, Y1}, [DST]!
- vst1.8 {X2, Y2}, [DST]!
- vst1.8 {X3, Y3}, [DST]!
-
- // Continue if there are more 128-byte chunks remaining, else return
- subs NBYTES, #128
- bne .Lnext_128bytes_\@
-
- // Store the next tweak
-.if \n == 64
- vst1.8 {TWEAKV}, [TWEAK]
-.else
- vst1.8 {TWEAKV_L}, [TWEAK]
-.endif
-
- mov sp, r7
- pop {r4-r7}
- bx lr
-.endm
-
-ENTRY(speck128_xts_encrypt_neon)
- _speck_xts_crypt n=64, decrypting=0
-ENDPROC(speck128_xts_encrypt_neon)
-
-ENTRY(speck128_xts_decrypt_neon)
- _speck_xts_crypt n=64, decrypting=1
-ENDPROC(speck128_xts_decrypt_neon)
-
-ENTRY(speck64_xts_encrypt_neon)
- _speck_xts_crypt n=32, decrypting=0
-ENDPROC(speck64_xts_encrypt_neon)
-
-ENTRY(speck64_xts_decrypt_neon)
- _speck_xts_crypt n=32, decrypting=1
-ENDPROC(speck64_xts_decrypt_neon)
diff --git a/arch/arm/crypto/speck-neon-glue.c b/arch/arm/crypto/speck-neon-glue.c
deleted file mode 100644
index f012c3ea998f..000000000000
--- a/arch/arm/crypto/speck-neon-glue.c
+++ /dev/null
@@ -1,288 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Note: the NIST recommendation for XTS only specifies a 128-bit block size,
- * but a 64-bit version (needed for Speck64) is fairly straightforward; the math
- * is just done in GF(2^64) instead of GF(2^128), with the reducing polynomial
- * x^64 + x^4 + x^3 + x + 1 from the original XEX paper (Rogaway, 2004:
- * "Efficient Instantiations of Tweakable Blockciphers and Refinements to Modes
- * OCB and PMAC"), represented as 0x1B.
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <crypto/algapi.h>
-#include <crypto/gf128mul.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/speck.h>
-#include <crypto/xts.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-/* The assembly functions only handle multiples of 128 bytes */
-#define SPECK_NEON_CHUNK_SIZE 128
-
-/* Speck128 */
-
-struct speck128_xts_tfm_ctx {
- struct speck128_tfm_ctx main_key;
- struct speck128_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
- u8 *, const u8 *);
-typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
- const void *, unsigned int, void *);
-
-static __always_inline int
-__speck128_xts_crypt(struct skcipher_request *req,
- speck128_crypt_one_t crypt_one,
- speck128_xts_crypt_many_t crypt_many)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- le128 tweak;
- int err;
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
- u8 *dst = walk.dst.virt.addr;
- const u8 *src = walk.src.virt.addr;
-
- if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
- unsigned int count;
-
- count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
- kernel_neon_begin();
- (*crypt_many)(ctx->main_key.round_keys,
- ctx->main_key.nrounds,
- dst, src, count, &tweak);
- kernel_neon_end();
- dst += count;
- src += count;
- nbytes -= count;
- }
-
- /* Handle any remainder with generic code */
- while (nbytes >= sizeof(tweak)) {
- le128_xor((le128 *)dst, (const le128 *)src, &tweak);
- (*crypt_one)(&ctx->main_key, dst, dst);
- le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
- gf128mul_x_ble(&tweak, &tweak);
-
- dst += sizeof(tweak);
- src += sizeof(tweak);
- nbytes -= sizeof(tweak);
- }
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
-}
-
-static int speck128_xts_encrypt(struct skcipher_request *req)
-{
- return __speck128_xts_crypt(req, crypto_speck128_encrypt,
- speck128_xts_encrypt_neon);
-}
-
-static int speck128_xts_decrypt(struct skcipher_request *req)
-{
- return __speck128_xts_crypt(req, crypto_speck128_decrypt,
- speck128_xts_decrypt_neon);
-}
-
-static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- int err;
-
- err = xts_verify_key(tfm, key, keylen);
- if (err)
- return err;
-
- keylen /= 2;
-
- err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
- if (err)
- return err;
-
- return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-/* Speck64 */
-
-struct speck64_xts_tfm_ctx {
- struct speck64_tfm_ctx main_key;
- struct speck64_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
- u8 *, const u8 *);
-typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
- const void *, unsigned int, void *);
-
-static __always_inline int
-__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
- speck64_xts_crypt_many_t crypt_many)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- __le64 tweak;
- int err;
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
- u8 *dst = walk.dst.virt.addr;
- const u8 *src = walk.src.virt.addr;
-
- if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
- unsigned int count;
-
- count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
- kernel_neon_begin();
- (*crypt_many)(ctx->main_key.round_keys,
- ctx->main_key.nrounds,
- dst, src, count, &tweak);
- kernel_neon_end();
- dst += count;
- src += count;
- nbytes -= count;
- }
-
- /* Handle any remainder with generic code */
- while (nbytes >= sizeof(tweak)) {
- *(__le64 *)dst = *(__le64 *)src ^ tweak;
- (*crypt_one)(&ctx->main_key, dst, dst);
- *(__le64 *)dst ^= tweak;
- tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
- ((tweak & cpu_to_le64(1ULL << 63)) ?
- 0x1B : 0));
- dst += sizeof(tweak);
- src += sizeof(tweak);
- nbytes -= sizeof(tweak);
- }
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
-}
-
-static int speck64_xts_encrypt(struct skcipher_request *req)
-{
- return __speck64_xts_crypt(req, crypto_speck64_encrypt,
- speck64_xts_encrypt_neon);
-}
-
-static int speck64_xts_decrypt(struct skcipher_request *req)
-{
- return __speck64_xts_crypt(req, crypto_speck64_decrypt,
- speck64_xts_decrypt_neon);
-}
-
-static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- int err;
-
- err = xts_verify_key(tfm, key, keylen);
- if (err)
- return err;
-
- keylen /= 2;
-
- err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
- if (err)
- return err;
-
- return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-static struct skcipher_alg speck_algs[] = {
- {
- .base.cra_name = "xts(speck128)",
- .base.cra_driver_name = "xts-speck128-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = SPECK128_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct speck128_xts_tfm_ctx),
- .base.cra_alignmask = 7,
- .base.cra_module = THIS_MODULE,
- .min_keysize = 2 * SPECK128_128_KEY_SIZE,
- .max_keysize = 2 * SPECK128_256_KEY_SIZE,
- .ivsize = SPECK128_BLOCK_SIZE,
- .walksize = SPECK_NEON_CHUNK_SIZE,
- .setkey = speck128_xts_setkey,
- .encrypt = speck128_xts_encrypt,
- .decrypt = speck128_xts_decrypt,
- }, {
- .base.cra_name = "xts(speck64)",
- .base.cra_driver_name = "xts-speck64-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = SPECK64_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct speck64_xts_tfm_ctx),
- .base.cra_alignmask = 7,
- .base.cra_module = THIS_MODULE,
- .min_keysize = 2 * SPECK64_96_KEY_SIZE,
- .max_keysize = 2 * SPECK64_128_KEY_SIZE,
- .ivsize = SPECK64_BLOCK_SIZE,
- .walksize = SPECK_NEON_CHUNK_SIZE,
- .setkey = speck64_xts_setkey,
- .encrypt = speck64_xts_encrypt,
- .decrypt = speck64_xts_decrypt,
- }
-};
-
-static int __init speck_neon_module_init(void)
-{
- if (!(elf_hwcap & HWCAP_NEON))
- return -ENODEV;
- return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-static void __exit speck_neon_module_exit(void)
-{
- crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-module_init(speck_neon_module_init);
-module_exit(speck_neon_module_exit);
-
-MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("xts(speck128)");
-MODULE_ALIAS_CRYPTO("xts-speck128-neon");
-MODULE_ALIAS_CRYPTO("xts(speck64)");
-MODULE_ALIAS_CRYPTO("xts-speck64-neon");
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index db8d364f8476..3d165b4cdd2a 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -698,6 +698,7 @@ CONFIG_MEMTEST=y
CONFIG_SECURITY=y
CONFIG_CRYPTO_ECHAINIV=y
CONFIG_CRYPTO_ANSI_CPRNG=y
+CONFIG_CRYPTO_DEV_FSL_DPAA2_CAAM=y
CONFIG_ARM64_CRYPTO=y
CONFIG_CRYPTO_SHA1_ARM64_CE=y
CONFIG_CRYPTO_SHA2_ARM64_CE=y
@@ -706,7 +707,6 @@ CONFIG_CRYPTO_SHA3_ARM64=m
CONFIG_CRYPTO_SM3_ARM64_CE=m
CONFIG_CRYPTO_GHASH_ARM64_CE=y
CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m
-CONFIG_CRYPTO_CRC32_ARM64_CE=m
CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
CONFIG_CRYPTO_CHACHA20_NEON=m
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index e3fdb0fd6f70..a5606823ed4d 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -66,11 +66,6 @@ config CRYPTO_CRCT10DIF_ARM64_CE
depends on KERNEL_MODE_NEON && CRC_T10DIF
select CRYPTO_HASH
-config CRYPTO_CRC32_ARM64_CE
- tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
- depends on CRC32
- select CRYPTO_HASH
-
config CRYPTO_AES_ARM64
tristate "AES core cipher using scalar instructions"
select CRYPTO_AES
@@ -119,10 +114,4 @@ config CRYPTO_AES_ARM64_BS
select CRYPTO_AES_ARM64
select CRYPTO_SIMD
-config CRYPTO_SPECK_NEON
- tristate "NEON accelerated Speck cipher algorithms"
- depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
- select CRYPTO_SPECK
-
endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index bcafd016618e..f476fede09ba 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -32,9 +32,6 @@ ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o
crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
-obj-$(CONFIG_CRYPTO_CRC32_ARM64_CE) += crc32-ce.o
-crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o
-
obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
@@ -56,9 +53,6 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
-obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
-speck-neon-y := speck-neon-core.o speck-neon-glue.o
-
obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 623e74ed1c67..143070510809 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -17,6 +17,11 @@
.arch armv8-a+crypto
+ xtsmask .req v16
+
+ .macro xts_reload_mask, tmp
+ .endm
+
/* preload all round keys */
.macro load_round_keys, rounds, rk
cmp \rounds, #12
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index adcb83eb683c..1e676625ef33 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -15,6 +15,7 @@
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
#include <linux/module.h>
#include <linux/cpufeature.h>
#include <crypto/xts.h>
@@ -31,6 +32,8 @@
#define aes_ecb_decrypt ce_aes_ecb_decrypt
#define aes_cbc_encrypt ce_aes_cbc_encrypt
#define aes_cbc_decrypt ce_aes_cbc_decrypt
+#define aes_cbc_cts_encrypt ce_aes_cbc_cts_encrypt
+#define aes_cbc_cts_decrypt ce_aes_cbc_cts_decrypt
#define aes_ctr_encrypt ce_aes_ctr_encrypt
#define aes_xts_encrypt ce_aes_xts_encrypt
#define aes_xts_decrypt ce_aes_xts_decrypt
@@ -45,6 +48,8 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
#define aes_ecb_decrypt neon_aes_ecb_decrypt
#define aes_cbc_encrypt neon_aes_cbc_encrypt
#define aes_cbc_decrypt neon_aes_cbc_decrypt
+#define aes_cbc_cts_encrypt neon_aes_cbc_cts_encrypt
+#define aes_cbc_cts_decrypt neon_aes_cbc_cts_decrypt
#define aes_ctr_encrypt neon_aes_ctr_encrypt
#define aes_xts_encrypt neon_aes_xts_encrypt
#define aes_xts_decrypt neon_aes_xts_decrypt
@@ -63,30 +68,41 @@ MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
/* defined in aes-modes.S */
-asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks);
-asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks);
-asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 iv[]);
-asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 iv[]);
-asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
+ int rounds, int bytes, u8 const iv[]);
+asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
+ int rounds, int bytes, u8 const iv[]);
+
+asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 ctr[]);
-asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
- int rounds, int blocks, u8 const rk2[], u8 iv[],
+asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
+ int rounds, int blocks, u32 const rk2[], u8 iv[],
int first);
-asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
- int rounds, int blocks, u8 const rk2[], u8 iv[],
+asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
+ int rounds, int blocks, u32 const rk2[], u8 iv[],
int first);
asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
int blocks, u8 dg[], int enc_before,
int enc_after);
+struct cts_cbc_req_ctx {
+ struct scatterlist sg_src[2];
+ struct scatterlist sg_dst[2];
+ struct skcipher_request subreq;
+};
+
struct crypto_aes_xts_ctx {
struct crypto_aes_ctx key1;
struct crypto_aes_ctx __aligned(8) key2;
@@ -142,7 +158,7 @@ static int ecb_encrypt(struct skcipher_request *req)
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_enc, rounds, blocks);
+ ctx->key_enc, rounds, blocks);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -162,7 +178,7 @@ static int ecb_decrypt(struct skcipher_request *req)
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_dec, rounds, blocks);
+ ctx->key_dec, rounds, blocks);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -182,7 +198,7 @@ static int cbc_encrypt(struct skcipher_request *req)
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_enc, rounds, blocks, walk.iv);
+ ctx->key_enc, rounds, blocks, walk.iv);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -202,13 +218,149 @@ static int cbc_decrypt(struct skcipher_request *req)
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_dec, rounds, blocks, walk.iv);
+ ctx->key_dec, rounds, blocks, walk.iv);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
}
+static int cts_cbc_init_tfm(struct crypto_skcipher *tfm)
+{
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct cts_cbc_req_ctx));
+ return 0;
+}
+
+static int cts_cbc_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
+ int err, rounds = 6 + ctx->key_length / 4;
+ int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
+ struct scatterlist *src = req->src, *dst = req->dst;
+ struct skcipher_walk walk;
+
+ skcipher_request_set_tfm(&rctx->subreq, tfm);
+
+ if (req->cryptlen <= AES_BLOCK_SIZE) {
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+ cbc_blocks = 1;
+ }
+
+ if (cbc_blocks > 0) {
+ unsigned int blocks;
+
+ skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
+ cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+
+ while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ kernel_neon_begin();
+ aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, rounds, blocks, walk.iv);
+ kernel_neon_end();
+ err = skcipher_walk_done(&walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ if (err)
+ return err;
+
+ if (req->cryptlen == AES_BLOCK_SIZE)
+ return 0;
+
+ dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
+ rctx->subreq.cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
+ rctx->subreq.cryptlen);
+ }
+
+ /* handle ciphertext stealing */
+ skcipher_request_set_crypt(&rctx->subreq, src, dst,
+ req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+ aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, rounds, walk.nbytes, walk.iv);
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
+}
+
+static int cts_cbc_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
+ int err, rounds = 6 + ctx->key_length / 4;
+ int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
+ struct scatterlist *src = req->src, *dst = req->dst;
+ struct skcipher_walk walk;
+
+ skcipher_request_set_tfm(&rctx->subreq, tfm);
+
+ if (req->cryptlen <= AES_BLOCK_SIZE) {
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+ cbc_blocks = 1;
+ }
+
+ if (cbc_blocks > 0) {
+ unsigned int blocks;
+
+ skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
+ cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+
+ while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ kernel_neon_begin();
+ aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, rounds, blocks, walk.iv);
+ kernel_neon_end();
+ err = skcipher_walk_done(&walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ if (err)
+ return err;
+
+ if (req->cryptlen == AES_BLOCK_SIZE)
+ return 0;
+
+ dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
+ rctx->subreq.cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
+ rctx->subreq.cryptlen);
+ }
+
+ /* handle ciphertext stealing */
+ skcipher_request_set_crypt(&rctx->subreq, src, dst,
+ req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+ aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, rounds, walk.nbytes, walk.iv);
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
+}
+
static int ctr_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -222,7 +374,7 @@ static int ctr_encrypt(struct skcipher_request *req)
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_enc, rounds, blocks, walk.iv);
+ ctx->key_enc, rounds, blocks, walk.iv);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -238,7 +390,7 @@ static int ctr_encrypt(struct skcipher_request *req)
blocks = -1;
kernel_neon_begin();
- aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
+ aes_ctr_encrypt(tail, NULL, ctx->key_enc, rounds,
blocks, walk.iv);
kernel_neon_end();
crypto_xor_cpy(tdst, tsrc, tail, nbytes);
@@ -272,8 +424,8 @@ static int xts_encrypt(struct skcipher_request *req)
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
kernel_neon_begin();
aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key1.key_enc, rounds, blocks,
- (u8 *)ctx->key2.key_enc, walk.iv, first);
+ ctx->key1.key_enc, rounds, blocks,
+ ctx->key2.key_enc, walk.iv, first);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -294,8 +446,8 @@ static int xts_decrypt(struct skcipher_request *req)
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
kernel_neon_begin();
aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key1.key_dec, rounds, blocks,
- (u8 *)ctx->key2.key_enc, walk.iv, first);
+ ctx->key1.key_dec, rounds, blocks,
+ ctx->key2.key_enc, walk.iv, first);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -336,6 +488,24 @@ static struct skcipher_alg aes_algs[] = { {
.decrypt = cbc_decrypt,
}, {
.base = {
+ .cra_name = "__cts(cbc(aes))",
+ .cra_driver_name = "__cts-cbc-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .walksize = 2 * AES_BLOCK_SIZE,
+ .setkey = skcipher_aes_setkey,
+ .encrypt = cts_cbc_encrypt,
+ .decrypt = cts_cbc_decrypt,
+ .init = cts_cbc_init_tfm,
+}, {
+ .base = {
.cra_name = "__ctr(aes)",
.cra_driver_name = "__ctr-aes-" MODE,
.cra_priority = PRIO,
@@ -412,7 +582,6 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
{
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
be128 *consts = (be128 *)ctx->consts;
- u8 *rk = (u8 *)ctx->key.key_enc;
int rounds = 6 + key_len / 4;
int err;
@@ -422,7 +591,8 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
/* encrypt the zero vector */
kernel_neon_begin();
- aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1);
+ aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc,
+ rounds, 1);
kernel_neon_end();
cmac_gf128_mul_by_x(consts, consts);
@@ -441,7 +611,6 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
};
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
- u8 *rk = (u8 *)ctx->key.key_enc;
int rounds = 6 + key_len / 4;
u8 key[AES_BLOCK_SIZE];
int err;
@@ -451,8 +620,8 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
return err;
kernel_neon_begin();
- aes_ecb_encrypt(key, ks[0], rk, rounds, 1);
- aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2);
+ aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
+ aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
kernel_neon_end();
return cbcmac_setkey(tfm, key, sizeof(key));
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 483a7130cf0e..67700045a0e0 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -14,12 +14,12 @@
.align 4
aes_encrypt_block4x:
- encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
+ encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
ret
ENDPROC(aes_encrypt_block4x)
aes_decrypt_block4x:
- decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
+ decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
ret
ENDPROC(aes_decrypt_block4x)
@@ -31,71 +31,57 @@ ENDPROC(aes_decrypt_block4x)
*/
AES_ENTRY(aes_ecb_encrypt)
- frame_push 5
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
-
-.Lecbencrestart:
- enc_prepare w22, x21, x5
+ enc_prepare w3, x2, x5
.LecbencloopNx:
- subs w23, w23, #4
+ subs w4, w4, #4
bmi .Lecbenc1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
bl aes_encrypt_block4x
- st1 {v0.16b-v3.16b}, [x19], #64
- cond_yield_neon .Lecbencrestart
+ st1 {v0.16b-v3.16b}, [x0], #64
b .LecbencloopNx
.Lecbenc1x:
- adds w23, w23, #4
+ adds w4, w4, #4
beq .Lecbencout
.Lecbencloop:
- ld1 {v0.16b}, [x20], #16 /* get next pt block */
- encrypt_block v0, w22, x21, x5, w6
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
+ ld1 {v0.16b}, [x1], #16 /* get next pt block */
+ encrypt_block v0, w3, x2, x5, w6
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
bne .Lecbencloop
.Lecbencout:
- frame_pop
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_ecb_encrypt)
AES_ENTRY(aes_ecb_decrypt)
- frame_push 5
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
-
-.Lecbdecrestart:
- dec_prepare w22, x21, x5
+ dec_prepare w3, x2, x5
.LecbdecloopNx:
- subs w23, w23, #4
+ subs w4, w4, #4
bmi .Lecbdec1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
bl aes_decrypt_block4x
- st1 {v0.16b-v3.16b}, [x19], #64
- cond_yield_neon .Lecbdecrestart
+ st1 {v0.16b-v3.16b}, [x0], #64
b .LecbdecloopNx
.Lecbdec1x:
- adds w23, w23, #4
+ adds w4, w4, #4
beq .Lecbdecout
.Lecbdecloop:
- ld1 {v0.16b}, [x20], #16 /* get next ct block */
- decrypt_block v0, w22, x21, x5, w6
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
+ ld1 {v0.16b}, [x1], #16 /* get next ct block */
+ decrypt_block v0, w3, x2, x5, w6
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
bne .Lecbdecloop
.Lecbdecout:
- frame_pop
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_ecb_decrypt)
@@ -108,162 +94,211 @@ AES_ENDPROC(aes_ecb_decrypt)
*/
AES_ENTRY(aes_cbc_encrypt)
- frame_push 6
-
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x5
-
-.Lcbcencrestart:
- ld1 {v4.16b}, [x24] /* get iv */
- enc_prepare w22, x21, x6
+ ld1 {v4.16b}, [x5] /* get iv */
+ enc_prepare w3, x2, x6
.Lcbcencloop4x:
- subs w23, w23, #4
+ subs w4, w4, #4
bmi .Lcbcenc1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
- encrypt_block v0, w22, x21, x6, w7
+ encrypt_block v0, w3, x2, x6, w7
eor v1.16b, v1.16b, v0.16b
- encrypt_block v1, w22, x21, x6, w7
+ encrypt_block v1, w3, x2, x6, w7
eor v2.16b, v2.16b, v1.16b
- encrypt_block v2, w22, x21, x6, w7
+ encrypt_block v2, w3, x2, x6, w7
eor v3.16b, v3.16b, v2.16b
- encrypt_block v3, w22, x21, x6, w7
- st1 {v0.16b-v3.16b}, [x19], #64
+ encrypt_block v3, w3, x2, x6, w7
+ st1 {v0.16b-v3.16b}, [x0], #64
mov v4.16b, v3.16b
- st1 {v4.16b}, [x24] /* return iv */
- cond_yield_neon .Lcbcencrestart
b .Lcbcencloop4x
.Lcbcenc1x:
- adds w23, w23, #4
+ adds w4, w4, #4
beq .Lcbcencout
.Lcbcencloop:
- ld1 {v0.16b}, [x20], #16 /* get next pt block */
+ ld1 {v0.16b}, [x1], #16 /* get next pt block */
eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
- encrypt_block v4, w22, x21, x6, w7
- st1 {v4.16b}, [x19], #16
- subs w23, w23, #1
+ encrypt_block v4, w3, x2, x6, w7
+ st1 {v4.16b}, [x0], #16
+ subs w4, w4, #1
bne .Lcbcencloop
.Lcbcencout:
- st1 {v4.16b}, [x24] /* return iv */
- frame_pop
+ st1 {v4.16b}, [x5] /* return iv */
ret
AES_ENDPROC(aes_cbc_encrypt)
AES_ENTRY(aes_cbc_decrypt)
- frame_push 6
-
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x5
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
-.Lcbcdecrestart:
- ld1 {v7.16b}, [x24] /* get iv */
- dec_prepare w22, x21, x6
+ ld1 {v7.16b}, [x5] /* get iv */
+ dec_prepare w3, x2, x6
.LcbcdecloopNx:
- subs w23, w23, #4
+ subs w4, w4, #4
bmi .Lcbcdec1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
mov v4.16b, v0.16b
mov v5.16b, v1.16b
mov v6.16b, v2.16b
bl aes_decrypt_block4x
- sub x20, x20, #16
+ sub x1, x1, #16
eor v0.16b, v0.16b, v7.16b
eor v1.16b, v1.16b, v4.16b
- ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
+ ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
eor v2.16b, v2.16b, v5.16b
eor v3.16b, v3.16b, v6.16b
- st1 {v0.16b-v3.16b}, [x19], #64
- st1 {v7.16b}, [x24] /* return iv */
- cond_yield_neon .Lcbcdecrestart
+ st1 {v0.16b-v3.16b}, [x0], #64
b .LcbcdecloopNx
.Lcbcdec1x:
- adds w23, w23, #4
+ adds w4, w4, #4
beq .Lcbcdecout
.Lcbcdecloop:
- ld1 {v1.16b}, [x20], #16 /* get next ct block */
+ ld1 {v1.16b}, [x1], #16 /* get next ct block */
mov v0.16b, v1.16b /* ...and copy to v0 */
- decrypt_block v0, w22, x21, x6, w7
+ decrypt_block v0, w3, x2, x6, w7
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
mov v7.16b, v1.16b /* ct is next iv */
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
bne .Lcbcdecloop
.Lcbcdecout:
- st1 {v7.16b}, [x24] /* return iv */
- frame_pop
+ st1 {v7.16b}, [x5] /* return iv */
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_cbc_decrypt)
/*
+ * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
+ * int rounds, int bytes, u8 const iv[])
+ * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
+ * int rounds, int bytes, u8 const iv[])
+ */
+
+AES_ENTRY(aes_cbc_cts_encrypt)
+ adr_l x8, .Lcts_permute_table
+ sub x4, x4, #16
+ add x9, x8, #32
+ add x8, x8, x4
+ sub x9, x9, x4
+ ld1 {v3.16b}, [x8]
+ ld1 {v4.16b}, [x9]
+
+ ld1 {v0.16b}, [x1], x4 /* overlapping loads */
+ ld1 {v1.16b}, [x1]
+
+ ld1 {v5.16b}, [x5] /* get iv */
+ enc_prepare w3, x2, x6
+
+ eor v0.16b, v0.16b, v5.16b /* xor with iv */
+ tbl v1.16b, {v1.16b}, v4.16b
+ encrypt_block v0, w3, x2, x6, w7
+
+ eor v1.16b, v1.16b, v0.16b
+ tbl v0.16b, {v0.16b}, v3.16b
+ encrypt_block v1, w3, x2, x6, w7
+
+ add x4, x0, x4
+ st1 {v0.16b}, [x4] /* overlapping stores */
+ st1 {v1.16b}, [x0]
+ ret
+AES_ENDPROC(aes_cbc_cts_encrypt)
+
+AES_ENTRY(aes_cbc_cts_decrypt)
+ adr_l x8, .Lcts_permute_table
+ sub x4, x4, #16
+ add x9, x8, #32
+ add x8, x8, x4
+ sub x9, x9, x4
+ ld1 {v3.16b}, [x8]
+ ld1 {v4.16b}, [x9]
+
+ ld1 {v0.16b}, [x1], x4 /* overlapping loads */
+ ld1 {v1.16b}, [x1]
+
+ ld1 {v5.16b}, [x5] /* get iv */
+ dec_prepare w3, x2, x6
+
+ tbl v2.16b, {v1.16b}, v4.16b
+ decrypt_block v0, w3, x2, x6, w7
+ eor v2.16b, v2.16b, v0.16b
+
+ tbx v0.16b, {v1.16b}, v4.16b
+ tbl v2.16b, {v2.16b}, v3.16b
+ decrypt_block v0, w3, x2, x6, w7
+ eor v0.16b, v0.16b, v5.16b /* xor with iv */
+
+ add x4, x0, x4
+ st1 {v2.16b}, [x4] /* overlapping stores */
+ st1 {v0.16b}, [x0]
+ ret
+AES_ENDPROC(aes_cbc_cts_decrypt)
+
+ .section ".rodata", "a"
+ .align 6
+.Lcts_permute_table:
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .previous
+
+
+ /*
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 ctr[])
*/
AES_ENTRY(aes_ctr_encrypt)
- frame_push 6
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x5
-
-.Lctrrestart:
- enc_prepare w22, x21, x6
- ld1 {v4.16b}, [x24]
+ enc_prepare w3, x2, x6
+ ld1 {v4.16b}, [x5]
umov x6, v4.d[1] /* keep swabbed ctr in reg */
rev x6, x6
+ cmn w6, w4 /* 32 bit overflow? */
+ bcs .Lctrloop
.LctrloopNx:
- subs w23, w23, #4
+ subs w4, w4, #4
bmi .Lctr1x
- cmn w6, #4 /* 32 bit overflow? */
- bcs .Lctr1x
- ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
- dup v7.4s, w6
+ add w7, w6, #1
mov v0.16b, v4.16b
- add v7.4s, v7.4s, v8.4s
+ add w8, w6, #2
mov v1.16b, v4.16b
- rev32 v8.16b, v7.16b
+ add w9, w6, #3
mov v2.16b, v4.16b
+ rev w7, w7
mov v3.16b, v4.16b
- mov v1.s[3], v8.s[0]
- mov v2.s[3], v8.s[1]
- mov v3.s[3], v8.s[2]
- ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
+ rev w8, w8
+ mov v1.s[3], w7
+ rev w9, w9
+ mov v2.s[3], w8
+ mov v3.s[3], w9
+ ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
bl aes_encrypt_block4x
eor v0.16b, v5.16b, v0.16b
- ld1 {v5.16b}, [x20], #16 /* get 1 input block */
+ ld1 {v5.16b}, [x1], #16 /* get 1 input block */
eor v1.16b, v6.16b, v1.16b
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
- st1 {v0.16b-v3.16b}, [x19], #64
+ st1 {v0.16b-v3.16b}, [x0], #64
add x6, x6, #4
rev x7, x6
ins v4.d[1], x7
- cbz w23, .Lctrout
- st1 {v4.16b}, [x24] /* return next CTR value */
- cond_yield_neon .Lctrrestart
+ cbz w4, .Lctrout
b .LctrloopNx
.Lctr1x:
- adds w23, w23, #4
+ adds w4, w4, #4
beq .Lctrout
.Lctrloop:
mov v0.16b, v4.16b
- encrypt_block v0, w22, x21, x8, w7
+ encrypt_block v0, w3, x2, x8, w7
adds x6, x6, #1 /* increment BE ctr */
rev x7, x6
@@ -271,22 +306,22 @@ AES_ENTRY(aes_ctr_encrypt)
bcs .Lctrcarry /* overflow? */
.Lctrcarrydone:
- subs w23, w23, #1
+ subs w4, w4, #1
bmi .Lctrtailblock /* blocks <0 means tail block */
- ld1 {v3.16b}, [x20], #16
+ ld1 {v3.16b}, [x1], #16
eor v3.16b, v0.16b, v3.16b
- st1 {v3.16b}, [x19], #16
+ st1 {v3.16b}, [x0], #16
bne .Lctrloop
.Lctrout:
- st1 {v4.16b}, [x24] /* return next CTR value */
-.Lctrret:
- frame_pop
+ st1 {v4.16b}, [x5] /* return next CTR value */
+ ldp x29, x30, [sp], #16
ret
.Lctrtailblock:
- st1 {v0.16b}, [x19]
- b .Lctrret
+ st1 {v0.16b}, [x0]
+ ldp x29, x30, [sp], #16
+ ret
.Lctrcarry:
umov x7, v4.d[0] /* load upper word of ctr */
@@ -296,7 +331,6 @@ AES_ENTRY(aes_ctr_encrypt)
ins v4.d[0], x7
b .Lctrcarrydone
AES_ENDPROC(aes_ctr_encrypt)
- .ltorg
/*
@@ -306,150 +340,132 @@ AES_ENDPROC(aes_ctr_encrypt)
* int blocks, u8 const rk2[], u8 iv[], int first)
*/
- .macro next_tweak, out, in, const, tmp
+ .macro next_tweak, out, in, tmp
sshr \tmp\().2d, \in\().2d, #63
- and \tmp\().16b, \tmp\().16b, \const\().16b
+ and \tmp\().16b, \tmp\().16b, xtsmask.16b
add \out\().2d, \in\().2d, \in\().2d
ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
eor \out\().16b, \out\().16b, \tmp\().16b
.endm
-.Lxts_mul_x:
-CPU_LE( .quad 1, 0x87 )
-CPU_BE( .quad 0x87, 1 )
+ .macro xts_load_mask, tmp
+ movi xtsmask.2s, #0x1
+ movi \tmp\().2s, #0x87
+ uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
+ .endm
AES_ENTRY(aes_xts_encrypt)
- frame_push 6
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x6
-
- ld1 {v4.16b}, [x24]
+ ld1 {v4.16b}, [x6]
+ xts_load_mask v8
cbz w7, .Lxtsencnotfirst
enc_prepare w3, x5, x8
encrypt_block v4, w3, x5, x8, w7 /* first tweak */
enc_switch_key w3, x2, x8
- ldr q7, .Lxts_mul_x
b .LxtsencNx
-.Lxtsencrestart:
- ld1 {v4.16b}, [x24]
.Lxtsencnotfirst:
- enc_prepare w22, x21, x8
+ enc_prepare w3, x2, x8
.LxtsencloopNx:
- ldr q7, .Lxts_mul_x
- next_tweak v4, v4, v7, v8
+ next_tweak v4, v4, v8
.LxtsencNx:
- subs w23, w23, #4
+ subs w4, w4, #4
bmi .Lxtsenc1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
- next_tweak v5, v4, v7, v8
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ next_tweak v5, v4, v8
eor v0.16b, v0.16b, v4.16b
- next_tweak v6, v5, v7, v8
+ next_tweak v6, v5, v8
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
- next_tweak v7, v6, v7, v8
+ next_tweak v7, v6, v8
eor v3.16b, v3.16b, v7.16b
bl aes_encrypt_block4x
eor v3.16b, v3.16b, v7.16b
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
- st1 {v0.16b-v3.16b}, [x19], #64
+ st1 {v0.16b-v3.16b}, [x0], #64
mov v4.16b, v7.16b
- cbz w23, .Lxtsencout
- st1 {v4.16b}, [x24]
- cond_yield_neon .Lxtsencrestart
+ cbz w4, .Lxtsencout
+ xts_reload_mask v8
b .LxtsencloopNx
.Lxtsenc1x:
- adds w23, w23, #4
+ adds w4, w4, #4
beq .Lxtsencout
.Lxtsencloop:
- ld1 {v1.16b}, [x20], #16
+ ld1 {v1.16b}, [x1], #16
eor v0.16b, v1.16b, v4.16b
- encrypt_block v0, w22, x21, x8, w7
+ encrypt_block v0, w3, x2, x8, w7
eor v0.16b, v0.16b, v4.16b
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
beq .Lxtsencout
- next_tweak v4, v4, v7, v8
+ next_tweak v4, v4, v8
b .Lxtsencloop
.Lxtsencout:
- st1 {v4.16b}, [x24]
- frame_pop
+ st1 {v4.16b}, [x6]
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_xts_encrypt)
AES_ENTRY(aes_xts_decrypt)
- frame_push 6
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x6
-
- ld1 {v4.16b}, [x24]
+ ld1 {v4.16b}, [x6]
+ xts_load_mask v8
cbz w7, .Lxtsdecnotfirst
enc_prepare w3, x5, x8
encrypt_block v4, w3, x5, x8, w7 /* first tweak */
dec_prepare w3, x2, x8
- ldr q7, .Lxts_mul_x
b .LxtsdecNx
-.Lxtsdecrestart:
- ld1 {v4.16b}, [x24]
.Lxtsdecnotfirst:
- dec_prepare w22, x21, x8
+ dec_prepare w3, x2, x8
.LxtsdecloopNx:
- ldr q7, .Lxts_mul_x
- next_tweak v4, v4, v7, v8
+ next_tweak v4, v4, v8
.LxtsdecNx:
- subs w23, w23, #4
+ subs w4, w4, #4
bmi .Lxtsdec1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
- next_tweak v5, v4, v7, v8
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ next_tweak v5, v4, v8
eor v0.16b, v0.16b, v4.16b
- next_tweak v6, v5, v7, v8
+ next_tweak v6, v5, v8
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
- next_tweak v7, v6, v7, v8
+ next_tweak v7, v6, v8
eor v3.16b, v3.16b, v7.16b
bl aes_decrypt_block4x
eor v3.16b, v3.16b, v7.16b
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
- st1 {v0.16b-v3.16b}, [x19], #64
+ st1 {v0.16b-v3.16b}, [x0], #64
mov v4.16b, v7.16b
- cbz w23, .Lxtsdecout
- st1 {v4.16b}, [x24]
- cond_yield_neon .Lxtsdecrestart
+ cbz w4, .Lxtsdecout
+ xts_reload_mask v8
b .LxtsdecloopNx
.Lxtsdec1x:
- adds w23, w23, #4
+ adds w4, w4, #4
beq .Lxtsdecout
.Lxtsdecloop:
- ld1 {v1.16b}, [x20], #16
+ ld1 {v1.16b}, [x1], #16
eor v0.16b, v1.16b, v4.16b
- decrypt_block v0, w22, x21, x8, w7
+ decrypt_block v0, w3, x2, x8, w7
eor v0.16b, v0.16b, v4.16b
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
beq .Lxtsdecout
- next_tweak v4, v4, v7, v8
+ next_tweak v4, v4, v8
b .Lxtsdecloop
.Lxtsdecout:
- st1 {v4.16b}, [x24]
- frame_pop
+ st1 {v4.16b}, [x6]
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_xts_decrypt)
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index 1c7b45b7268e..29100f692e8a 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -14,6 +14,12 @@
#define AES_ENTRY(func) ENTRY(neon_ ## func)
#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
+ xtsmask .req v7
+
+ .macro xts_reload_mask, tmp
+ xts_load_mask \tmp
+ .endm
+
/* multiply by polynomial 'x' in GF(2^8) */
.macro mul_by_x, out, in, temp, const
sshr \temp, \in, #7
diff --git a/arch/arm64/crypto/crc32-ce-core.S b/arch/arm64/crypto/crc32-ce-core.S
deleted file mode 100644
index 8061bf0f9c66..000000000000
--- a/arch/arm64/crypto/crc32-ce-core.S
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * Accelerated CRC32(C) using arm64 CRC, NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
- * calculation.
- * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
- * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
- * at:
- * http://www.intel.com/products/processor/manuals/
- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * Volume 2B: Instruction Set Reference, N-Z
- *
- * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
- * Alexander Boyko <Alexander_Boyko@xyratex.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
- .section ".rodata", "a"
- .align 6
- .cpu generic+crypto+crc
-
-.Lcrc32_constants:
- /*
- * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
- * #define CONSTANT_R1 0x154442bd4LL
- *
- * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
- * #define CONSTANT_R2 0x1c6e41596LL
- */
- .octa 0x00000001c6e415960000000154442bd4
-
- /*
- * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
- * #define CONSTANT_R3 0x1751997d0LL
- *
- * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
- * #define CONSTANT_R4 0x0ccaa009eLL
- */
- .octa 0x00000000ccaa009e00000001751997d0
-
- /*
- * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
- * #define CONSTANT_R5 0x163cd6124LL
- */
- .quad 0x0000000163cd6124
- .quad 0x00000000FFFFFFFF
-
- /*
- * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
- *
- * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
- * = 0x1F7011641LL
- * #define CONSTANT_RU 0x1F7011641LL
- */
- .octa 0x00000001F701164100000001DB710641
-
-.Lcrc32c_constants:
- .octa 0x000000009e4addf800000000740eef02
- .octa 0x000000014cd00bd600000000f20c0dfe
- .quad 0x00000000dd45aab8
- .quad 0x00000000FFFFFFFF
- .octa 0x00000000dea713f10000000105ec76f0
-
- vCONSTANT .req v0
- dCONSTANT .req d0
- qCONSTANT .req q0
-
- BUF .req x19
- LEN .req x20
- CRC .req x21
- CONST .req x22
-
- vzr .req v9
-
- /**
- * Calculate crc32
- * BUF - buffer
- * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
- * CRC - initial crc32
- * return %eax crc32
- * uint crc32_pmull_le(unsigned char const *buffer,
- * size_t len, uint crc32)
- */
- .text
-ENTRY(crc32_pmull_le)
- adr_l x3, .Lcrc32_constants
- b 0f
-
-ENTRY(crc32c_pmull_le)
- adr_l x3, .Lcrc32c_constants
-
-0: frame_push 4, 64
-
- mov BUF, x0
- mov LEN, x1
- mov CRC, x2
- mov CONST, x3
-
- bic LEN, LEN, #15
- ld1 {v1.16b-v4.16b}, [BUF], #0x40
- movi vzr.16b, #0
- fmov dCONSTANT, CRC
- eor v1.16b, v1.16b, vCONSTANT.16b
- sub LEN, LEN, #0x40
- cmp LEN, #0x40
- b.lt less_64
-
- ldr qCONSTANT, [CONST]
-
-loop_64: /* 64 bytes Full cache line folding */
- sub LEN, LEN, #0x40
-
- pmull2 v5.1q, v1.2d, vCONSTANT.2d
- pmull2 v6.1q, v2.2d, vCONSTANT.2d
- pmull2 v7.1q, v3.2d, vCONSTANT.2d
- pmull2 v8.1q, v4.2d, vCONSTANT.2d
-
- pmull v1.1q, v1.1d, vCONSTANT.1d
- pmull v2.1q, v2.1d, vCONSTANT.1d
- pmull v3.1q, v3.1d, vCONSTANT.1d
- pmull v4.1q, v4.1d, vCONSTANT.1d
-
- eor v1.16b, v1.16b, v5.16b
- ld1 {v5.16b}, [BUF], #0x10
- eor v2.16b, v2.16b, v6.16b
- ld1 {v6.16b}, [BUF], #0x10
- eor v3.16b, v3.16b, v7.16b
- ld1 {v7.16b}, [BUF], #0x10
- eor v4.16b, v4.16b, v8.16b
- ld1 {v8.16b}, [BUF], #0x10
-
- eor v1.16b, v1.16b, v5.16b
- eor v2.16b, v2.16b, v6.16b
- eor v3.16b, v3.16b, v7.16b
- eor v4.16b, v4.16b, v8.16b
-
- cmp LEN, #0x40
- b.lt less_64
-
- if_will_cond_yield_neon
- stp q1, q2, [sp, #.Lframe_local_offset]
- stp q3, q4, [sp, #.Lframe_local_offset + 32]
- do_cond_yield_neon
- ldp q1, q2, [sp, #.Lframe_local_offset]
- ldp q3, q4, [sp, #.Lframe_local_offset + 32]
- ldr qCONSTANT, [CONST]
- movi vzr.16b, #0
- endif_yield_neon
- b loop_64
-
-less_64: /* Folding cache line into 128bit */
- ldr qCONSTANT, [CONST, #16]
-
- pmull2 v5.1q, v1.2d, vCONSTANT.2d
- pmull v1.1q, v1.1d, vCONSTANT.1d
- eor v1.16b, v1.16b, v5.16b
- eor v1.16b, v1.16b, v2.16b
-
- pmull2 v5.1q, v1.2d, vCONSTANT.2d
- pmull v1.1q, v1.1d, vCONSTANT.1d
- eor v1.16b, v1.16b, v5.16b
- eor v1.16b, v1.16b, v3.16b
-
- pmull2 v5.1q, v1.2d, vCONSTANT.2d
- pmull v1.1q, v1.1d, vCONSTANT.1d
- eor v1.16b, v1.16b, v5.16b
- eor v1.16b, v1.16b, v4.16b
-
- cbz LEN, fold_64
-
-loop_16: /* Folding rest buffer into 128bit */
- subs LEN, LEN, #0x10
-
- ld1 {v2.16b}, [BUF], #0x10
- pmull2 v5.1q, v1.2d, vCONSTANT.2d
- pmull v1.1q, v1.1d, vCONSTANT.1d
- eor v1.16b, v1.16b, v5.16b
- eor v1.16b, v1.16b, v2.16b
-
- b.ne loop_16
-
-fold_64:
- /* perform the last 64 bit fold, also adds 32 zeroes
- * to the input stream */
- ext v2.16b, v1.16b, v1.16b, #8
- pmull2 v2.1q, v2.2d, vCONSTANT.2d
- ext v1.16b, v1.16b, vzr.16b, #8
- eor v1.16b, v1.16b, v2.16b
-
- /* final 32-bit fold */
- ldr dCONSTANT, [CONST, #32]
- ldr d3, [CONST, #40]
-
- ext v2.16b, v1.16b, vzr.16b, #4
- and v1.16b, v1.16b, v3.16b
- pmull v1.1q, v1.1d, vCONSTANT.1d
- eor v1.16b, v1.16b, v2.16b
-
- /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
- ldr qCONSTANT, [CONST, #48]
-
- and v2.16b, v1.16b, v3.16b
- ext v2.16b, vzr.16b, v2.16b, #8
- pmull2 v2.1q, v2.2d, vCONSTANT.2d
- and v2.16b, v2.16b, v3.16b
- pmull v2.1q, v2.1d, vCONSTANT.1d
- eor v1.16b, v1.16b, v2.16b
- mov w0, v1.s[1]
-
- frame_pop
- ret
-ENDPROC(crc32_pmull_le)
-ENDPROC(crc32c_pmull_le)
-
- .macro __crc32, c
-0: subs x2, x2, #16
- b.mi 8f
- ldp x3, x4, [x1], #16
-CPU_BE( rev x3, x3 )
-CPU_BE( rev x4, x4 )
- crc32\c\()x w0, w0, x3
- crc32\c\()x w0, w0, x4
- b.ne 0b
- ret
-
-8: tbz x2, #3, 4f
- ldr x3, [x1], #8
-CPU_BE( rev x3, x3 )
- crc32\c\()x w0, w0, x3
-4: tbz x2, #2, 2f
- ldr w3, [x1], #4
-CPU_BE( rev w3, w3 )
- crc32\c\()w w0, w0, w3
-2: tbz x2, #1, 1f
- ldrh w3, [x1], #2
-CPU_BE( rev16 w3, w3 )
- crc32\c\()h w0, w0, w3
-1: tbz x2, #0, 0f
- ldrb w3, [x1]
- crc32\c\()b w0, w0, w3
-0: ret
- .endm
-
- .align 5
-ENTRY(crc32_armv8_le)
- __crc32
-ENDPROC(crc32_armv8_le)
-
- .align 5
-ENTRY(crc32c_armv8_le)
- __crc32 c
-ENDPROC(crc32c_armv8_le)
diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c
deleted file mode 100644
index 34b4e3d46aab..000000000000
--- a/arch/arm64/crypto/crc32-ce-glue.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <crypto/internal/hash.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <asm/unaligned.h>
-
-#define PMULL_MIN_LEN 64L /* minimum size of buffer
- * for crc32_pmull_le_16 */
-#define SCALE_F 16L /* size of NEON register */
-
-asmlinkage u32 crc32_pmull_le(const u8 buf[], u64 len, u32 init_crc);
-asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], size_t len);
-
-asmlinkage u32 crc32c_pmull_le(const u8 buf[], u64 len, u32 init_crc);
-asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], size_t len);
-
-static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], size_t len);
-static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], size_t len);
-
-static int crc32_pmull_cra_init(struct crypto_tfm *tfm)
-{
- u32 *key = crypto_tfm_ctx(tfm);
-
- *key = 0;
- return 0;
-}
-
-static int crc32c_pmull_cra_init(struct crypto_tfm *tfm)
-{
- u32 *key = crypto_tfm_ctx(tfm);
-
- *key = ~0;
- return 0;
-}
-
-static int crc32_pmull_setkey(struct crypto_shash *hash, const u8 *key,
- unsigned int keylen)
-{
- u32 *mctx = crypto_shash_ctx(hash);
-
- if (keylen != sizeof(u32)) {
- crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
- return -EINVAL;
- }
- *mctx = le32_to_cpup((__le32 *)key);
- return 0;
-}
-
-static int crc32_pmull_init(struct shash_desc *desc)
-{
- u32 *mctx = crypto_shash_ctx(desc->tfm);
- u32 *crc = shash_desc_ctx(desc);
-
- *crc = *mctx;
- return 0;
-}
-
-static int crc32_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- u32 *crc = shash_desc_ctx(desc);
-
- *crc = crc32_armv8_le(*crc, data, length);
- return 0;
-}
-
-static int crc32c_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- u32 *crc = shash_desc_ctx(desc);
-
- *crc = crc32c_armv8_le(*crc, data, length);
- return 0;
-}
-
-static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- u32 *crc = shash_desc_ctx(desc);
- unsigned int l;
-
- if ((u64)data % SCALE_F) {
- l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
-
- *crc = fallback_crc32(*crc, data, l);
-
- data += l;
- length -= l;
- }
-
- if (length >= PMULL_MIN_LEN && may_use_simd()) {
- l = round_down(length, SCALE_F);
-
- kernel_neon_begin();
- *crc = crc32_pmull_le(data, l, *crc);
- kernel_neon_end();
-
- data += l;
- length -= l;
- }
-
- if (length > 0)
- *crc = fallback_crc32(*crc, data, length);
-
- return 0;
-}
-
-static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- u32 *crc = shash_desc_ctx(desc);
- unsigned int l;
-
- if ((u64)data % SCALE_F) {
- l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
-
- *crc = fallback_crc32c(*crc, data, l);
-
- data += l;
- length -= l;
- }
-
- if (length >= PMULL_MIN_LEN && may_use_simd()) {
- l = round_down(length, SCALE_F);
-
- kernel_neon_begin();
- *crc = crc32c_pmull_le(data, l, *crc);
- kernel_neon_end();
-
- data += l;
- length -= l;
- }
-
- if (length > 0) {
- *crc = fallback_crc32c(*crc, data, length);
- }
-
- return 0;
-}
-
-static int crc32_pmull_final(struct shash_desc *desc, u8 *out)
-{
- u32 *crc = shash_desc_ctx(desc);
-
- put_unaligned_le32(*crc, out);
- return 0;
-}
-
-static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
-{
- u32 *crc = shash_desc_ctx(desc);
-
- put_unaligned_le32(~*crc, out);
- return 0;
-}
-
-static struct shash_alg crc32_pmull_algs[] = { {
- .setkey = crc32_pmull_setkey,
- .init = crc32_pmull_init,
- .update = crc32_update,
- .final = crc32_pmull_final,
- .descsize = sizeof(u32),
- .digestsize = sizeof(u32),
-
- .base.cra_ctxsize = sizeof(u32),
- .base.cra_init = crc32_pmull_cra_init,
- .base.cra_name = "crc32",
- .base.cra_driver_name = "crc32-arm64-ce",
- .base.cra_priority = 200,
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .base.cra_blocksize = 1,
- .base.cra_module = THIS_MODULE,
-}, {
- .setkey = crc32_pmull_setkey,
- .init = crc32_pmull_init,
- .update = crc32c_update,
- .final = crc32c_pmull_final,
- .descsize = sizeof(u32),
- .digestsize = sizeof(u32),
-
- .base.cra_ctxsize = sizeof(u32),
- .base.cra_init = crc32c_pmull_cra_init,
- .base.cra_name = "crc32c",
- .base.cra_driver_name = "crc32c-arm64-ce",
- .base.cra_priority = 200,
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .base.cra_blocksize = 1,
- .base.cra_module = THIS_MODULE,
-} };
-
-static int __init crc32_pmull_mod_init(void)
-{
- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
- crc32_pmull_algs[0].update = crc32_pmull_update;
- crc32_pmull_algs[1].update = crc32c_pmull_update;
-
- if (elf_hwcap & HWCAP_CRC32) {
- fallback_crc32 = crc32_armv8_le;
- fallback_crc32c = crc32c_armv8_le;
- } else {
- fallback_crc32 = crc32_le;
- fallback_crc32c = __crc32c_le;
- }
- } else if (!(elf_hwcap & HWCAP_CRC32)) {
- return -ENODEV;
- }
- return crypto_register_shashes(crc32_pmull_algs,
- ARRAY_SIZE(crc32_pmull_algs));
-}
-
-static void __exit crc32_pmull_mod_exit(void)
-{
- crypto_unregister_shashes(crc32_pmull_algs,
- ARRAY_SIZE(crc32_pmull_algs));
-}
-
-static const struct cpu_feature crc32_cpu_feature[] = {
- { cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
-};
-MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
-
-module_init(crc32_pmull_mod_init);
-module_exit(crc32_pmull_mod_exit);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
index 663ea71cdb38..9e82e8e8ed05 100644
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -80,7 +80,186 @@
vzr .req v13
-ENTRY(crc_t10dif_pmull)
+ ad .req v14
+ bd .req v10
+
+ k00_16 .req v15
+ k32_48 .req v16
+
+ t3 .req v17
+ t4 .req v18
+ t5 .req v19
+ t6 .req v20
+ t7 .req v21
+ t8 .req v22
+ t9 .req v23
+
+ perm1 .req v24
+ perm2 .req v25
+ perm3 .req v26
+ perm4 .req v27
+
+ bd1 .req v28
+ bd2 .req v29
+ bd3 .req v30
+ bd4 .req v31
+
+ .macro __pmull_init_p64
+ .endm
+
+ .macro __pmull_pre_p64, bd
+ .endm
+
+ .macro __pmull_init_p8
+ // k00_16 := 0x0000000000000000_000000000000ffff
+ // k32_48 := 0x00000000ffffffff_0000ffffffffffff
+ movi k32_48.2d, #0xffffffff
+ mov k32_48.h[2], k32_48.h[0]
+ ushr k00_16.2d, k32_48.2d, #32
+
+ // prepare the permutation vectors
+ mov_q x5, 0x080f0e0d0c0b0a09
+ movi perm4.8b, #8
+ dup perm1.2d, x5
+ eor perm1.16b, perm1.16b, perm4.16b
+ ushr perm2.2d, perm1.2d, #8
+ ushr perm3.2d, perm1.2d, #16
+ ushr perm4.2d, perm1.2d, #24
+ sli perm2.2d, perm1.2d, #56
+ sli perm3.2d, perm1.2d, #48
+ sli perm4.2d, perm1.2d, #40
+ .endm
+
+ .macro __pmull_pre_p8, bd
+ tbl bd1.16b, {\bd\().16b}, perm1.16b
+ tbl bd2.16b, {\bd\().16b}, perm2.16b
+ tbl bd3.16b, {\bd\().16b}, perm3.16b
+ tbl bd4.16b, {\bd\().16b}, perm4.16b
+ .endm
+
+__pmull_p8_core:
+.L__pmull_p8_core:
+ ext t4.8b, ad.8b, ad.8b, #1 // A1
+ ext t5.8b, ad.8b, ad.8b, #2 // A2
+ ext t6.8b, ad.8b, ad.8b, #3 // A3
+
+ pmull t4.8h, t4.8b, bd.8b // F = A1*B
+ pmull t8.8h, ad.8b, bd1.8b // E = A*B1
+ pmull t5.8h, t5.8b, bd.8b // H = A2*B
+ pmull t7.8h, ad.8b, bd2.8b // G = A*B2
+ pmull t6.8h, t6.8b, bd.8b // J = A3*B
+ pmull t9.8h, ad.8b, bd3.8b // I = A*B3
+ pmull t3.8h, ad.8b, bd4.8b // K = A*B4
+ b 0f
+
+.L__pmull_p8_core2:
+ tbl t4.16b, {ad.16b}, perm1.16b // A1
+ tbl t5.16b, {ad.16b}, perm2.16b // A2
+ tbl t6.16b, {ad.16b}, perm3.16b // A3
+
+ pmull2 t4.8h, t4.16b, bd.16b // F = A1*B
+ pmull2 t8.8h, ad.16b, bd1.16b // E = A*B1
+ pmull2 t5.8h, t5.16b, bd.16b // H = A2*B
+ pmull2 t7.8h, ad.16b, bd2.16b // G = A*B2
+ pmull2 t6.8h, t6.16b, bd.16b // J = A3*B
+ pmull2 t9.8h, ad.16b, bd3.16b // I = A*B3
+ pmull2 t3.8h, ad.16b, bd4.16b // K = A*B4
+
+0: eor t4.16b, t4.16b, t8.16b // L = E + F
+ eor t5.16b, t5.16b, t7.16b // M = G + H
+ eor t6.16b, t6.16b, t9.16b // N = I + J
+
+ uzp1 t8.2d, t4.2d, t5.2d
+ uzp2 t4.2d, t4.2d, t5.2d
+ uzp1 t7.2d, t6.2d, t3.2d
+ uzp2 t6.2d, t6.2d, t3.2d
+
+ // t4 = (L) (P0 + P1) << 8
+ // t5 = (M) (P2 + P3) << 16
+ eor t8.16b, t8.16b, t4.16b
+ and t4.16b, t4.16b, k32_48.16b
+
+ // t6 = (N) (P4 + P5) << 24
+ // t7 = (K) (P6 + P7) << 32
+ eor t7.16b, t7.16b, t6.16b
+ and t6.16b, t6.16b, k00_16.16b
+
+ eor t8.16b, t8.16b, t4.16b
+ eor t7.16b, t7.16b, t6.16b
+
+ zip2 t5.2d, t8.2d, t4.2d
+ zip1 t4.2d, t8.2d, t4.2d
+ zip2 t3.2d, t7.2d, t6.2d
+ zip1 t6.2d, t7.2d, t6.2d
+
+ ext t4.16b, t4.16b, t4.16b, #15
+ ext t5.16b, t5.16b, t5.16b, #14
+ ext t6.16b, t6.16b, t6.16b, #13
+ ext t3.16b, t3.16b, t3.16b, #12
+
+ eor t4.16b, t4.16b, t5.16b
+ eor t6.16b, t6.16b, t3.16b
+ ret
+ENDPROC(__pmull_p8_core)
+
+ .macro __pmull_p8, rq, ad, bd, i
+ .ifnc \bd, v10
+ .err
+ .endif
+ mov ad.16b, \ad\().16b
+ .ifb \i
+ pmull \rq\().8h, \ad\().8b, bd.8b // D = A*B
+ .else
+ pmull2 \rq\().8h, \ad\().16b, bd.16b // D = A*B
+ .endif
+
+ bl .L__pmull_p8_core\i
+
+ eor \rq\().16b, \rq\().16b, t4.16b
+ eor \rq\().16b, \rq\().16b, t6.16b
+ .endm
+
+ .macro fold64, p, reg1, reg2
+ ldp q11, q12, [arg2], #0x20
+
+ __pmull_\p v8, \reg1, v10, 2
+ __pmull_\p \reg1, \reg1, v10
+
+CPU_LE( rev64 v11.16b, v11.16b )
+CPU_LE( rev64 v12.16b, v12.16b )
+
+ __pmull_\p v9, \reg2, v10, 2
+ __pmull_\p \reg2, \reg2, v10
+
+CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 )
+CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
+
+ eor \reg1\().16b, \reg1\().16b, v8.16b
+ eor \reg2\().16b, \reg2\().16b, v9.16b
+ eor \reg1\().16b, \reg1\().16b, v11.16b
+ eor \reg2\().16b, \reg2\().16b, v12.16b
+ .endm
+
+ .macro fold16, p, reg, rk
+ __pmull_\p v8, \reg, v10
+ __pmull_\p \reg, \reg, v10, 2
+ .ifnb \rk
+ ldr_l q10, \rk, x8
+ __pmull_pre_\p v10
+ .endif
+ eor v7.16b, v7.16b, v8.16b
+ eor v7.16b, v7.16b, \reg\().16b
+ .endm
+
+ .macro __pmull_p64, rd, rn, rm, n
+ .ifb \n
+ pmull \rd\().1q, \rn\().1d, \rm\().1d
+ .else
+ pmull2 \rd\().1q, \rn\().2d, \rm\().2d
+ .endif
+ .endm
+
+ .macro crc_t10dif_pmull, p
frame_push 3, 128
mov arg1_low32, w0
@@ -89,6 +268,8 @@ ENTRY(crc_t10dif_pmull)
movi vzr.16b, #0 // init zero register
+ __pmull_init_\p
+
// adjust the 16-bit initial_crc value, scale it to 32 bits
lsl arg1_low32, arg1_low32, #16
@@ -96,7 +277,7 @@ ENTRY(crc_t10dif_pmull)
cmp arg3, #256
// for sizes less than 128, we can't fold 64B at a time...
- b.lt _less_than_128
+ b.lt .L_less_than_128_\@
// load the initial crc value
// crc value does not need to be byte-reflected, but it needs
@@ -137,6 +318,7 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
ldr_l q10, rk3, x8 // xmm10 has rk3 and rk4
// type of pmull instruction
// will determine which constant to use
+ __pmull_pre_\p v10
//
// we subtract 256 instead of 128 to save one instruction from the loop
@@ -147,41 +329,19 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
// buffer. The _fold_64_B_loop will fold 64B at a time
// until we have 64+y Bytes of buffer
-
// fold 64B at a time. This section of the code folds 4 vector
// registers in parallel
-_fold_64_B_loop:
+.L_fold_64_B_loop_\@:
- .macro fold64, reg1, reg2
- ldp q11, q12, [arg2], #0x20
-
- pmull2 v8.1q, \reg1\().2d, v10.2d
- pmull \reg1\().1q, \reg1\().1d, v10.1d
-
-CPU_LE( rev64 v11.16b, v11.16b )
-CPU_LE( rev64 v12.16b, v12.16b )
-
- pmull2 v9.1q, \reg2\().2d, v10.2d
- pmull \reg2\().1q, \reg2\().1d, v10.1d
-
-CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 )
-CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
-
- eor \reg1\().16b, \reg1\().16b, v8.16b
- eor \reg2\().16b, \reg2\().16b, v9.16b
- eor \reg1\().16b, \reg1\().16b, v11.16b
- eor \reg2\().16b, \reg2\().16b, v12.16b
- .endm
-
- fold64 v0, v1
- fold64 v2, v3
- fold64 v4, v5
- fold64 v6, v7
+ fold64 \p, v0, v1
+ fold64 \p, v2, v3
+ fold64 \p, v4, v5
+ fold64 \p, v6, v7
subs arg3, arg3, #128
// check if there is another 64B in the buffer to be able to fold
- b.lt _fold_64_B_end
+ b.lt .L_fold_64_B_end_\@
if_will_cond_yield_neon
stp q0, q1, [sp, #.Lframe_local_offset]
@@ -195,11 +355,13 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
ldp q6, q7, [sp, #.Lframe_local_offset + 96]
ldr_l q10, rk3, x8
movi vzr.16b, #0 // init zero register
+ __pmull_init_\p
+ __pmull_pre_\p v10
endif_yield_neon
- b _fold_64_B_loop
+ b .L_fold_64_B_loop_\@
-_fold_64_B_end:
+.L_fold_64_B_end_\@:
// at this point, the buffer pointer is pointing at the last y Bytes
// of the buffer the 64B of folded data is in 4 of the vector
// registers: v0, v1, v2, v3
@@ -208,38 +370,29 @@ _fold_64_B_end:
// constants
ldr_l q10, rk9, x8
+ __pmull_pre_\p v10
- .macro fold16, reg, rk
- pmull v8.1q, \reg\().1d, v10.1d
- pmull2 \reg\().1q, \reg\().2d, v10.2d
- .ifnb \rk
- ldr_l q10, \rk, x8
- .endif
- eor v7.16b, v7.16b, v8.16b
- eor v7.16b, v7.16b, \reg\().16b
- .endm
-
- fold16 v0, rk11
- fold16 v1, rk13
- fold16 v2, rk15
- fold16 v3, rk17
- fold16 v4, rk19
- fold16 v5, rk1
- fold16 v6
+ fold16 \p, v0, rk11
+ fold16 \p, v1, rk13
+ fold16 \p, v2, rk15
+ fold16 \p, v3, rk17
+ fold16 \p, v4, rk19
+ fold16 \p, v5, rk1
+ fold16 \p, v6
// instead of 64, we add 48 to the loop counter to save 1 instruction
// from the loop instead of a cmp instruction, we use the negative
// flag with the jl instruction
adds arg3, arg3, #(128-16)
- b.lt _final_reduction_for_128
+ b.lt .L_final_reduction_for_128_\@
// now we have 16+y bytes left to reduce. 16 Bytes is in register v7
// and the rest is in memory. We can fold 16 bytes at a time if y>=16
// continue folding 16B at a time
-_16B_reduction_loop:
- pmull v8.1q, v7.1d, v10.1d
- pmull2 v7.1q, v7.2d, v10.2d
+.L_16B_reduction_loop_\@:
+ __pmull_\p v8, v7, v10
+ __pmull_\p v7, v7, v10, 2
eor v7.16b, v7.16b, v8.16b
ldr q0, [arg2], #16
@@ -251,22 +404,22 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
// instead of a cmp instruction, we utilize the flags with the
// jge instruction equivalent of: cmp arg3, 16-16
// check if there is any more 16B in the buffer to be able to fold
- b.ge _16B_reduction_loop
+ b.ge .L_16B_reduction_loop_\@
// now we have 16+z bytes left to reduce, where 0<= z < 16.
// first, we reduce the data in the xmm7 register
-_final_reduction_for_128:
+.L_final_reduction_for_128_\@:
// check if any more data to fold. If not, compute the CRC of
// the final 128 bits
adds arg3, arg3, #16
- b.eq _128_done
+ b.eq .L_128_done_\@
// here we are getting data that is less than 16 bytes.
// since we know that there was data before the pointer, we can
// offset the input pointer before the actual point, to receive
// exactly 16 bytes. after that the registers need to be adjusted.
-_get_last_two_regs:
+.L_get_last_two_regs_\@:
add arg2, arg2, arg3
ldr q1, [arg2, #-16]
CPU_LE( rev64 v1.16b, v1.16b )
@@ -291,47 +444,48 @@ CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 )
bsl v0.16b, v2.16b, v1.16b
// fold 16 Bytes
- pmull v8.1q, v7.1d, v10.1d
- pmull2 v7.1q, v7.2d, v10.2d
+ __pmull_\p v8, v7, v10
+ __pmull_\p v7, v7, v10, 2
eor v7.16b, v7.16b, v8.16b
eor v7.16b, v7.16b, v0.16b
-_128_done:
+.L_128_done_\@:
// compute crc of a 128-bit value
ldr_l q10, rk5, x8 // rk5 and rk6 in xmm10
+ __pmull_pre_\p v10
// 64b fold
ext v0.16b, vzr.16b, v7.16b, #8
mov v7.d[0], v7.d[1]
- pmull v7.1q, v7.1d, v10.1d
+ __pmull_\p v7, v7, v10
eor v7.16b, v7.16b, v0.16b
// 32b fold
ext v0.16b, v7.16b, vzr.16b, #4
mov v7.s[3], vzr.s[0]
- pmull2 v0.1q, v0.2d, v10.2d
+ __pmull_\p v0, v0, v10, 2
eor v7.16b, v7.16b, v0.16b
// barrett reduction
-_barrett:
ldr_l q10, rk7, x8
+ __pmull_pre_\p v10
mov v0.d[0], v7.d[1]
- pmull v0.1q, v0.1d, v10.1d
+ __pmull_\p v0, v0, v10
ext v0.16b, vzr.16b, v0.16b, #12
- pmull2 v0.1q, v0.2d, v10.2d
+ __pmull_\p v0, v0, v10, 2
ext v0.16b, vzr.16b, v0.16b, #12
eor v7.16b, v7.16b, v0.16b
mov w0, v7.s[1]
-_cleanup:
+.L_cleanup_\@:
// scale the result back to 16 bits
lsr x0, x0, #16
frame_pop
ret
-_less_than_128:
- cbz arg3, _cleanup
+.L_less_than_128_\@:
+ cbz arg3, .L_cleanup_\@
movi v0.16b, #0
mov v0.s[3], arg1_low32 // get the initial crc value
@@ -342,20 +496,21 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
eor v7.16b, v7.16b, v0.16b // xor the initial crc value
cmp arg3, #16
- b.eq _128_done // exactly 16 left
- b.lt _less_than_16_left
+ b.eq .L_128_done_\@ // exactly 16 left
+ b.lt .L_less_than_16_left_\@
ldr_l q10, rk1, x8 // rk1 and rk2 in xmm10
+ __pmull_pre_\p v10
// update the counter. subtract 32 instead of 16 to save one
// instruction from the loop
subs arg3, arg3, #32
- b.ge _16B_reduction_loop
+ b.ge .L_16B_reduction_loop_\@
add arg3, arg3, #16
- b _get_last_two_regs
+ b .L_get_last_two_regs_\@
-_less_than_16_left:
+.L_less_than_16_left_\@:
// shl r9, 4
adr_l x0, tbl_shf_table + 16
sub x0, x0, arg3
@@ -363,8 +518,17 @@ _less_than_16_left:
movi v9.16b, #0x80
eor v0.16b, v0.16b, v9.16b
tbl v7.16b, {v7.16b}, v0.16b
- b _128_done
-ENDPROC(crc_t10dif_pmull)
+ b .L_128_done_\@
+ .endm
+
+ENTRY(crc_t10dif_pmull_p8)
+ crc_t10dif_pmull p8
+ENDPROC(crc_t10dif_pmull_p8)
+
+ .align 5
+ENTRY(crc_t10dif_pmull_p64)
+ crc_t10dif_pmull p64
+ENDPROC(crc_t10dif_pmull_p64)
// precomputed constants
// these constants are precomputed from the poly:
diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c
index 96f0cae4a022..b461d62023f2 100644
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -22,7 +22,10 @@
#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U
-asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u64 len);
+asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 buf[], u64 len);
+asmlinkage u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 buf[], u64 len);
+
+static u16 (*crc_t10dif_pmull)(u16 init_crc, const u8 buf[], u64 len);
static int crct10dif_init(struct shash_desc *desc)
{
@@ -85,6 +88,11 @@ static struct shash_alg crc_t10dif_alg = {
static int __init crc_t10dif_mod_init(void)
{
+ if (elf_hwcap & HWCAP_PMULL)
+ crc_t10dif_pmull = crc_t10dif_pmull_p64;
+ else
+ crc_t10dif_pmull = crc_t10dif_pmull_p8;
+
return crypto_register_shash(&crc_t10dif_alg);
}
@@ -93,8 +101,10 @@ static void __exit crc_t10dif_mod_exit(void)
crypto_unregister_shash(&crc_t10dif_alg);
}
-module_cpu_feature_match(PMULL, crc_t10dif_mod_init);
+module_cpu_feature_match(ASIMD, crc_t10dif_mod_init);
module_exit(crc_t10dif_mod_exit);
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("crct10dif");
+MODULE_ALIAS_CRYPTO("crct10dif-arm64-ce");
diff --git a/arch/arm64/crypto/speck-neon-core.S b/arch/arm64/crypto/speck-neon-core.S
deleted file mode 100644
index b14463438b09..000000000000
--- a/arch/arm64/crypto/speck-neon-core.S
+++ /dev/null
@@ -1,352 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM64 NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Author: Eric Biggers <ebiggers@google.com>
- */
-
-#include <linux/linkage.h>
-
- .text
-
- // arguments
- ROUND_KEYS .req x0 // const {u64,u32} *round_keys
- NROUNDS .req w1 // int nrounds
- NROUNDS_X .req x1
- DST .req x2 // void *dst
- SRC .req x3 // const void *src
- NBYTES .req w4 // unsigned int nbytes
- TWEAK .req x5 // void *tweak
-
- // registers which hold the data being encrypted/decrypted
- // (underscores avoid a naming collision with ARM64 registers x0-x3)
- X_0 .req v0
- Y_0 .req v1
- X_1 .req v2
- Y_1 .req v3
- X_2 .req v4
- Y_2 .req v5
- X_3 .req v6
- Y_3 .req v7
-
- // the round key, duplicated in all lanes
- ROUND_KEY .req v8
-
- // index vector for tbl-based 8-bit rotates
- ROTATE_TABLE .req v9
- ROTATE_TABLE_Q .req q9
-
- // temporary registers
- TMP0 .req v10
- TMP1 .req v11
- TMP2 .req v12
- TMP3 .req v13
-
- // multiplication table for updating XTS tweaks
- GFMUL_TABLE .req v14
- GFMUL_TABLE_Q .req q14
-
- // next XTS tweak value(s)
- TWEAKV_NEXT .req v15
-
- // XTS tweaks for the blocks currently being encrypted/decrypted
- TWEAKV0 .req v16
- TWEAKV1 .req v17
- TWEAKV2 .req v18
- TWEAKV3 .req v19
- TWEAKV4 .req v20
- TWEAKV5 .req v21
- TWEAKV6 .req v22
- TWEAKV7 .req v23
-
- .align 4
-.Lror64_8_table:
- .octa 0x080f0e0d0c0b0a090007060504030201
-.Lror32_8_table:
- .octa 0x0c0f0e0d080b0a090407060500030201
-.Lrol64_8_table:
- .octa 0x0e0d0c0b0a09080f0605040302010007
-.Lrol32_8_table:
- .octa 0x0e0d0c0f0a09080b0605040702010003
-.Lgf128mul_table:
- .octa 0x00000000000000870000000000000001
-.Lgf64mul_table:
- .octa 0x0000000000000000000000002d361b00
-
-/*
- * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
- *
- * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
- * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
- * of ROUND_KEY. 'n' is the lane size: 64 for Speck128, or 32 for Speck64.
- * 'lanes' is the lane specifier: "2d" for Speck128 or "4s" for Speck64.
- */
-.macro _speck_round_128bytes n, lanes
-
- // x = ror(x, 8)
- tbl X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
- tbl X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
- tbl X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
- tbl X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
-
- // x += y
- add X_0.\lanes, X_0.\lanes, Y_0.\lanes
- add X_1.\lanes, X_1.\lanes, Y_1.\lanes
- add X_2.\lanes, X_2.\lanes, Y_2.\lanes
- add X_3.\lanes, X_3.\lanes, Y_3.\lanes
-
- // x ^= k
- eor X_0.16b, X_0.16b, ROUND_KEY.16b
- eor X_1.16b, X_1.16b, ROUND_KEY.16b
- eor X_2.16b, X_2.16b, ROUND_KEY.16b
- eor X_3.16b, X_3.16b, ROUND_KEY.16b
-
- // y = rol(y, 3)
- shl TMP0.\lanes, Y_0.\lanes, #3
- shl TMP1.\lanes, Y_1.\lanes, #3
- shl TMP2.\lanes, Y_2.\lanes, #3
- shl TMP3.\lanes, Y_3.\lanes, #3
- sri TMP0.\lanes, Y_0.\lanes, #(\n - 3)
- sri TMP1.\lanes, Y_1.\lanes, #(\n - 3)
- sri TMP2.\lanes, Y_2.\lanes, #(\n - 3)
- sri TMP3.\lanes, Y_3.\lanes, #(\n - 3)
-
- // y ^= x
- eor Y_0.16b, TMP0.16b, X_0.16b
- eor Y_1.16b, TMP1.16b, X_1.16b
- eor Y_2.16b, TMP2.16b, X_2.16b
- eor Y_3.16b, TMP3.16b, X_3.16b
-.endm
-
-/*
- * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
- *
- * This is the inverse of _speck_round_128bytes().
- */
-.macro _speck_unround_128bytes n, lanes
-
- // y ^= x
- eor TMP0.16b, Y_0.16b, X_0.16b
- eor TMP1.16b, Y_1.16b, X_1.16b
- eor TMP2.16b, Y_2.16b, X_2.16b
- eor TMP3.16b, Y_3.16b, X_3.16b
-
- // y = ror(y, 3)
- ushr Y_0.\lanes, TMP0.\lanes, #3
- ushr Y_1.\lanes, TMP1.\lanes, #3
- ushr Y_2.\lanes, TMP2.\lanes, #3
- ushr Y_3.\lanes, TMP3.\lanes, #3
- sli Y_0.\lanes, TMP0.\lanes, #(\n - 3)
- sli Y_1.\lanes, TMP1.\lanes, #(\n - 3)
- sli Y_2.\lanes, TMP2.\lanes, #(\n - 3)
- sli Y_3.\lanes, TMP3.\lanes, #(\n - 3)
-
- // x ^= k
- eor X_0.16b, X_0.16b, ROUND_KEY.16b
- eor X_1.16b, X_1.16b, ROUND_KEY.16b
- eor X_2.16b, X_2.16b, ROUND_KEY.16b
- eor X_3.16b, X_3.16b, ROUND_KEY.16b
-
- // x -= y
- sub X_0.\lanes, X_0.\lanes, Y_0.\lanes
- sub X_1.\lanes, X_1.\lanes, Y_1.\lanes
- sub X_2.\lanes, X_2.\lanes, Y_2.\lanes
- sub X_3.\lanes, X_3.\lanes, Y_3.\lanes
-
- // x = rol(x, 8)
- tbl X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
- tbl X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
- tbl X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
- tbl X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
-.endm
-
-.macro _next_xts_tweak next, cur, tmp, n
-.if \n == 64
- /*
- * Calculate the next tweak by multiplying the current one by x,
- * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
- */
- sshr \tmp\().2d, \cur\().2d, #63
- and \tmp\().16b, \tmp\().16b, GFMUL_TABLE.16b
- shl \next\().2d, \cur\().2d, #1
- ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
- eor \next\().16b, \next\().16b, \tmp\().16b
-.else
- /*
- * Calculate the next two tweaks by multiplying the current ones by x^2,
- * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
- */
- ushr \tmp\().2d, \cur\().2d, #62
- shl \next\().2d, \cur\().2d, #2
- tbl \tmp\().16b, {GFMUL_TABLE.16b}, \tmp\().16b
- eor \next\().16b, \next\().16b, \tmp\().16b
-.endif
-.endm
-
-/*
- * _speck_xts_crypt() - Speck-XTS encryption/decryption
- *
- * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
- * using Speck-XTS, specifically the variant with a block size of '2n' and round
- * count given by NROUNDS. The expanded round keys are given in ROUND_KEYS, and
- * the current XTS tweak value is given in TWEAK. It's assumed that NBYTES is a
- * nonzero multiple of 128.
- */
-.macro _speck_xts_crypt n, lanes, decrypting
-
- /*
- * If decrypting, modify the ROUND_KEYS parameter to point to the last
- * round key rather than the first, since for decryption the round keys
- * are used in reverse order.
- */
-.if \decrypting
- mov NROUNDS, NROUNDS /* zero the high 32 bits */
-.if \n == 64
- add ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #3
- sub ROUND_KEYS, ROUND_KEYS, #8
-.else
- add ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #2
- sub ROUND_KEYS, ROUND_KEYS, #4
-.endif
-.endif
-
- // Load the index vector for tbl-based 8-bit rotates
-.if \decrypting
- ldr ROTATE_TABLE_Q, .Lrol\n\()_8_table
-.else
- ldr ROTATE_TABLE_Q, .Lror\n\()_8_table
-.endif
-
- // One-time XTS preparation
-.if \n == 64
- // Load first tweak
- ld1 {TWEAKV0.16b}, [TWEAK]
-
- // Load GF(2^128) multiplication table
- ldr GFMUL_TABLE_Q, .Lgf128mul_table
-.else
- // Load first tweak
- ld1 {TWEAKV0.8b}, [TWEAK]
-
- // Load GF(2^64) multiplication table
- ldr GFMUL_TABLE_Q, .Lgf64mul_table
-
- // Calculate second tweak, packing it together with the first
- ushr TMP0.2d, TWEAKV0.2d, #63
- shl TMP1.2d, TWEAKV0.2d, #1
- tbl TMP0.8b, {GFMUL_TABLE.16b}, TMP0.8b
- eor TMP0.8b, TMP0.8b, TMP1.8b
- mov TWEAKV0.d[1], TMP0.d[0]
-.endif
-
-.Lnext_128bytes_\@:
-
- // Calculate XTS tweaks for next 128 bytes
- _next_xts_tweak TWEAKV1, TWEAKV0, TMP0, \n
- _next_xts_tweak TWEAKV2, TWEAKV1, TMP0, \n
- _next_xts_tweak TWEAKV3, TWEAKV2, TMP0, \n
- _next_xts_tweak TWEAKV4, TWEAKV3, TMP0, \n
- _next_xts_tweak TWEAKV5, TWEAKV4, TMP0, \n
- _next_xts_tweak TWEAKV6, TWEAKV5, TMP0, \n
- _next_xts_tweak TWEAKV7, TWEAKV6, TMP0, \n
- _next_xts_tweak TWEAKV_NEXT, TWEAKV7, TMP0, \n
-
- // Load the next source blocks into {X,Y}[0-3]
- ld1 {X_0.16b-Y_1.16b}, [SRC], #64
- ld1 {X_2.16b-Y_3.16b}, [SRC], #64
-
- // XOR the source blocks with their XTS tweaks
- eor TMP0.16b, X_0.16b, TWEAKV0.16b
- eor Y_0.16b, Y_0.16b, TWEAKV1.16b
- eor TMP1.16b, X_1.16b, TWEAKV2.16b
- eor Y_1.16b, Y_1.16b, TWEAKV3.16b
- eor TMP2.16b, X_2.16b, TWEAKV4.16b
- eor Y_2.16b, Y_2.16b, TWEAKV5.16b
- eor TMP3.16b, X_3.16b, TWEAKV6.16b
- eor Y_3.16b, Y_3.16b, TWEAKV7.16b
-
- /*
- * De-interleave the 'x' and 'y' elements of each block, i.e. make it so
- * that the X[0-3] registers contain only the second halves of blocks,
- * and the Y[0-3] registers contain only the first halves of blocks.
- * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
- */
- uzp2 X_0.\lanes, TMP0.\lanes, Y_0.\lanes
- uzp1 Y_0.\lanes, TMP0.\lanes, Y_0.\lanes
- uzp2 X_1.\lanes, TMP1.\lanes, Y_1.\lanes
- uzp1 Y_1.\lanes, TMP1.\lanes, Y_1.\lanes
- uzp2 X_2.\lanes, TMP2.\lanes, Y_2.\lanes
- uzp1 Y_2.\lanes, TMP2.\lanes, Y_2.\lanes
- uzp2 X_3.\lanes, TMP3.\lanes, Y_3.\lanes
- uzp1 Y_3.\lanes, TMP3.\lanes, Y_3.\lanes
-
- // Do the cipher rounds
- mov x6, ROUND_KEYS
- mov w7, NROUNDS
-.Lnext_round_\@:
-.if \decrypting
- ld1r {ROUND_KEY.\lanes}, [x6]
- sub x6, x6, #( \n / 8 )
- _speck_unround_128bytes \n, \lanes
-.else
- ld1r {ROUND_KEY.\lanes}, [x6], #( \n / 8 )
- _speck_round_128bytes \n, \lanes
-.endif
- subs w7, w7, #1
- bne .Lnext_round_\@
-
- // Re-interleave the 'x' and 'y' elements of each block
- zip1 TMP0.\lanes, Y_0.\lanes, X_0.\lanes
- zip2 Y_0.\lanes, Y_0.\lanes, X_0.\lanes
- zip1 TMP1.\lanes, Y_1.\lanes, X_1.\lanes
- zip2 Y_1.\lanes, Y_1.\lanes, X_1.\lanes
- zip1 TMP2.\lanes, Y_2.\lanes, X_2.\lanes
- zip2 Y_2.\lanes, Y_2.\lanes, X_2.\lanes
- zip1 TMP3.\lanes, Y_3.\lanes, X_3.\lanes
- zip2 Y_3.\lanes, Y_3.\lanes, X_3.\lanes
-
- // XOR the encrypted/decrypted blocks with the tweaks calculated earlier
- eor X_0.16b, TMP0.16b, TWEAKV0.16b
- eor Y_0.16b, Y_0.16b, TWEAKV1.16b
- eor X_1.16b, TMP1.16b, TWEAKV2.16b
- eor Y_1.16b, Y_1.16b, TWEAKV3.16b
- eor X_2.16b, TMP2.16b, TWEAKV4.16b
- eor Y_2.16b, Y_2.16b, TWEAKV5.16b
- eor X_3.16b, TMP3.16b, TWEAKV6.16b
- eor Y_3.16b, Y_3.16b, TWEAKV7.16b
- mov TWEAKV0.16b, TWEAKV_NEXT.16b
-
- // Store the ciphertext in the destination buffer
- st1 {X_0.16b-Y_1.16b}, [DST], #64
- st1 {X_2.16b-Y_3.16b}, [DST], #64
-
- // Continue if there are more 128-byte chunks remaining
- subs NBYTES, NBYTES, #128
- bne .Lnext_128bytes_\@
-
- // Store the next tweak and return
-.if \n == 64
- st1 {TWEAKV_NEXT.16b}, [TWEAK]
-.else
- st1 {TWEAKV_NEXT.8b}, [TWEAK]
-.endif
- ret
-.endm
-
-ENTRY(speck128_xts_encrypt_neon)
- _speck_xts_crypt n=64, lanes=2d, decrypting=0
-ENDPROC(speck128_xts_encrypt_neon)
-
-ENTRY(speck128_xts_decrypt_neon)
- _speck_xts_crypt n=64, lanes=2d, decrypting=1
-ENDPROC(speck128_xts_decrypt_neon)
-
-ENTRY(speck64_xts_encrypt_neon)
- _speck_xts_crypt n=32, lanes=4s, decrypting=0
-ENDPROC(speck64_xts_encrypt_neon)
-
-ENTRY(speck64_xts_decrypt_neon)
- _speck_xts_crypt n=32, lanes=4s, decrypting=1
-ENDPROC(speck64_xts_decrypt_neon)
diff --git a/arch/arm64/crypto/speck-neon-glue.c b/arch/arm64/crypto/speck-neon-glue.c
deleted file mode 100644
index 6e233aeb4ff4..000000000000
--- a/arch/arm64/crypto/speck-neon-glue.c
+++ /dev/null
@@ -1,282 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- * (64-bit version; based on the 32-bit version)
- *
- * Copyright (c) 2018 Google, Inc
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <crypto/algapi.h>
-#include <crypto/gf128mul.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/speck.h>
-#include <crypto/xts.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-/* The assembly functions only handle multiples of 128 bytes */
-#define SPECK_NEON_CHUNK_SIZE 128
-
-/* Speck128 */
-
-struct speck128_xts_tfm_ctx {
- struct speck128_tfm_ctx main_key;
- struct speck128_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
- u8 *, const u8 *);
-typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
- const void *, unsigned int, void *);
-
-static __always_inline int
-__speck128_xts_crypt(struct skcipher_request *req,
- speck128_crypt_one_t crypt_one,
- speck128_xts_crypt_many_t crypt_many)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- le128 tweak;
- int err;
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
- u8 *dst = walk.dst.virt.addr;
- const u8 *src = walk.src.virt.addr;
-
- if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
- unsigned int count;
-
- count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
- kernel_neon_begin();
- (*crypt_many)(ctx->main_key.round_keys,
- ctx->main_key.nrounds,
- dst, src, count, &tweak);
- kernel_neon_end();
- dst += count;
- src += count;
- nbytes -= count;
- }
-
- /* Handle any remainder with generic code */
- while (nbytes >= sizeof(tweak)) {
- le128_xor((le128 *)dst, (const le128 *)src, &tweak);
- (*crypt_one)(&ctx->main_key, dst, dst);
- le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
- gf128mul_x_ble(&tweak, &tweak);
-
- dst += sizeof(tweak);
- src += sizeof(tweak);
- nbytes -= sizeof(tweak);
- }
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
-}
-
-static int speck128_xts_encrypt(struct skcipher_request *req)
-{
- return __speck128_xts_crypt(req, crypto_speck128_encrypt,
- speck128_xts_encrypt_neon);
-}
-
-static int speck128_xts_decrypt(struct skcipher_request *req)
-{
- return __speck128_xts_crypt(req, crypto_speck128_decrypt,
- speck128_xts_decrypt_neon);
-}
-
-static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- int err;
-
- err = xts_verify_key(tfm, key, keylen);
- if (err)
- return err;
-
- keylen /= 2;
-
- err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
- if (err)
- return err;
-
- return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-/* Speck64 */
-
-struct speck64_xts_tfm_ctx {
- struct speck64_tfm_ctx main_key;
- struct speck64_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
- u8 *, const u8 *);
-typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
- const void *, unsigned int, void *);
-
-static __always_inline int
-__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
- speck64_xts_crypt_many_t crypt_many)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- __le64 tweak;
- int err;
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
- u8 *dst = walk.dst.virt.addr;
- const u8 *src = walk.src.virt.addr;
-
- if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
- unsigned int count;
-
- count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
- kernel_neon_begin();
- (*crypt_many)(ctx->main_key.round_keys,
- ctx->main_key.nrounds,
- dst, src, count, &tweak);
- kernel_neon_end();
- dst += count;
- src += count;
- nbytes -= count;
- }
-
- /* Handle any remainder with generic code */
- while (nbytes >= sizeof(tweak)) {
- *(__le64 *)dst = *(__le64 *)src ^ tweak;
- (*crypt_one)(&ctx->main_key, dst, dst);
- *(__le64 *)dst ^= tweak;
- tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
- ((tweak & cpu_to_le64(1ULL << 63)) ?
- 0x1B : 0));
- dst += sizeof(tweak);
- src += sizeof(tweak);
- nbytes -= sizeof(tweak);
- }
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
-}
-
-static int speck64_xts_encrypt(struct skcipher_request *req)
-{
- return __speck64_xts_crypt(req, crypto_speck64_encrypt,
- speck64_xts_encrypt_neon);
-}
-
-static int speck64_xts_decrypt(struct skcipher_request *req)
-{
- return __speck64_xts_crypt(req, crypto_speck64_decrypt,
- speck64_xts_decrypt_neon);
-}
-
-static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- int err;
-
- err = xts_verify_key(tfm, key, keylen);
- if (err)
- return err;
-
- keylen /= 2;
-
- err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
- if (err)
- return err;
-
- return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-static struct skcipher_alg speck_algs[] = {
- {
- .base.cra_name = "xts(speck128)",
- .base.cra_driver_name = "xts-speck128-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = SPECK128_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct speck128_xts_tfm_ctx),
- .base.cra_alignmask = 7,
- .base.cra_module = THIS_MODULE,
- .min_keysize = 2 * SPECK128_128_KEY_SIZE,
- .max_keysize = 2 * SPECK128_256_KEY_SIZE,
- .ivsize = SPECK128_BLOCK_SIZE,
- .walksize = SPECK_NEON_CHUNK_SIZE,
- .setkey = speck128_xts_setkey,
- .encrypt = speck128_xts_encrypt,
- .decrypt = speck128_xts_decrypt,
- }, {
- .base.cra_name = "xts(speck64)",
- .base.cra_driver_name = "xts-speck64-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = SPECK64_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct speck64_xts_tfm_ctx),
- .base.cra_alignmask = 7,
- .base.cra_module = THIS_MODULE,
- .min_keysize = 2 * SPECK64_96_KEY_SIZE,
- .max_keysize = 2 * SPECK64_128_KEY_SIZE,
- .ivsize = SPECK64_BLOCK_SIZE,
- .walksize = SPECK_NEON_CHUNK_SIZE,
- .setkey = speck64_xts_setkey,
- .encrypt = speck64_xts_encrypt,
- .decrypt = speck64_xts_decrypt,
- }
-};
-
-static int __init speck_neon_module_init(void)
-{
- if (!(elf_hwcap & HWCAP_ASIMD))
- return -ENODEV;
- return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-static void __exit speck_neon_module_exit(void)
-{
- crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-module_init(speck_neon_module_init);
-module_exit(speck_neon_module_exit);
-
-MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("xts(speck128)");
-MODULE_ALIAS_CRYPTO("xts-speck128-neon");
-MODULE_ALIAS_CRYPTO("xts(speck64)");
-MODULE_ALIAS_CRYPTO("xts-speck64-neon");
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 1d5483f6e457..85904b73e261 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -621,7 +621,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -657,7 +656,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 52a0af127951..9b3818bbb68b 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -578,7 +578,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -614,7 +613,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index b3103e51268a..769677809945 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -599,7 +599,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -635,7 +634,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index fb7d651a4cab..7dd264ddf2ea 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 6b37f5537c39..515f7439c755 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -580,7 +580,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -616,7 +615,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index c717bf879449..8e1038ceb407 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -602,7 +602,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -638,7 +637,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 226c994ce794..62c8aaa15cc7 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -684,7 +684,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -720,7 +719,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index b383327fd77a..733973f91297 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 9783d3deb9e9..fee30cc9ac16 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index a35d10ee10cb..eebf9c9088e7 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -593,7 +593,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -629,7 +628,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 573bf922d448..dabc54318c09 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -571,7 +571,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -607,7 +606,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index efb27a7fcc55..0d9a5c2a311a 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -572,7 +572,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -608,7 +607,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 941d8cc6c9f5..259d1698ac50 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -668,7 +668,6 @@ CONFIG_CRYPTO_USER=m
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index eb6f75f24208..37fd60c20e22 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -610,7 +610,6 @@ CONFIG_CRYPTO_USER=m
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index c54cb26eb7f5..812d9498d97b 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -44,7 +44,7 @@ struct s390_aes_ctx {
int key_len;
unsigned long fc;
union {
- struct crypto_skcipher *blk;
+ struct crypto_sync_skcipher *blk;
struct crypto_cipher *cip;
} fallback;
};
@@ -54,7 +54,7 @@ struct s390_xts_ctx {
u8 pcc_key[32];
int key_len;
unsigned long fc;
- struct crypto_skcipher *fallback;
+ struct crypto_sync_skcipher *fallback;
};
struct gcm_sg_walk {
@@ -184,14 +184,15 @@ static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
unsigned int ret;
- crypto_skcipher_clear_flags(sctx->fallback.blk, CRYPTO_TFM_REQ_MASK);
- crypto_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
+ crypto_sync_skcipher_clear_flags(sctx->fallback.blk,
+ CRYPTO_TFM_REQ_MASK);
+ crypto_sync_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
CRYPTO_TFM_REQ_MASK);
- ret = crypto_skcipher_setkey(sctx->fallback.blk, key, len);
+ ret = crypto_sync_skcipher_setkey(sctx->fallback.blk, key, len);
tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
- tfm->crt_flags |= crypto_skcipher_get_flags(sctx->fallback.blk) &
+ tfm->crt_flags |= crypto_sync_skcipher_get_flags(sctx->fallback.blk) &
CRYPTO_TFM_RES_MASK;
return ret;
@@ -204,9 +205,9 @@ static int fallback_blk_dec(struct blkcipher_desc *desc,
unsigned int ret;
struct crypto_blkcipher *tfm = desc->tfm;
struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
- SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
- skcipher_request_set_tfm(req, sctx->fallback.blk);
+ skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
skcipher_request_set_callback(req, desc->flags, NULL, NULL);
skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
@@ -223,9 +224,9 @@ static int fallback_blk_enc(struct blkcipher_desc *desc,
unsigned int ret;
struct crypto_blkcipher *tfm = desc->tfm;
struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
- SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
- skcipher_request_set_tfm(req, sctx->fallback.blk);
+ skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
skcipher_request_set_callback(req, desc->flags, NULL, NULL);
skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
@@ -306,8 +307,7 @@ static int fallback_init_blk(struct crypto_tfm *tfm)
const char *name = tfm->__crt_alg->cra_name;
struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
- sctx->fallback.blk = crypto_alloc_skcipher(name, 0,
- CRYPTO_ALG_ASYNC |
+ sctx->fallback.blk = crypto_alloc_sync_skcipher(name, 0,
CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(sctx->fallback.blk)) {
@@ -323,7 +323,7 @@ static void fallback_exit_blk(struct crypto_tfm *tfm)
{
struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
- crypto_free_skcipher(sctx->fallback.blk);
+ crypto_free_sync_skcipher(sctx->fallback.blk);
}
static struct crypto_alg ecb_aes_alg = {
@@ -453,14 +453,15 @@ static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
unsigned int ret;
- crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
- crypto_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
+ crypto_sync_skcipher_clear_flags(xts_ctx->fallback,
+ CRYPTO_TFM_REQ_MASK);
+ crypto_sync_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
CRYPTO_TFM_REQ_MASK);
- ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len);
+ ret = crypto_sync_skcipher_setkey(xts_ctx->fallback, key, len);
tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
- tfm->crt_flags |= crypto_skcipher_get_flags(xts_ctx->fallback) &
+ tfm->crt_flags |= crypto_sync_skcipher_get_flags(xts_ctx->fallback) &
CRYPTO_TFM_RES_MASK;
return ret;
@@ -472,10 +473,10 @@ static int xts_fallback_decrypt(struct blkcipher_desc *desc,
{
struct crypto_blkcipher *tfm = desc->tfm;
struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
- SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
unsigned int ret;
- skcipher_request_set_tfm(req, xts_ctx->fallback);
+ skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
skcipher_request_set_callback(req, desc->flags, NULL, NULL);
skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
@@ -491,10 +492,10 @@ static int xts_fallback_encrypt(struct blkcipher_desc *desc,
{
struct crypto_blkcipher *tfm = desc->tfm;
struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
- SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
unsigned int ret;
- skcipher_request_set_tfm(req, xts_ctx->fallback);
+ skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
skcipher_request_set_callback(req, desc->flags, NULL, NULL);
skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
@@ -611,8 +612,7 @@ static int xts_fallback_init(struct crypto_tfm *tfm)
const char *name = tfm->__crt_alg->cra_name;
struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
- xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
- CRYPTO_ALG_ASYNC |
+ xts_ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(xts_ctx->fallback)) {
@@ -627,7 +627,7 @@ static void xts_fallback_exit(struct crypto_tfm *tfm)
{
struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
- crypto_free_skcipher(xts_ctx->fallback);
+ crypto_free_sync_skcipher(xts_ctx->fallback);
}
static struct crypto_alg xts_aes_alg = {
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 20add000dd6d..7cb6a52f727d 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -221,7 +221,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_DEFLATE=m
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index a450ad573dcb..a4b0007a54e1 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -60,9 +60,6 @@ endif
ifeq ($(avx2_supported),yes)
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
- obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
- obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
- obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
endif
@@ -106,7 +103,7 @@ ifeq ($(avx2_supported),yes)
morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
endif
-aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
+aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index acbe7e8336d8..661f7daf43da 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -102,9 +102,6 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
-int crypto_fpu_init(void);
-void crypto_fpu_exit(void);
-
#define AVX_GEN2_OPTSIZE 640
#define AVX_GEN4_OPTSIZE 4096
@@ -817,7 +814,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
/* Linearize assoc, if not already linear */
if (req->src->length >= assoclen && req->src->length &&
(!PageHighMem(sg_page(req->src)) ||
- req->src->offset + req->src->length < PAGE_SIZE)) {
+ req->src->offset + req->src->length <= PAGE_SIZE)) {
scatterwalk_start(&assoc_sg_walk, req->src);
assoc = scatterwalk_map(&assoc_sg_walk);
} else {
@@ -1253,22 +1250,6 @@ static struct skcipher_alg aesni_skciphers[] = {
static
struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
-static struct {
- const char *algname;
- const char *drvname;
- const char *basename;
- struct simd_skcipher_alg *simd;
-} aesni_simd_skciphers2[] = {
-#if (defined(MODULE) && IS_ENABLED(CONFIG_CRYPTO_PCBC)) || \
- IS_BUILTIN(CONFIG_CRYPTO_PCBC)
- {
- .algname = "pcbc(aes)",
- .drvname = "pcbc-aes-aesni",
- .basename = "fpu(pcbc(__aes-aesni))",
- },
-#endif
-};
-
#ifdef CONFIG_X86_64
static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
unsigned int key_len)
@@ -1422,10 +1403,6 @@ static void aesni_free_simds(void)
for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) &&
aesni_simd_skciphers[i]; i++)
simd_skcipher_free(aesni_simd_skciphers[i]);
-
- for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++)
- if (aesni_simd_skciphers2[i].simd)
- simd_skcipher_free(aesni_simd_skciphers2[i].simd);
}
static int __init aesni_init(void)
@@ -1469,13 +1446,9 @@ static int __init aesni_init(void)
#endif
#endif
- err = crypto_fpu_init();
- if (err)
- return err;
-
err = crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
if (err)
- goto fpu_exit;
+ return err;
err = crypto_register_skciphers(aesni_skciphers,
ARRAY_SIZE(aesni_skciphers));
@@ -1499,18 +1472,6 @@ static int __init aesni_init(void)
aesni_simd_skciphers[i] = simd;
}
- for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) {
- algname = aesni_simd_skciphers2[i].algname;
- drvname = aesni_simd_skciphers2[i].drvname;
- basename = aesni_simd_skciphers2[i].basename;
- simd = simd_skcipher_create_compat(algname, drvname, basename);
- err = PTR_ERR(simd);
- if (IS_ERR(simd))
- continue;
-
- aesni_simd_skciphers2[i].simd = simd;
- }
-
return 0;
unregister_simds:
@@ -1521,8 +1482,6 @@ unregister_skciphers:
ARRAY_SIZE(aesni_skciphers));
unregister_algs:
crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
-fpu_exit:
- crypto_fpu_exit();
return err;
}
@@ -1533,8 +1492,6 @@ static void __exit aesni_exit(void)
crypto_unregister_skciphers(aesni_skciphers,
ARRAY_SIZE(aesni_skciphers));
crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
-
- crypto_fpu_exit();
}
late_initcall(aesni_init);
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
deleted file mode 100644
index 406680476c52..000000000000
--- a/arch/x86/crypto/fpu.c
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * FPU: Wrapper for blkcipher touching fpu
- *
- * Copyright (c) Intel Corp.
- * Author: Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/internal/skcipher.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <asm/fpu/api.h>
-
-struct crypto_fpu_ctx {
- struct crypto_skcipher *child;
-};
-
-static int crypto_fpu_setkey(struct crypto_skcipher *parent, const u8 *key,
- unsigned int keylen)
-{
- struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(parent);
- struct crypto_skcipher *child = ctx->child;
- int err;
-
- crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
- crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
- CRYPTO_TFM_REQ_MASK);
- err = crypto_skcipher_setkey(child, key, keylen);
- crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
- CRYPTO_TFM_RES_MASK);
- return err;
-}
-
-static int crypto_fpu_encrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_skcipher *child = ctx->child;
- SKCIPHER_REQUEST_ON_STACK(subreq, child);
- int err;
-
- skcipher_request_set_tfm(subreq, child);
- skcipher_request_set_callback(subreq, 0, NULL, NULL);
- skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
- req->iv);
-
- kernel_fpu_begin();
- err = crypto_skcipher_encrypt(subreq);
- kernel_fpu_end();
-
- skcipher_request_zero(subreq);
- return err;
-}
-
-static int crypto_fpu_decrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_skcipher *child = ctx->child;
- SKCIPHER_REQUEST_ON_STACK(subreq, child);
- int err;
-
- skcipher_request_set_tfm(subreq, child);
- skcipher_request_set_callback(subreq, 0, NULL, NULL);
- skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
- req->iv);
-
- kernel_fpu_begin();
- err = crypto_skcipher_decrypt(subreq);
- kernel_fpu_end();
-
- skcipher_request_zero(subreq);
- return err;
-}
-
-static int crypto_fpu_init_tfm(struct crypto_skcipher *tfm)
-{
- struct skcipher_instance *inst = skcipher_alg_instance(tfm);
- struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_skcipher_spawn *spawn;
- struct crypto_skcipher *cipher;
-
- spawn = skcipher_instance_ctx(inst);
- cipher = crypto_spawn_skcipher(spawn);
- if (IS_ERR(cipher))
- return PTR_ERR(cipher);
-
- ctx->child = cipher;
-
- return 0;
-}
-
-static void crypto_fpu_exit_tfm(struct crypto_skcipher *tfm)
-{
- struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- crypto_free_skcipher(ctx->child);
-}
-
-static void crypto_fpu_free(struct skcipher_instance *inst)
-{
- crypto_drop_skcipher(skcipher_instance_ctx(inst));
- kfree(inst);
-}
-
-static int crypto_fpu_create(struct crypto_template *tmpl, struct rtattr **tb)
-{
- struct crypto_skcipher_spawn *spawn;
- struct skcipher_instance *inst;
- struct crypto_attr_type *algt;
- struct skcipher_alg *alg;
- const char *cipher_name;
- int err;
-
- algt = crypto_get_attr_type(tb);
- if (IS_ERR(algt))
- return PTR_ERR(algt);
-
- if ((algt->type ^ (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_TYPE_SKCIPHER)) &
- algt->mask)
- return -EINVAL;
-
- if (!(algt->mask & CRYPTO_ALG_INTERNAL))
- return -EINVAL;
-
- cipher_name = crypto_attr_alg_name(tb[1]);
- if (IS_ERR(cipher_name))
- return PTR_ERR(cipher_name);
-
- inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
- if (!inst)
- return -ENOMEM;
-
- spawn = skcipher_instance_ctx(inst);
-
- crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
- err = crypto_grab_skcipher(spawn, cipher_name, CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
- if (err)
- goto out_free_inst;
-
- alg = crypto_skcipher_spawn_alg(spawn);
-
- err = crypto_inst_setname(skcipher_crypto_instance(inst), "fpu",
- &alg->base);
- if (err)
- goto out_drop_skcipher;
-
- inst->alg.base.cra_flags = CRYPTO_ALG_INTERNAL;
- inst->alg.base.cra_priority = alg->base.cra_priority;
- inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
- inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
-
- inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
- inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
- inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
-
- inst->alg.base.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
-
- inst->alg.init = crypto_fpu_init_tfm;
- inst->alg.exit = crypto_fpu_exit_tfm;
-
- inst->alg.setkey = crypto_fpu_setkey;
- inst->alg.encrypt = crypto_fpu_encrypt;
- inst->alg.decrypt = crypto_fpu_decrypt;
-
- inst->free = crypto_fpu_free;
-
- err = skcipher_register_instance(tmpl, inst);
- if (err)
- goto out_drop_skcipher;
-
-out:
- return err;
-
-out_drop_skcipher:
- crypto_drop_skcipher(spawn);
-out_free_inst:
- kfree(inst);
- goto out;
-}
-
-static struct crypto_template crypto_fpu_tmpl = {
- .name = "fpu",
- .create = crypto_fpu_create,
- .module = THIS_MODULE,
-};
-
-int __init crypto_fpu_init(void)
-{
- return crypto_register_template(&crypto_fpu_tmpl);
-}
-
-void crypto_fpu_exit(void)
-{
- crypto_unregister_template(&crypto_fpu_tmpl);
-}
-
-MODULE_ALIAS_CRYPTO("fpu");
diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile
deleted file mode 100644
index 815ded3ba90e..000000000000
--- a/arch/x86/crypto/sha1-mb/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Arch-specific CryptoAPI modules.
-#
-
-OBJECT_FILES_NON_STANDARD := y
-
-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
- $(comma)4)$(comma)%ymm2,yes,no)
-ifeq ($(avx2_supported),yes)
- obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o
- sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \
- sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o
-endif
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c
deleted file mode 100644
index b93805664c1d..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb.c
+++ /dev/null
@@ -1,1011 +0,0 @@
-/*
- * Multi buffer SHA1 algorithm Glue Code
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/sha.h>
-#include <crypto/mcryptd.h>
-#include <crypto/crypto_wq.h>
-#include <asm/byteorder.h>
-#include <linux/hardirq.h>
-#include <asm/fpu/api.h>
-#include "sha1_mb_ctx.h"
-
-#define FLUSH_INTERVAL 1000 /* in usec */
-
-static struct mcryptd_alg_state sha1_mb_alg_state;
-
-struct sha1_mb_ctx {
- struct mcryptd_ahash *mcryptd_tfm;
-};
-
-static inline struct mcryptd_hash_request_ctx
- *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx)
-{
- struct ahash_request *areq;
-
- areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
- return container_of(areq, struct mcryptd_hash_request_ctx, areq);
-}
-
-static inline struct ahash_request
- *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
-{
- return container_of((void *) ctx, struct ahash_request, __ctx);
-}
-
-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
- struct ahash_request *areq)
-{
- rctx->flag = HASH_UPDATE;
-}
-
-static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)
- (struct sha1_mb_mgr *state, struct job_sha1 *job);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)
- (struct sha1_mb_mgr *state);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)
- (struct sha1_mb_mgr *state);
-
-static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
- uint64_t total_len)
-{
- uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);
-
- memset(&padblock[i], 0, SHA1_BLOCK_SIZE);
- padblock[i] = 0x80;
-
- i += ((SHA1_BLOCK_SIZE - 1) &
- (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1)))
- + 1 + SHA1_PADLENGTHFIELD_SIZE;
-
-#if SHA1_PADLENGTHFIELD_SIZE == 16
- *((uint64_t *) &padblock[i - 16]) = 0;
-#endif
-
- *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
-
- /* Number of extra blocks to hash */
- return i >> SHA1_LOG2_BLOCK_SIZE;
-}
-
-static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr,
- struct sha1_hash_ctx *ctx)
-{
- while (ctx) {
- if (ctx->status & HASH_CTX_STS_COMPLETE) {
- /* Clear PROCESSING bit */
- ctx->status = HASH_CTX_STS_COMPLETE;
- return ctx;
- }
-
- /*
- * If the extra blocks are empty, begin hashing what remains
- * in the user's buffer.
- */
- if (ctx->partial_block_buffer_length == 0 &&
- ctx->incoming_buffer_length) {
-
- const void *buffer = ctx->incoming_buffer;
- uint32_t len = ctx->incoming_buffer_length;
- uint32_t copy_len;
-
- /*
- * Only entire blocks can be hashed.
- * Copy remainder to extra blocks buffer.
- */
- copy_len = len & (SHA1_BLOCK_SIZE-1);
-
- if (copy_len) {
- len -= copy_len;
- memcpy(ctx->partial_block_buffer,
- ((const char *) buffer + len),
- copy_len);
- ctx->partial_block_buffer_length = copy_len;
- }
-
- ctx->incoming_buffer_length = 0;
-
- /* len should be a multiple of the block size now */
- assert((len % SHA1_BLOCK_SIZE) == 0);
-
- /* Set len to the number of blocks to be hashed */
- len >>= SHA1_LOG2_BLOCK_SIZE;
-
- if (len) {
-
- ctx->job.buffer = (uint8_t *) buffer;
- ctx->job.len = len;
- ctx = (struct sha1_hash_ctx *)sha1_job_mgr_submit(&mgr->mgr,
- &ctx->job);
- continue;
- }
- }
-
- /*
- * If the extra blocks are not empty, then we are
- * either on the last block(s) or we need more
- * user input before continuing.
- */
- if (ctx->status & HASH_CTX_STS_LAST) {
-
- uint8_t *buf = ctx->partial_block_buffer;
- uint32_t n_extra_blocks =
- sha1_pad(buf, ctx->total_length);
-
- ctx->status = (HASH_CTX_STS_PROCESSING |
- HASH_CTX_STS_COMPLETE);
- ctx->job.buffer = buf;
- ctx->job.len = (uint32_t) n_extra_blocks;
- ctx = (struct sha1_hash_ctx *)
- sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
- continue;
- }
-
- ctx->status = HASH_CTX_STS_IDLE;
- return ctx;
- }
-
- return NULL;
-}
-
-static struct sha1_hash_ctx
- *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr)
-{
- /*
- * If get_comp_job returns NULL, there are no jobs complete.
- * If get_comp_job returns a job, verify that it is safe to return to
- * the user.
- * If it is not ready, resubmit the job to finish processing.
- * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
- * Otherwise, all jobs currently being managed by the hash_ctx_mgr
- * still need processing.
- */
- struct sha1_hash_ctx *ctx;
-
- ctx = (struct sha1_hash_ctx *) sha1_job_mgr_get_comp_job(&mgr->mgr);
- return sha1_ctx_mgr_resubmit(mgr, ctx);
-}
-
-static void sha1_ctx_mgr_init(struct sha1_ctx_mgr *mgr)
-{
- sha1_job_mgr_init(&mgr->mgr);
-}
-
-static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
- struct sha1_hash_ctx *ctx,
- const void *buffer,
- uint32_t len,
- int flags)
-{
- if (flags & ~(HASH_UPDATE | HASH_LAST)) {
- /* User should not pass anything other than UPDATE or LAST */
- ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
- return ctx;
- }
-
- if (ctx->status & HASH_CTX_STS_PROCESSING) {
- /* Cannot submit to a currently processing job. */
- ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
- return ctx;
- }
-
- if (ctx->status & HASH_CTX_STS_COMPLETE) {
- /* Cannot update a finished job. */
- ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
- return ctx;
- }
-
- /*
- * If we made it here, there were no errors during this call to
- * submit
- */
- ctx->error = HASH_CTX_ERROR_NONE;
-
- /* Store buffer ptr info from user */
- ctx->incoming_buffer = buffer;
- ctx->incoming_buffer_length = len;
-
- /*
- * Store the user's request flags and mark this ctx as currently
- * being processed.
- */
- ctx->status = (flags & HASH_LAST) ?
- (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
- HASH_CTX_STS_PROCESSING;
-
- /* Advance byte counter */
- ctx->total_length += len;
-
- /*
- * If there is anything currently buffered in the extra blocks,
- * append to it until it contains a whole block.
- * Or if the user's buffer contains less than a whole block,
- * append as much as possible to the extra block.
- */
- if (ctx->partial_block_buffer_length || len < SHA1_BLOCK_SIZE) {
- /*
- * Compute how many bytes to copy from user buffer into
- * extra block
- */
- uint32_t copy_len = SHA1_BLOCK_SIZE -
- ctx->partial_block_buffer_length;
- if (len < copy_len)
- copy_len = len;
-
- if (copy_len) {
- /* Copy and update relevant pointers and counters */
- memcpy(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
- buffer, copy_len);
-
- ctx->partial_block_buffer_length += copy_len;
- ctx->incoming_buffer = (const void *)
- ((const char *)buffer + copy_len);
- ctx->incoming_buffer_length = len - copy_len;
- }
-
- /*
- * The extra block should never contain more than 1 block
- * here
- */
- assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE);
-
- /*
- * If the extra block buffer contains exactly 1 block, it can
- * be hashed.
- */
- if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) {
- ctx->partial_block_buffer_length = 0;
-
- ctx->job.buffer = ctx->partial_block_buffer;
- ctx->job.len = 1;
- ctx = (struct sha1_hash_ctx *)
- sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
- }
- }
-
- return sha1_ctx_mgr_resubmit(mgr, ctx);
-}
-
-static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr)
-{
- struct sha1_hash_ctx *ctx;
-
- while (1) {
- ctx = (struct sha1_hash_ctx *) sha1_job_mgr_flush(&mgr->mgr);
-
- /* If flush returned 0, there are no more jobs in flight. */
- if (!ctx)
- return NULL;
-
- /*
- * If flush returned a job, resubmit the job to finish
- * processing.
- */
- ctx = sha1_ctx_mgr_resubmit(mgr, ctx);
-
- /*
- * If sha1_ctx_mgr_resubmit returned a job, it is ready to be
- * returned. Otherwise, all jobs currently being managed by the
- * sha1_ctx_mgr still need processing. Loop.
- */
- if (ctx)
- return ctx;
- }
-}
-
-static int sha1_mb_init(struct ahash_request *areq)
-{
- struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
-
- hash_ctx_init(sctx);
- sctx->job.result_digest[0] = SHA1_H0;
- sctx->job.result_digest[1] = SHA1_H1;
- sctx->job.result_digest[2] = SHA1_H2;
- sctx->job.result_digest[3] = SHA1_H3;
- sctx->job.result_digest[4] = SHA1_H4;
- sctx->total_length = 0;
- sctx->partial_block_buffer_length = 0;
- sctx->status = HASH_CTX_STS_IDLE;
-
- return 0;
-}
-
-static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
-{
- int i;
- struct sha1_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
- __be32 *dst = (__be32 *) rctx->out;
-
- for (i = 0; i < 5; ++i)
- dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
-
- return 0;
-}
-
-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
- struct mcryptd_alg_cstate *cstate, bool flush)
-{
- int flag = HASH_UPDATE;
- int nbytes, err = 0;
- struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
- struct sha1_hash_ctx *sha_ctx;
-
- /* more work ? */
- while (!(rctx->flag & HASH_DONE)) {
- nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
- if (nbytes < 0) {
- err = nbytes;
- goto out;
- }
- /* check if the walk is done */
- if (crypto_ahash_walk_last(&rctx->walk)) {
- rctx->flag |= HASH_DONE;
- if (rctx->flag & HASH_FINAL)
- flag |= HASH_LAST;
-
- }
- sha_ctx = (struct sha1_hash_ctx *)
- ahash_request_ctx(&rctx->areq);
- kernel_fpu_begin();
- sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx,
- rctx->walk.data, nbytes, flag);
- if (!sha_ctx) {
- if (flush)
- sha_ctx = sha1_ctx_mgr_flush(cstate->mgr);
- }
- kernel_fpu_end();
- if (sha_ctx)
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- else {
- rctx = NULL;
- goto out;
- }
- }
-
- /* copy the results */
- if (rctx->flag & HASH_FINAL)
- sha1_mb_set_results(rctx);
-
-out:
- *ret_rctx = rctx;
- return err;
-}
-
-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
- struct mcryptd_alg_cstate *cstate,
- int err)
-{
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha1_hash_ctx *sha_ctx;
- struct mcryptd_hash_request_ctx *req_ctx;
- int ret;
-
- /* remove from work list */
- spin_lock(&cstate->work_lock);
- list_del(&rctx->waiter);
- spin_unlock(&cstate->work_lock);
-
- if (irqs_disabled())
- rctx->complete(&req->base, err);
- else {
- local_bh_disable();
- rctx->complete(&req->base, err);
- local_bh_enable();
- }
-
- /* check to see if there are other jobs that are done */
- sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
- while (sha_ctx) {
- req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&req_ctx, cstate, false);
- if (req_ctx) {
- spin_lock(&cstate->work_lock);
- list_del(&req_ctx->waiter);
- spin_unlock(&cstate->work_lock);
-
- req = cast_mcryptd_ctx_to_req(req_ctx);
- if (irqs_disabled())
- req_ctx->complete(&req->base, ret);
- else {
- local_bh_disable();
- req_ctx->complete(&req->base, ret);
- local_bh_enable();
- }
- }
- sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
- }
-
- return 0;
-}
-
-static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
- struct mcryptd_alg_cstate *cstate)
-{
- unsigned long next_flush;
- unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
-
- /* initialize tag */
- rctx->tag.arrival = jiffies; /* tag the arrival time */
- rctx->tag.seq_num = cstate->next_seq_num++;
- next_flush = rctx->tag.arrival + delay;
- rctx->tag.expire = next_flush;
-
- spin_lock(&cstate->work_lock);
- list_add_tail(&rctx->waiter, &cstate->work_list);
- spin_unlock(&cstate->work_lock);
-
- mcryptd_arm_flusher(cstate, delay);
-}
-
-static int sha1_mb_update(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx, areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
-
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha1_hash_ctx *sha_ctx;
- int ret = 0, nbytes;
-
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
- if (nbytes < 0) {
- ret = nbytes;
- goto done;
- }
-
- if (crypto_ahash_walk_last(&rctx->walk))
- rctx->flag |= HASH_DONE;
-
- /* submit */
- sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
- sha1_mb_add_list(rctx, cstate);
- kernel_fpu_begin();
- sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
- nbytes, HASH_UPDATE);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
-
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha1_mb_finup(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx, areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
-
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha1_hash_ctx *sha_ctx;
- int ret = 0, flag = HASH_UPDATE, nbytes;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
- if (nbytes < 0) {
- ret = nbytes;
- goto done;
- }
-
- if (crypto_ahash_walk_last(&rctx->walk)) {
- rctx->flag |= HASH_DONE;
- flag = HASH_LAST;
- }
-
- /* submit */
- rctx->flag |= HASH_FINAL;
- sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
- sha1_mb_add_list(rctx, cstate);
-
- kernel_fpu_begin();
- sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
- nbytes, flag);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha1_mb_final(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx, areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
-
- struct sha1_hash_ctx *sha_ctx;
- int ret = 0;
- u8 data;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- rctx->flag |= HASH_DONE | HASH_FINAL;
-
- sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
- /* flag HASH_FINAL and 0 data size */
- sha1_mb_add_list(rctx, cstate);
- kernel_fpu_begin();
- sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
- HASH_LAST);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha1_mb_export(struct ahash_request *areq, void *out)
-{
- struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
-
- memcpy(out, sctx, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha1_mb_import(struct ahash_request *areq, const void *in)
-{
- struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
-
- memcpy(sctx, in, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
-{
- struct mcryptd_ahash *mcryptd_tfm;
- struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
- struct mcryptd_hash_ctx *mctx;
-
- mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
- CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(mcryptd_tfm))
- return PTR_ERR(mcryptd_tfm);
- mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
- mctx->alg_state = &sha1_mb_alg_state;
- ctx->mcryptd_tfm = mcryptd_tfm;
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- crypto_ahash_reqsize(&mcryptd_tfm->base));
-
- return 0;
-}
-
-static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static int sha1_mb_areq_init_tfm(struct crypto_tfm *tfm)
-{
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- sizeof(struct sha1_hash_ctx));
-
- return 0;
-}
-
-static void sha1_mb_areq_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static struct ahash_alg sha1_mb_areq_alg = {
- .init = sha1_mb_init,
- .update = sha1_mb_update,
- .final = sha1_mb_final,
- .finup = sha1_mb_finup,
- .export = sha1_mb_export,
- .import = sha1_mb_import,
- .halg = {
- .digestsize = SHA1_DIGEST_SIZE,
- .statesize = sizeof(struct sha1_hash_ctx),
- .base = {
- .cra_name = "__sha1-mb",
- .cra_driver_name = "__intel_sha1-mb",
- .cra_priority = 100,
- /*
- * use ASYNC flag as some buffers in multi-buffer
- * algo may not have completed before hashing thread
- * sleep
- */
- .cra_flags = CRYPTO_ALG_ASYNC |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SHA1_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT
- (sha1_mb_areq_alg.halg.base.cra_list),
- .cra_init = sha1_mb_areq_init_tfm,
- .cra_exit = sha1_mb_areq_exit_tfm,
- .cra_ctxsize = sizeof(struct sha1_hash_ctx),
- }
- }
-};
-
-static int sha1_mb_async_init(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_init(mcryptd_req);
-}
-
-static int sha1_mb_async_update(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_update(mcryptd_req);
-}
-
-static int sha1_mb_async_finup(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_finup(mcryptd_req);
-}
-
-static int sha1_mb_async_final(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_final(mcryptd_req);
-}
-
-static int sha1_mb_async_digest(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_digest(mcryptd_req);
-}
-
-static int sha1_mb_async_export(struct ahash_request *req, void *out)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_export(mcryptd_req, out);
-}
-
-static int sha1_mb_async_import(struct ahash_request *req, const void *in)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
- struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
- struct mcryptd_hash_request_ctx *rctx;
- struct ahash_request *areq;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- rctx = ahash_request_ctx(mcryptd_req);
- areq = &rctx->areq;
-
- ahash_request_set_tfm(areq, child);
- ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
- rctx->complete, req);
-
- return crypto_ahash_import(mcryptd_req, in);
-}
-
-static struct ahash_alg sha1_mb_async_alg = {
- .init = sha1_mb_async_init,
- .update = sha1_mb_async_update,
- .final = sha1_mb_async_final,
- .finup = sha1_mb_async_finup,
- .digest = sha1_mb_async_digest,
- .export = sha1_mb_async_export,
- .import = sha1_mb_async_import,
- .halg = {
- .digestsize = SHA1_DIGEST_SIZE,
- .statesize = sizeof(struct sha1_hash_ctx),
- .base = {
- .cra_name = "sha1",
- .cra_driver_name = "sha1_mb",
- /*
- * Low priority, since with few concurrent hash requests
- * this is extremely slow due to the flush delay. Users
- * whose workloads would benefit from this can request
- * it explicitly by driver name, or can increase its
- * priority at runtime using NETLINK_CRYPTO.
- */
- .cra_priority = 50,
- .cra_flags = CRYPTO_ALG_ASYNC,
- .cra_blocksize = SHA1_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(sha1_mb_async_alg.halg.base.cra_list),
- .cra_init = sha1_mb_async_init_tfm,
- .cra_exit = sha1_mb_async_exit_tfm,
- .cra_ctxsize = sizeof(struct sha1_mb_ctx),
- .cra_alignmask = 0,
- },
- },
-};
-
-static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
-{
- struct mcryptd_hash_request_ctx *rctx;
- unsigned long cur_time;
- unsigned long next_flush = 0;
- struct sha1_hash_ctx *sha_ctx;
-
-
- cur_time = jiffies;
-
- while (!list_empty(&cstate->work_list)) {
- rctx = list_entry(cstate->work_list.next,
- struct mcryptd_hash_request_ctx, waiter);
- if (time_before(cur_time, rctx->tag.expire))
- break;
- kernel_fpu_begin();
- sha_ctx = (struct sha1_hash_ctx *)
- sha1_ctx_mgr_flush(cstate->mgr);
- kernel_fpu_end();
- if (!sha_ctx) {
- pr_err("sha1_mb error: nothing got flushed for non-empty list\n");
- break;
- }
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- sha_finish_walk(&rctx, cstate, true);
- sha_complete_job(rctx, cstate, 0);
- }
-
- if (!list_empty(&cstate->work_list)) {
- rctx = list_entry(cstate->work_list.next,
- struct mcryptd_hash_request_ctx, waiter);
- /* get the hash context and then flush time */
- next_flush = rctx->tag.expire;
- mcryptd_arm_flusher(cstate, get_delay(next_flush));
- }
- return next_flush;
-}
-
-static int __init sha1_mb_mod_init(void)
-{
-
- int cpu;
- int err;
- struct mcryptd_alg_cstate *cpu_state;
-
- /* check for dependent cpu features */
- if (!boot_cpu_has(X86_FEATURE_AVX2) ||
- !boot_cpu_has(X86_FEATURE_BMI2))
- return -ENODEV;
-
- /* initialize multibuffer structures */
- sha1_mb_alg_state.alg_cstate = alloc_percpu(struct mcryptd_alg_cstate);
-
- sha1_job_mgr_init = sha1_mb_mgr_init_avx2;
- sha1_job_mgr_submit = sha1_mb_mgr_submit_avx2;
- sha1_job_mgr_flush = sha1_mb_mgr_flush_avx2;
- sha1_job_mgr_get_comp_job = sha1_mb_mgr_get_comp_job_avx2;
-
- if (!sha1_mb_alg_state.alg_cstate)
- return -ENOMEM;
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
- cpu_state->next_flush = 0;
- cpu_state->next_seq_num = 0;
- cpu_state->flusher_engaged = false;
- INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
- cpu_state->cpu = cpu;
- cpu_state->alg_state = &sha1_mb_alg_state;
- cpu_state->mgr = kzalloc(sizeof(struct sha1_ctx_mgr),
- GFP_KERNEL);
- if (!cpu_state->mgr)
- goto err2;
- sha1_ctx_mgr_init(cpu_state->mgr);
- INIT_LIST_HEAD(&cpu_state->work_list);
- spin_lock_init(&cpu_state->work_lock);
- }
- sha1_mb_alg_state.flusher = &sha1_mb_flusher;
-
- err = crypto_register_ahash(&sha1_mb_areq_alg);
- if (err)
- goto err2;
- err = crypto_register_ahash(&sha1_mb_async_alg);
- if (err)
- goto err1;
-
-
- return 0;
-err1:
- crypto_unregister_ahash(&sha1_mb_areq_alg);
-err2:
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
- kfree(cpu_state->mgr);
- }
- free_percpu(sha1_mb_alg_state.alg_cstate);
- return -ENODEV;
-}
-
-static void __exit sha1_mb_mod_fini(void)
-{
- int cpu;
- struct mcryptd_alg_cstate *cpu_state;
-
- crypto_unregister_ahash(&sha1_mb_async_alg);
- crypto_unregister_ahash(&sha1_mb_areq_alg);
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
- kfree(cpu_state->mgr);
- }
- free_percpu(sha1_mb_alg_state.alg_cstate);
-}
-
-module_init(sha1_mb_mod_init);
-module_exit(sha1_mb_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated");
-
-MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
deleted file mode 100644
index 9454bd16f9f8..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Header file for multi buffer SHA context
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SHA_MB_CTX_INTERNAL_H
-#define _SHA_MB_CTX_INTERNAL_H
-
-#include "sha1_mb_mgr.h"
-
-#define HASH_UPDATE 0x00
-#define HASH_LAST 0x01
-#define HASH_DONE 0x02
-#define HASH_FINAL 0x04
-
-#define HASH_CTX_STS_IDLE 0x00
-#define HASH_CTX_STS_PROCESSING 0x01
-#define HASH_CTX_STS_LAST 0x02
-#define HASH_CTX_STS_COMPLETE 0x04
-
-enum hash_ctx_error {
- HASH_CTX_ERROR_NONE = 0,
- HASH_CTX_ERROR_INVALID_FLAGS = -1,
- HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
- HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
-
-#ifdef HASH_CTX_DEBUG
- HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
-#endif
-};
-
-
-#define hash_ctx_user_data(ctx) ((ctx)->user_data)
-#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
-#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
-#define hash_ctx_status(ctx) ((ctx)->status)
-#define hash_ctx_error(ctx) ((ctx)->error)
-#define hash_ctx_init(ctx) \
- do { \
- (ctx)->error = HASH_CTX_ERROR_NONE; \
- (ctx)->status = HASH_CTX_STS_COMPLETE; \
- } while (0)
-
-
-/* Hash Constants and Typedefs */
-#define SHA1_DIGEST_LENGTH 5
-#define SHA1_LOG2_BLOCK_SIZE 6
-
-#define SHA1_PADLENGTHFIELD_SIZE 8
-
-#ifdef SHA_MB_DEBUG
-#define assert(expr) \
-do { \
- if (unlikely(!(expr))) { \
- printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
- #expr, __FILE__, __func__, __LINE__); \
- } \
-} while (0)
-#else
-#define assert(expr) do {} while (0)
-#endif
-
-struct sha1_ctx_mgr {
- struct sha1_mb_mgr mgr;
-};
-
-/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */
-
-struct sha1_hash_ctx {
- /* Must be at struct offset 0 */
- struct job_sha1 job;
- /* status flag */
- int status;
- /* error flag */
- int error;
-
- uint64_t total_length;
- const void *incoming_buffer;
- uint32_t incoming_buffer_length;
- uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2];
- uint32_t partial_block_buffer_length;
- void *user_data;
-};
-
-#endif
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
deleted file mode 100644
index 08ad1a9acfd7..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Header file for multi buffer SHA1 algorithm manager
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * James Guilford <james.guilford@intel.com>
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef __SHA_MB_MGR_H
-#define __SHA_MB_MGR_H
-
-
-#include <linux/types.h>
-
-#define NUM_SHA1_DIGEST_WORDS 5
-
-enum job_sts { STS_UNKNOWN = 0,
- STS_BEING_PROCESSED = 1,
- STS_COMPLETED = 2,
- STS_INTERNAL_ERROR = 3,
- STS_ERROR = 4
-};
-
-struct job_sha1 {
- u8 *buffer;
- u32 len;
- u32 result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32);
- enum job_sts status;
- void *user_data;
-};
-
-/* SHA1 out-of-order scheduler */
-
-/* typedef uint32_t sha1_digest_array[5][8]; */
-
-struct sha1_args_x8 {
- uint32_t digest[5][8];
- uint8_t *data_ptr[8];
-};
-
-struct sha1_lane_data {
- struct job_sha1 *job_in_lane;
-};
-
-struct sha1_mb_mgr {
- struct sha1_args_x8 args;
-
- uint32_t lens[8];
-
- /* each byte is index (0...7) of unused lanes */
- uint64_t unused_lanes;
- /* byte 4 is set to FF as a flag */
- struct sha1_lane_data ldata[8];
-};
-
-
-#define SHA1_MB_MGR_NUM_LANES_AVX2 8
-
-void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state);
-struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state,
- struct job_sha1 *job);
-struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state);
-struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state);
-
-#endif
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
deleted file mode 100644
index 86688c6e7a25..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * Header file for multi buffer SHA1 algorithm data structure
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * James Guilford <james.guilford@intel.com>
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# Macros for defining data structures
-
-# Usage example
-
-#START_FIELDS # JOB_AES
-### name size align
-#FIELD _plaintext, 8, 8 # pointer to plaintext
-#FIELD _ciphertext, 8, 8 # pointer to ciphertext
-#FIELD _IV, 16, 8 # IV
-#FIELD _keys, 8, 8 # pointer to keys
-#FIELD _len, 4, 4 # length in bytes
-#FIELD _status, 4, 4 # status enumeration
-#FIELD _user_data, 8, 8 # pointer to user data
-#UNION _union, size1, align1, \
-# size2, align2, \
-# size3, align3, \
-# ...
-#END_FIELDS
-#%assign _JOB_AES_size _FIELD_OFFSET
-#%assign _JOB_AES_align _STRUCT_ALIGN
-
-#########################################################################
-
-# Alternate "struc-like" syntax:
-# STRUCT job_aes2
-# RES_Q .plaintext, 1
-# RES_Q .ciphertext, 1
-# RES_DQ .IV, 1
-# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
-# RES_U .union, size1, align1, \
-# size2, align2, \
-# ...
-# ENDSTRUCT
-# # Following only needed if nesting
-# %assign job_aes2_size _FIELD_OFFSET
-# %assign job_aes2_align _STRUCT_ALIGN
-#
-# RES_* macros take a name, a count and an optional alignment.
-# The count in in terms of the base size of the macro, and the
-# default alignment is the base size.
-# The macros are:
-# Macro Base size
-# RES_B 1
-# RES_W 2
-# RES_D 4
-# RES_Q 8
-# RES_DQ 16
-# RES_Y 32
-# RES_Z 64
-#
-# RES_U defines a union. It's arguments are a name and two or more
-# pairs of "size, alignment"
-#
-# The two assigns are only needed if this structure is being nested
-# within another. Even if the assigns are not done, one can still use
-# STRUCT_NAME_size as the size of the structure.
-#
-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
-#
-# The differences between this and using "struc" directly are that each
-# type is implicitly aligned to its natural length (although this can be
-# over-ridden with an explicit third parameter), and that the structure
-# is padded at the end to its overall alignment.
-#
-
-#########################################################################
-
-#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_
-#define _SHA1_MB_MGR_DATASTRUCT_ASM_
-
-## START_FIELDS
-.macro START_FIELDS
- _FIELD_OFFSET = 0
- _STRUCT_ALIGN = 0
-.endm
-
-## FIELD name size align
-.macro FIELD name size align
- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
- \name = _FIELD_OFFSET
- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
-.if (\align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = \align
-.endif
-.endm
-
-## END_FIELDS
-.macro END_FIELDS
- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
-.endm
-
-########################################################################
-
-.macro STRUCT p1
-START_FIELDS
-.struc \p1
-.endm
-
-.macro ENDSTRUCT
- tmp = _FIELD_OFFSET
- END_FIELDS
- tmp = (_FIELD_OFFSET - %%tmp)
-.if (tmp > 0)
- .lcomm tmp
-.endif
-.endstruc
-.endm
-
-## RES_int name size align
-.macro RES_int p1 p2 p3
- name = \p1
- size = \p2
- align = .\p3
-
- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
-.align align
-.lcomm name size
- _FIELD_OFFSET = _FIELD_OFFSET + (size)
-.if (align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = align
-.endif
-.endm
-
-
-
-# macro RES_B name, size [, align]
-.macro RES_B _name, _size, _align=1
-RES_int _name _size _align
-.endm
-
-# macro RES_W name, size [, align]
-.macro RES_W _name, _size, _align=2
-RES_int _name 2*(_size) _align
-.endm
-
-# macro RES_D name, size [, align]
-.macro RES_D _name, _size, _align=4
-RES_int _name 4*(_size) _align
-.endm
-
-# macro RES_Q name, size [, align]
-.macro RES_Q _name, _size, _align=8
-RES_int _name 8*(_size) _align
-.endm
-
-# macro RES_DQ name, size [, align]
-.macro RES_DQ _name, _size, _align=16
-RES_int _name 16*(_size) _align
-.endm
-
-# macro RES_Y name, size [, align]
-.macro RES_Y _name, _size, _align=32
-RES_int _name 32*(_size) _align
-.endm
-
-# macro RES_Z name, size [, align]
-.macro RES_Z _name, _size, _align=64
-RES_int _name 64*(_size) _align
-.endm
-
-
-#endif
-
-########################################################################
-#### Define constants
-########################################################################
-
-########################################################################
-#### Define SHA1 Out Of Order Data Structures
-########################################################################
-
-START_FIELDS # LANE_DATA
-### name size align
-FIELD _job_in_lane, 8, 8 # pointer to job object
-END_FIELDS
-
-_LANE_DATA_size = _FIELD_OFFSET
-_LANE_DATA_align = _STRUCT_ALIGN
-
-########################################################################
-
-START_FIELDS # SHA1_ARGS_X8
-### name size align
-FIELD _digest, 4*5*8, 16 # transposed digest
-FIELD _data_ptr, 8*8, 8 # array of pointers to data
-END_FIELDS
-
-_SHA1_ARGS_X4_size = _FIELD_OFFSET
-_SHA1_ARGS_X4_align = _STRUCT_ALIGN
-_SHA1_ARGS_X8_size = _FIELD_OFFSET
-_SHA1_ARGS_X8_align = _STRUCT_ALIGN
-
-########################################################################
-
-START_FIELDS # MB_MGR
-### name size align
-FIELD _args, _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align
-FIELD _lens, 4*8, 8
-FIELD _unused_lanes, 8, 8
-FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align
-END_FIELDS
-
-_MB_MGR_size = _FIELD_OFFSET
-_MB_MGR_align = _STRUCT_ALIGN
-
-_args_digest = _args + _digest
-_args_data_ptr = _args + _data_ptr
-
-
-########################################################################
-#### Define constants
-########################################################################
-
-#define STS_UNKNOWN 0
-#define STS_BEING_PROCESSED 1
-#define STS_COMPLETED 2
-
-########################################################################
-#### Define JOB_SHA1 structure
-########################################################################
-
-START_FIELDS # JOB_SHA1
-
-### name size align
-FIELD _buffer, 8, 8 # pointer to buffer
-FIELD _len, 4, 4 # length in bytes
-FIELD _result_digest, 5*4, 32 # Digest (output)
-FIELD _status, 4, 4
-FIELD _user_data, 8, 8
-END_FIELDS
-
-_JOB_SHA1_size = _FIELD_OFFSET
-_JOB_SHA1_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
deleted file mode 100644
index 7cfba738f104..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Flush routine for SHA1 multibuffer
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * James Guilford <james.guilford@intel.com>
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha1_mb_mgr_datastruct.S"
-
-
-.extern sha1_x8_avx2
-
-# LINUX register definitions
-#define arg1 %rdi
-#define arg2 %rsi
-
-# Common definitions
-#define state arg1
-#define job arg2
-#define len2 arg2
-
-# idx must be a register not clobbered by sha1_x8_avx2
-#define idx %r8
-#define DWORD_idx %r8d
-
-#define unused_lanes %rbx
-#define lane_data %rbx
-#define tmp2 %rbx
-#define tmp2_w %ebx
-
-#define job_rax %rax
-#define tmp1 %rax
-#define size_offset %rax
-#define tmp %rax
-#define start_offset %rax
-
-#define tmp3 %arg1
-
-#define extra_blocks %arg2
-#define p %arg2
-
-.macro LABEL prefix n
-\prefix\n\():
-.endm
-
-.macro JNE_SKIP i
-jne skip_\i
-.endm
-
-.altmacro
-.macro SET_OFFSET _offset
-offset = \_offset
-.endm
-.noaltmacro
-
-# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state)
-# arg 1 : rcx : state
-ENTRY(sha1_mb_mgr_flush_avx2)
- FRAME_BEGIN
- push %rbx
-
- # If bit (32+3) is set, then all lanes are empty
- mov _unused_lanes(state), unused_lanes
- bt $32+3, unused_lanes
- jc return_null
-
- # find a lane with a non-null job
- xor idx, idx
- offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne one(%rip), idx
- offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne two(%rip), idx
- offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne three(%rip), idx
- offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne four(%rip), idx
- offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne five(%rip), idx
- offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne six(%rip), idx
- offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne seven(%rip), idx
-
- # copy idx to empty lanes
-copy_lane_data:
- offset = (_args + _data_ptr)
- mov offset(state,idx,8), tmp
-
- I = 0
-.rep 8
- offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
-.altmacro
- JNE_SKIP %I
- offset = (_args + _data_ptr + 8*I)
- mov tmp, offset(state)
- offset = (_lens + 4*I)
- movl $0xFFFFFFFF, offset(state)
-LABEL skip_ %I
- I = (I+1)
-.noaltmacro
-.endr
-
- # Find min length
- vmovdqu _lens+0*16(state), %xmm0
- vmovdqu _lens+1*16(state), %xmm1
-
- vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
- vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
- vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
-
- vmovd %xmm2, DWORD_idx
- mov idx, len2
- and $0xF, idx
- shr $4, len2
- jz len_is_0
-
- vpand clear_low_nibble(%rip), %xmm2, %xmm2
- vpshufd $0, %xmm2, %xmm2
-
- vpsubd %xmm2, %xmm0, %xmm0
- vpsubd %xmm2, %xmm1, %xmm1
-
- vmovdqu %xmm0, _lens+0*16(state)
- vmovdqu %xmm1, _lens+1*16(state)
-
- # "state" and "args" are the same address, arg1
- # len is arg2
- call sha1_x8_avx2
- # state and idx are intact
-
-
-len_is_0:
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- mov _unused_lanes(state), unused_lanes
- shl $4, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens(state, idx, 4)
-
- vmovd _args_digest(state , idx, 4) , %xmm0
- vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
- movl _args_digest+4*32(state, idx, 4), tmp2_w
-
- vmovdqu %xmm0, _result_digest(job_rax)
- offset = (_result_digest + 1*16)
- mov tmp2_w, offset(job_rax)
-
-return:
- pop %rbx
- FRAME_END
- ret
-
-return_null:
- xor job_rax, job_rax
- jmp return
-ENDPROC(sha1_mb_mgr_flush_avx2)
-
-
-#################################################################
-
-.align 16
-ENTRY(sha1_mb_mgr_get_comp_job_avx2)
- push %rbx
-
- ## if bit 32+3 is set, then all lanes are empty
- mov _unused_lanes(state), unused_lanes
- bt $(32+3), unused_lanes
- jc .return_null
-
- # Find min length
- vmovdqu _lens(state), %xmm0
- vmovdqu _lens+1*16(state), %xmm1
-
- vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
- vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
- vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
-
- vmovd %xmm2, DWORD_idx
- test $~0xF, idx
- jnz .return_null
-
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- mov _unused_lanes(state), unused_lanes
- shl $4, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens(state, idx, 4)
-
- vmovd _args_digest(state, idx, 4), %xmm0
- vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
- movl _args_digest+4*32(state, idx, 4), tmp2_w
-
- vmovdqu %xmm0, _result_digest(job_rax)
- movl tmp2_w, _result_digest+1*16(job_rax)
-
- pop %rbx
-
- ret
-
-.return_null:
- xor job_rax, job_rax
- pop %rbx
- ret
-ENDPROC(sha1_mb_mgr_get_comp_job_avx2)
-
-.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
-.align 16
-clear_low_nibble:
-.octa 0x000000000000000000000000FFFFFFF0
-
-.section .rodata.cst8, "aM", @progbits, 8
-.align 8
-one:
-.quad 1
-two:
-.quad 2
-three:
-.quad 3
-four:
-.quad 4
-five:
-.quad 5
-six:
-.quad 6
-seven:
-.quad 7
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
deleted file mode 100644
index d2add0d35f43..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Initialization code for multi buffer SHA1 algorithm for AVX2
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "sha1_mb_mgr.h"
-
-void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
-{
- unsigned int j;
- state->unused_lanes = 0xF76543210ULL;
- for (j = 0; j < 8; j++) {
- state->lens[j] = 0xFFFFFFFF;
- state->ldata[j].job_in_lane = NULL;
- }
-}
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
deleted file mode 100644
index 7a93b1c0d69a..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Buffer submit code for multi buffer SHA1 algorithm
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * James Guilford <james.guilford@intel.com>
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha1_mb_mgr_datastruct.S"
-
-
-.extern sha1_x8_avx
-
-# LINUX register definitions
-arg1 = %rdi
-arg2 = %rsi
-size_offset = %rcx
-tmp2 = %rcx
-extra_blocks = %rdx
-
-# Common definitions
-#define state arg1
-#define job %rsi
-#define len2 arg2
-#define p2 arg2
-
-# idx must be a register not clobberred by sha1_x8_avx2
-idx = %r8
-DWORD_idx = %r8d
-last_len = %r8
-
-p = %r11
-start_offset = %r11
-
-unused_lanes = %rbx
-BYTE_unused_lanes = %bl
-
-job_rax = %rax
-len = %rax
-DWORD_len = %eax
-
-lane = %r12
-tmp3 = %r12
-
-tmp = %r9
-DWORD_tmp = %r9d
-
-lane_data = %r10
-
-# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job)
-# arg 1 : rcx : state
-# arg 2 : rdx : job
-ENTRY(sha1_mb_mgr_submit_avx2)
- FRAME_BEGIN
- push %rbx
- push %r12
-
- mov _unused_lanes(state), unused_lanes
- mov unused_lanes, lane
- and $0xF, lane
- shr $4, unused_lanes
- imul $_LANE_DATA_size, lane, lane_data
- movl $STS_BEING_PROCESSED, _status(job)
- lea _ldata(state, lane_data), lane_data
- mov unused_lanes, _unused_lanes(state)
- movl _len(job), DWORD_len
-
- mov job, _job_in_lane(lane_data)
- shl $4, len
- or lane, len
-
- movl DWORD_len, _lens(state , lane, 4)
-
- # Load digest words from result_digest
- vmovdqu _result_digest(job), %xmm0
- mov _result_digest+1*16(job), DWORD_tmp
- vmovd %xmm0, _args_digest(state, lane, 4)
- vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4)
- vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4)
- vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4)
- movl DWORD_tmp, _args_digest+4*32(state , lane, 4)
-
- mov _buffer(job), p
- mov p, _args_data_ptr(state, lane, 8)
-
- cmp $0xF, unused_lanes
- jne return_null
-
-start_loop:
- # Find min length
- vmovdqa _lens(state), %xmm0
- vmovdqa _lens+1*16(state), %xmm1
-
- vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
- vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
- vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
-
- vmovd %xmm2, DWORD_idx
- mov idx, len2
- and $0xF, idx
- shr $4, len2
- jz len_is_0
-
- vpand clear_low_nibble(%rip), %xmm2, %xmm2
- vpshufd $0, %xmm2, %xmm2
-
- vpsubd %xmm2, %xmm0, %xmm0
- vpsubd %xmm2, %xmm1, %xmm1
-
- vmovdqa %xmm0, _lens + 0*16(state)
- vmovdqa %xmm1, _lens + 1*16(state)
-
-
- # "state" and "args" are the same address, arg1
- # len is arg2
- call sha1_x8_avx2
-
- # state and idx are intact
-
-len_is_0:
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- mov _unused_lanes(state), unused_lanes
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- shl $4, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens(state, idx, 4)
-
- vmovd _args_digest(state, idx, 4), %xmm0
- vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
- vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
- vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
- movl _args_digest+4*32(state, idx, 4), DWORD_tmp
-
- vmovdqu %xmm0, _result_digest(job_rax)
- movl DWORD_tmp, _result_digest+1*16(job_rax)
-
-return:
- pop %r12
- pop %rbx
- FRAME_END
- ret
-
-return_null:
- xor job_rax, job_rax
- jmp return
-
-ENDPROC(sha1_mb_mgr_submit_avx2)
-
-.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
-.align 16
-clear_low_nibble:
- .octa 0x000000000000000000000000FFFFFFF0
diff --git a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
deleted file mode 100644
index 20f77aa633de..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
+++ /dev/null
@@ -1,492 +0,0 @@
-/*
- * Multi-buffer SHA1 algorithm hash compute routine
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * James Guilford <james.guilford@intel.com>
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include "sha1_mb_mgr_datastruct.S"
-
-## code to compute oct SHA1 using SSE-256
-## outer calling routine takes care of save and restore of XMM registers
-
-## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15# ymm0-15
-##
-## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15
-## Linux preserves: rdi rbp r8
-##
-## clobbers ymm0-15
-
-
-# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
-# "transpose" data in {r0...r7} using temps {t0...t1}
-# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
-# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
-# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
-# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
-# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
-# r4 = {e7 e6 e5 e4 e3 e2 e1 e0}
-# r5 = {f7 f6 f5 f4 f3 f2 f1 f0}
-# r6 = {g7 g6 g5 g4 g3 g2 g1 g0}
-# r7 = {h7 h6 h5 h4 h3 h2 h1 h0}
-#
-# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
-# r0 = {h0 g0 f0 e0 d0 c0 b0 a0}
-# r1 = {h1 g1 f1 e1 d1 c1 b1 a1}
-# r2 = {h2 g2 f2 e2 d2 c2 b2 a2}
-# r3 = {h3 g3 f3 e3 d3 c3 b3 a3}
-# r4 = {h4 g4 f4 e4 d4 c4 b4 a4}
-# r5 = {h5 g5 f5 e5 d5 c5 b5 a5}
-# r6 = {h6 g6 f6 e6 d6 c6 b6 a6}
-# r7 = {h7 g7 f7 e7 d7 c7 b7 a7}
-#
-
-.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
- # process top half (r0..r3) {a...d}
- vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
- vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
- vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
- vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
- vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1}
- vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2}
- vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3}
- vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0}
-
- # use r2 in place of t0
- # process bottom half (r4..r7) {e...h}
- vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0}
- vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2}
- vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0}
- vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2}
- vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1}
- vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2}
- vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3}
- vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0}
-
- vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6
- vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2
- vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5
- vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1
- vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7
- vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3
- vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4
- vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0
-
-.endm
-##
-## Magic functions defined in FIPS 180-1
-##
-# macro MAGIC_F0 F,B,C,D,T ## F = (D ^ (B & (C ^ D)))
-.macro MAGIC_F0 regF regB regC regD regT
- vpxor \regD, \regC, \regF
- vpand \regB, \regF, \regF
- vpxor \regD, \regF, \regF
-.endm
-
-# macro MAGIC_F1 F,B,C,D,T ## F = (B ^ C ^ D)
-.macro MAGIC_F1 regF regB regC regD regT
- vpxor \regC, \regD, \regF
- vpxor \regB, \regF, \regF
-.endm
-
-# macro MAGIC_F2 F,B,C,D,T ## F = ((B & C) | (B & D) | (C & D))
-.macro MAGIC_F2 regF regB regC regD regT
- vpor \regC, \regB, \regF
- vpand \regC, \regB, \regT
- vpand \regD, \regF, \regF
- vpor \regT, \regF, \regF
-.endm
-
-# macro MAGIC_F3 F,B,C,D,T ## F = (B ^ C ^ D)
-.macro MAGIC_F3 regF regB regC regD regT
- MAGIC_F1 \regF,\regB,\regC,\regD,\regT
-.endm
-
-# PROLD reg, imm, tmp
-.macro PROLD reg imm tmp
- vpsrld $(32-\imm), \reg, \tmp
- vpslld $\imm, \reg, \reg
- vpor \tmp, \reg, \reg
-.endm
-
-.macro PROLD_nd reg imm tmp src
- vpsrld $(32-\imm), \src, \tmp
- vpslld $\imm, \src, \reg
- vpor \tmp, \reg, \reg
-.endm
-
-.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC
- vpaddd \immCNT, \regE, \regE
- vpaddd \memW*32(%rsp), \regE, \regE
- PROLD_nd \regT, 5, \regF, \regA
- vpaddd \regT, \regE, \regE
- \MAGIC \regF, \regB, \regC, \regD, \regT
- PROLD \regB, 30, \regT
- vpaddd \regF, \regE, \regE
-.endm
-
-.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC
- vpaddd \immCNT, \regE, \regE
- offset = ((\memW - 14) & 15) * 32
- vmovdqu offset(%rsp), W14
- vpxor W14, W16, W16
- offset = ((\memW - 8) & 15) * 32
- vpxor offset(%rsp), W16, W16
- offset = ((\memW - 3) & 15) * 32
- vpxor offset(%rsp), W16, W16
- vpsrld $(32-1), W16, \regF
- vpslld $1, W16, W16
- vpor W16, \regF, \regF
-
- ROTATE_W
-
- offset = ((\memW - 0) & 15) * 32
- vmovdqu \regF, offset(%rsp)
- vpaddd \regF, \regE, \regE
- PROLD_nd \regT, 5, \regF, \regA
- vpaddd \regT, \regE, \regE
- \MAGIC \regF,\regB,\regC,\regD,\regT ## FUN = MAGIC_Fi(B,C,D)
- PROLD \regB,30, \regT
- vpaddd \regF, \regE, \regE
-.endm
-
-########################################################################
-########################################################################
-########################################################################
-
-## FRAMESZ plus pushes must be an odd multiple of 8
-YMM_SAVE = (15-15)*32
-FRAMESZ = 32*16 + YMM_SAVE
-_YMM = FRAMESZ - YMM_SAVE
-
-#define VMOVPS vmovups
-
-IDX = %rax
-inp0 = %r9
-inp1 = %r10
-inp2 = %r11
-inp3 = %r12
-inp4 = %r13
-inp5 = %r14
-inp6 = %r15
-inp7 = %rcx
-arg1 = %rdi
-arg2 = %rsi
-RSP_SAVE = %rdx
-
-# ymm0 A
-# ymm1 B
-# ymm2 C
-# ymm3 D
-# ymm4 E
-# ymm5 F AA
-# ymm6 T0 BB
-# ymm7 T1 CC
-# ymm8 T2 DD
-# ymm9 T3 EE
-# ymm10 T4 TMP
-# ymm11 T5 FUN
-# ymm12 T6 K
-# ymm13 T7 W14
-# ymm14 T8 W15
-# ymm15 T9 W16
-
-
-A = %ymm0
-B = %ymm1
-C = %ymm2
-D = %ymm3
-E = %ymm4
-F = %ymm5
-T0 = %ymm6
-T1 = %ymm7
-T2 = %ymm8
-T3 = %ymm9
-T4 = %ymm10
-T5 = %ymm11
-T6 = %ymm12
-T7 = %ymm13
-T8 = %ymm14
-T9 = %ymm15
-
-AA = %ymm5
-BB = %ymm6
-CC = %ymm7
-DD = %ymm8
-EE = %ymm9
-TMP = %ymm10
-FUN = %ymm11
-K = %ymm12
-W14 = %ymm13
-W15 = %ymm14
-W16 = %ymm15
-
-.macro ROTATE_ARGS
- TMP_ = E
- E = D
- D = C
- C = B
- B = A
- A = TMP_
-.endm
-
-.macro ROTATE_W
-TMP_ = W16
-W16 = W15
-W15 = W14
-W14 = TMP_
-.endm
-
-# 8 streams x 5 32bit words per digest x 4 bytes per word
-#define DIGEST_SIZE (8*5*4)
-
-.align 32
-
-# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size)
-# arg 1 : pointer to array[4] of pointer to input data
-# arg 2 : size (in blocks) ;; assumed to be >= 1
-#
-ENTRY(sha1_x8_avx2)
-
- # save callee-saved clobbered registers to comply with C function ABI
- push %r12
- push %r13
- push %r14
- push %r15
-
- #save rsp
- mov %rsp, RSP_SAVE
- sub $FRAMESZ, %rsp
-
- #align rsp to 32 Bytes
- and $~0x1F, %rsp
-
- ## Initialize digests
- vmovdqu 0*32(arg1), A
- vmovdqu 1*32(arg1), B
- vmovdqu 2*32(arg1), C
- vmovdqu 3*32(arg1), D
- vmovdqu 4*32(arg1), E
-
- ## transpose input onto stack
- mov _data_ptr+0*8(arg1),inp0
- mov _data_ptr+1*8(arg1),inp1
- mov _data_ptr+2*8(arg1),inp2
- mov _data_ptr+3*8(arg1),inp3
- mov _data_ptr+4*8(arg1),inp4
- mov _data_ptr+5*8(arg1),inp5
- mov _data_ptr+6*8(arg1),inp6
- mov _data_ptr+7*8(arg1),inp7
-
- xor IDX, IDX
-lloop:
- vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), F
- I=0
-.rep 2
- VMOVPS (inp0, IDX), T0
- VMOVPS (inp1, IDX), T1
- VMOVPS (inp2, IDX), T2
- VMOVPS (inp3, IDX), T3
- VMOVPS (inp4, IDX), T4
- VMOVPS (inp5, IDX), T5
- VMOVPS (inp6, IDX), T6
- VMOVPS (inp7, IDX), T7
-
- TRANSPOSE8 T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
- vpshufb F, T0, T0
- vmovdqu T0, (I*8)*32(%rsp)
- vpshufb F, T1, T1
- vmovdqu T1, (I*8+1)*32(%rsp)
- vpshufb F, T2, T2
- vmovdqu T2, (I*8+2)*32(%rsp)
- vpshufb F, T3, T3
- vmovdqu T3, (I*8+3)*32(%rsp)
- vpshufb F, T4, T4
- vmovdqu T4, (I*8+4)*32(%rsp)
- vpshufb F, T5, T5
- vmovdqu T5, (I*8+5)*32(%rsp)
- vpshufb F, T6, T6
- vmovdqu T6, (I*8+6)*32(%rsp)
- vpshufb F, T7, T7
- vmovdqu T7, (I*8+7)*32(%rsp)
- add $32, IDX
- I = (I+1)
-.endr
- # save old digests
- vmovdqu A,AA
- vmovdqu B,BB
- vmovdqu C,CC
- vmovdqu D,DD
- vmovdqu E,EE
-
-##
-## perform 0-79 steps
-##
- vmovdqu K00_19(%rip), K
-## do rounds 0...15
- I = 0
-.rep 16
- SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
- ROTATE_ARGS
- I = (I+1)
-.endr
-
-## do rounds 16...19
- vmovdqu ((16 - 16) & 15) * 32 (%rsp), W16
- vmovdqu ((16 - 15) & 15) * 32 (%rsp), W15
-.rep 4
- SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
- ROTATE_ARGS
- I = (I+1)
-.endr
-
-## do rounds 20...39
- vmovdqu K20_39(%rip), K
-.rep 20
- SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1
- ROTATE_ARGS
- I = (I+1)
-.endr
-
-## do rounds 40...59
- vmovdqu K40_59(%rip), K
-.rep 20
- SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2
- ROTATE_ARGS
- I = (I+1)
-.endr
-
-## do rounds 60...79
- vmovdqu K60_79(%rip), K
-.rep 20
- SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3
- ROTATE_ARGS
- I = (I+1)
-.endr
-
- vpaddd AA,A,A
- vpaddd BB,B,B
- vpaddd CC,C,C
- vpaddd DD,D,D
- vpaddd EE,E,E
-
- sub $1, arg2
- jne lloop
-
- # write out digests
- vmovdqu A, 0*32(arg1)
- vmovdqu B, 1*32(arg1)
- vmovdqu C, 2*32(arg1)
- vmovdqu D, 3*32(arg1)
- vmovdqu E, 4*32(arg1)
-
- # update input pointers
- add IDX, inp0
- add IDX, inp1
- add IDX, inp2
- add IDX, inp3
- add IDX, inp4
- add IDX, inp5
- add IDX, inp6
- add IDX, inp7
- mov inp0, _data_ptr (arg1)
- mov inp1, _data_ptr + 1*8(arg1)
- mov inp2, _data_ptr + 2*8(arg1)
- mov inp3, _data_ptr + 3*8(arg1)
- mov inp4, _data_ptr + 4*8(arg1)
- mov inp5, _data_ptr + 5*8(arg1)
- mov inp6, _data_ptr + 6*8(arg1)
- mov inp7, _data_ptr + 7*8(arg1)
-
- ################
- ## Postamble
-
- mov RSP_SAVE, %rsp
-
- # restore callee-saved clobbered registers
- pop %r15
- pop %r14
- pop %r13
- pop %r12
-
- ret
-ENDPROC(sha1_x8_avx2)
-
-
-.section .rodata.cst32.K00_19, "aM", @progbits, 32
-.align 32
-K00_19:
-.octa 0x5A8279995A8279995A8279995A827999
-.octa 0x5A8279995A8279995A8279995A827999
-
-.section .rodata.cst32.K20_39, "aM", @progbits, 32
-.align 32
-K20_39:
-.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
-.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
-
-.section .rodata.cst32.K40_59, "aM", @progbits, 32
-.align 32
-K40_59:
-.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
-.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
-
-.section .rodata.cst32.K60_79, "aM", @progbits, 32
-.align 32
-K60_79:
-.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
-.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
-
-.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
-.align 32
-PSHUFFLE_BYTE_FLIP_MASK:
-.octa 0x0c0d0e0f08090a0b0405060700010203
-.octa 0x0c0d0e0f08090a0b0405060700010203
diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile
deleted file mode 100644
index 53ad6e7db747..000000000000
--- a/arch/x86/crypto/sha256-mb/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Arch-specific CryptoAPI modules.
-#
-
-OBJECT_FILES_NON_STANDARD := y
-
-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
- $(comma)4)$(comma)%ymm2,yes,no)
-ifeq ($(avx2_supported),yes)
- obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o
- sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \
- sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o
-endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c
deleted file mode 100644
index 97c5fc43e115..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb.c
+++ /dev/null
@@ -1,1013 +0,0 @@
-/*
- * Multi buffer SHA256 algorithm Glue Code
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/sha.h>
-#include <crypto/mcryptd.h>
-#include <crypto/crypto_wq.h>
-#include <asm/byteorder.h>
-#include <linux/hardirq.h>
-#include <asm/fpu/api.h>
-#include "sha256_mb_ctx.h"
-
-#define FLUSH_INTERVAL 1000 /* in usec */
-
-static struct mcryptd_alg_state sha256_mb_alg_state;
-
-struct sha256_mb_ctx {
- struct mcryptd_ahash *mcryptd_tfm;
-};
-
-static inline struct mcryptd_hash_request_ctx
- *cast_hash_to_mcryptd_ctx(struct sha256_hash_ctx *hash_ctx)
-{
- struct ahash_request *areq;
-
- areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
- return container_of(areq, struct mcryptd_hash_request_ctx, areq);
-}
-
-static inline struct ahash_request
- *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
-{
- return container_of((void *) ctx, struct ahash_request, __ctx);
-}
-
-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
- struct ahash_request *areq)
-{
- rctx->flag = HASH_UPDATE;
-}
-
-static asmlinkage void (*sha256_job_mgr_init)(struct sha256_mb_mgr *state);
-static asmlinkage struct job_sha256* (*sha256_job_mgr_submit)
- (struct sha256_mb_mgr *state, struct job_sha256 *job);
-static asmlinkage struct job_sha256* (*sha256_job_mgr_flush)
- (struct sha256_mb_mgr *state);
-static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job)
- (struct sha256_mb_mgr *state);
-
-inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
- uint64_t total_len)
-{
- uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1);
-
- memset(&padblock[i], 0, SHA256_BLOCK_SIZE);
- padblock[i] = 0x80;
-
- i += ((SHA256_BLOCK_SIZE - 1) &
- (0 - (total_len + SHA256_PADLENGTHFIELD_SIZE + 1)))
- + 1 + SHA256_PADLENGTHFIELD_SIZE;
-
-#if SHA256_PADLENGTHFIELD_SIZE == 16
- *((uint64_t *) &padblock[i - 16]) = 0;
-#endif
-
- *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
-
- /* Number of extra blocks to hash */
- return i >> SHA256_LOG2_BLOCK_SIZE;
-}
-
-static struct sha256_hash_ctx
- *sha256_ctx_mgr_resubmit(struct sha256_ctx_mgr *mgr,
- struct sha256_hash_ctx *ctx)
-{
- while (ctx) {
- if (ctx->status & HASH_CTX_STS_COMPLETE) {
- /* Clear PROCESSING bit */
- ctx->status = HASH_CTX_STS_COMPLETE;
- return ctx;
- }
-
- /*
- * If the extra blocks are empty, begin hashing what remains
- * in the user's buffer.
- */
- if (ctx->partial_block_buffer_length == 0 &&
- ctx->incoming_buffer_length) {
-
- const void *buffer = ctx->incoming_buffer;
- uint32_t len = ctx->incoming_buffer_length;
- uint32_t copy_len;
-
- /*
- * Only entire blocks can be hashed.
- * Copy remainder to extra blocks buffer.
- */
- copy_len = len & (SHA256_BLOCK_SIZE-1);
-
- if (copy_len) {
- len -= copy_len;
- memcpy(ctx->partial_block_buffer,
- ((const char *) buffer + len),
- copy_len);
- ctx->partial_block_buffer_length = copy_len;
- }
-
- ctx->incoming_buffer_length = 0;
-
- /* len should be a multiple of the block size now */
- assert((len % SHA256_BLOCK_SIZE) == 0);
-
- /* Set len to the number of blocks to be hashed */
- len >>= SHA256_LOG2_BLOCK_SIZE;
-
- if (len) {
-
- ctx->job.buffer = (uint8_t *) buffer;
- ctx->job.len = len;
- ctx = (struct sha256_hash_ctx *)
- sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
- continue;
- }
- }
-
- /*
- * If the extra blocks are not empty, then we are
- * either on the last block(s) or we need more
- * user input before continuing.
- */
- if (ctx->status & HASH_CTX_STS_LAST) {
-
- uint8_t *buf = ctx->partial_block_buffer;
- uint32_t n_extra_blocks =
- sha256_pad(buf, ctx->total_length);
-
- ctx->status = (HASH_CTX_STS_PROCESSING |
- HASH_CTX_STS_COMPLETE);
- ctx->job.buffer = buf;
- ctx->job.len = (uint32_t) n_extra_blocks;
- ctx = (struct sha256_hash_ctx *)
- sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
- continue;
- }
-
- ctx->status = HASH_CTX_STS_IDLE;
- return ctx;
- }
-
- return NULL;
-}
-
-static struct sha256_hash_ctx
- *sha256_ctx_mgr_get_comp_ctx(struct sha256_ctx_mgr *mgr)
-{
- /*
- * If get_comp_job returns NULL, there are no jobs complete.
- * If get_comp_job returns a job, verify that it is safe to return to
- * the user. If it is not ready, resubmit the job to finish processing.
- * If sha256_ctx_mgr_resubmit returned a job, it is ready to be
- * returned. Otherwise, all jobs currently being managed by the
- * hash_ctx_mgr still need processing.
- */
- struct sha256_hash_ctx *ctx;
-
- ctx = (struct sha256_hash_ctx *) sha256_job_mgr_get_comp_job(&mgr->mgr);
- return sha256_ctx_mgr_resubmit(mgr, ctx);
-}
-
-static void sha256_ctx_mgr_init(struct sha256_ctx_mgr *mgr)
-{
- sha256_job_mgr_init(&mgr->mgr);
-}
-
-static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
- struct sha256_hash_ctx *ctx,
- const void *buffer,
- uint32_t len,
- int flags)
-{
- if (flags & ~(HASH_UPDATE | HASH_LAST)) {
- /* User should not pass anything other than UPDATE or LAST */
- ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
- return ctx;
- }
-
- if (ctx->status & HASH_CTX_STS_PROCESSING) {
- /* Cannot submit to a currently processing job. */
- ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
- return ctx;
- }
-
- if (ctx->status & HASH_CTX_STS_COMPLETE) {
- /* Cannot update a finished job. */
- ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
- return ctx;
- }
-
- /* If we made it here, there was no error during this call to submit */
- ctx->error = HASH_CTX_ERROR_NONE;
-
- /* Store buffer ptr info from user */
- ctx->incoming_buffer = buffer;
- ctx->incoming_buffer_length = len;
-
- /*
- * Store the user's request flags and mark this ctx as currently
- * being processed.
- */
- ctx->status = (flags & HASH_LAST) ?
- (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
- HASH_CTX_STS_PROCESSING;
-
- /* Advance byte counter */
- ctx->total_length += len;
-
- /*
- * If there is anything currently buffered in the extra blocks,
- * append to it until it contains a whole block.
- * Or if the user's buffer contains less than a whole block,
- * append as much as possible to the extra block.
- */
- if (ctx->partial_block_buffer_length || len < SHA256_BLOCK_SIZE) {
- /*
- * Compute how many bytes to copy from user buffer into
- * extra block
- */
- uint32_t copy_len = SHA256_BLOCK_SIZE -
- ctx->partial_block_buffer_length;
- if (len < copy_len)
- copy_len = len;
-
- if (copy_len) {
- /* Copy and update relevant pointers and counters */
- memcpy(
- &ctx->partial_block_buffer[ctx->partial_block_buffer_length],
- buffer, copy_len);
-
- ctx->partial_block_buffer_length += copy_len;
- ctx->incoming_buffer = (const void *)
- ((const char *)buffer + copy_len);
- ctx->incoming_buffer_length = len - copy_len;
- }
-
- /* The extra block should never contain more than 1 block */
- assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE);
-
- /*
- * If the extra block buffer contains exactly 1 block,
- * it can be hashed.
- */
- if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) {
- ctx->partial_block_buffer_length = 0;
-
- ctx->job.buffer = ctx->partial_block_buffer;
- ctx->job.len = 1;
- ctx = (struct sha256_hash_ctx *)
- sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
- }
- }
-
- return sha256_ctx_mgr_resubmit(mgr, ctx);
-}
-
-static struct sha256_hash_ctx *sha256_ctx_mgr_flush(struct sha256_ctx_mgr *mgr)
-{
- struct sha256_hash_ctx *ctx;
-
- while (1) {
- ctx = (struct sha256_hash_ctx *)
- sha256_job_mgr_flush(&mgr->mgr);
-
- /* If flush returned 0, there are no more jobs in flight. */
- if (!ctx)
- return NULL;
-
- /*
- * If flush returned a job, resubmit the job to finish
- * processing.
- */
- ctx = sha256_ctx_mgr_resubmit(mgr, ctx);
-
- /*
- * If sha256_ctx_mgr_resubmit returned a job, it is ready to
- * be returned. Otherwise, all jobs currently being managed by
- * the sha256_ctx_mgr still need processing. Loop.
- */
- if (ctx)
- return ctx;
- }
-}
-
-static int sha256_mb_init(struct ahash_request *areq)
-{
- struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
-
- hash_ctx_init(sctx);
- sctx->job.result_digest[0] = SHA256_H0;
- sctx->job.result_digest[1] = SHA256_H1;
- sctx->job.result_digest[2] = SHA256_H2;
- sctx->job.result_digest[3] = SHA256_H3;
- sctx->job.result_digest[4] = SHA256_H4;
- sctx->job.result_digest[5] = SHA256_H5;
- sctx->job.result_digest[6] = SHA256_H6;
- sctx->job.result_digest[7] = SHA256_H7;
- sctx->total_length = 0;
- sctx->partial_block_buffer_length = 0;
- sctx->status = HASH_CTX_STS_IDLE;
-
- return 0;
-}
-
-static int sha256_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
-{
- int i;
- struct sha256_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
- __be32 *dst = (__be32 *) rctx->out;
-
- for (i = 0; i < 8; ++i)
- dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
-
- return 0;
-}
-
-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
- struct mcryptd_alg_cstate *cstate, bool flush)
-{
- int flag = HASH_UPDATE;
- int nbytes, err = 0;
- struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
- struct sha256_hash_ctx *sha_ctx;
-
- /* more work ? */
- while (!(rctx->flag & HASH_DONE)) {
- nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
- if (nbytes < 0) {
- err = nbytes;
- goto out;
- }
- /* check if the walk is done */
- if (crypto_ahash_walk_last(&rctx->walk)) {
- rctx->flag |= HASH_DONE;
- if (rctx->flag & HASH_FINAL)
- flag |= HASH_LAST;
-
- }
- sha_ctx = (struct sha256_hash_ctx *)
- ahash_request_ctx(&rctx->areq);
- kernel_fpu_begin();
- sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx,
- rctx->walk.data, nbytes, flag);
- if (!sha_ctx) {
- if (flush)
- sha_ctx = sha256_ctx_mgr_flush(cstate->mgr);
- }
- kernel_fpu_end();
- if (sha_ctx)
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- else {
- rctx = NULL;
- goto out;
- }
- }
-
- /* copy the results */
- if (rctx->flag & HASH_FINAL)
- sha256_mb_set_results(rctx);
-
-out:
- *ret_rctx = rctx;
- return err;
-}
-
-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
- struct mcryptd_alg_cstate *cstate,
- int err)
-{
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha256_hash_ctx *sha_ctx;
- struct mcryptd_hash_request_ctx *req_ctx;
- int ret;
-
- /* remove from work list */
- spin_lock(&cstate->work_lock);
- list_del(&rctx->waiter);
- spin_unlock(&cstate->work_lock);
-
- if (irqs_disabled())
- rctx->complete(&req->base, err);
- else {
- local_bh_disable();
- rctx->complete(&req->base, err);
- local_bh_enable();
- }
-
- /* check to see if there are other jobs that are done */
- sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
- while (sha_ctx) {
- req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&req_ctx, cstate, false);
- if (req_ctx) {
- spin_lock(&cstate->work_lock);
- list_del(&req_ctx->waiter);
- spin_unlock(&cstate->work_lock);
-
- req = cast_mcryptd_ctx_to_req(req_ctx);
- if (irqs_disabled())
- req_ctx->complete(&req->base, ret);
- else {
- local_bh_disable();
- req_ctx->complete(&req->base, ret);
- local_bh_enable();
- }
- }
- sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
- }
-
- return 0;
-}
-
-static void sha256_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
- struct mcryptd_alg_cstate *cstate)
-{
- unsigned long next_flush;
- unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
-
- /* initialize tag */
- rctx->tag.arrival = jiffies; /* tag the arrival time */
- rctx->tag.seq_num = cstate->next_seq_num++;
- next_flush = rctx->tag.arrival + delay;
- rctx->tag.expire = next_flush;
-
- spin_lock(&cstate->work_lock);
- list_add_tail(&rctx->waiter, &cstate->work_list);
- spin_unlock(&cstate->work_lock);
-
- mcryptd_arm_flusher(cstate, delay);
-}
-
-static int sha256_mb_update(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx, areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
-
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha256_hash_ctx *sha_ctx;
- int ret = 0, nbytes;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
- if (nbytes < 0) {
- ret = nbytes;
- goto done;
- }
-
- if (crypto_ahash_walk_last(&rctx->walk))
- rctx->flag |= HASH_DONE;
-
- /* submit */
- sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
- sha256_mb_add_list(rctx, cstate);
- kernel_fpu_begin();
- sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
- nbytes, HASH_UPDATE);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
-
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha256_mb_finup(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx, areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
-
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha256_hash_ctx *sha_ctx;
- int ret = 0, flag = HASH_UPDATE, nbytes;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
- if (nbytes < 0) {
- ret = nbytes;
- goto done;
- }
-
- if (crypto_ahash_walk_last(&rctx->walk)) {
- rctx->flag |= HASH_DONE;
- flag = HASH_LAST;
- }
-
- /* submit */
- rctx->flag |= HASH_FINAL;
- sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
- sha256_mb_add_list(rctx, cstate);
-
- kernel_fpu_begin();
- sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
- nbytes, flag);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha256_mb_final(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx,
- areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
-
- struct sha256_hash_ctx *sha_ctx;
- int ret = 0;
- u8 data;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- rctx->flag |= HASH_DONE | HASH_FINAL;
-
- sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
- /* flag HASH_FINAL and 0 data size */
- sha256_mb_add_list(rctx, cstate);
- kernel_fpu_begin();
- sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
- HASH_LAST);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha256_mb_export(struct ahash_request *areq, void *out)
-{
- struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
-
- memcpy(out, sctx, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha256_mb_import(struct ahash_request *areq, const void *in)
-{
- struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
-
- memcpy(sctx, in, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha256_mb_async_init_tfm(struct crypto_tfm *tfm)
-{
- struct mcryptd_ahash *mcryptd_tfm;
- struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
- struct mcryptd_hash_ctx *mctx;
-
- mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha256-mb",
- CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(mcryptd_tfm))
- return PTR_ERR(mcryptd_tfm);
- mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
- mctx->alg_state = &sha256_mb_alg_state;
- ctx->mcryptd_tfm = mcryptd_tfm;
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- crypto_ahash_reqsize(&mcryptd_tfm->base));
-
- return 0;
-}
-
-static void sha256_mb_async_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static int sha256_mb_areq_init_tfm(struct crypto_tfm *tfm)
-{
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- sizeof(struct sha256_hash_ctx));
-
- return 0;
-}
-
-static void sha256_mb_areq_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static struct ahash_alg sha256_mb_areq_alg = {
- .init = sha256_mb_init,
- .update = sha256_mb_update,
- .final = sha256_mb_final,
- .finup = sha256_mb_finup,
- .export = sha256_mb_export,
- .import = sha256_mb_import,
- .halg = {
- .digestsize = SHA256_DIGEST_SIZE,
- .statesize = sizeof(struct sha256_hash_ctx),
- .base = {
- .cra_name = "__sha256-mb",
- .cra_driver_name = "__intel_sha256-mb",
- .cra_priority = 100,
- /*
- * use ASYNC flag as some buffers in multi-buffer
- * algo may not have completed before hashing thread
- * sleep
- */
- .cra_flags = CRYPTO_ALG_ASYNC |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT
- (sha256_mb_areq_alg.halg.base.cra_list),
- .cra_init = sha256_mb_areq_init_tfm,
- .cra_exit = sha256_mb_areq_exit_tfm,
- .cra_ctxsize = sizeof(struct sha256_hash_ctx),
- }
- }
-};
-
-static int sha256_mb_async_init(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_init(mcryptd_req);
-}
-
-static int sha256_mb_async_update(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_update(mcryptd_req);
-}
-
-static int sha256_mb_async_finup(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_finup(mcryptd_req);
-}
-
-static int sha256_mb_async_final(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_final(mcryptd_req);
-}
-
-static int sha256_mb_async_digest(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_digest(mcryptd_req);
-}
-
-static int sha256_mb_async_export(struct ahash_request *req, void *out)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_export(mcryptd_req, out);
-}
-
-static int sha256_mb_async_import(struct ahash_request *req, const void *in)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
- struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
- struct mcryptd_hash_request_ctx *rctx;
- struct ahash_request *areq;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- rctx = ahash_request_ctx(mcryptd_req);
- areq = &rctx->areq;
-
- ahash_request_set_tfm(areq, child);
- ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
- rctx->complete, req);
-
- return crypto_ahash_import(mcryptd_req, in);
-}
-
-static struct ahash_alg sha256_mb_async_alg = {
- .init = sha256_mb_async_init,
- .update = sha256_mb_async_update,
- .final = sha256_mb_async_final,
- .finup = sha256_mb_async_finup,
- .export = sha256_mb_async_export,
- .import = sha256_mb_async_import,
- .digest = sha256_mb_async_digest,
- .halg = {
- .digestsize = SHA256_DIGEST_SIZE,
- .statesize = sizeof(struct sha256_hash_ctx),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name = "sha256_mb",
- /*
- * Low priority, since with few concurrent hash requests
- * this is extremely slow due to the flush delay. Users
- * whose workloads would benefit from this can request
- * it explicitly by driver name, or can increase its
- * priority at runtime using NETLINK_CRYPTO.
- */
- .cra_priority = 50,
- .cra_flags = CRYPTO_ALG_ASYNC,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT
- (sha256_mb_async_alg.halg.base.cra_list),
- .cra_init = sha256_mb_async_init_tfm,
- .cra_exit = sha256_mb_async_exit_tfm,
- .cra_ctxsize = sizeof(struct sha256_mb_ctx),
- .cra_alignmask = 0,
- },
- },
-};
-
-static unsigned long sha256_mb_flusher(struct mcryptd_alg_cstate *cstate)
-{
- struct mcryptd_hash_request_ctx *rctx;
- unsigned long cur_time;
- unsigned long next_flush = 0;
- struct sha256_hash_ctx *sha_ctx;
-
-
- cur_time = jiffies;
-
- while (!list_empty(&cstate->work_list)) {
- rctx = list_entry(cstate->work_list.next,
- struct mcryptd_hash_request_ctx, waiter);
- if (time_before(cur_time, rctx->tag.expire))
- break;
- kernel_fpu_begin();
- sha_ctx = (struct sha256_hash_ctx *)
- sha256_ctx_mgr_flush(cstate->mgr);
- kernel_fpu_end();
- if (!sha_ctx) {
- pr_err("sha256_mb error: nothing got"
- " flushed for non-empty list\n");
- break;
- }
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- sha_finish_walk(&rctx, cstate, true);
- sha_complete_job(rctx, cstate, 0);
- }
-
- if (!list_empty(&cstate->work_list)) {
- rctx = list_entry(cstate->work_list.next,
- struct mcryptd_hash_request_ctx, waiter);
- /* get the hash context and then flush time */
- next_flush = rctx->tag.expire;
- mcryptd_arm_flusher(cstate, get_delay(next_flush));
- }
- return next_flush;
-}
-
-static int __init sha256_mb_mod_init(void)
-{
-
- int cpu;
- int err;
- struct mcryptd_alg_cstate *cpu_state;
-
- /* check for dependent cpu features */
- if (!boot_cpu_has(X86_FEATURE_AVX2) ||
- !boot_cpu_has(X86_FEATURE_BMI2))
- return -ENODEV;
-
- /* initialize multibuffer structures */
- sha256_mb_alg_state.alg_cstate = alloc_percpu
- (struct mcryptd_alg_cstate);
-
- sha256_job_mgr_init = sha256_mb_mgr_init_avx2;
- sha256_job_mgr_submit = sha256_mb_mgr_submit_avx2;
- sha256_job_mgr_flush = sha256_mb_mgr_flush_avx2;
- sha256_job_mgr_get_comp_job = sha256_mb_mgr_get_comp_job_avx2;
-
- if (!sha256_mb_alg_state.alg_cstate)
- return -ENOMEM;
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
- cpu_state->next_flush = 0;
- cpu_state->next_seq_num = 0;
- cpu_state->flusher_engaged = false;
- INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
- cpu_state->cpu = cpu;
- cpu_state->alg_state = &sha256_mb_alg_state;
- cpu_state->mgr = kzalloc(sizeof(struct sha256_ctx_mgr),
- GFP_KERNEL);
- if (!cpu_state->mgr)
- goto err2;
- sha256_ctx_mgr_init(cpu_state->mgr);
- INIT_LIST_HEAD(&cpu_state->work_list);
- spin_lock_init(&cpu_state->work_lock);
- }
- sha256_mb_alg_state.flusher = &sha256_mb_flusher;
-
- err = crypto_register_ahash(&sha256_mb_areq_alg);
- if (err)
- goto err2;
- err = crypto_register_ahash(&sha256_mb_async_alg);
- if (err)
- goto err1;
-
-
- return 0;
-err1:
- crypto_unregister_ahash(&sha256_mb_areq_alg);
-err2:
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
- kfree(cpu_state->mgr);
- }
- free_percpu(sha256_mb_alg_state.alg_cstate);
- return -ENODEV;
-}
-
-static void __exit sha256_mb_mod_fini(void)
-{
- int cpu;
- struct mcryptd_alg_cstate *cpu_state;
-
- crypto_unregister_ahash(&sha256_mb_async_alg);
- crypto_unregister_ahash(&sha256_mb_areq_alg);
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
- kfree(cpu_state->mgr);
- }
- free_percpu(sha256_mb_alg_state.alg_cstate);
-}
-
-module_init(sha256_mb_mod_init);
-module_exit(sha256_mb_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, multi buffer accelerated");
-
-MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
deleted file mode 100644
index 7c432543dc7f..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Header file for multi buffer SHA256 context
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SHA_MB_CTX_INTERNAL_H
-#define _SHA_MB_CTX_INTERNAL_H
-
-#include "sha256_mb_mgr.h"
-
-#define HASH_UPDATE 0x00
-#define HASH_LAST 0x01
-#define HASH_DONE 0x02
-#define HASH_FINAL 0x04
-
-#define HASH_CTX_STS_IDLE 0x00
-#define HASH_CTX_STS_PROCESSING 0x01
-#define HASH_CTX_STS_LAST 0x02
-#define HASH_CTX_STS_COMPLETE 0x04
-
-enum hash_ctx_error {
- HASH_CTX_ERROR_NONE = 0,
- HASH_CTX_ERROR_INVALID_FLAGS = -1,
- HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
- HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
-
-#ifdef HASH_CTX_DEBUG
- HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
-#endif
-};
-
-
-#define hash_ctx_user_data(ctx) ((ctx)->user_data)
-#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
-#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
-#define hash_ctx_status(ctx) ((ctx)->status)
-#define hash_ctx_error(ctx) ((ctx)->error)
-#define hash_ctx_init(ctx) \
- do { \
- (ctx)->error = HASH_CTX_ERROR_NONE; \
- (ctx)->status = HASH_CTX_STS_COMPLETE; \
- } while (0)
-
-
-/* Hash Constants and Typedefs */
-#define SHA256_DIGEST_LENGTH 8
-#define SHA256_LOG2_BLOCK_SIZE 6
-
-#define SHA256_PADLENGTHFIELD_SIZE 8
-
-#ifdef SHA_MB_DEBUG
-#define assert(expr) \
-do { \
- if (unlikely(!(expr))) { \
- printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
- #expr, __FILE__, __func__, __LINE__); \
- } \
-} while (0)
-#else
-#define assert(expr) do {} while (0)
-#endif
-
-struct sha256_ctx_mgr {
- struct sha256_mb_mgr mgr;
-};
-
-/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */
-
-struct sha256_hash_ctx {
- /* Must be at struct offset 0 */
- struct job_sha256 job;
- /* status flag */
- int status;
- /* error flag */
- int error;
-
- uint64_t total_length;
- const void *incoming_buffer;
- uint32_t incoming_buffer_length;
- uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2];
- uint32_t partial_block_buffer_length;
- void *user_data;
-};
-
-#endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
deleted file mode 100644
index b01ae408c56d..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Header file for multi buffer SHA256 algorithm manager
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef __SHA_MB_MGR_H
-#define __SHA_MB_MGR_H
-
-#include <linux/types.h>
-
-#define NUM_SHA256_DIGEST_WORDS 8
-
-enum job_sts { STS_UNKNOWN = 0,
- STS_BEING_PROCESSED = 1,
- STS_COMPLETED = 2,
- STS_INTERNAL_ERROR = 3,
- STS_ERROR = 4
-};
-
-struct job_sha256 {
- u8 *buffer;
- u32 len;
- u32 result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32);
- enum job_sts status;
- void *user_data;
-};
-
-/* SHA256 out-of-order scheduler */
-
-/* typedef uint32_t sha8_digest_array[8][8]; */
-
-struct sha256_args_x8 {
- uint32_t digest[8][8];
- uint8_t *data_ptr[8];
-};
-
-struct sha256_lane_data {
- struct job_sha256 *job_in_lane;
-};
-
-struct sha256_mb_mgr {
- struct sha256_args_x8 args;
-
- uint32_t lens[8];
-
- /* each byte is index (0...7) of unused lanes */
- uint64_t unused_lanes;
- /* byte 4 is set to FF as a flag */
- struct sha256_lane_data ldata[8];
-};
-
-
-#define SHA256_MB_MGR_NUM_LANES_AVX2 8
-
-void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state);
-struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state,
- struct job_sha256 *job);
-struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state);
-struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state);
-
-#endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
deleted file mode 100644
index 5c377bac21d0..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Header file for multi buffer SHA256 algorithm data structure
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# Macros for defining data structures
-
-# Usage example
-
-#START_FIELDS # JOB_AES
-### name size align
-#FIELD _plaintext, 8, 8 # pointer to plaintext
-#FIELD _ciphertext, 8, 8 # pointer to ciphertext
-#FIELD _IV, 16, 8 # IV
-#FIELD _keys, 8, 8 # pointer to keys
-#FIELD _len, 4, 4 # length in bytes
-#FIELD _status, 4, 4 # status enumeration
-#FIELD _user_data, 8, 8 # pointer to user data
-#UNION _union, size1, align1, \
-# size2, align2, \
-# size3, align3, \
-# ...
-#END_FIELDS
-#%assign _JOB_AES_size _FIELD_OFFSET
-#%assign _JOB_AES_align _STRUCT_ALIGN
-
-#########################################################################
-
-# Alternate "struc-like" syntax:
-# STRUCT job_aes2
-# RES_Q .plaintext, 1
-# RES_Q .ciphertext, 1
-# RES_DQ .IV, 1
-# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
-# RES_U .union, size1, align1, \
-# size2, align2, \
-# ...
-# ENDSTRUCT
-# # Following only needed if nesting
-# %assign job_aes2_size _FIELD_OFFSET
-# %assign job_aes2_align _STRUCT_ALIGN
-#
-# RES_* macros take a name, a count and an optional alignment.
-# The count in in terms of the base size of the macro, and the
-# default alignment is the base size.
-# The macros are:
-# Macro Base size
-# RES_B 1
-# RES_W 2
-# RES_D 4
-# RES_Q 8
-# RES_DQ 16
-# RES_Y 32
-# RES_Z 64
-#
-# RES_U defines a union. It's arguments are a name and two or more
-# pairs of "size, alignment"
-#
-# The two assigns are only needed if this structure is being nested
-# within another. Even if the assigns are not done, one can still use
-# STRUCT_NAME_size as the size of the structure.
-#
-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
-#
-# The differences between this and using "struc" directly are that each
-# type is implicitly aligned to its natural length (although this can be
-# over-ridden with an explicit third parameter), and that the structure
-# is padded at the end to its overall alignment.
-#
-
-#########################################################################
-
-#ifndef _DATASTRUCT_ASM_
-#define _DATASTRUCT_ASM_
-
-#define SZ8 8*SHA256_DIGEST_WORD_SIZE
-#define ROUNDS 64*SZ8
-#define PTR_SZ 8
-#define SHA256_DIGEST_WORD_SIZE 4
-#define MAX_SHA256_LANES 8
-#define SHA256_DIGEST_WORDS 8
-#define SHA256_DIGEST_ROW_SIZE (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE)
-#define SHA256_DIGEST_SIZE (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS)
-#define SHA256_BLK_SZ 64
-
-# START_FIELDS
-.macro START_FIELDS
- _FIELD_OFFSET = 0
- _STRUCT_ALIGN = 0
-.endm
-
-# FIELD name size align
-.macro FIELD name size align
- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
- \name = _FIELD_OFFSET
- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
-.if (\align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = \align
-.endif
-.endm
-
-# END_FIELDS
-.macro END_FIELDS
- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
-.endm
-
-########################################################################
-
-.macro STRUCT p1
-START_FIELDS
-.struc \p1
-.endm
-
-.macro ENDSTRUCT
- tmp = _FIELD_OFFSET
- END_FIELDS
- tmp = (_FIELD_OFFSET - %%tmp)
-.if (tmp > 0)
- .lcomm tmp
-.endif
-.endstruc
-.endm
-
-## RES_int name size align
-.macro RES_int p1 p2 p3
- name = \p1
- size = \p2
- align = .\p3
-
- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
-.align align
-.lcomm name size
- _FIELD_OFFSET = _FIELD_OFFSET + (size)
-.if (align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = align
-.endif
-.endm
-
-# macro RES_B name, size [, align]
-.macro RES_B _name, _size, _align=1
-RES_int _name _size _align
-.endm
-
-# macro RES_W name, size [, align]
-.macro RES_W _name, _size, _align=2
-RES_int _name 2*(_size) _align
-.endm
-
-# macro RES_D name, size [, align]
-.macro RES_D _name, _size, _align=4
-RES_int _name 4*(_size) _align
-.endm
-
-# macro RES_Q name, size [, align]
-.macro RES_Q _name, _size, _align=8
-RES_int _name 8*(_size) _align
-.endm
-
-# macro RES_DQ name, size [, align]
-.macro RES_DQ _name, _size, _align=16
-RES_int _name 16*(_size) _align
-.endm
-
-# macro RES_Y name, size [, align]
-.macro RES_Y _name, _size, _align=32
-RES_int _name 32*(_size) _align
-.endm
-
-# macro RES_Z name, size [, align]
-.macro RES_Z _name, _size, _align=64
-RES_int _name 64*(_size) _align
-.endm
-
-#endif
-
-
-########################################################################
-#### Define SHA256 Out Of Order Data Structures
-########################################################################
-
-START_FIELDS # LANE_DATA
-### name size align
-FIELD _job_in_lane, 8, 8 # pointer to job object
-END_FIELDS
-
- _LANE_DATA_size = _FIELD_OFFSET
- _LANE_DATA_align = _STRUCT_ALIGN
-
-########################################################################
-
-START_FIELDS # SHA256_ARGS_X4
-### name size align
-FIELD _digest, 4*8*8, 4 # transposed digest
-FIELD _data_ptr, 8*8, 8 # array of pointers to data
-END_FIELDS
-
- _SHA256_ARGS_X4_size = _FIELD_OFFSET
- _SHA256_ARGS_X4_align = _STRUCT_ALIGN
- _SHA256_ARGS_X8_size = _FIELD_OFFSET
- _SHA256_ARGS_X8_align = _STRUCT_ALIGN
-
-#######################################################################
-
-START_FIELDS # MB_MGR
-### name size align
-FIELD _args, _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align
-FIELD _lens, 4*8, 8
-FIELD _unused_lanes, 8, 8
-FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align
-END_FIELDS
-
- _MB_MGR_size = _FIELD_OFFSET
- _MB_MGR_align = _STRUCT_ALIGN
-
-_args_digest = _args + _digest
-_args_data_ptr = _args + _data_ptr
-
-#######################################################################
-
-START_FIELDS #STACK_FRAME
-### name size align
-FIELD _data, 16*SZ8, 1 # transposed digest
-FIELD _digest, 8*SZ8, 1 # array of pointers to data
-FIELD _ytmp, 4*SZ8, 1
-FIELD _rsp, 8, 1
-END_FIELDS
-
- _STACK_FRAME_size = _FIELD_OFFSET
- _STACK_FRAME_align = _STRUCT_ALIGN
-
-#######################################################################
-
-########################################################################
-#### Define constants
-########################################################################
-
-#define STS_UNKNOWN 0
-#define STS_BEING_PROCESSED 1
-#define STS_COMPLETED 2
-
-########################################################################
-#### Define JOB_SHA256 structure
-########################################################################
-
-START_FIELDS # JOB_SHA256
-
-### name size align
-FIELD _buffer, 8, 8 # pointer to buffer
-FIELD _len, 8, 8 # length in bytes
-FIELD _result_digest, 8*4, 32 # Digest (output)
-FIELD _status, 4, 4
-FIELD _user_data, 8, 8
-END_FIELDS
-
- _JOB_SHA256_size = _FIELD_OFFSET
- _JOB_SHA256_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
deleted file mode 100644
index d2364c55bbde..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Flush routine for SHA256 multibuffer
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha256_mb_mgr_datastruct.S"
-
-.extern sha256_x8_avx2
-
-#LINUX register definitions
-#define arg1 %rdi
-#define arg2 %rsi
-
-# Common register definitions
-#define state arg1
-#define job arg2
-#define len2 arg2
-
-# idx must be a register not clobberred by sha1_mult
-#define idx %r8
-#define DWORD_idx %r8d
-
-#define unused_lanes %rbx
-#define lane_data %rbx
-#define tmp2 %rbx
-#define tmp2_w %ebx
-
-#define job_rax %rax
-#define tmp1 %rax
-#define size_offset %rax
-#define tmp %rax
-#define start_offset %rax
-
-#define tmp3 %arg1
-
-#define extra_blocks %arg2
-#define p %arg2
-
-.macro LABEL prefix n
-\prefix\n\():
-.endm
-
-.macro JNE_SKIP i
-jne skip_\i
-.endm
-
-.altmacro
-.macro SET_OFFSET _offset
-offset = \_offset
-.endm
-.noaltmacro
-
-# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state)
-# arg 1 : rcx : state
-ENTRY(sha256_mb_mgr_flush_avx2)
- FRAME_BEGIN
- push %rbx
-
- # If bit (32+3) is set, then all lanes are empty
- mov _unused_lanes(state), unused_lanes
- bt $32+3, unused_lanes
- jc return_null
-
- # find a lane with a non-null job
- xor idx, idx
- offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne one(%rip), idx
- offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne two(%rip), idx
- offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne three(%rip), idx
- offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne four(%rip), idx
- offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne five(%rip), idx
- offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne six(%rip), idx
- offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne seven(%rip), idx
-
- # copy idx to empty lanes
-copy_lane_data:
- offset = (_args + _data_ptr)
- mov offset(state,idx,8), tmp
-
- I = 0
-.rep 8
- offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
-.altmacro
- JNE_SKIP %I
- offset = (_args + _data_ptr + 8*I)
- mov tmp, offset(state)
- offset = (_lens + 4*I)
- movl $0xFFFFFFFF, offset(state)
-LABEL skip_ %I
- I = (I+1)
-.noaltmacro
-.endr
-
- # Find min length
- vmovdqu _lens+0*16(state), %xmm0
- vmovdqu _lens+1*16(state), %xmm1
-
- vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
- vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
- vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
-
- vmovd %xmm2, DWORD_idx
- mov idx, len2
- and $0xF, idx
- shr $4, len2
- jz len_is_0
-
- vpand clear_low_nibble(%rip), %xmm2, %xmm2
- vpshufd $0, %xmm2, %xmm2
-
- vpsubd %xmm2, %xmm0, %xmm0
- vpsubd %xmm2, %xmm1, %xmm1
-
- vmovdqu %xmm0, _lens+0*16(state)
- vmovdqu %xmm1, _lens+1*16(state)
-
- # "state" and "args" are the same address, arg1
- # len is arg2
- call sha256_x8_avx2
- # state and idx are intact
-
-len_is_0:
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- mov _unused_lanes(state), unused_lanes
- shl $4, unused_lanes
- or idx, unused_lanes
-
- mov unused_lanes, _unused_lanes(state)
- movl $0xFFFFFFFF, _lens(state,idx,4)
-
- vmovd _args_digest(state , idx, 4) , %xmm0
- vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
- vmovd _args_digest+4*32(state, idx, 4), %xmm1
- vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
- vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
- vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
-
- vmovdqu %xmm0, _result_digest(job_rax)
- offset = (_result_digest + 1*16)
- vmovdqu %xmm1, offset(job_rax)
-
-return:
- pop %rbx
- FRAME_END
- ret
-
-return_null:
- xor job_rax, job_rax
- jmp return
-ENDPROC(sha256_mb_mgr_flush_avx2)
-
-##############################################################################
-
-.align 16
-ENTRY(sha256_mb_mgr_get_comp_job_avx2)
- push %rbx
-
- ## if bit 32+3 is set, then all lanes are empty
- mov _unused_lanes(state), unused_lanes
- bt $(32+3), unused_lanes
- jc .return_null
-
- # Find min length
- vmovdqu _lens(state), %xmm0
- vmovdqu _lens+1*16(state), %xmm1
-
- vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
- vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
- vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
-
- vmovd %xmm2, DWORD_idx
- test $~0xF, idx
- jnz .return_null
-
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- mov _unused_lanes(state), unused_lanes
- shl $4, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens(state, idx, 4)
-
- vmovd _args_digest(state, idx, 4), %xmm0
- vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
- vmovd _args_digest+4*32(state, idx, 4), %xmm1
- vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
- vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
- vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
-
- vmovdqu %xmm0, _result_digest(job_rax)
- offset = (_result_digest + 1*16)
- vmovdqu %xmm1, offset(job_rax)
-
- pop %rbx
-
- ret
-
-.return_null:
- xor job_rax, job_rax
- pop %rbx
- ret
-ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
-
-.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
-.align 16
-clear_low_nibble:
-.octa 0x000000000000000000000000FFFFFFF0
-
-.section .rodata.cst8, "aM", @progbits, 8
-.align 8
-one:
-.quad 1
-two:
-.quad 2
-three:
-.quad 3
-four:
-.quad 4
-five:
-.quad 5
-six:
-.quad 6
-seven:
-.quad 7
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
deleted file mode 100644
index b0c498371e67..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Initialization code for multi buffer SHA256 algorithm for AVX2
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "sha256_mb_mgr.h"
-
-void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state)
-{
- unsigned int j;
-
- state->unused_lanes = 0xF76543210ULL;
- for (j = 0; j < 8; j++) {
- state->lens[j] = 0xFFFFFFFF;
- state->ldata[j].job_in_lane = NULL;
- }
-}
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
deleted file mode 100644
index b36ae7454084..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Buffer submit code for multi buffer SHA256 algorithm
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha256_mb_mgr_datastruct.S"
-
-.extern sha256_x8_avx2
-
-# LINUX register definitions
-arg1 = %rdi
-arg2 = %rsi
-size_offset = %rcx
-tmp2 = %rcx
-extra_blocks = %rdx
-
-# Common definitions
-#define state arg1
-#define job %rsi
-#define len2 arg2
-#define p2 arg2
-
-# idx must be a register not clobberred by sha1_x8_avx2
-idx = %r8
-DWORD_idx = %r8d
-last_len = %r8
-
-p = %r11
-start_offset = %r11
-
-unused_lanes = %rbx
-BYTE_unused_lanes = %bl
-
-job_rax = %rax
-len = %rax
-DWORD_len = %eax
-
-lane = %r12
-tmp3 = %r12
-
-tmp = %r9
-DWORD_tmp = %r9d
-
-lane_data = %r10
-
-# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job)
-# arg 1 : rcx : state
-# arg 2 : rdx : job
-ENTRY(sha256_mb_mgr_submit_avx2)
- FRAME_BEGIN
- push %rbx
- push %r12
-
- mov _unused_lanes(state), unused_lanes
- mov unused_lanes, lane
- and $0xF, lane
- shr $4, unused_lanes
- imul $_LANE_DATA_size, lane, lane_data
- movl $STS_BEING_PROCESSED, _status(job)
- lea _ldata(state, lane_data), lane_data
- mov unused_lanes, _unused_lanes(state)
- movl _len(job), DWORD_len
-
- mov job, _job_in_lane(lane_data)
- shl $4, len
- or lane, len
-
- movl DWORD_len, _lens(state , lane, 4)
-
- # Load digest words from result_digest
- vmovdqu _result_digest(job), %xmm0
- vmovdqu _result_digest+1*16(job), %xmm1
- vmovd %xmm0, _args_digest(state, lane, 4)
- vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4)
- vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4)
- vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4)
- vmovd %xmm1, _args_digest+4*32(state , lane, 4)
-
- vpextrd $1, %xmm1, _args_digest+5*32(state , lane, 4)
- vpextrd $2, %xmm1, _args_digest+6*32(state , lane, 4)
- vpextrd $3, %xmm1, _args_digest+7*32(state , lane, 4)
-
- mov _buffer(job), p
- mov p, _args_data_ptr(state, lane, 8)
-
- cmp $0xF, unused_lanes
- jne return_null
-
-start_loop:
- # Find min length
- vmovdqa _lens(state), %xmm0
- vmovdqa _lens+1*16(state), %xmm1
-
- vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
- vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
- vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
-
- vmovd %xmm2, DWORD_idx
- mov idx, len2
- and $0xF, idx
- shr $4, len2
- jz len_is_0
-
- vpand clear_low_nibble(%rip), %xmm2, %xmm2
- vpshufd $0, %xmm2, %xmm2
-
- vpsubd %xmm2, %xmm0, %xmm0
- vpsubd %xmm2, %xmm1, %xmm1
-
- vmovdqa %xmm0, _lens + 0*16(state)
- vmovdqa %xmm1, _lens + 1*16(state)
-
- # "state" and "args" are the same address, arg1
- # len is arg2
- call sha256_x8_avx2
-
- # state and idx are intact
-
-len_is_0:
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- mov _unused_lanes(state), unused_lanes
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- shl $4, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens(state,idx,4)
-
- vmovd _args_digest(state, idx, 4), %xmm0
- vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
- vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
- vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
- vmovd _args_digest+4*32(state, idx, 4), %xmm1
-
- vpinsrd $1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1
- vpinsrd $2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1
- vpinsrd $3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1
-
- vmovdqu %xmm0, _result_digest(job_rax)
- vmovdqu %xmm1, _result_digest+1*16(job_rax)
-
-return:
- pop %r12
- pop %rbx
- FRAME_END
- ret
-
-return_null:
- xor job_rax, job_rax
- jmp return
-
-ENDPROC(sha256_mb_mgr_submit_avx2)
-
-.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
-.align 16
-clear_low_nibble:
- .octa 0x000000000000000000000000FFFFFFF0
diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
deleted file mode 100644
index 1687c80c5995..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
+++ /dev/null
@@ -1,598 +0,0 @@
-/*
- * Multi-buffer SHA256 algorithm hash compute routine
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include "sha256_mb_mgr_datastruct.S"
-
-## code to compute oct SHA256 using SSE-256
-## outer calling routine takes care of save and restore of XMM registers
-## Logic designed/laid out by JDG
-
-## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; %ymm0-15
-## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15
-## Linux preserves: rdi rbp r8
-##
-## clobbers %ymm0-15
-
-arg1 = %rdi
-arg2 = %rsi
-reg3 = %rcx
-reg4 = %rdx
-
-# Common definitions
-STATE = arg1
-INP_SIZE = arg2
-
-IDX = %rax
-ROUND = %rbx
-TBL = reg3
-
-inp0 = %r9
-inp1 = %r10
-inp2 = %r11
-inp3 = %r12
-inp4 = %r13
-inp5 = %r14
-inp6 = %r15
-inp7 = reg4
-
-a = %ymm0
-b = %ymm1
-c = %ymm2
-d = %ymm3
-e = %ymm4
-f = %ymm5
-g = %ymm6
-h = %ymm7
-
-T1 = %ymm8
-
-a0 = %ymm12
-a1 = %ymm13
-a2 = %ymm14
-TMP = %ymm15
-TMP0 = %ymm6
-TMP1 = %ymm7
-
-TT0 = %ymm8
-TT1 = %ymm9
-TT2 = %ymm10
-TT3 = %ymm11
-TT4 = %ymm12
-TT5 = %ymm13
-TT6 = %ymm14
-TT7 = %ymm15
-
-# Define stack usage
-
-# Assume stack aligned to 32 bytes before call
-# Therefore FRAMESZ mod 32 must be 32-8 = 24
-
-#define FRAMESZ 0x388
-
-#define VMOVPS vmovups
-
-# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
-# "transpose" data in {r0...r7} using temps {t0...t1}
-# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
-# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
-# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
-# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
-# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
-# r4 = {e7 e6 e5 e4 e3 e2 e1 e0}
-# r5 = {f7 f6 f5 f4 f3 f2 f1 f0}
-# r6 = {g7 g6 g5 g4 g3 g2 g1 g0}
-# r7 = {h7 h6 h5 h4 h3 h2 h1 h0}
-#
-# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
-# r0 = {h0 g0 f0 e0 d0 c0 b0 a0}
-# r1 = {h1 g1 f1 e1 d1 c1 b1 a1}
-# r2 = {h2 g2 f2 e2 d2 c2 b2 a2}
-# r3 = {h3 g3 f3 e3 d3 c3 b3 a3}
-# r4 = {h4 g4 f4 e4 d4 c4 b4 a4}
-# r5 = {h5 g5 f5 e5 d5 c5 b5 a5}
-# r6 = {h6 g6 f6 e6 d6 c6 b6 a6}
-# r7 = {h7 g7 f7 e7 d7 c7 b7 a7}
-#
-
-.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
- # process top half (r0..r3) {a...d}
- vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
- vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
- vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
- vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
- vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1}
- vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2}
- vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3}
- vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0}
-
- # use r2 in place of t0
- # process bottom half (r4..r7) {e...h}
- vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0}
- vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2}
- vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0}
- vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2}
- vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1}
- vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2}
- vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3}
- vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0}
-
- vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6
- vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2
- vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5
- vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1
- vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7
- vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3
- vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4
- vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0
-
-.endm
-
-.macro ROTATE_ARGS
-TMP_ = h
-h = g
-g = f
-f = e
-e = d
-d = c
-c = b
-b = a
-a = TMP_
-.endm
-
-.macro _PRORD reg imm tmp
- vpslld $(32-\imm),\reg,\tmp
- vpsrld $\imm,\reg, \reg
- vpor \tmp,\reg, \reg
-.endm
-
-# PRORD_nd reg, imm, tmp, src
-.macro _PRORD_nd reg imm tmp src
- vpslld $(32-\imm), \src, \tmp
- vpsrld $\imm, \src, \reg
- vpor \tmp, \reg, \reg
-.endm
-
-# PRORD dst/src, amt
-.macro PRORD reg imm
- _PRORD \reg,\imm,TMP
-.endm
-
-# PRORD_nd dst, src, amt
-.macro PRORD_nd reg tmp imm
- _PRORD_nd \reg, \imm, TMP, \tmp
-.endm
-
-# arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_00_15 _T1 i
- PRORD_nd a0,e,5 # sig1: a0 = (e >> 5)
-
- vpxor g, f, a2 # ch: a2 = f^g
- vpand e,a2, a2 # ch: a2 = (f^g)&e
- vpxor g, a2, a2 # a2 = ch
-
- PRORD_nd a1,e,25 # sig1: a1 = (e >> 25)
-
- vmovdqu \_T1,(SZ8*(\i & 0xf))(%rsp)
- vpaddd (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K
- vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5)
- PRORD a0, 6 # sig1: a0 = (e >> 6) ^ (e >> 11)
- vpaddd a2, h, h # h = h + ch
- PRORD_nd a2,a,11 # sig0: a2 = (a >> 11)
- vpaddd \_T1,h, h # h = h + ch + W + K
- vpxor a1, a0, a0 # a0 = sigma1
- PRORD_nd a1,a,22 # sig0: a1 = (a >> 22)
- vpxor c, a, \_T1 # maj: T1 = a^c
- add $SZ8, ROUND # ROUND++
- vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b
- vpaddd a0, h, h
- vpaddd h, d, d
- vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11)
- PRORD a2,2 # sig0: a2 = (a >> 2) ^ (a >> 13)
- vpxor a1, a2, a2 # a2 = sig0
- vpand c, a, a1 # maj: a1 = a&c
- vpor \_T1, a1, a1 # a1 = maj
- vpaddd a1, h, h # h = h + ch + W + K + maj
- vpaddd a2, h, h # h = h + ch + W + K + maj + sigma0
- ROTATE_ARGS
-.endm
-
-# arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_16_XX _T1 i
- vmovdqu (SZ8*((\i-15)&0xf))(%rsp), \_T1
- vmovdqu (SZ8*((\i-2)&0xf))(%rsp), a1
- vmovdqu \_T1, a0
- PRORD \_T1,11
- vmovdqu a1, a2
- PRORD a1,2
- vpxor a0, \_T1, \_T1
- PRORD \_T1, 7
- vpxor a2, a1, a1
- PRORD a1, 17
- vpsrld $3, a0, a0
- vpxor a0, \_T1, \_T1
- vpsrld $10, a2, a2
- vpxor a2, a1, a1
- vpaddd (SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1
- vpaddd (SZ8*((\i-7)&0xf))(%rsp), a1, a1
- vpaddd a1, \_T1, \_T1
-
- ROUND_00_15 \_T1,\i
-.endm
-
-# SHA256_ARGS:
-# UINT128 digest[8]; // transposed digests
-# UINT8 *data_ptr[4];
-
-# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes);
-# arg 1 : STATE : pointer to array of pointers to input data
-# arg 2 : INP_SIZE : size of input in blocks
- # general registers preserved in outer calling routine
- # outer calling routine saves all the XMM registers
- # save rsp, allocate 32-byte aligned for local variables
-ENTRY(sha256_x8_avx2)
-
- # save callee-saved clobbered registers to comply with C function ABI
- push %r12
- push %r13
- push %r14
- push %r15
-
- mov %rsp, IDX
- sub $FRAMESZ, %rsp
- and $~0x1F, %rsp
- mov IDX, _rsp(%rsp)
-
- # Load the pre-transposed incoming digest.
- vmovdqu 0*SHA256_DIGEST_ROW_SIZE(STATE),a
- vmovdqu 1*SHA256_DIGEST_ROW_SIZE(STATE),b
- vmovdqu 2*SHA256_DIGEST_ROW_SIZE(STATE),c
- vmovdqu 3*SHA256_DIGEST_ROW_SIZE(STATE),d
- vmovdqu 4*SHA256_DIGEST_ROW_SIZE(STATE),e
- vmovdqu 5*SHA256_DIGEST_ROW_SIZE(STATE),f
- vmovdqu 6*SHA256_DIGEST_ROW_SIZE(STATE),g
- vmovdqu 7*SHA256_DIGEST_ROW_SIZE(STATE),h
-
- lea K256_8(%rip),TBL
-
- # load the address of each of the 4 message lanes
- # getting ready to transpose input onto stack
- mov _args_data_ptr+0*PTR_SZ(STATE),inp0
- mov _args_data_ptr+1*PTR_SZ(STATE),inp1
- mov _args_data_ptr+2*PTR_SZ(STATE),inp2
- mov _args_data_ptr+3*PTR_SZ(STATE),inp3
- mov _args_data_ptr+4*PTR_SZ(STATE),inp4
- mov _args_data_ptr+5*PTR_SZ(STATE),inp5
- mov _args_data_ptr+6*PTR_SZ(STATE),inp6
- mov _args_data_ptr+7*PTR_SZ(STATE),inp7
-
- xor IDX, IDX
-lloop:
- xor ROUND, ROUND
-
- # save old digest
- vmovdqu a, _digest(%rsp)
- vmovdqu b, _digest+1*SZ8(%rsp)
- vmovdqu c, _digest+2*SZ8(%rsp)
- vmovdqu d, _digest+3*SZ8(%rsp)
- vmovdqu e, _digest+4*SZ8(%rsp)
- vmovdqu f, _digest+5*SZ8(%rsp)
- vmovdqu g, _digest+6*SZ8(%rsp)
- vmovdqu h, _digest+7*SZ8(%rsp)
- i = 0
-.rep 2
- VMOVPS i*32(inp0, IDX), TT0
- VMOVPS i*32(inp1, IDX), TT1
- VMOVPS i*32(inp2, IDX), TT2
- VMOVPS i*32(inp3, IDX), TT3
- VMOVPS i*32(inp4, IDX), TT4
- VMOVPS i*32(inp5, IDX), TT5
- VMOVPS i*32(inp6, IDX), TT6
- VMOVPS i*32(inp7, IDX), TT7
- vmovdqu g, _ytmp(%rsp)
- vmovdqu h, _ytmp+1*SZ8(%rsp)
- TRANSPOSE8 TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7, TMP0, TMP1
- vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1
- vmovdqu _ytmp(%rsp), g
- vpshufb TMP1, TT0, TT0
- vpshufb TMP1, TT1, TT1
- vpshufb TMP1, TT2, TT2
- vpshufb TMP1, TT3, TT3
- vpshufb TMP1, TT4, TT4
- vpshufb TMP1, TT5, TT5
- vpshufb TMP1, TT6, TT6
- vpshufb TMP1, TT7, TT7
- vmovdqu _ytmp+1*SZ8(%rsp), h
- vmovdqu TT4, _ytmp(%rsp)
- vmovdqu TT5, _ytmp+1*SZ8(%rsp)
- vmovdqu TT6, _ytmp+2*SZ8(%rsp)
- vmovdqu TT7, _ytmp+3*SZ8(%rsp)
- ROUND_00_15 TT0,(i*8+0)
- vmovdqu _ytmp(%rsp), TT0
- ROUND_00_15 TT1,(i*8+1)
- vmovdqu _ytmp+1*SZ8(%rsp), TT1
- ROUND_00_15 TT2,(i*8+2)
- vmovdqu _ytmp+2*SZ8(%rsp), TT2
- ROUND_00_15 TT3,(i*8+3)
- vmovdqu _ytmp+3*SZ8(%rsp), TT3
- ROUND_00_15 TT0,(i*8+4)
- ROUND_00_15 TT1,(i*8+5)
- ROUND_00_15 TT2,(i*8+6)
- ROUND_00_15 TT3,(i*8+7)
- i = (i+1)
-.endr
- add $64, IDX
- i = (i*8)
-
- jmp Lrounds_16_xx
-.align 16
-Lrounds_16_xx:
-.rep 16
- ROUND_16_XX T1, i
- i = (i+1)
-.endr
-
- cmp $ROUNDS,ROUND
- jb Lrounds_16_xx
-
- # add old digest
- vpaddd _digest+0*SZ8(%rsp), a, a
- vpaddd _digest+1*SZ8(%rsp), b, b
- vpaddd _digest+2*SZ8(%rsp), c, c
- vpaddd _digest+3*SZ8(%rsp), d, d
- vpaddd _digest+4*SZ8(%rsp), e, e
- vpaddd _digest+5*SZ8(%rsp), f, f
- vpaddd _digest+6*SZ8(%rsp), g, g
- vpaddd _digest+7*SZ8(%rsp), h, h
-
- sub $1, INP_SIZE # unit is blocks
- jne lloop
-
- # write back to memory (state object) the transposed digest
- vmovdqu a, 0*SHA256_DIGEST_ROW_SIZE(STATE)
- vmovdqu b, 1*SHA256_DIGEST_ROW_SIZE(STATE)
- vmovdqu c, 2*SHA256_DIGEST_ROW_SIZE(STATE)
- vmovdqu d, 3*SHA256_DIGEST_ROW_SIZE(STATE)
- vmovdqu e, 4*SHA256_DIGEST_ROW_SIZE(STATE)
- vmovdqu f, 5*SHA256_DIGEST_ROW_SIZE(STATE)
- vmovdqu g, 6*SHA256_DIGEST_ROW_SIZE(STATE)
- vmovdqu h, 7*SHA256_DIGEST_ROW_SIZE(STATE)
-
- # update input pointers
- add IDX, inp0
- mov inp0, _args_data_ptr+0*8(STATE)
- add IDX, inp1
- mov inp1, _args_data_ptr+1*8(STATE)
- add IDX, inp2
- mov inp2, _args_data_ptr+2*8(STATE)
- add IDX, inp3
- mov inp3, _args_data_ptr+3*8(STATE)
- add IDX, inp4
- mov inp4, _args_data_ptr+4*8(STATE)
- add IDX, inp5
- mov inp5, _args_data_ptr+5*8(STATE)
- add IDX, inp6
- mov inp6, _args_data_ptr+6*8(STATE)
- add IDX, inp7
- mov inp7, _args_data_ptr+7*8(STATE)
-
- # Postamble
- mov _rsp(%rsp), %rsp
-
- # restore callee-saved clobbered registers
- pop %r15
- pop %r14
- pop %r13
- pop %r12
-
- ret
-ENDPROC(sha256_x8_avx2)
-
-.section .rodata.K256_8, "a", @progbits
-.align 64
-K256_8:
- .octa 0x428a2f98428a2f98428a2f98428a2f98
- .octa 0x428a2f98428a2f98428a2f98428a2f98
- .octa 0x71374491713744917137449171374491
- .octa 0x71374491713744917137449171374491
- .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
- .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
- .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
- .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
- .octa 0x3956c25b3956c25b3956c25b3956c25b
- .octa 0x3956c25b3956c25b3956c25b3956c25b
- .octa 0x59f111f159f111f159f111f159f111f1
- .octa 0x59f111f159f111f159f111f159f111f1
- .octa 0x923f82a4923f82a4923f82a4923f82a4
- .octa 0x923f82a4923f82a4923f82a4923f82a4
- .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
- .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
- .octa 0xd807aa98d807aa98d807aa98d807aa98
- .octa 0xd807aa98d807aa98d807aa98d807aa98
- .octa 0x12835b0112835b0112835b0112835b01
- .octa 0x12835b0112835b0112835b0112835b01
- .octa 0x243185be243185be243185be243185be
- .octa 0x243185be243185be243185be243185be
- .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3
- .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3
- .octa 0x72be5d7472be5d7472be5d7472be5d74
- .octa 0x72be5d7472be5d7472be5d7472be5d74
- .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe
- .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe
- .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7
- .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7
- .octa 0xc19bf174c19bf174c19bf174c19bf174
- .octa 0xc19bf174c19bf174c19bf174c19bf174
- .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1
- .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1
- .octa 0xefbe4786efbe4786efbe4786efbe4786
- .octa 0xefbe4786efbe4786efbe4786efbe4786
- .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6
- .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6
- .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc
- .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc
- .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f
- .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f
- .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa
- .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa
- .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
- .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
- .octa 0x76f988da76f988da76f988da76f988da
- .octa 0x76f988da76f988da76f988da76f988da
- .octa 0x983e5152983e5152983e5152983e5152
- .octa 0x983e5152983e5152983e5152983e5152
- .octa 0xa831c66da831c66da831c66da831c66d
- .octa 0xa831c66da831c66da831c66da831c66d
- .octa 0xb00327c8b00327c8b00327c8b00327c8
- .octa 0xb00327c8b00327c8b00327c8b00327c8
- .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7
- .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7
- .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
- .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
- .octa 0xd5a79147d5a79147d5a79147d5a79147
- .octa 0xd5a79147d5a79147d5a79147d5a79147
- .octa 0x06ca635106ca635106ca635106ca6351
- .octa 0x06ca635106ca635106ca635106ca6351
- .octa 0x14292967142929671429296714292967
- .octa 0x14292967142929671429296714292967
- .octa 0x27b70a8527b70a8527b70a8527b70a85
- .octa 0x27b70a8527b70a8527b70a8527b70a85
- .octa 0x2e1b21382e1b21382e1b21382e1b2138
- .octa 0x2e1b21382e1b21382e1b21382e1b2138
- .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
- .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
- .octa 0x53380d1353380d1353380d1353380d13
- .octa 0x53380d1353380d1353380d1353380d13
- .octa 0x650a7354650a7354650a7354650a7354
- .octa 0x650a7354650a7354650a7354650a7354
- .octa 0x766a0abb766a0abb766a0abb766a0abb
- .octa 0x766a0abb766a0abb766a0abb766a0abb
- .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e
- .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e
- .octa 0x92722c8592722c8592722c8592722c85
- .octa 0x92722c8592722c8592722c8592722c85
- .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
- .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
- .octa 0xa81a664ba81a664ba81a664ba81a664b
- .octa 0xa81a664ba81a664ba81a664ba81a664b
- .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70
- .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70
- .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3
- .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3
- .octa 0xd192e819d192e819d192e819d192e819
- .octa 0xd192e819d192e819d192e819d192e819
- .octa 0xd6990624d6990624d6990624d6990624
- .octa 0xd6990624d6990624d6990624d6990624
- .octa 0xf40e3585f40e3585f40e3585f40e3585
- .octa 0xf40e3585f40e3585f40e3585f40e3585
- .octa 0x106aa070106aa070106aa070106aa070
- .octa 0x106aa070106aa070106aa070106aa070
- .octa 0x19a4c11619a4c11619a4c11619a4c116
- .octa 0x19a4c11619a4c11619a4c11619a4c116
- .octa 0x1e376c081e376c081e376c081e376c08
- .octa 0x1e376c081e376c081e376c081e376c08
- .octa 0x2748774c2748774c2748774c2748774c
- .octa 0x2748774c2748774c2748774c2748774c
- .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5
- .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5
- .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3
- .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3
- .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
- .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
- .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
- .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
- .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3
- .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3
- .octa 0x748f82ee748f82ee748f82ee748f82ee
- .octa 0x748f82ee748f82ee748f82ee748f82ee
- .octa 0x78a5636f78a5636f78a5636f78a5636f
- .octa 0x78a5636f78a5636f78a5636f78a5636f
- .octa 0x84c8781484c8781484c8781484c87814
- .octa 0x84c8781484c8781484c8781484c87814
- .octa 0x8cc702088cc702088cc702088cc70208
- .octa 0x8cc702088cc702088cc702088cc70208
- .octa 0x90befffa90befffa90befffa90befffa
- .octa 0x90befffa90befffa90befffa90befffa
- .octa 0xa4506ceba4506ceba4506ceba4506ceb
- .octa 0xa4506ceba4506ceba4506ceba4506ceb
- .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
- .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
- .octa 0xc67178f2c67178f2c67178f2c67178f2
- .octa 0xc67178f2c67178f2c67178f2c67178f2
-
-.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
-.align 32
-PSHUFFLE_BYTE_FLIP_MASK:
-.octa 0x0c0d0e0f08090a0b0405060700010203
-.octa 0x0c0d0e0f08090a0b0405060700010203
-
-.section .rodata.cst256.K256, "aM", @progbits, 256
-.align 64
-.global K256
-K256:
- .int 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
- .int 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
- .int 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
- .int 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
- .int 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
- .int 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
- .int 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
- .int 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
- .int 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
- .int 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
- .int 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
- .int 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
- .int 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
- .int 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
- .int 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
- .int 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile
deleted file mode 100644
index 90f1ef69152e..000000000000
--- a/arch/x86/crypto/sha512-mb/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Arch-specific CryptoAPI modules.
-#
-
-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
- $(comma)4)$(comma)%ymm2,yes,no)
-ifeq ($(avx2_supported),yes)
- obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o
- sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \
- sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o
-endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
deleted file mode 100644
index 26b85678012d..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ /dev/null
@@ -1,1047 +0,0 @@
-/*
- * Multi buffer SHA512 algorithm Glue Code
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/sha.h>
-#include <crypto/mcryptd.h>
-#include <crypto/crypto_wq.h>
-#include <asm/byteorder.h>
-#include <linux/hardirq.h>
-#include <asm/fpu/api.h>
-#include "sha512_mb_ctx.h"
-
-#define FLUSH_INTERVAL 1000 /* in usec */
-
-static struct mcryptd_alg_state sha512_mb_alg_state;
-
-struct sha512_mb_ctx {
- struct mcryptd_ahash *mcryptd_tfm;
-};
-
-static inline struct mcryptd_hash_request_ctx
- *cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx)
-{
- struct ahash_request *areq;
-
- areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
- return container_of(areq, struct mcryptd_hash_request_ctx, areq);
-}
-
-static inline struct ahash_request
- *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
-{
- return container_of((void *) ctx, struct ahash_request, __ctx);
-}
-
-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
- struct ahash_request *areq)
-{
- rctx->flag = HASH_UPDATE;
-}
-
-static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_submit)
- (struct sha512_mb_mgr *state,
- struct job_sha512 *job);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
- (struct sha512_mb_mgr *state);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
- (struct sha512_mb_mgr *state);
-
-inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
- uint64_t total_len)
-{
- uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1);
-
- memset(&padblock[i], 0, SHA512_BLOCK_SIZE);
- padblock[i] = 0x80;
-
- i += ((SHA512_BLOCK_SIZE - 1) &
- (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1)))
- + 1 + SHA512_PADLENGTHFIELD_SIZE;
-
-#if SHA512_PADLENGTHFIELD_SIZE == 16
- *((uint64_t *) &padblock[i - 16]) = 0;
-#endif
-
- *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
-
- /* Number of extra blocks to hash */
- return i >> SHA512_LOG2_BLOCK_SIZE;
-}
-
-static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit
- (struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx)
-{
- while (ctx) {
- if (ctx->status & HASH_CTX_STS_COMPLETE) {
- /* Clear PROCESSING bit */
- ctx->status = HASH_CTX_STS_COMPLETE;
- return ctx;
- }
-
- /*
- * If the extra blocks are empty, begin hashing what remains
- * in the user's buffer.
- */
- if (ctx->partial_block_buffer_length == 0 &&
- ctx->incoming_buffer_length) {
-
- const void *buffer = ctx->incoming_buffer;
- uint32_t len = ctx->incoming_buffer_length;
- uint32_t copy_len;
-
- /*
- * Only entire blocks can be hashed.
- * Copy remainder to extra blocks buffer.
- */
- copy_len = len & (SHA512_BLOCK_SIZE-1);
-
- if (copy_len) {
- len -= copy_len;
- memcpy(ctx->partial_block_buffer,
- ((const char *) buffer + len),
- copy_len);
- ctx->partial_block_buffer_length = copy_len;
- }
-
- ctx->incoming_buffer_length = 0;
-
- /* len should be a multiple of the block size now */
- assert((len % SHA512_BLOCK_SIZE) == 0);
-
- /* Set len to the number of blocks to be hashed */
- len >>= SHA512_LOG2_BLOCK_SIZE;
-
- if (len) {
-
- ctx->job.buffer = (uint8_t *) buffer;
- ctx->job.len = len;
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_submit(&mgr->mgr,
- &ctx->job);
- continue;
- }
- }
-
- /*
- * If the extra blocks are not empty, then we are
- * either on the last block(s) or we need more
- * user input before continuing.
- */
- if (ctx->status & HASH_CTX_STS_LAST) {
-
- uint8_t *buf = ctx->partial_block_buffer;
- uint32_t n_extra_blocks =
- sha512_pad(buf, ctx->total_length);
-
- ctx->status = (HASH_CTX_STS_PROCESSING |
- HASH_CTX_STS_COMPLETE);
- ctx->job.buffer = buf;
- ctx->job.len = (uint32_t) n_extra_blocks;
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
- continue;
- }
-
- if (ctx)
- ctx->status = HASH_CTX_STS_IDLE;
- return ctx;
- }
-
- return NULL;
-}
-
-static struct sha512_hash_ctx
- *sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate)
-{
- /*
- * If get_comp_job returns NULL, there are no jobs complete.
- * If get_comp_job returns a job, verify that it is safe to return to
- * the user.
- * If it is not ready, resubmit the job to finish processing.
- * If sha512_ctx_mgr_resubmit returned a job, it is ready to be
- * returned.
- * Otherwise, all jobs currently being managed by the hash_ctx_mgr
- * still need processing.
- */
- struct sha512_ctx_mgr *mgr;
- struct sha512_hash_ctx *ctx;
- unsigned long flags;
-
- mgr = cstate->mgr;
- spin_lock_irqsave(&cstate->work_lock, flags);
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_get_comp_job(&mgr->mgr);
- ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
- spin_unlock_irqrestore(&cstate->work_lock, flags);
- return ctx;
-}
-
-static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
-{
- sha512_job_mgr_init(&mgr->mgr);
-}
-
-static struct sha512_hash_ctx
- *sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate,
- struct sha512_hash_ctx *ctx,
- const void *buffer,
- uint32_t len,
- int flags)
-{
- struct sha512_ctx_mgr *mgr;
- unsigned long irqflags;
-
- mgr = cstate->mgr;
- spin_lock_irqsave(&cstate->work_lock, irqflags);
- if (flags & ~(HASH_UPDATE | HASH_LAST)) {
- /* User should not pass anything other than UPDATE or LAST */
- ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
- goto unlock;
- }
-
- if (ctx->status & HASH_CTX_STS_PROCESSING) {
- /* Cannot submit to a currently processing job. */
- ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
- goto unlock;
- }
-
- if (ctx->status & HASH_CTX_STS_COMPLETE) {
- /* Cannot update a finished job. */
- ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
- goto unlock;
- }
-
- /*
- * If we made it here, there were no errors during this call to
- * submit
- */
- ctx->error = HASH_CTX_ERROR_NONE;
-
- /* Store buffer ptr info from user */
- ctx->incoming_buffer = buffer;
- ctx->incoming_buffer_length = len;
-
- /*
- * Store the user's request flags and mark this ctx as currently being
- * processed.
- */
- ctx->status = (flags & HASH_LAST) ?
- (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
- HASH_CTX_STS_PROCESSING;
-
- /* Advance byte counter */
- ctx->total_length += len;
-
- /*
- * If there is anything currently buffered in the extra blocks,
- * append to it until it contains a whole block.
- * Or if the user's buffer contains less than a whole block,
- * append as much as possible to the extra block.
- */
- if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) {
- /* Compute how many bytes to copy from user buffer into extra
- * block
- */
- uint32_t copy_len = SHA512_BLOCK_SIZE -
- ctx->partial_block_buffer_length;
- if (len < copy_len)
- copy_len = len;
-
- if (copy_len) {
- /* Copy and update relevant pointers and counters */
- memcpy
- (&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
- buffer, copy_len);
-
- ctx->partial_block_buffer_length += copy_len;
- ctx->incoming_buffer = (const void *)
- ((const char *)buffer + copy_len);
- ctx->incoming_buffer_length = len - copy_len;
- }
-
- /* The extra block should never contain more than 1 block
- * here
- */
- assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE);
-
- /* If the extra block buffer contains exactly 1 block, it can
- * be hashed.
- */
- if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) {
- ctx->partial_block_buffer_length = 0;
-
- ctx->job.buffer = ctx->partial_block_buffer;
- ctx->job.len = 1;
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
- }
- }
-
- ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
-unlock:
- spin_unlock_irqrestore(&cstate->work_lock, irqflags);
- return ctx;
-}
-
-static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate)
-{
- struct sha512_ctx_mgr *mgr;
- struct sha512_hash_ctx *ctx;
- unsigned long flags;
-
- mgr = cstate->mgr;
- spin_lock_irqsave(&cstate->work_lock, flags);
- while (1) {
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_flush(&mgr->mgr);
-
- /* If flush returned 0, there are no more jobs in flight. */
- if (!ctx)
- break;
-
- /*
- * If flush returned a job, resubmit the job to finish
- * processing.
- */
- ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
-
- /*
- * If sha512_ctx_mgr_resubmit returned a job, it is ready to
- * be returned. Otherwise, all jobs currently being managed by
- * the sha512_ctx_mgr still need processing. Loop.
- */
- if (ctx)
- break;
- }
- spin_unlock_irqrestore(&cstate->work_lock, flags);
- return ctx;
-}
-
-static int sha512_mb_init(struct ahash_request *areq)
-{
- struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
- hash_ctx_init(sctx);
- sctx->job.result_digest[0] = SHA512_H0;
- sctx->job.result_digest[1] = SHA512_H1;
- sctx->job.result_digest[2] = SHA512_H2;
- sctx->job.result_digest[3] = SHA512_H3;
- sctx->job.result_digest[4] = SHA512_H4;
- sctx->job.result_digest[5] = SHA512_H5;
- sctx->job.result_digest[6] = SHA512_H6;
- sctx->job.result_digest[7] = SHA512_H7;
- sctx->total_length = 0;
- sctx->partial_block_buffer_length = 0;
- sctx->status = HASH_CTX_STS_IDLE;
-
- return 0;
-}
-
-static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
-{
- int i;
- struct sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
- __be64 *dst = (__be64 *) rctx->out;
-
- for (i = 0; i < 8; ++i)
- dst[i] = cpu_to_be64(sctx->job.result_digest[i]);
-
- return 0;
-}
-
-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
- struct mcryptd_alg_cstate *cstate, bool flush)
-{
- int flag = HASH_UPDATE;
- int nbytes, err = 0;
- struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
- struct sha512_hash_ctx *sha_ctx;
-
- /* more work ? */
- while (!(rctx->flag & HASH_DONE)) {
- nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
- if (nbytes < 0) {
- err = nbytes;
- goto out;
- }
- /* check if the walk is done */
- if (crypto_ahash_walk_last(&rctx->walk)) {
- rctx->flag |= HASH_DONE;
- if (rctx->flag & HASH_FINAL)
- flag |= HASH_LAST;
-
- }
- sha_ctx = (struct sha512_hash_ctx *)
- ahash_request_ctx(&rctx->areq);
- kernel_fpu_begin();
- sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx,
- rctx->walk.data, nbytes, flag);
- if (!sha_ctx) {
- if (flush)
- sha_ctx = sha512_ctx_mgr_flush(cstate);
- }
- kernel_fpu_end();
- if (sha_ctx)
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- else {
- rctx = NULL;
- goto out;
- }
- }
-
- /* copy the results */
- if (rctx->flag & HASH_FINAL)
- sha512_mb_set_results(rctx);
-
-out:
- *ret_rctx = rctx;
- return err;
-}
-
-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
- struct mcryptd_alg_cstate *cstate,
- int err)
-{
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha512_hash_ctx *sha_ctx;
- struct mcryptd_hash_request_ctx *req_ctx;
- int ret;
- unsigned long flags;
-
- /* remove from work list */
- spin_lock_irqsave(&cstate->work_lock, flags);
- list_del(&rctx->waiter);
- spin_unlock_irqrestore(&cstate->work_lock, flags);
-
- if (irqs_disabled())
- rctx->complete(&req->base, err);
- else {
- local_bh_disable();
- rctx->complete(&req->base, err);
- local_bh_enable();
- }
-
- /* check to see if there are other jobs that are done */
- sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
- while (sha_ctx) {
- req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&req_ctx, cstate, false);
- if (req_ctx) {
- spin_lock_irqsave(&cstate->work_lock, flags);
- list_del(&req_ctx->waiter);
- spin_unlock_irqrestore(&cstate->work_lock, flags);
-
- req = cast_mcryptd_ctx_to_req(req_ctx);
- if (irqs_disabled())
- req_ctx->complete(&req->base, ret);
- else {
- local_bh_disable();
- req_ctx->complete(&req->base, ret);
- local_bh_enable();
- }
- }
- sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
- }
-
- return 0;
-}
-
-static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
- struct mcryptd_alg_cstate *cstate)
-{
- unsigned long next_flush;
- unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
- unsigned long flags;
-
- /* initialize tag */
- rctx->tag.arrival = jiffies; /* tag the arrival time */
- rctx->tag.seq_num = cstate->next_seq_num++;
- next_flush = rctx->tag.arrival + delay;
- rctx->tag.expire = next_flush;
-
- spin_lock_irqsave(&cstate->work_lock, flags);
- list_add_tail(&rctx->waiter, &cstate->work_list);
- spin_unlock_irqrestore(&cstate->work_lock, flags);
-
- mcryptd_arm_flusher(cstate, delay);
-}
-
-static int sha512_mb_update(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx,
- areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha512_hash_ctx *sha_ctx;
- int ret = 0, nbytes;
-
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
- if (nbytes < 0) {
- ret = nbytes;
- goto done;
- }
-
- if (crypto_ahash_walk_last(&rctx->walk))
- rctx->flag |= HASH_DONE;
-
- /* submit */
- sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
- sha512_mb_add_list(rctx, cstate);
- kernel_fpu_begin();
- sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
- nbytes, HASH_UPDATE);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
-
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha512_mb_finup(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx,
- areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha512_hash_ctx *sha_ctx;
- int ret = 0, flag = HASH_UPDATE, nbytes;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
- if (nbytes < 0) {
- ret = nbytes;
- goto done;
- }
-
- if (crypto_ahash_walk_last(&rctx->walk)) {
- rctx->flag |= HASH_DONE;
- flag = HASH_LAST;
- }
-
- /* submit */
- rctx->flag |= HASH_FINAL;
- sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
- sha512_mb_add_list(rctx, cstate);
-
- kernel_fpu_begin();
- sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
- nbytes, flag);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha512_mb_final(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx,
- areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
- struct sha512_hash_ctx *sha_ctx;
- int ret = 0;
- u8 data;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- rctx->flag |= HASH_DONE | HASH_FINAL;
-
- sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
- /* flag HASH_FINAL and 0 data size */
- sha512_mb_add_list(rctx, cstate);
- kernel_fpu_begin();
- sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha512_mb_export(struct ahash_request *areq, void *out)
-{
- struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
- memcpy(out, sctx, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha512_mb_import(struct ahash_request *areq, const void *in)
-{
- struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
- memcpy(sctx, in, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm)
-{
- struct mcryptd_ahash *mcryptd_tfm;
- struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
- struct mcryptd_hash_ctx *mctx;
-
- mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb",
- CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(mcryptd_tfm))
- return PTR_ERR(mcryptd_tfm);
- mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
- mctx->alg_state = &sha512_mb_alg_state;
- ctx->mcryptd_tfm = mcryptd_tfm;
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- crypto_ahash_reqsize(&mcryptd_tfm->base));
-
- return 0;
-}
-
-static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm)
-{
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- sizeof(struct sha512_hash_ctx));
-
- return 0;
-}
-
-static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static struct ahash_alg sha512_mb_areq_alg = {
- .init = sha512_mb_init,
- .update = sha512_mb_update,
- .final = sha512_mb_final,
- .finup = sha512_mb_finup,
- .export = sha512_mb_export,
- .import = sha512_mb_import,
- .halg = {
- .digestsize = SHA512_DIGEST_SIZE,
- .statesize = sizeof(struct sha512_hash_ctx),
- .base = {
- .cra_name = "__sha512-mb",
- .cra_driver_name = "__intel_sha512-mb",
- .cra_priority = 100,
- /*
- * use ASYNC flag as some buffers in multi-buffer
- * algo may not have completed before hashing thread
- * sleep
- */
- .cra_flags = CRYPTO_ALG_ASYNC |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SHA512_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT
- (sha512_mb_areq_alg.halg.base.cra_list),
- .cra_init = sha512_mb_areq_init_tfm,
- .cra_exit = sha512_mb_areq_exit_tfm,
- .cra_ctxsize = sizeof(struct sha512_hash_ctx),
- }
- }
-};
-
-static int sha512_mb_async_init(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_init(mcryptd_req);
-}
-
-static int sha512_mb_async_update(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_update(mcryptd_req);
-}
-
-static int sha512_mb_async_finup(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_finup(mcryptd_req);
-}
-
-static int sha512_mb_async_final(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_final(mcryptd_req);
-}
-
-static int sha512_mb_async_digest(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_digest(mcryptd_req);
-}
-
-static int sha512_mb_async_export(struct ahash_request *req, void *out)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_export(mcryptd_req, out);
-}
-
-static int sha512_mb_async_import(struct ahash_request *req, const void *in)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
- struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
- struct mcryptd_hash_request_ctx *rctx;
- struct ahash_request *areq;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- rctx = ahash_request_ctx(mcryptd_req);
-
- areq = &rctx->areq;
-
- ahash_request_set_tfm(areq, child);
- ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
- rctx->complete, req);
-
- return crypto_ahash_import(mcryptd_req, in);
-}
-
-static struct ahash_alg sha512_mb_async_alg = {
- .init = sha512_mb_async_init,
- .update = sha512_mb_async_update,
- .final = sha512_mb_async_final,
- .finup = sha512_mb_async_finup,
- .digest = sha512_mb_async_digest,
- .export = sha512_mb_async_export,
- .import = sha512_mb_async_import,
- .halg = {
- .digestsize = SHA512_DIGEST_SIZE,
- .statesize = sizeof(struct sha512_hash_ctx),
- .base = {
- .cra_name = "sha512",
- .cra_driver_name = "sha512_mb",
- /*
- * Low priority, since with few concurrent hash requests
- * this is extremely slow due to the flush delay. Users
- * whose workloads would benefit from this can request
- * it explicitly by driver name, or can increase its
- * priority at runtime using NETLINK_CRYPTO.
- */
- .cra_priority = 50,
- .cra_flags = CRYPTO_ALG_ASYNC,
- .cra_blocksize = SHA512_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT
- (sha512_mb_async_alg.halg.base.cra_list),
- .cra_init = sha512_mb_async_init_tfm,
- .cra_exit = sha512_mb_async_exit_tfm,
- .cra_ctxsize = sizeof(struct sha512_mb_ctx),
- .cra_alignmask = 0,
- },
- },
-};
-
-static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate)
-{
- struct mcryptd_hash_request_ctx *rctx;
- unsigned long cur_time;
- unsigned long next_flush = 0;
- struct sha512_hash_ctx *sha_ctx;
-
-
- cur_time = jiffies;
-
- while (!list_empty(&cstate->work_list)) {
- rctx = list_entry(cstate->work_list.next,
- struct mcryptd_hash_request_ctx, waiter);
- if time_before(cur_time, rctx->tag.expire)
- break;
- kernel_fpu_begin();
- sha_ctx = (struct sha512_hash_ctx *)
- sha512_ctx_mgr_flush(cstate);
- kernel_fpu_end();
- if (!sha_ctx) {
- pr_err("sha512_mb error: nothing got flushed for"
- " non-empty list\n");
- break;
- }
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- sha_finish_walk(&rctx, cstate, true);
- sha_complete_job(rctx, cstate, 0);
- }
-
- if (!list_empty(&cstate->work_list)) {
- rctx = list_entry(cstate->work_list.next,
- struct mcryptd_hash_request_ctx, waiter);
- /* get the hash context and then flush time */
- next_flush = rctx->tag.expire;
- mcryptd_arm_flusher(cstate, get_delay(next_flush));
- }
- return next_flush;
-}
-
-static int __init sha512_mb_mod_init(void)
-{
-
- int cpu;
- int err;
- struct mcryptd_alg_cstate *cpu_state;
-
- /* check for dependent cpu features */
- if (!boot_cpu_has(X86_FEATURE_AVX2) ||
- !boot_cpu_has(X86_FEATURE_BMI2))
- return -ENODEV;
-
- /* initialize multibuffer structures */
- sha512_mb_alg_state.alg_cstate =
- alloc_percpu(struct mcryptd_alg_cstate);
-
- sha512_job_mgr_init = sha512_mb_mgr_init_avx2;
- sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2;
- sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2;
- sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2;
-
- if (!sha512_mb_alg_state.alg_cstate)
- return -ENOMEM;
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
- cpu_state->next_flush = 0;
- cpu_state->next_seq_num = 0;
- cpu_state->flusher_engaged = false;
- INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
- cpu_state->cpu = cpu;
- cpu_state->alg_state = &sha512_mb_alg_state;
- cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr),
- GFP_KERNEL);
- if (!cpu_state->mgr)
- goto err2;
- sha512_ctx_mgr_init(cpu_state->mgr);
- INIT_LIST_HEAD(&cpu_state->work_list);
- spin_lock_init(&cpu_state->work_lock);
- }
- sha512_mb_alg_state.flusher = &sha512_mb_flusher;
-
- err = crypto_register_ahash(&sha512_mb_areq_alg);
- if (err)
- goto err2;
- err = crypto_register_ahash(&sha512_mb_async_alg);
- if (err)
- goto err1;
-
-
- return 0;
-err1:
- crypto_unregister_ahash(&sha512_mb_areq_alg);
-err2:
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
- kfree(cpu_state->mgr);
- }
- free_percpu(sha512_mb_alg_state.alg_cstate);
- return -ENODEV;
-}
-
-static void __exit sha512_mb_mod_fini(void)
-{
- int cpu;
- struct mcryptd_alg_cstate *cpu_state;
-
- crypto_unregister_ahash(&sha512_mb_async_alg);
- crypto_unregister_ahash(&sha512_mb_areq_alg);
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
- kfree(cpu_state->mgr);
- }
- free_percpu(sha512_mb_alg_state.alg_cstate);
-}
-
-module_init(sha512_mb_mod_init);
-module_exit(sha512_mb_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated");
-
-MODULE_ALIAS("sha512");
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
deleted file mode 100644
index e5c465bd821e..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Header file for multi buffer SHA512 context
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SHA_MB_CTX_INTERNAL_H
-#define _SHA_MB_CTX_INTERNAL_H
-
-#include "sha512_mb_mgr.h"
-
-#define HASH_UPDATE 0x00
-#define HASH_LAST 0x01
-#define HASH_DONE 0x02
-#define HASH_FINAL 0x04
-
-#define HASH_CTX_STS_IDLE 0x00
-#define HASH_CTX_STS_PROCESSING 0x01
-#define HASH_CTX_STS_LAST 0x02
-#define HASH_CTX_STS_COMPLETE 0x04
-
-enum hash_ctx_error {
- HASH_CTX_ERROR_NONE = 0,
- HASH_CTX_ERROR_INVALID_FLAGS = -1,
- HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
- HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
-};
-
-#define hash_ctx_user_data(ctx) ((ctx)->user_data)
-#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
-#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
-#define hash_ctx_status(ctx) ((ctx)->status)
-#define hash_ctx_error(ctx) ((ctx)->error)
-#define hash_ctx_init(ctx) \
- do { \
- (ctx)->error = HASH_CTX_ERROR_NONE; \
- (ctx)->status = HASH_CTX_STS_COMPLETE; \
- } while (0)
-
-/* Hash Constants and Typedefs */
-#define SHA512_DIGEST_LENGTH 8
-#define SHA512_LOG2_BLOCK_SIZE 7
-
-#define SHA512_PADLENGTHFIELD_SIZE 16
-
-#ifdef SHA_MB_DEBUG
-#define assert(expr) \
-do { \
- if (unlikely(!(expr))) { \
- printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
- #expr, __FILE__, __func__, __LINE__); \
- } \
-} while (0)
-#else
-#define assert(expr) do {} while (0)
-#endif
-
-struct sha512_ctx_mgr {
- struct sha512_mb_mgr mgr;
-};
-
-/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */
-
-struct sha512_hash_ctx {
- /* Must be at struct offset 0 */
- struct job_sha512 job;
- /* status flag */
- int status;
- /* error flag */
- int error;
-
- uint64_t total_length;
- const void *incoming_buffer;
- uint32_t incoming_buffer_length;
- uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2];
- uint32_t partial_block_buffer_length;
- void *user_data;
-};
-
-#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
deleted file mode 100644
index 178f17eef382..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Header file for multi buffer SHA512 algorithm manager
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __SHA_MB_MGR_H
-#define __SHA_MB_MGR_H
-
-#include <linux/types.h>
-
-#define NUM_SHA512_DIGEST_WORDS 8
-
-enum job_sts {STS_UNKNOWN = 0,
- STS_BEING_PROCESSED = 1,
- STS_COMPLETED = 2,
- STS_INTERNAL_ERROR = 3,
- STS_ERROR = 4
-};
-
-struct job_sha512 {
- u8 *buffer;
- u64 len;
- u64 result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32);
- enum job_sts status;
- void *user_data;
-};
-
-struct sha512_args_x4 {
- uint64_t digest[8][4];
- uint8_t *data_ptr[4];
-};
-
-struct sha512_lane_data {
- struct job_sha512 *job_in_lane;
-};
-
-struct sha512_mb_mgr {
- struct sha512_args_x4 args;
-
- uint64_t lens[4];
-
- /* each byte is index (0...7) of unused lanes */
- uint64_t unused_lanes;
- /* byte 4 is set to FF as a flag */
- struct sha512_lane_data ldata[4];
-};
-
-#define SHA512_MB_MGR_NUM_LANES_AVX2 4
-
-void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state);
-struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state,
- struct job_sha512 *job);
-struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state);
-struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state);
-
-#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
deleted file mode 100644
index cf2636d4c9ba..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Header file for multi buffer SHA256 algorithm data structure
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# Macros for defining data structures
-
-# Usage example
-
-#START_FIELDS # JOB_AES
-### name size align
-#FIELD _plaintext, 8, 8 # pointer to plaintext
-#FIELD _ciphertext, 8, 8 # pointer to ciphertext
-#FIELD _IV, 16, 8 # IV
-#FIELD _keys, 8, 8 # pointer to keys
-#FIELD _len, 4, 4 # length in bytes
-#FIELD _status, 4, 4 # status enumeration
-#FIELD _user_data, 8, 8 # pointer to user data
-#UNION _union, size1, align1, \
-# size2, align2, \
-# size3, align3, \
-# ...
-#END_FIELDS
-#%assign _JOB_AES_size _FIELD_OFFSET
-#%assign _JOB_AES_align _STRUCT_ALIGN
-
-#########################################################################
-
-# Alternate "struc-like" syntax:
-# STRUCT job_aes2
-# RES_Q .plaintext, 1
-# RES_Q .ciphertext, 1
-# RES_DQ .IV, 1
-# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
-# RES_U .union, size1, align1, \
-# size2, align2, \
-# ...
-# ENDSTRUCT
-# # Following only needed if nesting
-# %assign job_aes2_size _FIELD_OFFSET
-# %assign job_aes2_align _STRUCT_ALIGN
-#
-# RES_* macros take a name, a count and an optional alignment.
-# The count in in terms of the base size of the macro, and the
-# default alignment is the base size.
-# The macros are:
-# Macro Base size
-# RES_B 1
-# RES_W 2
-# RES_D 4
-# RES_Q 8
-# RES_DQ 16
-# RES_Y 32
-# RES_Z 64
-#
-# RES_U defines a union. It's arguments are a name and two or more
-# pairs of "size, alignment"
-#
-# The two assigns are only needed if this structure is being nested
-# within another. Even if the assigns are not done, one can still use
-# STRUCT_NAME_size as the size of the structure.
-#
-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
-#
-# The differences between this and using "struc" directly are that each
-# type is implicitly aligned to its natural length (although this can be
-# over-ridden with an explicit third parameter), and that the structure
-# is padded at the end to its overall alignment.
-#
-
-#########################################################################
-
-#ifndef _DATASTRUCT_ASM_
-#define _DATASTRUCT_ASM_
-
-#define PTR_SZ 8
-#define SHA512_DIGEST_WORD_SIZE 8
-#define SHA512_MB_MGR_NUM_LANES_AVX2 4
-#define NUM_SHA512_DIGEST_WORDS 8
-#define SZ4 4*SHA512_DIGEST_WORD_SIZE
-#define ROUNDS 80*SZ4
-#define SHA512_DIGEST_ROW_SIZE (SHA512_MB_MGR_NUM_LANES_AVX2 * 8)
-
-# START_FIELDS
-.macro START_FIELDS
- _FIELD_OFFSET = 0
- _STRUCT_ALIGN = 0
-.endm
-
-# FIELD name size align
-.macro FIELD name size align
- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
- \name = _FIELD_OFFSET
- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
-.if (\align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = \align
-.endif
-.endm
-
-# END_FIELDS
-.macro END_FIELDS
- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
-.endm
-
-.macro STRUCT p1
-START_FIELDS
-.struc \p1
-.endm
-
-.macro ENDSTRUCT
- tmp = _FIELD_OFFSET
- END_FIELDS
- tmp = (_FIELD_OFFSET - ##tmp)
-.if (tmp > 0)
- .lcomm tmp
-.endm
-
-## RES_int name size align
-.macro RES_int p1 p2 p3
- name = \p1
- size = \p2
- align = .\p3
-
- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
-.align align
-.lcomm name size
- _FIELD_OFFSET = _FIELD_OFFSET + (size)
-.if (align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = align
-.endif
-.endm
-
-# macro RES_B name, size [, align]
-.macro RES_B _name, _size, _align=1
-RES_int _name _size _align
-.endm
-
-# macro RES_W name, size [, align]
-.macro RES_W _name, _size, _align=2
-RES_int _name 2*(_size) _align
-.endm
-
-# macro RES_D name, size [, align]
-.macro RES_D _name, _size, _align=4
-RES_int _name 4*(_size) _align
-.endm
-
-# macro RES_Q name, size [, align]
-.macro RES_Q _name, _size, _align=8
-RES_int _name 8*(_size) _align
-.endm
-
-# macro RES_DQ name, size [, align]
-.macro RES_DQ _name, _size, _align=16
-RES_int _name 16*(_size) _align
-.endm
-
-# macro RES_Y name, size [, align]
-.macro RES_Y _name, _size, _align=32
-RES_int _name 32*(_size) _align
-.endm
-
-# macro RES_Z name, size [, align]
-.macro RES_Z _name, _size, _align=64
-RES_int _name 64*(_size) _align
-.endm
-
-#endif
-
-###################################################################
-### Define SHA512 Out Of Order Data Structures
-###################################################################
-
-START_FIELDS # LANE_DATA
-### name size align
-FIELD _job_in_lane, 8, 8 # pointer to job object
-END_FIELDS
-
- _LANE_DATA_size = _FIELD_OFFSET
- _LANE_DATA_align = _STRUCT_ALIGN
-
-####################################################################
-
-START_FIELDS # SHA512_ARGS_X4
-### name size align
-FIELD _digest, 8*8*4, 4 # transposed digest
-FIELD _data_ptr, 8*4, 8 # array of pointers to data
-END_FIELDS
-
- _SHA512_ARGS_X4_size = _FIELD_OFFSET
- _SHA512_ARGS_X4_align = _STRUCT_ALIGN
-
-#####################################################################
-
-START_FIELDS # MB_MGR
-### name size align
-FIELD _args, _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align
-FIELD _lens, 8*4, 8
-FIELD _unused_lanes, 8, 8
-FIELD _ldata, _LANE_DATA_size*4, _LANE_DATA_align
-END_FIELDS
-
- _MB_MGR_size = _FIELD_OFFSET
- _MB_MGR_align = _STRUCT_ALIGN
-
-_args_digest = _args + _digest
-_args_data_ptr = _args + _data_ptr
-
-#######################################################################
-
-#######################################################################
-#### Define constants
-#######################################################################
-
-#define STS_UNKNOWN 0
-#define STS_BEING_PROCESSED 1
-#define STS_COMPLETED 2
-
-#######################################################################
-#### Define JOB_SHA512 structure
-#######################################################################
-
-START_FIELDS # JOB_SHA512
-### name size align
-FIELD _buffer, 8, 8 # pointer to buffer
-FIELD _len, 8, 8 # length in bytes
-FIELD _result_digest, 8*8, 32 # Digest (output)
-FIELD _status, 4, 4
-FIELD _user_data, 8, 8
-END_FIELDS
-
- _JOB_SHA512_size = _FIELD_OFFSET
- _JOB_SHA512_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
deleted file mode 100644
index 7c629caebc05..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * Flush routine for SHA512 multibuffer
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-.extern sha512_x4_avx2
-
-# LINUX register definitions
-#define arg1 %rdi
-#define arg2 %rsi
-
-# idx needs to be other than arg1, arg2, rbx, r12
-#define idx %rdx
-
-# Common definitions
-#define state arg1
-#define job arg2
-#define len2 arg2
-
-#define unused_lanes %rbx
-#define lane_data %rbx
-#define tmp2 %rbx
-
-#define job_rax %rax
-#define tmp1 %rax
-#define size_offset %rax
-#define tmp %rax
-#define start_offset %rax
-
-#define tmp3 arg1
-
-#define extra_blocks arg2
-#define p arg2
-
-#define tmp4 %r8
-#define lens0 %r8
-
-#define lens1 %r9
-#define lens2 %r10
-#define lens3 %r11
-
-.macro LABEL prefix n
-\prefix\n\():
-.endm
-
-.macro JNE_SKIP i
-jne skip_\i
-.endm
-
-.altmacro
-.macro SET_OFFSET _offset
-offset = \_offset
-.endm
-.noaltmacro
-
-# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state)
-# arg 1 : rcx : state
-ENTRY(sha512_mb_mgr_flush_avx2)
- FRAME_BEGIN
- push %rbx
-
- # If bit (32+3) is set, then all lanes are empty
- mov _unused_lanes(state), unused_lanes
- bt $32+7, unused_lanes
- jc return_null
-
- # find a lane with a non-null job
- xor idx, idx
- offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne one(%rip), idx
- offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne two(%rip), idx
- offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne three(%rip), idx
-
- # copy idx to empty lanes
-copy_lane_data:
- offset = (_args + _data_ptr)
- mov offset(state,idx,8), tmp
-
- I = 0
-.rep 4
- offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
-.altmacro
- JNE_SKIP %I
- offset = (_args + _data_ptr + 8*I)
- mov tmp, offset(state)
- offset = (_lens + 8*I +4)
- movl $0xFFFFFFFF, offset(state)
-LABEL skip_ %I
- I = (I+1)
-.noaltmacro
-.endr
-
- # Find min length
- mov _lens + 0*8(state),lens0
- mov lens0,idx
- mov _lens + 1*8(state),lens1
- cmp idx,lens1
- cmovb lens1,idx
- mov _lens + 2*8(state),lens2
- cmp idx,lens2
- cmovb lens2,idx
- mov _lens + 3*8(state),lens3
- cmp idx,lens3
- cmovb lens3,idx
- mov idx,len2
- and $0xF,idx
- and $~0xFF,len2
- jz len_is_0
-
- sub len2, lens0
- sub len2, lens1
- sub len2, lens2
- sub len2, lens3
- shr $32,len2
- mov lens0, _lens + 0*8(state)
- mov lens1, _lens + 1*8(state)
- mov lens2, _lens + 2*8(state)
- mov lens3, _lens + 3*8(state)
-
- # "state" and "args" are the same address, arg1
- # len is arg2
- call sha512_x4_avx2
- # state and idx are intact
-
-len_is_0:
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- mov _unused_lanes(state), unused_lanes
- shl $8, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens+4(state, idx, 8)
-
- vmovq _args_digest+0*32(state, idx, 8), %xmm0
- vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
- vmovq _args_digest+2*32(state, idx, 8), %xmm1
- vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
- vmovq _args_digest+4*32(state, idx, 8), %xmm2
- vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
- vmovq _args_digest+6*32(state, idx, 8), %xmm3
- vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
-
- vmovdqu %xmm0, _result_digest(job_rax)
- vmovdqu %xmm1, _result_digest+1*16(job_rax)
- vmovdqu %xmm2, _result_digest+2*16(job_rax)
- vmovdqu %xmm3, _result_digest+3*16(job_rax)
-
-return:
- pop %rbx
- FRAME_END
- ret
-
-return_null:
- xor job_rax, job_rax
- jmp return
-ENDPROC(sha512_mb_mgr_flush_avx2)
-.align 16
-
-ENTRY(sha512_mb_mgr_get_comp_job_avx2)
- push %rbx
-
- mov _unused_lanes(state), unused_lanes
- bt $(32+7), unused_lanes
- jc .return_null
-
- # Find min length
- mov _lens(state),lens0
- mov lens0,idx
- mov _lens+1*8(state),lens1
- cmp idx,lens1
- cmovb lens1,idx
- mov _lens+2*8(state),lens2
- cmp idx,lens2
- cmovb lens2,idx
- mov _lens+3*8(state),lens3
- cmp idx,lens3
- cmovb lens3,idx
- test $~0xF,idx
- jnz .return_null
- and $0xF,idx
-
- #process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- mov _unused_lanes(state), unused_lanes
- shl $8, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens+4(state, idx, 8)
-
- vmovq _args_digest(state, idx, 8), %xmm0
- vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
- vmovq _args_digest+2*32(state, idx, 8), %xmm1
- vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
- vmovq _args_digest+4*32(state, idx, 8), %xmm2
- vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
- vmovq _args_digest+6*32(state, idx, 8), %xmm3
- vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
-
- vmovdqu %xmm0, _result_digest+0*16(job_rax)
- vmovdqu %xmm1, _result_digest+1*16(job_rax)
- vmovdqu %xmm2, _result_digest+2*16(job_rax)
- vmovdqu %xmm3, _result_digest+3*16(job_rax)
-
- pop %rbx
-
- ret
-
-.return_null:
- xor job_rax, job_rax
- pop %rbx
- ret
-ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
-
-.section .rodata.cst8.one, "aM", @progbits, 8
-.align 8
-one:
-.quad 1
-
-.section .rodata.cst8.two, "aM", @progbits, 8
-.align 8
-two:
-.quad 2
-
-.section .rodata.cst8.three, "aM", @progbits, 8
-.align 8
-three:
-.quad 3
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
deleted file mode 100644
index d08805032f01..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Initialization code for multi buffer SHA256 algorithm for AVX2
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "sha512_mb_mgr.h"
-
-void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
-{
- unsigned int j;
-
- /* initially all lanes are unused */
- state->lens[0] = 0xFFFFFFFF00000000;
- state->lens[1] = 0xFFFFFFFF00000001;
- state->lens[2] = 0xFFFFFFFF00000002;
- state->lens[3] = 0xFFFFFFFF00000003;
-
- state->unused_lanes = 0xFF03020100;
- for (j = 0; j < 4; j++)
- state->ldata[j].job_in_lane = NULL;
-}
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
deleted file mode 100644
index 4ba709ba78e5..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Buffer submit code for multi buffer SHA512 algorithm
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-.extern sha512_x4_avx2
-
-#define arg1 %rdi
-#define arg2 %rsi
-
-#define idx %rdx
-#define last_len %rdx
-
-#define size_offset %rcx
-#define tmp2 %rcx
-
-# Common definitions
-#define state arg1
-#define job arg2
-#define len2 arg2
-#define p2 arg2
-
-#define p %r11
-#define start_offset %r11
-
-#define unused_lanes %rbx
-
-#define job_rax %rax
-#define len %rax
-
-#define lane %r12
-#define tmp3 %r12
-#define lens3 %r12
-
-#define extra_blocks %r8
-#define lens0 %r8
-
-#define tmp %r9
-#define lens1 %r9
-
-#define lane_data %r10
-#define lens2 %r10
-
-#define DWORD_len %eax
-
-# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job)
-# arg 1 : rcx : state
-# arg 2 : rdx : job
-ENTRY(sha512_mb_mgr_submit_avx2)
- FRAME_BEGIN
- push %rbx
- push %r12
-
- mov _unused_lanes(state), unused_lanes
- movzb %bl,lane
- shr $8, unused_lanes
- imul $_LANE_DATA_size, lane,lane_data
- movl $STS_BEING_PROCESSED, _status(job)
- lea _ldata(state, lane_data), lane_data
- mov unused_lanes, _unused_lanes(state)
- movl _len(job), DWORD_len
-
- mov job, _job_in_lane(lane_data)
- movl DWORD_len,_lens+4(state , lane, 8)
-
- # Load digest words from result_digest
- vmovdqu _result_digest+0*16(job), %xmm0
- vmovdqu _result_digest+1*16(job), %xmm1
- vmovdqu _result_digest+2*16(job), %xmm2
- vmovdqu _result_digest+3*16(job), %xmm3
-
- vmovq %xmm0, _args_digest(state, lane, 8)
- vpextrq $1, %xmm0, _args_digest+1*32(state , lane, 8)
- vmovq %xmm1, _args_digest+2*32(state , lane, 8)
- vpextrq $1, %xmm1, _args_digest+3*32(state , lane, 8)
- vmovq %xmm2, _args_digest+4*32(state , lane, 8)
- vpextrq $1, %xmm2, _args_digest+5*32(state , lane, 8)
- vmovq %xmm3, _args_digest+6*32(state , lane, 8)
- vpextrq $1, %xmm3, _args_digest+7*32(state , lane, 8)
-
- mov _buffer(job), p
- mov p, _args_data_ptr(state, lane, 8)
-
- cmp $0xFF, unused_lanes
- jne return_null
-
-start_loop:
-
- # Find min length
- mov _lens+0*8(state),lens0
- mov lens0,idx
- mov _lens+1*8(state),lens1
- cmp idx,lens1
- cmovb lens1, idx
- mov _lens+2*8(state),lens2
- cmp idx,lens2
- cmovb lens2,idx
- mov _lens+3*8(state),lens3
- cmp idx,lens3
- cmovb lens3,idx
- mov idx,len2
- and $0xF,idx
- and $~0xFF,len2
- jz len_is_0
-
- sub len2,lens0
- sub len2,lens1
- sub len2,lens2
- sub len2,lens3
- shr $32,len2
- mov lens0, _lens + 0*8(state)
- mov lens1, _lens + 1*8(state)
- mov lens2, _lens + 2*8(state)
- mov lens3, _lens + 3*8(state)
-
- # "state" and "args" are the same address, arg1
- # len is arg2
- call sha512_x4_avx2
- # state and idx are intact
-
-len_is_0:
-
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- mov _unused_lanes(state), unused_lanes
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- shl $8, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF,_lens+4(state,idx,8)
- vmovq _args_digest+0*32(state , idx, 8), %xmm0
- vpinsrq $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0
- vmovq _args_digest+2*32(state , idx, 8), %xmm1
- vpinsrq $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1
- vmovq _args_digest+4*32(state , idx, 8), %xmm2
- vpinsrq $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2
- vmovq _args_digest+6*32(state , idx, 8), %xmm3
- vpinsrq $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3
-
- vmovdqu %xmm0, _result_digest + 0*16(job_rax)
- vmovdqu %xmm1, _result_digest + 1*16(job_rax)
- vmovdqu %xmm2, _result_digest + 2*16(job_rax)
- vmovdqu %xmm3, _result_digest + 3*16(job_rax)
-
-return:
- pop %r12
- pop %rbx
- FRAME_END
- ret
-
-return_null:
- xor job_rax, job_rax
- jmp return
-ENDPROC(sha512_mb_mgr_submit_avx2)
-
-/* UNUSED?
-.section .rodata.cst16, "aM", @progbits, 16
-.align 16
-H0: .int 0x6a09e667
-H1: .int 0xbb67ae85
-H2: .int 0x3c6ef372
-H3: .int 0xa54ff53a
-H4: .int 0x510e527f
-H5: .int 0x9b05688c
-H6: .int 0x1f83d9ab
-H7: .int 0x5be0cd19
-*/
diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
deleted file mode 100644
index e22e907643a6..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
+++ /dev/null
@@ -1,531 +0,0 @@
-/*
- * Multi-buffer SHA512 algorithm hash compute routine
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# code to compute quad SHA512 using AVX2
-# use YMMs to tackle the larger digest size
-# outer calling routine takes care of save and restore of XMM registers
-# Logic designed/laid out by JDG
-
-# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15
-# Stack must be aligned to 32 bytes before call
-# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12
-# Linux preserves: rcx rdx rdi rbp r13 r14 r15
-# clobbers ymm0-15
-
-#include <linux/linkage.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-arg1 = %rdi
-arg2 = %rsi
-
-# Common definitions
-STATE = arg1
-INP_SIZE = arg2
-
-IDX = %rax
-ROUND = %rbx
-TBL = %r8
-
-inp0 = %r9
-inp1 = %r10
-inp2 = %r11
-inp3 = %r12
-
-a = %ymm0
-b = %ymm1
-c = %ymm2
-d = %ymm3
-e = %ymm4
-f = %ymm5
-g = %ymm6
-h = %ymm7
-
-a0 = %ymm8
-a1 = %ymm9
-a2 = %ymm10
-
-TT0 = %ymm14
-TT1 = %ymm13
-TT2 = %ymm12
-TT3 = %ymm11
-TT4 = %ymm10
-TT5 = %ymm9
-
-T1 = %ymm14
-TMP = %ymm15
-
-# Define stack usage
-STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24
-
-#define VMOVPD vmovupd
-_digest = SZ4*16
-
-# transpose r0, r1, r2, r3, t0, t1
-# "transpose" data in {r0..r3} using temps {t0..t3}
-# Input looks like: {r0 r1 r2 r3}
-# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
-# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
-# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
-# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
-#
-# output looks like: {t0 r1 r0 r3}
-# t0 = {d1 d0 c1 c0 b1 b0 a1 a0}
-# r1 = {d3 d2 c3 c2 b3 b2 a3 a2}
-# r0 = {d5 d4 c5 c4 b5 b4 a5 a4}
-# r3 = {d7 d6 c7 c6 b7 b6 a7 a6}
-
-.macro TRANSPOSE r0 r1 r2 r3 t0 t1
- vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
- vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
- vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
- vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
-
- vperm2f128 $0x20, \r2, \r0, \r1 # h6...a6
- vperm2f128 $0x31, \r2, \r0, \r3 # h2...a2
- vperm2f128 $0x31, \t1, \t0, \r0 # h5...a5
- vperm2f128 $0x20, \t1, \t0, \t0 # h1...a1
-.endm
-
-.macro ROTATE_ARGS
-TMP_ = h
-h = g
-g = f
-f = e
-e = d
-d = c
-c = b
-b = a
-a = TMP_
-.endm
-
-# PRORQ reg, imm, tmp
-# packed-rotate-right-double
-# does a rotate by doing two shifts and an or
-.macro _PRORQ reg imm tmp
- vpsllq $(64-\imm),\reg,\tmp
- vpsrlq $\imm,\reg, \reg
- vpor \tmp,\reg, \reg
-.endm
-
-# non-destructive
-# PRORQ_nd reg, imm, tmp, src
-.macro _PRORQ_nd reg imm tmp src
- vpsllq $(64-\imm), \src, \tmp
- vpsrlq $\imm, \src, \reg
- vpor \tmp, \reg, \reg
-.endm
-
-# PRORQ dst/src, amt
-.macro PRORQ reg imm
- _PRORQ \reg, \imm, TMP
-.endm
-
-# PRORQ_nd dst, src, amt
-.macro PRORQ_nd reg tmp imm
- _PRORQ_nd \reg, \imm, TMP, \tmp
-.endm
-
-#; arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_00_15 _T1 i
- PRORQ_nd a0, e, (18-14) # sig1: a0 = (e >> 4)
-
- vpxor g, f, a2 # ch: a2 = f^g
- vpand e,a2, a2 # ch: a2 = (f^g)&e
- vpxor g, a2, a2 # a2 = ch
-
- PRORQ_nd a1,e,41 # sig1: a1 = (e >> 25)
-
- offset = SZ4*(\i & 0xf)
- vmovdqu \_T1,offset(%rsp)
- vpaddq (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K
- vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5)
- PRORQ a0, 14 # sig1: a0 = (e >> 6) ^ (e >> 11)
- vpaddq a2, h, h # h = h + ch
- PRORQ_nd a2,a,6 # sig0: a2 = (a >> 11)
- vpaddq \_T1,h, h # h = h + ch + W + K
- vpxor a1, a0, a0 # a0 = sigma1
- vmovdqu a,\_T1
- PRORQ_nd a1,a,39 # sig0: a1 = (a >> 22)
- vpxor c, \_T1, \_T1 # maj: T1 = a^c
- add $SZ4, ROUND # ROUND++
- vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b
- vpaddq a0, h, h
- vpaddq h, d, d
- vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11)
- PRORQ a2,28 # sig0: a2 = (a >> 2) ^ (a >> 13)
- vpxor a1, a2, a2 # a2 = sig0
- vpand c, a, a1 # maj: a1 = a&c
- vpor \_T1, a1, a1 # a1 = maj
- vpaddq a1, h, h # h = h + ch + W + K + maj
- vpaddq a2, h, h # h = h + ch + W + K + maj + sigma0
- ROTATE_ARGS
-.endm
-
-
-#; arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_16_XX _T1 i
- vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1
- vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1
- vmovdqu \_T1, a0
- PRORQ \_T1,7
- vmovdqu a1, a2
- PRORQ a1,42
- vpxor a0, \_T1, \_T1
- PRORQ \_T1, 1
- vpxor a2, a1, a1
- PRORQ a1, 19
- vpsrlq $7, a0, a0
- vpxor a0, \_T1, \_T1
- vpsrlq $6, a2, a2
- vpxor a2, a1, a1
- vpaddq SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1
- vpaddq SZ4*((\i-7)&0xf)(%rsp), a1, a1
- vpaddq a1, \_T1, \_T1
-
- ROUND_00_15 \_T1,\i
-.endm
-
-
-# void sha512_x4_avx2(void *STATE, const int INP_SIZE)
-# arg 1 : STATE : pointer to input data
-# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1)
-ENTRY(sha512_x4_avx2)
- # general registers preserved in outer calling routine
- # outer calling routine saves all the XMM registers
- # save callee-saved clobbered registers to comply with C function ABI
- push %r12
- push %r13
- push %r14
- push %r15
-
- sub $STACK_SPACE1, %rsp
-
- # Load the pre-transposed incoming digest.
- vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a
- vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b
- vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c
- vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d
- vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e
- vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f
- vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g
- vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h
-
- lea K512_4(%rip),TBL
-
- # load the address of each of the 4 message lanes
- # getting ready to transpose input onto stack
- mov _data_ptr+0*PTR_SZ(STATE),inp0
- mov _data_ptr+1*PTR_SZ(STATE),inp1
- mov _data_ptr+2*PTR_SZ(STATE),inp2
- mov _data_ptr+3*PTR_SZ(STATE),inp3
-
- xor IDX, IDX
-lloop:
- xor ROUND, ROUND
-
- # save old digest
- vmovdqu a, _digest(%rsp)
- vmovdqu b, _digest+1*SZ4(%rsp)
- vmovdqu c, _digest+2*SZ4(%rsp)
- vmovdqu d, _digest+3*SZ4(%rsp)
- vmovdqu e, _digest+4*SZ4(%rsp)
- vmovdqu f, _digest+5*SZ4(%rsp)
- vmovdqu g, _digest+6*SZ4(%rsp)
- vmovdqu h, _digest+7*SZ4(%rsp)
- i = 0
-.rep 4
- vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP
- VMOVPD i*32(inp0, IDX), TT2
- VMOVPD i*32(inp1, IDX), TT1
- VMOVPD i*32(inp2, IDX), TT4
- VMOVPD i*32(inp3, IDX), TT3
- TRANSPOSE TT2, TT1, TT4, TT3, TT0, TT5
- vpshufb TMP, TT0, TT0
- vpshufb TMP, TT1, TT1
- vpshufb TMP, TT2, TT2
- vpshufb TMP, TT3, TT3
- ROUND_00_15 TT0,(i*4+0)
- ROUND_00_15 TT1,(i*4+1)
- ROUND_00_15 TT2,(i*4+2)
- ROUND_00_15 TT3,(i*4+3)
- i = (i+1)
-.endr
- add $128, IDX
-
- i = (i*4)
-
- jmp Lrounds_16_xx
-.align 16
-Lrounds_16_xx:
-.rep 16
- ROUND_16_XX T1, i
- i = (i+1)
-.endr
- cmp $0xa00,ROUND
- jb Lrounds_16_xx
-
- # add old digest
- vpaddq _digest(%rsp), a, a
- vpaddq _digest+1*SZ4(%rsp), b, b
- vpaddq _digest+2*SZ4(%rsp), c, c
- vpaddq _digest+3*SZ4(%rsp), d, d
- vpaddq _digest+4*SZ4(%rsp), e, e
- vpaddq _digest+5*SZ4(%rsp), f, f
- vpaddq _digest+6*SZ4(%rsp), g, g
- vpaddq _digest+7*SZ4(%rsp), h, h
-
- sub $1, INP_SIZE # unit is blocks
- jne lloop
-
- # write back to memory (state object) the transposed digest
- vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE)
-
- # update input data pointers
- add IDX, inp0
- mov inp0, _data_ptr+0*PTR_SZ(STATE)
- add IDX, inp1
- mov inp1, _data_ptr+1*PTR_SZ(STATE)
- add IDX, inp2
- mov inp2, _data_ptr+2*PTR_SZ(STATE)
- add IDX, inp3
- mov inp3, _data_ptr+3*PTR_SZ(STATE)
-
- #;;;;;;;;;;;;;;;
- #; Postamble
- add $STACK_SPACE1, %rsp
- # restore callee-saved clobbered registers
-
- pop %r15
- pop %r14
- pop %r13
- pop %r12
-
- # outer calling routine restores XMM and other GP registers
- ret
-ENDPROC(sha512_x4_avx2)
-
-.section .rodata.K512_4, "a", @progbits
-.align 64
-K512_4:
- .octa 0x428a2f98d728ae22428a2f98d728ae22,\
- 0x428a2f98d728ae22428a2f98d728ae22
- .octa 0x7137449123ef65cd7137449123ef65cd,\
- 0x7137449123ef65cd7137449123ef65cd
- .octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\
- 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f
- .octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\
- 0xe9b5dba58189dbbce9b5dba58189dbbc
- .octa 0x3956c25bf348b5383956c25bf348b538,\
- 0x3956c25bf348b5383956c25bf348b538
- .octa 0x59f111f1b605d01959f111f1b605d019,\
- 0x59f111f1b605d01959f111f1b605d019
- .octa 0x923f82a4af194f9b923f82a4af194f9b,\
- 0x923f82a4af194f9b923f82a4af194f9b
- .octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\
- 0xab1c5ed5da6d8118ab1c5ed5da6d8118
- .octa 0xd807aa98a3030242d807aa98a3030242,\
- 0xd807aa98a3030242d807aa98a3030242
- .octa 0x12835b0145706fbe12835b0145706fbe,\
- 0x12835b0145706fbe12835b0145706fbe
- .octa 0x243185be4ee4b28c243185be4ee4b28c,\
- 0x243185be4ee4b28c243185be4ee4b28c
- .octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\
- 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2
- .octa 0x72be5d74f27b896f72be5d74f27b896f,\
- 0x72be5d74f27b896f72be5d74f27b896f
- .octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\
- 0x80deb1fe3b1696b180deb1fe3b1696b1
- .octa 0x9bdc06a725c712359bdc06a725c71235,\
- 0x9bdc06a725c712359bdc06a725c71235
- .octa 0xc19bf174cf692694c19bf174cf692694,\
- 0xc19bf174cf692694c19bf174cf692694
- .octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\
- 0xe49b69c19ef14ad2e49b69c19ef14ad2
- .octa 0xefbe4786384f25e3efbe4786384f25e3,\
- 0xefbe4786384f25e3efbe4786384f25e3
- .octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\
- 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5
- .octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\
- 0x240ca1cc77ac9c65240ca1cc77ac9c65
- .octa 0x2de92c6f592b02752de92c6f592b0275,\
- 0x2de92c6f592b02752de92c6f592b0275
- .octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\
- 0x4a7484aa6ea6e4834a7484aa6ea6e483
- .octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\
- 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4
- .octa 0x76f988da831153b576f988da831153b5,\
- 0x76f988da831153b576f988da831153b5
- .octa 0x983e5152ee66dfab983e5152ee66dfab,\
- 0x983e5152ee66dfab983e5152ee66dfab
- .octa 0xa831c66d2db43210a831c66d2db43210,\
- 0xa831c66d2db43210a831c66d2db43210
- .octa 0xb00327c898fb213fb00327c898fb213f,\
- 0xb00327c898fb213fb00327c898fb213f
- .octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\
- 0xbf597fc7beef0ee4bf597fc7beef0ee4
- .octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\
- 0xc6e00bf33da88fc2c6e00bf33da88fc2
- .octa 0xd5a79147930aa725d5a79147930aa725,\
- 0xd5a79147930aa725d5a79147930aa725
- .octa 0x06ca6351e003826f06ca6351e003826f,\
- 0x06ca6351e003826f06ca6351e003826f
- .octa 0x142929670a0e6e70142929670a0e6e70,\
- 0x142929670a0e6e70142929670a0e6e70
- .octa 0x27b70a8546d22ffc27b70a8546d22ffc,\
- 0x27b70a8546d22ffc27b70a8546d22ffc
- .octa 0x2e1b21385c26c9262e1b21385c26c926,\
- 0x2e1b21385c26c9262e1b21385c26c926
- .octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\
- 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed
- .octa 0x53380d139d95b3df53380d139d95b3df,\
- 0x53380d139d95b3df53380d139d95b3df
- .octa 0x650a73548baf63de650a73548baf63de,\
- 0x650a73548baf63de650a73548baf63de
- .octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\
- 0x766a0abb3c77b2a8766a0abb3c77b2a8
- .octa 0x81c2c92e47edaee681c2c92e47edaee6,\
- 0x81c2c92e47edaee681c2c92e47edaee6
- .octa 0x92722c851482353b92722c851482353b,\
- 0x92722c851482353b92722c851482353b
- .octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\
- 0xa2bfe8a14cf10364a2bfe8a14cf10364
- .octa 0xa81a664bbc423001a81a664bbc423001,\
- 0xa81a664bbc423001a81a664bbc423001
- .octa 0xc24b8b70d0f89791c24b8b70d0f89791,\
- 0xc24b8b70d0f89791c24b8b70d0f89791
- .octa 0xc76c51a30654be30c76c51a30654be30,\
- 0xc76c51a30654be30c76c51a30654be30
- .octa 0xd192e819d6ef5218d192e819d6ef5218,\
- 0xd192e819d6ef5218d192e819d6ef5218
- .octa 0xd69906245565a910d69906245565a910,\
- 0xd69906245565a910d69906245565a910
- .octa 0xf40e35855771202af40e35855771202a,\
- 0xf40e35855771202af40e35855771202a
- .octa 0x106aa07032bbd1b8106aa07032bbd1b8,\
- 0x106aa07032bbd1b8106aa07032bbd1b8
- .octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\
- 0x19a4c116b8d2d0c819a4c116b8d2d0c8
- .octa 0x1e376c085141ab531e376c085141ab53,\
- 0x1e376c085141ab531e376c085141ab53
- .octa 0x2748774cdf8eeb992748774cdf8eeb99,\
- 0x2748774cdf8eeb992748774cdf8eeb99
- .octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\
- 0x34b0bcb5e19b48a834b0bcb5e19b48a8
- .octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\
- 0x391c0cb3c5c95a63391c0cb3c5c95a63
- .octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\
- 0x4ed8aa4ae3418acb4ed8aa4ae3418acb
- .octa 0x5b9cca4f7763e3735b9cca4f7763e373,\
- 0x5b9cca4f7763e3735b9cca4f7763e373
- .octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\
- 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3
- .octa 0x748f82ee5defb2fc748f82ee5defb2fc,\
- 0x748f82ee5defb2fc748f82ee5defb2fc
- .octa 0x78a5636f43172f6078a5636f43172f60,\
- 0x78a5636f43172f6078a5636f43172f60
- .octa 0x84c87814a1f0ab7284c87814a1f0ab72,\
- 0x84c87814a1f0ab7284c87814a1f0ab72
- .octa 0x8cc702081a6439ec8cc702081a6439ec,\
- 0x8cc702081a6439ec8cc702081a6439ec
- .octa 0x90befffa23631e2890befffa23631e28,\
- 0x90befffa23631e2890befffa23631e28
- .octa 0xa4506cebde82bde9a4506cebde82bde9,\
- 0xa4506cebde82bde9a4506cebde82bde9
- .octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\
- 0xbef9a3f7b2c67915bef9a3f7b2c67915
- .octa 0xc67178f2e372532bc67178f2e372532b,\
- 0xc67178f2e372532bc67178f2e372532b
- .octa 0xca273eceea26619cca273eceea26619c,\
- 0xca273eceea26619cca273eceea26619c
- .octa 0xd186b8c721c0c207d186b8c721c0c207,\
- 0xd186b8c721c0c207d186b8c721c0c207
- .octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\
- 0xeada7dd6cde0eb1eeada7dd6cde0eb1e
- .octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\
- 0xf57d4f7fee6ed178f57d4f7fee6ed178
- .octa 0x06f067aa72176fba06f067aa72176fba,\
- 0x06f067aa72176fba06f067aa72176fba
- .octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\
- 0x0a637dc5a2c898a60a637dc5a2c898a6
- .octa 0x113f9804bef90dae113f9804bef90dae,\
- 0x113f9804bef90dae113f9804bef90dae
- .octa 0x1b710b35131c471b1b710b35131c471b,\
- 0x1b710b35131c471b1b710b35131c471b
- .octa 0x28db77f523047d8428db77f523047d84,\
- 0x28db77f523047d8428db77f523047d84
- .octa 0x32caab7b40c7249332caab7b40c72493,\
- 0x32caab7b40c7249332caab7b40c72493
- .octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\
- 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc
- .octa 0x431d67c49c100d4c431d67c49c100d4c,\
- 0x431d67c49c100d4c431d67c49c100d4c
- .octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\
- 0x4cc5d4becb3e42b64cc5d4becb3e42b6
- .octa 0x597f299cfc657e2a597f299cfc657e2a,\
- 0x597f299cfc657e2a597f299cfc657e2a
- .octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\
- 0x5fcb6fab3ad6faec5fcb6fab3ad6faec
- .octa 0x6c44198c4a4758176c44198c4a475817,\
- 0x6c44198c4a4758176c44198c4a475817
-
-.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
-.align 32
-PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
- .octa 0x18191a1b1c1d1e1f1011121314151617
diff --git a/crypto/Kconfig b/crypto/Kconfig
index f3e40ac56d93..f7a235db56aa 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -213,20 +213,6 @@ config CRYPTO_CRYPTD
converts an arbitrary synchronous software crypto algorithm
into an asynchronous algorithm that executes in a kernel thread.
-config CRYPTO_MCRYPTD
- tristate "Software async multi-buffer crypto daemon"
- select CRYPTO_BLKCIPHER
- select CRYPTO_HASH
- select CRYPTO_MANAGER
- select CRYPTO_WORKQUEUE
- help
- This is a generic software asynchronous crypto daemon that
- provides the kernel thread to assist multi-buffer crypto
- algorithms for submitting jobs and flushing jobs in multi-buffer
- crypto algorithms. Multi-buffer crypto algorithms are executed
- in the context of this kernel thread and drivers can post
- their crypto request asynchronously to be processed by this daemon.
-
config CRYPTO_AUTHENC
tristate "Authenc support"
select CRYPTO_AEAD
@@ -470,6 +456,18 @@ config CRYPTO_LRW
The first 128, 192 or 256 bits in the key are used for AES and the
rest is used to tie each cipher block to its logical position.
+config CRYPTO_OFB
+ tristate "OFB support"
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_MANAGER
+ help
+ OFB: the Output Feedback mode makes a block cipher into a synchronous
+ stream cipher. It generates keystream blocks, which are then XORed
+ with the plaintext blocks to get the ciphertext. Flipping a bit in the
+ ciphertext produces a flipped bit in the plaintext at the same
+ location. This property allows many error correcting codes to function
+ normally even when applied before encryption.
+
config CRYPTO_PCBC
tristate "PCBC support"
select CRYPTO_BLKCIPHER
@@ -848,54 +846,6 @@ config CRYPTO_SHA1_PPC_SPE
SHA-1 secure hash standard (DFIPS 180-4) implemented
using powerpc SPE SIMD instruction set.
-config CRYPTO_SHA1_MB
- tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)"
- depends on X86 && 64BIT
- select CRYPTO_SHA1
- select CRYPTO_HASH
- select CRYPTO_MCRYPTD
- help
- SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
- using multi-buffer technique. This algorithm computes on
- multiple data lanes concurrently with SIMD instructions for
- better throughput. It should not be enabled by default but
- used when there is significant amount of work to keep the keep
- the data lanes filled to get performance benefit. If the data
- lanes remain unfilled, a flush operation will be initiated to
- process the crypto jobs, adding a slight latency.
-
-config CRYPTO_SHA256_MB
- tristate "SHA256 digest algorithm (x86_64 Multi-Buffer, Experimental)"
- depends on X86 && 64BIT
- select CRYPTO_SHA256
- select CRYPTO_HASH
- select CRYPTO_MCRYPTD
- help
- SHA-256 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
- using multi-buffer technique. This algorithm computes on
- multiple data lanes concurrently with SIMD instructions for
- better throughput. It should not be enabled by default but
- used when there is significant amount of work to keep the keep
- the data lanes filled to get performance benefit. If the data
- lanes remain unfilled, a flush operation will be initiated to
- process the crypto jobs, adding a slight latency.
-
-config CRYPTO_SHA512_MB
- tristate "SHA512 digest algorithm (x86_64 Multi-Buffer, Experimental)"
- depends on X86 && 64BIT
- select CRYPTO_SHA512
- select CRYPTO_HASH
- select CRYPTO_MCRYPTD
- help
- SHA-512 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
- using multi-buffer technique. This algorithm computes on
- multiple data lanes concurrently with SIMD instructions for
- better throughput. It should not be enabled by default but
- used when there is significant amount of work to keep the keep
- the data lanes filled to get performance benefit. If the data
- lanes remain unfilled, a flush operation will be initiated to
- process the crypto jobs, adding a slight latency.
-
config CRYPTO_SHA256
tristate "SHA224 and SHA256 digest algorithm"
select CRYPTO_HASH
@@ -1133,7 +1083,7 @@ config CRYPTO_AES_NI_INTEL
In addition to AES cipher algorithm support, the acceleration
for some popular block cipher mode is supported too, including
- ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional
+ ECB, CBC, LRW, XTS. The 64 bit version has additional
acceleration for CTR.
config CRYPTO_AES_SPARC64
@@ -1590,20 +1540,6 @@ config CRYPTO_SM4
If unsure, say N.
-config CRYPTO_SPECK
- tristate "Speck cipher algorithm"
- select CRYPTO_ALGAPI
- help
- Speck is a lightweight block cipher that is tuned for optimal
- performance in software (rather than hardware).
-
- Speck may not be as secure as AES, and should only be used on systems
- where AES is not fast enough.
-
- See also: <https://eprint.iacr.org/2013/404.pdf>
-
- If unsure, say N.
-
config CRYPTO_TEA
tristate "TEA, XTEA and XETA cipher algorithms"
select CRYPTO_ALGAPI
@@ -1875,6 +1811,17 @@ config CRYPTO_USER_API_AEAD
This option enables the user-spaces interface for AEAD
cipher algorithms.
+config CRYPTO_STATS
+ bool "Crypto usage statistics for User-space"
+ help
+ This option enables the gathering of crypto stats.
+ This will collect:
+ - encrypt/decrypt size and numbers of symmeric operations
+ - compress/decompress size and numbers of compress operations
+ - size and numbers of hash operations
+ - encrypt/decrypt/sign/verify numbers for asymmetric operations
+ - generate/seed numbers for rng operations
+
config CRYPTO_HASH_INFO
bool
diff --git a/crypto/Makefile b/crypto/Makefile
index 6d1d40eeb964..5c207c76abf7 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -54,6 +54,7 @@ cryptomgr-y := algboss.o testmgr.o
obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
obj-$(CONFIG_CRYPTO_USER) += crypto_user.o
+crypto_user-y := crypto_user_base.o crypto_user_stat.o
obj-$(CONFIG_CRYPTO_CMAC) += cmac.o
obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
obj-$(CONFIG_CRYPTO_VMAC) += vmac.o
@@ -93,7 +94,6 @@ obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o
obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o
obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
-obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o
obj-$(CONFIG_CRYPTO_DES) += des_generic.o
obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o
@@ -115,7 +115,6 @@ obj-$(CONFIG_CRYPTO_TEA) += tea.o
obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
obj-$(CONFIG_CRYPTO_SEED) += seed.o
-obj-$(CONFIG_CRYPTO_SPECK) += speck.o
obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
@@ -143,6 +142,7 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
+obj-$(CONFIG_CRYPTO_OFB) += ofb.o
ecdh_generic-y := ecc.o
ecdh_generic-y += ecdh.o
diff --git a/crypto/aegis.h b/crypto/aegis.h
index f1c6900ddb80..405e025fc906 100644
--- a/crypto/aegis.h
+++ b/crypto/aegis.h
@@ -21,7 +21,7 @@
union aegis_block {
__le64 words64[AEGIS_BLOCK_SIZE / sizeof(__le64)];
- u32 words32[AEGIS_BLOCK_SIZE / sizeof(u32)];
+ __le32 words32[AEGIS_BLOCK_SIZE / sizeof(__le32)];
u8 bytes[AEGIS_BLOCK_SIZE];
};
@@ -57,24 +57,22 @@ static void crypto_aegis_aesenc(union aegis_block *dst,
const union aegis_block *src,
const union aegis_block *key)
{
- u32 *d = dst->words32;
const u8 *s = src->bytes;
- const u32 *k = key->words32;
const u32 *t0 = crypto_ft_tab[0];
const u32 *t1 = crypto_ft_tab[1];
const u32 *t2 = crypto_ft_tab[2];
const u32 *t3 = crypto_ft_tab[3];
u32 d0, d1, d2, d3;
- d0 = t0[s[ 0]] ^ t1[s[ 5]] ^ t2[s[10]] ^ t3[s[15]] ^ k[0];
- d1 = t0[s[ 4]] ^ t1[s[ 9]] ^ t2[s[14]] ^ t3[s[ 3]] ^ k[1];
- d2 = t0[s[ 8]] ^ t1[s[13]] ^ t2[s[ 2]] ^ t3[s[ 7]] ^ k[2];
- d3 = t0[s[12]] ^ t1[s[ 1]] ^ t2[s[ 6]] ^ t3[s[11]] ^ k[3];
+ d0 = t0[s[ 0]] ^ t1[s[ 5]] ^ t2[s[10]] ^ t3[s[15]];
+ d1 = t0[s[ 4]] ^ t1[s[ 9]] ^ t2[s[14]] ^ t3[s[ 3]];
+ d2 = t0[s[ 8]] ^ t1[s[13]] ^ t2[s[ 2]] ^ t3[s[ 7]];
+ d3 = t0[s[12]] ^ t1[s[ 1]] ^ t2[s[ 6]] ^ t3[s[11]];
- d[0] = d0;
- d[1] = d1;
- d[2] = d2;
- d[3] = d3;
+ dst->words32[0] = cpu_to_le32(d0) ^ key->words32[0];
+ dst->words32[1] = cpu_to_le32(d1) ^ key->words32[1];
+ dst->words32[2] = cpu_to_le32(d2) ^ key->words32[2];
+ dst->words32[3] = cpu_to_le32(d3) ^ key->words32[3];
}
#endif /* _CRYPTO_AEGIS_H */
diff --git a/crypto/ahash.c b/crypto/ahash.c
index a64c143165b1..e21667b4e10a 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -364,24 +364,35 @@ static int crypto_ahash_op(struct ahash_request *req,
int crypto_ahash_final(struct ahash_request *req)
{
- return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
+ int ret;
+
+ ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
+ crypto_stat_ahash_final(req, ret);
+ return ret;
}
EXPORT_SYMBOL_GPL(crypto_ahash_final);
int crypto_ahash_finup(struct ahash_request *req)
{
- return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
+ int ret;
+
+ ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
+ crypto_stat_ahash_final(req, ret);
+ return ret;
}
EXPORT_SYMBOL_GPL(crypto_ahash_finup);
int crypto_ahash_digest(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ int ret;
if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
- return -ENOKEY;
-
- return crypto_ahash_op(req, tfm->digest);
+ ret = -ENOKEY;
+ else
+ ret = crypto_ahash_op(req, tfm->digest);
+ crypto_stat_ahash_final(req, ret);
+ return ret;
}
EXPORT_SYMBOL_GPL(crypto_ahash_digest);
@@ -550,8 +561,8 @@ static int ahash_prepare_alg(struct ahash_alg *alg)
{
struct crypto_alg *base = &alg->halg.base;
- if (alg->halg.digestsize > PAGE_SIZE / 8 ||
- alg->halg.statesize > PAGE_SIZE / 8 ||
+ if (alg->halg.digestsize > HASH_MAX_DIGESTSIZE ||
+ alg->halg.statesize > HASH_MAX_STATESIZE ||
alg->halg.statesize == 0)
return -EINVAL;
diff --git a/crypto/algapi.c b/crypto/algapi.c
index c0755cf4f53f..2545c5f89c4c 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -57,9 +57,14 @@ static int crypto_check_alg(struct crypto_alg *alg)
if (alg->cra_alignmask & (alg->cra_alignmask + 1))
return -EINVAL;
- if (alg->cra_blocksize > PAGE_SIZE / 8)
+ /* General maximums for all algs. */
+ if (alg->cra_alignmask > MAX_ALGAPI_ALIGNMASK)
return -EINVAL;
+ if (alg->cra_blocksize > MAX_ALGAPI_BLOCKSIZE)
+ return -EINVAL;
+
+ /* Lower maximums for specific alg types. */
if (!alg->cra_type && (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
CRYPTO_ALG_TYPE_CIPHER) {
if (alg->cra_alignmask > MAX_CIPHER_ALIGNMASK)
@@ -253,6 +258,14 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
list_add(&alg->cra_list, &crypto_alg_list);
list_add(&larval->alg.cra_list, &crypto_alg_list);
+ atomic_set(&alg->encrypt_cnt, 0);
+ atomic_set(&alg->decrypt_cnt, 0);
+ atomic64_set(&alg->encrypt_tlen, 0);
+ atomic64_set(&alg->decrypt_tlen, 0);
+ atomic_set(&alg->verify_cnt, 0);
+ atomic_set(&alg->cipher_err_cnt, 0);
+ atomic_set(&alg->sign_cnt, 0);
+
out:
return larval;
@@ -367,6 +380,8 @@ static void crypto_wait_for_test(struct crypto_larval *larval)
err = wait_for_completion_killable(&larval->completion);
WARN_ON(err);
+ if (!err)
+ crypto_probing_notify(CRYPTO_MSG_ALG_LOADED, larval);
out:
crypto_larval_kill(&larval->alg);
diff --git a/crypto/algboss.c b/crypto/algboss.c
index 5e6df2a087fa..527b44d0af21 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -274,6 +274,8 @@ static int cryptomgr_notify(struct notifier_block *this, unsigned long msg,
return cryptomgr_schedule_probe(data);
case CRYPTO_MSG_ALG_REGISTER:
return cryptomgr_schedule_test(data);
+ case CRYPTO_MSG_ALG_LOADED:
+ break;
}
return NOTIFY_DONE;
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index c40a8c7ee8ae..eb100a04ce9f 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -42,7 +42,7 @@
struct aead_tfm {
struct crypto_aead *aead;
- struct crypto_skcipher *null_tfm;
+ struct crypto_sync_skcipher *null_tfm;
};
static inline bool aead_sufficient_data(struct sock *sk)
@@ -75,13 +75,13 @@ static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
return af_alg_sendmsg(sock, msg, size, ivsize);
}
-static int crypto_aead_copy_sgl(struct crypto_skcipher *null_tfm,
+static int crypto_aead_copy_sgl(struct crypto_sync_skcipher *null_tfm,
struct scatterlist *src,
struct scatterlist *dst, unsigned int len)
{
- SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
- skcipher_request_set_tfm(skreq, null_tfm);
+ skcipher_request_set_sync_tfm(skreq, null_tfm);
skcipher_request_set_callback(skreq, CRYPTO_TFM_REQ_MAY_BACKLOG,
NULL, NULL);
skcipher_request_set_crypt(skreq, src, dst, len, NULL);
@@ -99,7 +99,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
struct af_alg_ctx *ctx = ask->private;
struct aead_tfm *aeadc = pask->private;
struct crypto_aead *tfm = aeadc->aead;
- struct crypto_skcipher *null_tfm = aeadc->null_tfm;
+ struct crypto_sync_skcipher *null_tfm = aeadc->null_tfm;
unsigned int i, as = crypto_aead_authsize(tfm);
struct af_alg_async_req *areq;
struct af_alg_tsgl *tsgl, *tmp;
@@ -478,7 +478,7 @@ static void *aead_bind(const char *name, u32 type, u32 mask)
{
struct aead_tfm *tfm;
struct crypto_aead *aead;
- struct crypto_skcipher *null_tfm;
+ struct crypto_sync_skcipher *null_tfm;
tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
if (!tfm)
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index bfcf595fd8f9..d0cde541beb6 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -239,7 +239,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags,
struct alg_sock *ask = alg_sk(sk);
struct hash_ctx *ctx = ask->private;
struct ahash_request *req = &ctx->req;
- char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req)) ? : 1];
+ char state[HASH_MAX_STATESIZE];
struct sock *sk2;
struct alg_sock *ask2;
struct hash_ctx *ctx2;
diff --git a/crypto/authenc.c b/crypto/authenc.c
index 4fa8d40d947b..37f54d1b2f66 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -33,7 +33,7 @@ struct authenc_instance_ctx {
struct crypto_authenc_ctx {
struct crypto_ahash *auth;
struct crypto_skcipher *enc;
- struct crypto_skcipher *null;
+ struct crypto_sync_skcipher *null;
};
struct authenc_request_ctx {
@@ -185,9 +185,9 @@ static int crypto_authenc_copy_assoc(struct aead_request *req)
{
struct crypto_aead *authenc = crypto_aead_reqtfm(req);
struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
- SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
- skcipher_request_set_tfm(skreq, ctx->null);
+ skcipher_request_set_sync_tfm(skreq, ctx->null);
skcipher_request_set_callback(skreq, aead_request_flags(req),
NULL, NULL);
skcipher_request_set_crypt(skreq, req->src, req->dst, req->assoclen,
@@ -318,7 +318,7 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm);
struct crypto_ahash *auth;
struct crypto_skcipher *enc;
- struct crypto_skcipher *null;
+ struct crypto_sync_skcipher *null;
int err;
auth = crypto_spawn_ahash(&ictx->auth);
diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index 50b804747e20..80a25cc04aec 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -36,7 +36,7 @@ struct crypto_authenc_esn_ctx {
unsigned int reqoff;
struct crypto_ahash *auth;
struct crypto_skcipher *enc;
- struct crypto_skcipher *null;
+ struct crypto_sync_skcipher *null;
};
struct authenc_esn_request_ctx {
@@ -183,9 +183,9 @@ static int crypto_authenc_esn_copy(struct aead_request *req, unsigned int len)
{
struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req);
struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn);
- SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
- skcipher_request_set_tfm(skreq, ctx->null);
+ skcipher_request_set_sync_tfm(skreq, ctx->null);
skcipher_request_set_callback(skreq, aead_request_flags(req),
NULL, NULL);
skcipher_request_set_crypt(skreq, req->src, req->dst, len, NULL);
@@ -341,7 +341,7 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm)
struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(tfm);
struct crypto_ahash *auth;
struct crypto_skcipher *enc;
- struct crypto_skcipher *null;
+ struct crypto_sync_skcipher *null;
int err;
auth = crypto_spawn_ahash(&ictx->auth);
diff --git a/crypto/ccm.c b/crypto/ccm.c
index 0a083342ec8c..b242fd0d3262 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -50,7 +50,10 @@ struct crypto_ccm_req_priv_ctx {
u32 flags;
struct scatterlist src[3];
struct scatterlist dst[3];
- struct skcipher_request skreq;
+ union {
+ struct ahash_request ahreq;
+ struct skcipher_request skreq;
+ };
};
struct cbcmac_tfm_ctx {
@@ -181,7 +184,7 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
- AHASH_REQUEST_ON_STACK(ahreq, ctx->mac);
+ struct ahash_request *ahreq = &pctx->ahreq;
unsigned int assoclen = req->assoclen;
struct scatterlist sg[3];
u8 *odata = pctx->odata;
@@ -427,7 +430,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
crypto_aead_set_reqsize(
tfm,
align + sizeof(struct crypto_ccm_req_priv_ctx) +
- crypto_skcipher_reqsize(ctr));
+ max(crypto_ahash_reqsize(mac), crypto_skcipher_reqsize(ctr)));
return 0;
diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
index e451c3cb6a56..3ae96587caf9 100644
--- a/crypto/chacha20_generic.c
+++ b/crypto/chacha20_generic.c
@@ -18,20 +18,21 @@
static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes)
{
- u32 stream[CHACHA20_BLOCK_WORDS];
+ /* aligned to potentially speed up crypto_xor() */
+ u8 stream[CHACHA20_BLOCK_SIZE] __aligned(sizeof(long));
if (dst != src)
memcpy(dst, src, bytes);
while (bytes >= CHACHA20_BLOCK_SIZE) {
chacha20_block(state, stream);
- crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE);
+ crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE);
bytes -= CHACHA20_BLOCK_SIZE;
dst += CHACHA20_BLOCK_SIZE;
}
if (bytes) {
chacha20_block(state, stream);
- crypto_xor(dst, (const u8 *)stream, bytes);
+ crypto_xor(dst, stream, bytes);
}
}
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index addca7bae33f..7118fb5efbaa 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -76,7 +76,7 @@ struct cryptd_blkcipher_request_ctx {
struct cryptd_skcipher_ctx {
atomic_t refcnt;
- struct crypto_skcipher *child;
+ struct crypto_sync_skcipher *child;
};
struct cryptd_skcipher_request_ctx {
@@ -449,14 +449,16 @@ static int cryptd_skcipher_setkey(struct crypto_skcipher *parent,
const u8 *key, unsigned int keylen)
{
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent);
- struct crypto_skcipher *child = ctx->child;
+ struct crypto_sync_skcipher *child = ctx->child;
int err;
- crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
- crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+ crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+ crypto_sync_skcipher_set_flags(child,
+ crypto_skcipher_get_flags(parent) &
CRYPTO_TFM_REQ_MASK);
- err = crypto_skcipher_setkey(child, key, keylen);
- crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+ err = crypto_sync_skcipher_setkey(child, key, keylen);
+ crypto_skcipher_set_flags(parent,
+ crypto_sync_skcipher_get_flags(child) &
CRYPTO_TFM_RES_MASK);
return err;
}
@@ -483,13 +485,13 @@ static void cryptd_skcipher_encrypt(struct crypto_async_request *base,
struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_skcipher *child = ctx->child;
- SKCIPHER_REQUEST_ON_STACK(subreq, child);
+ struct crypto_sync_skcipher *child = ctx->child;
+ SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
if (unlikely(err == -EINPROGRESS))
goto out;
- skcipher_request_set_tfm(subreq, child);
+ skcipher_request_set_sync_tfm(subreq, child);
skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
NULL, NULL);
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
@@ -511,13 +513,13 @@ static void cryptd_skcipher_decrypt(struct crypto_async_request *base,
struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_skcipher *child = ctx->child;
- SKCIPHER_REQUEST_ON_STACK(subreq, child);
+ struct crypto_sync_skcipher *child = ctx->child;
+ SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
if (unlikely(err == -EINPROGRESS))
goto out;
- skcipher_request_set_tfm(subreq, child);
+ skcipher_request_set_sync_tfm(subreq, child);
skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
NULL, NULL);
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
@@ -568,7 +570,7 @@ static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm)
if (IS_ERR(cipher))
return PTR_ERR(cipher);
- ctx->child = cipher;
+ ctx->child = (struct crypto_sync_skcipher *)cipher;
crypto_skcipher_set_reqsize(
tfm, sizeof(struct cryptd_skcipher_request_ctx));
return 0;
@@ -578,7 +580,7 @@ static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm)
{
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- crypto_free_skcipher(ctx->child);
+ crypto_free_sync_skcipher(ctx->child);
}
static void cryptd_skcipher_free(struct skcipher_instance *inst)
@@ -1243,7 +1245,7 @@ struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm)
{
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
- return ctx->child;
+ return &ctx->child->base;
}
EXPORT_SYMBOL_GPL(cryptd_skcipher_child);
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index 0959b268966c..0bae59922a80 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -26,7 +26,7 @@
#include <linux/string.h>
static DEFINE_MUTEX(crypto_default_null_skcipher_lock);
-static struct crypto_skcipher *crypto_default_null_skcipher;
+static struct crypto_sync_skcipher *crypto_default_null_skcipher;
static int crypto_default_null_skcipher_refcnt;
static int null_compress(struct crypto_tfm *tfm, const u8 *src,
@@ -152,16 +152,15 @@ MODULE_ALIAS_CRYPTO("compress_null");
MODULE_ALIAS_CRYPTO("digest_null");
MODULE_ALIAS_CRYPTO("cipher_null");
-struct crypto_skcipher *crypto_get_default_null_skcipher(void)
+struct crypto_sync_skcipher *crypto_get_default_null_skcipher(void)
{
- struct crypto_skcipher *tfm;
+ struct crypto_sync_skcipher *tfm;
mutex_lock(&crypto_default_null_skcipher_lock);
tfm = crypto_default_null_skcipher;
if (!tfm) {
- tfm = crypto_alloc_skcipher("ecb(cipher_null)",
- 0, CRYPTO_ALG_ASYNC);
+ tfm = crypto_alloc_sync_skcipher("ecb(cipher_null)", 0, 0);
if (IS_ERR(tfm))
goto unlock;
@@ -181,7 +180,7 @@ void crypto_put_default_null_skcipher(void)
{
mutex_lock(&crypto_default_null_skcipher_lock);
if (!--crypto_default_null_skcipher_refcnt) {
- crypto_free_skcipher(crypto_default_null_skcipher);
+ crypto_free_sync_skcipher(crypto_default_null_skcipher);
crypto_default_null_skcipher = NULL;
}
mutex_unlock(&crypto_default_null_skcipher_lock);
diff --git a/crypto/crypto_user.c b/crypto/crypto_user_base.c
index 0e89b5457cab..e41f6cc33fff 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user_base.c
@@ -29,6 +29,7 @@
#include <crypto/internal/rng.h>
#include <crypto/akcipher.h>
#include <crypto/kpp.h>
+#include <crypto/internal/cryptouser.h>
#include "internal.h"
@@ -37,7 +38,7 @@
static DEFINE_MUTEX(crypto_cfg_mutex);
/* The crypto netlink socket */
-static struct sock *crypto_nlsk;
+struct sock *crypto_nlsk;
struct crypto_dump_info {
struct sk_buff *in_skb;
@@ -46,7 +47,7 @@ struct crypto_dump_info {
u16 nlmsg_flags;
};
-static struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
+struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
{
struct crypto_alg *q, *alg = NULL;
@@ -461,6 +462,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
[CRYPTO_MSG_UPDATEALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
[CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
[CRYPTO_MSG_DELRNG - CRYPTO_MSG_BASE] = 0,
+ [CRYPTO_MSG_GETSTAT - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
};
static const struct nla_policy crypto_policy[CRYPTOCFGA_MAX+1] = {
@@ -481,6 +483,9 @@ static const struct crypto_link {
.dump = crypto_dump_report,
.done = crypto_dump_report_done},
[CRYPTO_MSG_DELRNG - CRYPTO_MSG_BASE] = { .doit = crypto_del_rng },
+ [CRYPTO_MSG_GETSTAT - CRYPTO_MSG_BASE] = { .doit = crypto_reportstat,
+ .dump = crypto_dump_reportstat,
+ .done = crypto_dump_reportstat_done},
};
static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c
new file mode 100644
index 000000000000..021ad06bbb62
--- /dev/null
+++ b/crypto/crypto_user_stat.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Crypto user configuration API.
+ *
+ * Copyright (C) 2017-2018 Corentin Labbe <clabbe@baylibre.com>
+ *
+ */
+
+#include <linux/crypto.h>
+#include <linux/cryptouser.h>
+#include <linux/sched.h>
+#include <net/netlink.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/internal/rng.h>
+#include <crypto/akcipher.h>
+#include <crypto/kpp.h>
+#include <crypto/internal/cryptouser.h>
+
+#include "internal.h"
+
+#define null_terminated(x) (strnlen(x, sizeof(x)) < sizeof(x))
+
+static DEFINE_MUTEX(crypto_cfg_mutex);
+
+extern struct sock *crypto_nlsk;
+
+struct crypto_dump_info {
+ struct sk_buff *in_skb;
+ struct sk_buff *out_skb;
+ u32 nlmsg_seq;
+ u16 nlmsg_flags;
+};
+
+static int crypto_report_aead(struct sk_buff *skb, struct crypto_alg *alg)
+{
+ struct crypto_stat raead;
+ u64 v64;
+ u32 v32;
+
+ strncpy(raead.type, "aead", sizeof(raead.type));
+
+ v32 = atomic_read(&alg->encrypt_cnt);
+ raead.stat_encrypt_cnt = v32;
+ v64 = atomic64_read(&alg->encrypt_tlen);
+ raead.stat_encrypt_tlen = v64;
+ v32 = atomic_read(&alg->decrypt_cnt);
+ raead.stat_decrypt_cnt = v32;
+ v64 = atomic64_read(&alg->decrypt_tlen);
+ raead.stat_decrypt_tlen = v64;
+ v32 = atomic_read(&alg->aead_err_cnt);
+ raead.stat_aead_err_cnt = v32;
+
+ if (nla_put(skb, CRYPTOCFGA_STAT_AEAD,
+ sizeof(struct crypto_stat), &raead))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg)
+{
+ struct crypto_stat rcipher;
+ u64 v64;
+ u32 v32;
+
+ strlcpy(rcipher.type, "cipher", sizeof(rcipher.type));
+
+ v32 = atomic_read(&alg->encrypt_cnt);
+ rcipher.stat_encrypt_cnt = v32;
+ v64 = atomic64_read(&alg->encrypt_tlen);
+ rcipher.stat_encrypt_tlen = v64;
+ v32 = atomic_read(&alg->decrypt_cnt);
+ rcipher.stat_decrypt_cnt = v32;
+ v64 = atomic64_read(&alg->decrypt_tlen);
+ rcipher.stat_decrypt_tlen = v64;
+ v32 = atomic_read(&alg->cipher_err_cnt);
+ rcipher.stat_cipher_err_cnt = v32;
+
+ if (nla_put(skb, CRYPTOCFGA_STAT_CIPHER,
+ sizeof(struct crypto_stat), &rcipher))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+ struct crypto_stat rcomp;
+ u64 v64;
+ u32 v32;
+
+ strlcpy(rcomp.type, "compression", sizeof(rcomp.type));
+ v32 = atomic_read(&alg->compress_cnt);
+ rcomp.stat_compress_cnt = v32;
+ v64 = atomic64_read(&alg->compress_tlen);
+ rcomp.stat_compress_tlen = v64;
+ v32 = atomic_read(&alg->decompress_cnt);
+ rcomp.stat_decompress_cnt = v32;
+ v64 = atomic64_read(&alg->decompress_tlen);
+ rcomp.stat_decompress_tlen = v64;
+ v32 = atomic_read(&alg->cipher_err_cnt);
+ rcomp.stat_compress_err_cnt = v32;
+
+ if (nla_put(skb, CRYPTOCFGA_STAT_COMPRESS,
+ sizeof(struct crypto_stat), &rcomp))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+ struct crypto_stat racomp;
+ u64 v64;
+ u32 v32;
+
+ strlcpy(racomp.type, "acomp", sizeof(racomp.type));
+ v32 = atomic_read(&alg->compress_cnt);
+ racomp.stat_compress_cnt = v32;
+ v64 = atomic64_read(&alg->compress_tlen);
+ racomp.stat_compress_tlen = v64;
+ v32 = atomic_read(&alg->decompress_cnt);
+ racomp.stat_decompress_cnt = v32;
+ v64 = atomic64_read(&alg->decompress_tlen);
+ racomp.stat_decompress_tlen = v64;
+ v32 = atomic_read(&alg->cipher_err_cnt);
+ racomp.stat_compress_err_cnt = v32;
+
+ if (nla_put(skb, CRYPTOCFGA_STAT_ACOMP,
+ sizeof(struct crypto_stat), &racomp))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
+{
+ struct crypto_stat rakcipher;
+ u64 v64;
+ u32 v32;
+
+ strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
+ v32 = atomic_read(&alg->encrypt_cnt);
+ rakcipher.stat_encrypt_cnt = v32;
+ v64 = atomic64_read(&alg->encrypt_tlen);
+ rakcipher.stat_encrypt_tlen = v64;
+ v32 = atomic_read(&alg->decrypt_cnt);
+ rakcipher.stat_decrypt_cnt = v32;
+ v64 = atomic64_read(&alg->decrypt_tlen);
+ rakcipher.stat_decrypt_tlen = v64;
+ v32 = atomic_read(&alg->sign_cnt);
+ rakcipher.stat_sign_cnt = v32;
+ v32 = atomic_read(&alg->verify_cnt);
+ rakcipher.stat_verify_cnt = v32;
+ v32 = atomic_read(&alg->akcipher_err_cnt);
+ rakcipher.stat_akcipher_err_cnt = v32;
+
+ if (nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER,
+ sizeof(struct crypto_stat), &rakcipher))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+ struct crypto_stat rkpp;
+ u32 v;
+
+ strlcpy(rkpp.type, "kpp", sizeof(rkpp.type));
+
+ v = atomic_read(&alg->setsecret_cnt);
+ rkpp.stat_setsecret_cnt = v;
+ v = atomic_read(&alg->generate_public_key_cnt);
+ rkpp.stat_generate_public_key_cnt = v;
+ v = atomic_read(&alg->compute_shared_secret_cnt);
+ rkpp.stat_compute_shared_secret_cnt = v;
+ v = atomic_read(&alg->kpp_err_cnt);
+ rkpp.stat_kpp_err_cnt = v;
+
+ if (nla_put(skb, CRYPTOCFGA_STAT_KPP,
+ sizeof(struct crypto_stat), &rkpp))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int crypto_report_ahash(struct sk_buff *skb, struct crypto_alg *alg)
+{
+ struct crypto_stat rhash;
+ u64 v64;
+ u32 v32;
+
+ strncpy(rhash.type, "ahash", sizeof(rhash.type));
+
+ v32 = atomic_read(&alg->hash_cnt);
+ rhash.stat_hash_cnt = v32;
+ v64 = atomic64_read(&alg->hash_tlen);
+ rhash.stat_hash_tlen = v64;
+ v32 = atomic_read(&alg->hash_err_cnt);
+ rhash.stat_hash_err_cnt = v32;
+
+ if (nla_put(skb, CRYPTOCFGA_STAT_HASH,
+ sizeof(struct crypto_stat), &rhash))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int crypto_report_shash(struct sk_buff *skb, struct crypto_alg *alg)
+{
+ struct crypto_stat rhash;
+ u64 v64;
+ u32 v32;
+
+ strncpy(rhash.type, "shash", sizeof(rhash.type));
+
+ v32 = atomic_read(&alg->hash_cnt);
+ rhash.stat_hash_cnt = v32;
+ v64 = atomic64_read(&alg->hash_tlen);
+ rhash.stat_hash_tlen = v64;
+ v32 = atomic_read(&alg->hash_err_cnt);
+ rhash.stat_hash_err_cnt = v32;
+
+ if (nla_put(skb, CRYPTOCFGA_STAT_HASH,
+ sizeof(struct crypto_stat), &rhash))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int crypto_report_rng(struct sk_buff *skb, struct crypto_alg *alg)
+{
+ struct crypto_stat rrng;
+ u64 v64;
+ u32 v32;
+
+ strncpy(rrng.type, "rng", sizeof(rrng.type));
+
+ v32 = atomic_read(&alg->generate_cnt);
+ rrng.stat_generate_cnt = v32;
+ v64 = atomic64_read(&alg->generate_tlen);
+ rrng.stat_generate_tlen = v64;
+ v32 = atomic_read(&alg->seed_cnt);
+ rrng.stat_seed_cnt = v32;
+ v32 = atomic_read(&alg->hash_err_cnt);
+ rrng.stat_rng_err_cnt = v32;
+
+ if (nla_put(skb, CRYPTOCFGA_STAT_RNG,
+ sizeof(struct crypto_stat), &rrng))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int crypto_reportstat_one(struct crypto_alg *alg,
+ struct crypto_user_alg *ualg,
+ struct sk_buff *skb)
+{
+ strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
+ strlcpy(ualg->cru_driver_name, alg->cra_driver_name,
+ sizeof(ualg->cru_driver_name));
+ strlcpy(ualg->cru_module_name, module_name(alg->cra_module),
+ sizeof(ualg->cru_module_name));
+
+ ualg->cru_type = 0;
+ ualg->cru_mask = 0;
+ ualg->cru_flags = alg->cra_flags;
+ ualg->cru_refcnt = refcount_read(&alg->cra_refcnt);
+
+ if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority))
+ goto nla_put_failure;
+ if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
+ struct crypto_stat rl;
+
+ strlcpy(rl.type, "larval", sizeof(rl.type));
+ if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL,
+ sizeof(struct crypto_stat), &rl))
+ goto nla_put_failure;
+ goto out;
+ }
+
+ switch (alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL)) {
+ case CRYPTO_ALG_TYPE_AEAD:
+ if (crypto_report_aead(skb, alg))
+ goto nla_put_failure;
+ break;
+ case CRYPTO_ALG_TYPE_SKCIPHER:
+ if (crypto_report_cipher(skb, alg))
+ goto nla_put_failure;
+ break;
+ case CRYPTO_ALG_TYPE_BLKCIPHER:
+ if (crypto_report_cipher(skb, alg))
+ goto nla_put_failure;
+ break;
+ case CRYPTO_ALG_TYPE_CIPHER:
+ if (crypto_report_cipher(skb, alg))
+ goto nla_put_failure;
+ break;
+ case CRYPTO_ALG_TYPE_COMPRESS:
+ if (crypto_report_comp(skb, alg))
+ goto nla_put_failure;
+ break;
+ case CRYPTO_ALG_TYPE_ACOMPRESS:
+ if (crypto_report_acomp(skb, alg))
+ goto nla_put_failure;
+ break;
+ case CRYPTO_ALG_TYPE_SCOMPRESS:
+ if (crypto_report_acomp(skb, alg))
+ goto nla_put_failure;
+ break;
+ case CRYPTO_ALG_TYPE_AKCIPHER:
+ if (crypto_report_akcipher(skb, alg))
+ goto nla_put_failure;
+ break;
+ case CRYPTO_ALG_TYPE_KPP:
+ if (crypto_report_kpp(skb, alg))
+ goto nla_put_failure;
+ break;
+ case CRYPTO_ALG_TYPE_AHASH:
+ if (crypto_report_ahash(skb, alg))
+ goto nla_put_failure;
+ break;
+ case CRYPTO_ALG_TYPE_HASH:
+ if (crypto_report_shash(skb, alg))
+ goto nla_put_failure;
+ break;
+ case CRYPTO_ALG_TYPE_RNG:
+ if (crypto_report_rng(skb, alg))
+ goto nla_put_failure;
+ break;
+ default:
+ pr_err("ERROR: Unhandled alg %d in %s\n",
+ alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL),
+ __func__);
+ }
+
+out:
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int crypto_reportstat_alg(struct crypto_alg *alg,
+ struct crypto_dump_info *info)
+{
+ struct sk_buff *in_skb = info->in_skb;
+ struct sk_buff *skb = info->out_skb;
+ struct nlmsghdr *nlh;
+ struct crypto_user_alg *ualg;
+ int err = 0;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq,
+ CRYPTO_MSG_GETSTAT, sizeof(*ualg), info->nlmsg_flags);
+ if (!nlh) {
+ err = -EMSGSIZE;
+ goto out;
+ }
+
+ ualg = nlmsg_data(nlh);
+
+ err = crypto_reportstat_one(alg, ualg, skb);
+ if (err) {
+ nlmsg_cancel(skb, nlh);
+ goto out;
+ }
+
+ nlmsg_end(skb, nlh);
+
+out:
+ return err;
+}
+
+int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
+ struct nlattr **attrs)
+{
+ struct crypto_user_alg *p = nlmsg_data(in_nlh);
+ struct crypto_alg *alg;
+ struct sk_buff *skb;
+ struct crypto_dump_info info;
+ int err;
+
+ if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+ return -EINVAL;
+
+ alg = crypto_alg_match(p, 0);
+ if (!alg)
+ return -ENOENT;
+
+ err = -ENOMEM;
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!skb)
+ goto drop_alg;
+
+ info.in_skb = in_skb;
+ info.out_skb = skb;
+ info.nlmsg_seq = in_nlh->nlmsg_seq;
+ info.nlmsg_flags = 0;
+
+ err = crypto_reportstat_alg(alg, &info);
+
+drop_alg:
+ crypto_mod_put(alg);
+
+ if (err)
+ return err;
+
+ return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
+}
+
+int crypto_dump_reportstat(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct crypto_alg *alg;
+ struct crypto_dump_info info;
+ int err;
+
+ if (cb->args[0])
+ goto out;
+
+ cb->args[0] = 1;
+
+ info.in_skb = cb->skb;
+ info.out_skb = skb;
+ info.nlmsg_seq = cb->nlh->nlmsg_seq;
+ info.nlmsg_flags = NLM_F_MULTI;
+
+ list_for_each_entry(alg, &crypto_alg_list, cra_list) {
+ err = crypto_reportstat_alg(alg, &info);
+ if (err)
+ goto out_err;
+ }
+
+out:
+ return skb->len;
+out_err:
+ return err;
+}
+
+int crypto_dump_reportstat_done(struct netlink_callback *cb)
+{
+ return 0;
+}
+
+MODULE_LICENSE("GPL");
diff --git a/crypto/echainiv.c b/crypto/echainiv.c
index 45819e6015bf..77e607fdbfb7 100644
--- a/crypto/echainiv.c
+++ b/crypto/echainiv.c
@@ -47,9 +47,9 @@ static int echainiv_encrypt(struct aead_request *req)
info = req->iv;
if (req->src != req->dst) {
- SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
- skcipher_request_set_tfm(nreq, ctx->sknull);
+ skcipher_request_set_sync_tfm(nreq, ctx->sknull);
skcipher_request_set_callback(nreq, req->base.flags,
NULL, NULL);
skcipher_request_set_crypt(nreq, req->src, req->dst,
diff --git a/crypto/gcm.c b/crypto/gcm.c
index 0ad879e1f9b2..e438492db2ca 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -50,7 +50,7 @@ struct crypto_rfc4543_instance_ctx {
struct crypto_rfc4543_ctx {
struct crypto_aead *child;
- struct crypto_skcipher *null;
+ struct crypto_sync_skcipher *null;
u8 nonce[4];
};
@@ -1067,9 +1067,9 @@ static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc)
unsigned int authsize = crypto_aead_authsize(aead);
unsigned int nbytes = req->assoclen + req->cryptlen -
(enc ? 0 : authsize);
- SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null);
- skcipher_request_set_tfm(nreq, ctx->null);
+ skcipher_request_set_sync_tfm(nreq, ctx->null);
skcipher_request_set_callback(nreq, req->base.flags, NULL, NULL);
skcipher_request_set_crypt(nreq, req->src, req->dst, nbytes, NULL);
@@ -1093,7 +1093,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm)
struct crypto_aead_spawn *spawn = &ictx->aead;
struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm);
struct crypto_aead *aead;
- struct crypto_skcipher *null;
+ struct crypto_sync_skcipher *null;
unsigned long align;
int err = 0;
diff --git a/crypto/internal.h b/crypto/internal.h
index 9a3f39939fba..ef769b5e8ad3 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -26,12 +26,6 @@
#include <linux/rwsem.h>
#include <linux/slab.h>
-/* Crypto notification events. */
-enum {
- CRYPTO_MSG_ALG_REQUEST,
- CRYPTO_MSG_ALG_REGISTER,
-};
-
struct crypto_instance;
struct crypto_template;
@@ -90,8 +84,6 @@ struct crypto_alg *crypto_find_alg(const char *alg_name,
void *crypto_alloc_tfm(const char *alg_name,
const struct crypto_type *frontend, u32 type, u32 mask);
-int crypto_register_notifier(struct notifier_block *nb);
-int crypto_unregister_notifier(struct notifier_block *nb);
int crypto_probing_notify(unsigned long val, void *v);
unsigned int crypto_alg_extsize(struct crypto_alg *alg);
diff --git a/crypto/lrw.c b/crypto/lrw.c
index 393a782679c7..0430ccd08728 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -29,8 +29,6 @@
#include <crypto/b128ops.h>
#include <crypto/gf128mul.h>
-#define LRW_BUFFER_SIZE 128u
-
#define LRW_BLOCK_SIZE 16
struct priv {
@@ -56,19 +54,7 @@ struct priv {
};
struct rctx {
- be128 buf[LRW_BUFFER_SIZE / sizeof(be128)];
-
be128 t;
-
- be128 *ext;
-
- struct scatterlist srcbuf[2];
- struct scatterlist dstbuf[2];
- struct scatterlist *src;
- struct scatterlist *dst;
-
- unsigned int left;
-
struct skcipher_request subreq;
};
@@ -120,112 +106,68 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
return 0;
}
-static inline void inc(be128 *iv)
-{
- be64_add_cpu(&iv->b, 1);
- if (!iv->b)
- be64_add_cpu(&iv->a, 1);
-}
-
-/* this returns the number of consequative 1 bits starting
- * from the right, get_index128(00 00 00 00 00 00 ... 00 00 10 FB) = 2 */
-static inline int get_index128(be128 *block)
+/*
+ * Returns the number of trailing '1' bits in the words of the counter, which is
+ * represented by 4 32-bit words, arranged from least to most significant.
+ * At the same time, increments the counter by one.
+ *
+ * For example:
+ *
+ * u32 counter[4] = { 0xFFFFFFFF, 0x1, 0x0, 0x0 };
+ * int i = next_index(&counter);
+ * // i == 33, counter == { 0x0, 0x2, 0x0, 0x0 }
+ */
+static int next_index(u32 *counter)
{
- int x;
- __be32 *p = (__be32 *) block;
+ int i, res = 0;
- for (p += 3, x = 0; x < 128; p--, x += 32) {
- u32 val = be32_to_cpup(p);
+ for (i = 0; i < 4; i++) {
+ if (counter[i] + 1 != 0)
+ return res + ffz(counter[i]++);
- if (!~val)
- continue;
-
- return x + ffz(val);
+ counter[i] = 0;
+ res += 32;
}
- return x;
+ /*
+ * If we get here, then x == 128 and we are incrementing the counter
+ * from all ones to all zeros. This means we must return index 127, i.e.
+ * the one corresponding to key2*{ 1,...,1 }.
+ */
+ return 127;
}
-static int post_crypt(struct skcipher_request *req)
+/*
+ * We compute the tweak masks twice (both before and after the ECB encryption or
+ * decryption) to avoid having to allocate a temporary buffer and/or make
+ * mutliple calls to the 'ecb(..)' instance, which usually would be slower than
+ * just doing the next_index() calls again.
+ */
+static int xor_tweak(struct skcipher_request *req, bool second_pass)
{
- struct rctx *rctx = skcipher_request_ctx(req);
- be128 *buf = rctx->ext ?: rctx->buf;
- struct skcipher_request *subreq;
const int bs = LRW_BLOCK_SIZE;
- struct skcipher_walk w;
- struct scatterlist *sg;
- unsigned offset;
- int err;
-
- subreq = &rctx->subreq;
- err = skcipher_walk_virt(&w, subreq, false);
-
- while (w.nbytes) {
- unsigned int avail = w.nbytes;
- be128 *wdst;
-
- wdst = w.dst.virt.addr;
-
- do {
- be128_xor(wdst, buf++, wdst);
- wdst++;
- } while ((avail -= bs) >= bs);
-
- err = skcipher_walk_done(&w, avail);
- }
-
- rctx->left -= subreq->cryptlen;
-
- if (err || !rctx->left)
- goto out;
-
- rctx->dst = rctx->dstbuf;
-
- scatterwalk_done(&w.out, 0, 1);
- sg = w.out.sg;
- offset = w.out.offset;
-
- if (rctx->dst != sg) {
- rctx->dst[0] = *sg;
- sg_unmark_end(rctx->dst);
- scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 2);
- }
- rctx->dst[0].length -= offset - sg->offset;
- rctx->dst[0].offset = offset;
-
-out:
- return err;
-}
-
-static int pre_crypt(struct skcipher_request *req)
-{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rctx *rctx = skcipher_request_ctx(req);
struct priv *ctx = crypto_skcipher_ctx(tfm);
- be128 *buf = rctx->ext ?: rctx->buf;
- struct skcipher_request *subreq;
- const int bs = LRW_BLOCK_SIZE;
+ struct rctx *rctx = skcipher_request_ctx(req);
+ be128 t = rctx->t;
struct skcipher_walk w;
- struct scatterlist *sg;
- unsigned cryptlen;
- unsigned offset;
- be128 *iv;
- bool more;
+ __be32 *iv;
+ u32 counter[4];
int err;
- subreq = &rctx->subreq;
- skcipher_request_set_tfm(subreq, tfm);
-
- cryptlen = subreq->cryptlen;
- more = rctx->left > cryptlen;
- if (!more)
- cryptlen = rctx->left;
+ if (second_pass) {
+ req = &rctx->subreq;
+ /* set to our TFM to enforce correct alignment: */
+ skcipher_request_set_tfm(req, tfm);
+ }
- skcipher_request_set_crypt(subreq, rctx->src, rctx->dst,
- cryptlen, req->iv);
+ err = skcipher_walk_virt(&w, req, false);
+ iv = (__be32 *)w.iv;
- err = skcipher_walk_virt(&w, subreq, false);
- iv = w.iv;
+ counter[0] = be32_to_cpu(iv[3]);
+ counter[1] = be32_to_cpu(iv[2]);
+ counter[2] = be32_to_cpu(iv[1]);
+ counter[3] = be32_to_cpu(iv[0]);
while (w.nbytes) {
unsigned int avail = w.nbytes;
@@ -236,188 +178,85 @@ static int pre_crypt(struct skcipher_request *req)
wdst = w.dst.virt.addr;
do {
- *buf++ = rctx->t;
- be128_xor(wdst++, &rctx->t, wsrc++);
+ be128_xor(wdst++, &t, wsrc++);
/* T <- I*Key2, using the optimization
* discussed in the specification */
- be128_xor(&rctx->t, &rctx->t,
- &ctx->mulinc[get_index128(iv)]);
- inc(iv);
+ be128_xor(&t, &t, &ctx->mulinc[next_index(counter)]);
} while ((avail -= bs) >= bs);
- err = skcipher_walk_done(&w, avail);
- }
-
- skcipher_request_set_tfm(subreq, ctx->child);
- skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst,
- cryptlen, NULL);
-
- if (err || !more)
- goto out;
-
- rctx->src = rctx->srcbuf;
-
- scatterwalk_done(&w.in, 0, 1);
- sg = w.in.sg;
- offset = w.in.offset;
+ if (second_pass && w.nbytes == w.total) {
+ iv[0] = cpu_to_be32(counter[3]);
+ iv[1] = cpu_to_be32(counter[2]);
+ iv[2] = cpu_to_be32(counter[1]);
+ iv[3] = cpu_to_be32(counter[0]);
+ }
- if (rctx->src != sg) {
- rctx->src[0] = *sg;
- sg_unmark_end(rctx->src);
- scatterwalk_crypto_chain(rctx->src, sg_next(sg), 2);
+ err = skcipher_walk_done(&w, avail);
}
- rctx->src[0].length -= offset - sg->offset;
- rctx->src[0].offset = offset;
-out:
return err;
}
-static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
+static int xor_tweak_pre(struct skcipher_request *req)
{
- struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
- struct rctx *rctx = skcipher_request_ctx(req);
- struct skcipher_request *subreq;
- gfp_t gfp;
-
- subreq = &rctx->subreq;
- skcipher_request_set_callback(subreq, req->base.flags, done, req);
-
- gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
- GFP_ATOMIC;
- rctx->ext = NULL;
-
- subreq->cryptlen = LRW_BUFFER_SIZE;
- if (req->cryptlen > LRW_BUFFER_SIZE) {
- unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE);
-
- rctx->ext = kmalloc(n, gfp);
- if (rctx->ext)
- subreq->cryptlen = n;
- }
-
- rctx->src = req->src;
- rctx->dst = req->dst;
- rctx->left = req->cryptlen;
-
- /* calculate first value of T */
- memcpy(&rctx->t, req->iv, sizeof(rctx->t));
-
- /* T <- I*Key2 */
- gf128mul_64k_bbe(&rctx->t, ctx->table);
-
- return 0;
+ return xor_tweak(req, false);
}
-static void exit_crypt(struct skcipher_request *req)
+static int xor_tweak_post(struct skcipher_request *req)
{
- struct rctx *rctx = skcipher_request_ctx(req);
-
- rctx->left = 0;
-
- if (rctx->ext)
- kzfree(rctx->ext);
+ return xor_tweak(req, true);
}
-static int do_encrypt(struct skcipher_request *req, int err)
-{
- struct rctx *rctx = skcipher_request_ctx(req);
- struct skcipher_request *subreq;
-
- subreq = &rctx->subreq;
-
- while (!err && rctx->left) {
- err = pre_crypt(req) ?:
- crypto_skcipher_encrypt(subreq) ?:
- post_crypt(req);
-
- if (err == -EINPROGRESS || err == -EBUSY)
- return err;
- }
-
- exit_crypt(req);
- return err;
-}
-
-static void encrypt_done(struct crypto_async_request *areq, int err)
+static void crypt_done(struct crypto_async_request *areq, int err)
{
struct skcipher_request *req = areq->data;
- struct skcipher_request *subreq;
- struct rctx *rctx;
- rctx = skcipher_request_ctx(req);
+ if (!err)
+ err = xor_tweak_post(req);
- if (err == -EINPROGRESS) {
- if (rctx->left != req->cryptlen)
- return;
- goto out;
- }
-
- subreq = &rctx->subreq;
- subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
-
- err = do_encrypt(req, err ?: post_crypt(req));
- if (rctx->left)
- return;
-
-out:
skcipher_request_complete(req, err);
}
-static int encrypt(struct skcipher_request *req)
-{
- return do_encrypt(req, init_crypt(req, encrypt_done));
-}
-
-static int do_decrypt(struct skcipher_request *req, int err)
+static void init_crypt(struct skcipher_request *req)
{
+ struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
struct rctx *rctx = skcipher_request_ctx(req);
- struct skcipher_request *subreq;
-
- subreq = &rctx->subreq;
+ struct skcipher_request *subreq = &rctx->subreq;
- while (!err && rctx->left) {
- err = pre_crypt(req) ?:
- crypto_skcipher_decrypt(subreq) ?:
- post_crypt(req);
+ skcipher_request_set_tfm(subreq, ctx->child);
+ skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req);
+ /* pass req->iv as IV (will be used by xor_tweak, ECB will ignore it) */
+ skcipher_request_set_crypt(subreq, req->dst, req->dst,
+ req->cryptlen, req->iv);
- if (err == -EINPROGRESS || err == -EBUSY)
- return err;
- }
+ /* calculate first value of T */
+ memcpy(&rctx->t, req->iv, sizeof(rctx->t));
- exit_crypt(req);
- return err;
+ /* T <- I*Key2 */
+ gf128mul_64k_bbe(&rctx->t, ctx->table);
}
-static void decrypt_done(struct crypto_async_request *areq, int err)
+static int encrypt(struct skcipher_request *req)
{
- struct skcipher_request *req = areq->data;
- struct skcipher_request *subreq;
- struct rctx *rctx;
-
- rctx = skcipher_request_ctx(req);
-
- if (err == -EINPROGRESS) {
- if (rctx->left != req->cryptlen)
- return;
- goto out;
- }
-
- subreq = &rctx->subreq;
- subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
-
- err = do_decrypt(req, err ?: post_crypt(req));
- if (rctx->left)
- return;
+ struct rctx *rctx = skcipher_request_ctx(req);
+ struct skcipher_request *subreq = &rctx->subreq;
-out:
- skcipher_request_complete(req, err);
+ init_crypt(req);
+ return xor_tweak_pre(req) ?:
+ crypto_skcipher_encrypt(subreq) ?:
+ xor_tweak_post(req);
}
static int decrypt(struct skcipher_request *req)
{
- return do_decrypt(req, init_crypt(req, decrypt_done));
+ struct rctx *rctx = skcipher_request_ctx(req);
+ struct skcipher_request *subreq = &rctx->subreq;
+
+ init_crypt(req);
+ return xor_tweak_pre(req) ?:
+ crypto_skcipher_decrypt(subreq) ?:
+ xor_tweak_post(req);
}
static int init_tfm(struct crypto_skcipher *tfm)
@@ -543,7 +382,7 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
inst->alg.base.cra_priority = alg->base.cra_priority;
inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE;
inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
- (__alignof__(u64) - 1);
+ (__alignof__(__be32) - 1);
inst->alg.ivsize = LRW_BLOCK_SIZE;
inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) +
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
deleted file mode 100644
index f14152147ce8..000000000000
--- a/crypto/mcryptd.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/*
- * Software multibuffer async crypto daemon.
- *
- * Copyright (c) 2014 Tim Chen <tim.c.chen@linux.intel.com>
- *
- * Adapted from crypto daemon.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/aead.h>
-#include <crypto/mcryptd.h>
-#include <crypto/crypto_wq.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-#include <linux/sched.h>
-#include <linux/sched/stat.h>
-#include <linux/slab.h>
-
-#define MCRYPTD_MAX_CPU_QLEN 100
-#define MCRYPTD_BATCH 9
-
-static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
- unsigned int tail);
-
-struct mcryptd_flush_list {
- struct list_head list;
- struct mutex lock;
-};
-
-static struct mcryptd_flush_list __percpu *mcryptd_flist;
-
-struct hashd_instance_ctx {
- struct crypto_ahash_spawn spawn;
- struct mcryptd_queue *queue;
-};
-
-static void mcryptd_queue_worker(struct work_struct *work);
-
-void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay)
-{
- struct mcryptd_flush_list *flist;
-
- if (!cstate->flusher_engaged) {
- /* put the flusher on the flush list */
- flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
- mutex_lock(&flist->lock);
- list_add_tail(&cstate->flush_list, &flist->list);
- cstate->flusher_engaged = true;
- cstate->next_flush = jiffies + delay;
- queue_delayed_work_on(smp_processor_id(), kcrypto_wq,
- &cstate->flush, delay);
- mutex_unlock(&flist->lock);
- }
-}
-EXPORT_SYMBOL(mcryptd_arm_flusher);
-
-static int mcryptd_init_queue(struct mcryptd_queue *queue,
- unsigned int max_cpu_qlen)
-{
- int cpu;
- struct mcryptd_cpu_queue *cpu_queue;
-
- queue->cpu_queue = alloc_percpu(struct mcryptd_cpu_queue);
- pr_debug("mqueue:%p mcryptd_cpu_queue %p\n", queue, queue->cpu_queue);
- if (!queue->cpu_queue)
- return -ENOMEM;
- for_each_possible_cpu(cpu) {
- cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
- pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
- crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
- INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
- spin_lock_init(&cpu_queue->q_lock);
- }
- return 0;
-}
-
-static void mcryptd_fini_queue(struct mcryptd_queue *queue)
-{
- int cpu;
- struct mcryptd_cpu_queue *cpu_queue;
-
- for_each_possible_cpu(cpu) {
- cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
- BUG_ON(cpu_queue->queue.qlen);
- }
- free_percpu(queue->cpu_queue);
-}
-
-static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
- struct crypto_async_request *request,
- struct mcryptd_hash_request_ctx *rctx)
-{
- int cpu, err;
- struct mcryptd_cpu_queue *cpu_queue;
-
- cpu_queue = raw_cpu_ptr(queue->cpu_queue);
- spin_lock(&cpu_queue->q_lock);
- cpu = smp_processor_id();
- rctx->tag.cpu = smp_processor_id();
-
- err = crypto_enqueue_request(&cpu_queue->queue, request);
- pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
- cpu, cpu_queue, request);
- spin_unlock(&cpu_queue->q_lock);
- queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
-
- return err;
-}
-
-/*
- * Try to opportunisticlly flush the partially completed jobs if
- * crypto daemon is the only task running.
- */
-static void mcryptd_opportunistic_flush(void)
-{
- struct mcryptd_flush_list *flist;
- struct mcryptd_alg_cstate *cstate;
-
- flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
- while (single_task_running()) {
- mutex_lock(&flist->lock);
- cstate = list_first_entry_or_null(&flist->list,
- struct mcryptd_alg_cstate, flush_list);
- if (!cstate || !cstate->flusher_engaged) {
- mutex_unlock(&flist->lock);
- return;
- }
- list_del(&cstate->flush_list);
- cstate->flusher_engaged = false;
- mutex_unlock(&flist->lock);
- cstate->alg_state->flusher(cstate);
- }
-}
-
-/*
- * Called in workqueue context, do one real cryption work (via
- * req->complete) and reschedule itself if there are more work to
- * do.
- */
-static void mcryptd_queue_worker(struct work_struct *work)
-{
- struct mcryptd_cpu_queue *cpu_queue;
- struct crypto_async_request *req, *backlog;
- int i;
-
- /*
- * Need to loop through more than once for multi-buffer to
- * be effective.
- */
-
- cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
- i = 0;
- while (i < MCRYPTD_BATCH || single_task_running()) {
-
- spin_lock_bh(&cpu_queue->q_lock);
- backlog = crypto_get_backlog(&cpu_queue->queue);
- req = crypto_dequeue_request(&cpu_queue->queue);
- spin_unlock_bh(&cpu_queue->q_lock);
-
- if (!req) {
- mcryptd_opportunistic_flush();
- return;
- }
-
- if (backlog)
- backlog->complete(backlog, -EINPROGRESS);
- req->complete(req, 0);
- if (!cpu_queue->queue.qlen)
- return;
- ++i;
- }
- if (cpu_queue->queue.qlen)
- queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
-}
-
-void mcryptd_flusher(struct work_struct *__work)
-{
- struct mcryptd_alg_cstate *alg_cpu_state;
- struct mcryptd_alg_state *alg_state;
- struct mcryptd_flush_list *flist;
- int cpu;
-
- cpu = smp_processor_id();
- alg_cpu_state = container_of(to_delayed_work(__work),
- struct mcryptd_alg_cstate, flush);
- alg_state = alg_cpu_state->alg_state;
- if (alg_cpu_state->cpu != cpu)
- pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n",
- cpu, alg_cpu_state->cpu);
-
- if (alg_cpu_state->flusher_engaged) {
- flist = per_cpu_ptr(mcryptd_flist, cpu);
- mutex_lock(&flist->lock);
- list_del(&alg_cpu_state->flush_list);
- alg_cpu_state->flusher_engaged = false;
- mutex_unlock(&flist->lock);
- alg_state->flusher(alg_cpu_state);
- }
-}
-EXPORT_SYMBOL_GPL(mcryptd_flusher);
-
-static inline struct mcryptd_queue *mcryptd_get_queue(struct crypto_tfm *tfm)
-{
- struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
- struct mcryptd_instance_ctx *ictx = crypto_instance_ctx(inst);
-
- return ictx->queue;
-}
-
-static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
- unsigned int tail)
-{
- char *p;
- struct crypto_instance *inst;
- int err;
-
- p = kzalloc(head + sizeof(*inst) + tail, GFP_KERNEL);
- if (!p)
- return ERR_PTR(-ENOMEM);
-
- inst = (void *)(p + head);
-
- err = -ENAMETOOLONG;
- if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
- "mcryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
- goto out_free_inst;
-
- memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
-
- inst->alg.cra_priority = alg->cra_priority + 50;
- inst->alg.cra_blocksize = alg->cra_blocksize;
- inst->alg.cra_alignmask = alg->cra_alignmask;
-
-out:
- return p;
-
-out_free_inst:
- kfree(p);
- p = ERR_PTR(err);
- goto out;
-}
-
-static inline bool mcryptd_check_internal(struct rtattr **tb, u32 *type,
- u32 *mask)
-{
- struct crypto_attr_type *algt;
-
- algt = crypto_get_attr_type(tb);
- if (IS_ERR(algt))
- return false;
-
- *type |= algt->type & CRYPTO_ALG_INTERNAL;
- *mask |= algt->mask & CRYPTO_ALG_INTERNAL;
-
- if (*type & *mask & CRYPTO_ALG_INTERNAL)
- return true;
- else
- return false;
-}
-
-static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm)
-{
- struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
- struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst);
- struct crypto_ahash_spawn *spawn = &ictx->spawn;
- struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
- struct crypto_ahash *hash;
-
- hash = crypto_spawn_ahash(spawn);
- if (IS_ERR(hash))
- return PTR_ERR(hash);
-
- ctx->child = hash;
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct mcryptd_hash_request_ctx) +
- crypto_ahash_reqsize(hash));
- return 0;
-}
-
-static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm)
-{
- struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
-
- crypto_free_ahash(ctx->child);
-}
-
-static int mcryptd_hash_setkey(struct crypto_ahash *parent,
- const u8 *key, unsigned int keylen)
-{
- struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(parent);
- struct crypto_ahash *child = ctx->child;
- int err;
-
- crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
- crypto_ahash_set_flags(child, crypto_ahash_get_flags(parent) &
- CRYPTO_TFM_REQ_MASK);
- err = crypto_ahash_setkey(child, key, keylen);
- crypto_ahash_set_flags(parent, crypto_ahash_get_flags(child) &
- CRYPTO_TFM_RES_MASK);
- return err;
-}
-
-static int mcryptd_hash_enqueue(struct ahash_request *req,
- crypto_completion_t complete)
-{
- int ret;
-
- struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct mcryptd_queue *queue =
- mcryptd_get_queue(crypto_ahash_tfm(tfm));
-
- rctx->complete = req->base.complete;
- req->base.complete = complete;
-
- ret = mcryptd_enqueue_request(queue, &req->base, rctx);
-
- return ret;
-}
-
-static void mcryptd_hash_init(struct crypto_async_request *req_async, int err)
-{
- struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
- struct crypto_ahash *child = ctx->child;
- struct ahash_request *req = ahash_request_cast(req_async);
- struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
- struct ahash_request *desc = &rctx->areq;
-
- if (unlikely(err == -EINPROGRESS))
- goto out;
-
- ahash_request_set_tfm(desc, child);
- ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
- rctx->complete, req_async);
-
- rctx->out = req->result;
- err = crypto_ahash_init(desc);
-
-out:
- local_bh_disable();
- rctx->complete(&req->base, err);
- local_bh_enable();
-}
-
-static int mcryptd_hash_init_enqueue(struct ahash_request *req)
-{
- return mcryptd_hash_enqueue(req, mcryptd_hash_init);
-}
-
-static void mcryptd_hash_update(struct crypto_async_request *req_async, int err)
-{
- struct ahash_request *req = ahash_request_cast(req_async);
- struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-
- if (unlikely(err == -EINPROGRESS))
- goto out;
-
- rctx->out = req->result;
- err = crypto_ahash_update(&rctx->areq);
- if (err) {
- req->base.complete = rctx->complete;
- goto out;
- }
-
- return;
-out:
- local_bh_disable();
- rctx->complete(&req->base, err);
- local_bh_enable();
-}
-
-static int mcryptd_hash_update_enqueue(struct ahash_request *req)
-{
- return mcryptd_hash_enqueue(req, mcryptd_hash_update);
-}
-
-static void mcryptd_hash_final(struct crypto_async_request *req_async, int err)
-{
- struct ahash_request *req = ahash_request_cast(req_async);
- struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-
- if (unlikely(err == -EINPROGRESS))
- goto out;
-
- rctx->out = req->result;
- err = crypto_ahash_final(&rctx->areq);
- if (err) {
- req->base.complete = rctx->complete;
- goto out;
- }
-
- return;
-out:
- local_bh_disable();
- rctx->complete(&req->base, err);
- local_bh_enable();
-}
-
-static int mcryptd_hash_final_enqueue(struct ahash_request *req)
-{
- return mcryptd_hash_enqueue(req, mcryptd_hash_final);
-}
-
-static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err)
-{
- struct ahash_request *req = ahash_request_cast(req_async);
- struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-
- if (unlikely(err == -EINPROGRESS))
- goto out;
- rctx->out = req->result;
- err = crypto_ahash_finup(&rctx->areq);
-
- if (err) {
- req->base.complete = rctx->complete;
- goto out;
- }
-
- return;
-out:
- local_bh_disable();
- rctx->complete(&req->base, err);
- local_bh_enable();
-}
-
-static int mcryptd_hash_finup_enqueue(struct ahash_request *req)
-{
- return mcryptd_hash_enqueue(req, mcryptd_hash_finup);
-}
-
-static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err)
-{
- struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
- struct crypto_ahash *child = ctx->child;
- struct ahash_request *req = ahash_request_cast(req_async);
- struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
- struct ahash_request *desc = &rctx->areq;
-
- if (unlikely(err == -EINPROGRESS))
- goto out;
-
- ahash_request_set_tfm(desc, child);
- ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
- rctx->complete, req_async);
-
- rctx->out = req->result;
- err = crypto_ahash_init(desc) ?: crypto_ahash_finup(desc);
-
-out:
- local_bh_disable();
- rctx->complete(&req->base, err);
- local_bh_enable();
-}
-
-static int mcryptd_hash_digest_enqueue(struct ahash_request *req)
-{
- return mcryptd_hash_enqueue(req, mcryptd_hash_digest);
-}
-
-static int mcryptd_hash_export(struct ahash_request *req, void *out)
-{
- struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-
- return crypto_ahash_export(&rctx->areq, out);
-}
-
-static int mcryptd_hash_import(struct ahash_request *req, const void *in)
-{
- struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-
- return crypto_ahash_import(&rctx->areq, in);
-}
-
-static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
- struct mcryptd_queue *queue)
-{
- struct hashd_instance_ctx *ctx;
- struct ahash_instance *inst;
- struct hash_alg_common *halg;
- struct crypto_alg *alg;
- u32 type = 0;
- u32 mask = 0;
- int err;
-
- if (!mcryptd_check_internal(tb, &type, &mask))
- return -EINVAL;
-
- halg = ahash_attr_alg(tb[1], type, mask);
- if (IS_ERR(halg))
- return PTR_ERR(halg);
-
- alg = &halg->base;
- pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name);
- inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(),
- sizeof(*ctx));
- err = PTR_ERR(inst);
- if (IS_ERR(inst))
- goto out_put_alg;
-
- ctx = ahash_instance_ctx(inst);
- ctx->queue = queue;
-
- err = crypto_init_ahash_spawn(&ctx->spawn, halg,
- ahash_crypto_instance(inst));
- if (err)
- goto out_free_inst;
-
- inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC |
- (alg->cra_flags & (CRYPTO_ALG_INTERNAL |
- CRYPTO_ALG_OPTIONAL_KEY));
-
- inst->alg.halg.digestsize = halg->digestsize;
- inst->alg.halg.statesize = halg->statesize;
- inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx);
-
- inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm;
- inst->alg.halg.base.cra_exit = mcryptd_hash_exit_tfm;
-
- inst->alg.init = mcryptd_hash_init_enqueue;
- inst->alg.update = mcryptd_hash_update_enqueue;
- inst->alg.final = mcryptd_hash_final_enqueue;
- inst->alg.finup = mcryptd_hash_finup_enqueue;
- inst->alg.export = mcryptd_hash_export;
- inst->alg.import = mcryptd_hash_import;
- if (crypto_hash_alg_has_setkey(halg))
- inst->alg.setkey = mcryptd_hash_setkey;
- inst->alg.digest = mcryptd_hash_digest_enqueue;
-
- err = ahash_register_instance(tmpl, inst);
- if (err) {
- crypto_drop_ahash(&ctx->spawn);
-out_free_inst:
- kfree(inst);
- }
-
-out_put_alg:
- crypto_mod_put(alg);
- return err;
-}
-
-static struct mcryptd_queue mqueue;
-
-static int mcryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
-{
- struct crypto_attr_type *algt;
-
- algt = crypto_get_attr_type(tb);
- if (IS_ERR(algt))
- return PTR_ERR(algt);
-
- switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
- case CRYPTO_ALG_TYPE_DIGEST:
- return mcryptd_create_hash(tmpl, tb, &mqueue);
- break;
- }
-
- return -EINVAL;
-}
-
-static void mcryptd_free(struct crypto_instance *inst)
-{
- struct mcryptd_instance_ctx *ctx = crypto_instance_ctx(inst);
- struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst);
-
- switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) {
- case CRYPTO_ALG_TYPE_AHASH:
- crypto_drop_ahash(&hctx->spawn);
- kfree(ahash_instance(inst));
- return;
- default:
- crypto_drop_spawn(&ctx->spawn);
- kfree(inst);
- }
-}
-
-static struct crypto_template mcryptd_tmpl = {
- .name = "mcryptd",
- .create = mcryptd_create,
- .free = mcryptd_free,
- .module = THIS_MODULE,
-};
-
-struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
- u32 type, u32 mask)
-{
- char mcryptd_alg_name[CRYPTO_MAX_ALG_NAME];
- struct crypto_ahash *tfm;
-
- if (snprintf(mcryptd_alg_name, CRYPTO_MAX_ALG_NAME,
- "mcryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
- return ERR_PTR(-EINVAL);
- tfm = crypto_alloc_ahash(mcryptd_alg_name, type, mask);
- if (IS_ERR(tfm))
- return ERR_CAST(tfm);
- if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
- crypto_free_ahash(tfm);
- return ERR_PTR(-EINVAL);
- }
-
- return __mcryptd_ahash_cast(tfm);
-}
-EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
-
-struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
-{
- struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
-
- return ctx->child;
-}
-EXPORT_SYMBOL_GPL(mcryptd_ahash_child);
-
-struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req)
-{
- struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
- return &rctx->areq;
-}
-EXPORT_SYMBOL_GPL(mcryptd_ahash_desc);
-
-void mcryptd_free_ahash(struct mcryptd_ahash *tfm)
-{
- crypto_free_ahash(&tfm->base);
-}
-EXPORT_SYMBOL_GPL(mcryptd_free_ahash);
-
-static int __init mcryptd_init(void)
-{
- int err, cpu;
- struct mcryptd_flush_list *flist;
-
- mcryptd_flist = alloc_percpu(struct mcryptd_flush_list);
- for_each_possible_cpu(cpu) {
- flist = per_cpu_ptr(mcryptd_flist, cpu);
- INIT_LIST_HEAD(&flist->list);
- mutex_init(&flist->lock);
- }
-
- err = mcryptd_init_queue(&mqueue, MCRYPTD_MAX_CPU_QLEN);
- if (err) {
- free_percpu(mcryptd_flist);
- return err;
- }
-
- err = crypto_register_template(&mcryptd_tmpl);
- if (err) {
- mcryptd_fini_queue(&mqueue);
- free_percpu(mcryptd_flist);
- }
-
- return err;
-}
-
-static void __exit mcryptd_exit(void)
-{
- mcryptd_fini_queue(&mqueue);
- crypto_unregister_template(&mcryptd_tmpl);
- free_percpu(mcryptd_flist);
-}
-
-subsys_initcall(mcryptd_init);
-module_exit(mcryptd_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Software async multibuffer crypto daemon");
-MODULE_ALIAS_CRYPTO("mcryptd");
diff --git a/crypto/morus1280.c b/crypto/morus1280.c
index d057cf5ac4a8..3889c188f266 100644
--- a/crypto/morus1280.c
+++ b/crypto/morus1280.c
@@ -385,14 +385,11 @@ static void crypto_morus1280_final(struct morus1280_state *state,
struct morus1280_block *tag_xor,
u64 assoclen, u64 cryptlen)
{
- u64 assocbits = assoclen * 8;
- u64 cryptbits = cryptlen * 8;
-
struct morus1280_block tmp;
unsigned int i;
- tmp.words[0] = cpu_to_le64(assocbits);
- tmp.words[1] = cpu_to_le64(cryptbits);
+ tmp.words[0] = assoclen * 8;
+ tmp.words[1] = cryptlen * 8;
tmp.words[2] = 0;
tmp.words[3] = 0;
diff --git a/crypto/morus640.c b/crypto/morus640.c
index 1ca76e54281b..da06ec2f6a80 100644
--- a/crypto/morus640.c
+++ b/crypto/morus640.c
@@ -384,21 +384,13 @@ static void crypto_morus640_final(struct morus640_state *state,
struct morus640_block *tag_xor,
u64 assoclen, u64 cryptlen)
{
- u64 assocbits = assoclen * 8;
- u64 cryptbits = cryptlen * 8;
-
- u32 assocbits_lo = (u32)assocbits;
- u32 assocbits_hi = (u32)(assocbits >> 32);
- u32 cryptbits_lo = (u32)cryptbits;
- u32 cryptbits_hi = (u32)(cryptbits >> 32);
-
struct morus640_block tmp;
unsigned int i;
- tmp.words[0] = cpu_to_le32(assocbits_lo);
- tmp.words[1] = cpu_to_le32(assocbits_hi);
- tmp.words[2] = cpu_to_le32(cryptbits_lo);
- tmp.words[3] = cpu_to_le32(cryptbits_hi);
+ tmp.words[0] = lower_32_bits(assoclen * 8);
+ tmp.words[1] = upper_32_bits(assoclen * 8);
+ tmp.words[2] = lower_32_bits(cryptlen * 8);
+ tmp.words[3] = upper_32_bits(cryptlen * 8);
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
state->s[4].words[i] ^= state->s[0].words[i];
diff --git a/crypto/ofb.c b/crypto/ofb.c
new file mode 100644
index 000000000000..886631708c5e
--- /dev/null
+++ b/crypto/ofb.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * OFB: Output FeedBack mode
+ *
+ * Copyright (C) 2018 ARM Limited or its affiliates.
+ * All rights reserved.
+ *
+ * Based loosely on public domain code gleaned from libtomcrypt
+ * (https://github.com/libtom/libtomcrypt).
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+struct crypto_ofb_ctx {
+ struct crypto_cipher *child;
+ int cnt;
+};
+
+
+static int crypto_ofb_setkey(struct crypto_skcipher *parent, const u8 *key,
+ unsigned int keylen)
+{
+ struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(parent);
+ struct crypto_cipher *child = ctx->child;
+ int err;
+
+ crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+ crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+ CRYPTO_TFM_REQ_MASK);
+ err = crypto_cipher_setkey(child, key, keylen);
+ crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) &
+ CRYPTO_TFM_RES_MASK);
+ return err;
+}
+
+static int crypto_ofb_encrypt_segment(struct crypto_ofb_ctx *ctx,
+ struct skcipher_walk *walk,
+ struct crypto_cipher *tfm)
+{
+ int bsize = crypto_cipher_blocksize(tfm);
+ int nbytes = walk->nbytes;
+
+ u8 *src = walk->src.virt.addr;
+ u8 *dst = walk->dst.virt.addr;
+ u8 *iv = walk->iv;
+
+ do {
+ if (ctx->cnt == bsize) {
+ if (nbytes < bsize)
+ break;
+ crypto_cipher_encrypt_one(tfm, iv, iv);
+ ctx->cnt = 0;
+ }
+ *dst = *src ^ iv[ctx->cnt];
+ src++;
+ dst++;
+ ctx->cnt++;
+ } while (--nbytes);
+ return nbytes;
+}
+
+static int crypto_ofb_encrypt(struct skcipher_request *req)
+{
+ struct skcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ unsigned int bsize;
+ struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_cipher *child = ctx->child;
+ int ret = 0;
+
+ bsize = crypto_cipher_blocksize(child);
+ ctx->cnt = bsize;
+
+ ret = skcipher_walk_virt(&walk, req, false);
+
+ while (walk.nbytes) {
+ ret = crypto_ofb_encrypt_segment(ctx, &walk, child);
+ ret = skcipher_walk_done(&walk, ret);
+ }
+
+ return ret;
+}
+
+/* OFB encrypt and decrypt are identical */
+static int crypto_ofb_decrypt(struct skcipher_request *req)
+{
+ return crypto_ofb_encrypt(req);
+}
+
+static int crypto_ofb_init_tfm(struct crypto_skcipher *tfm)
+{
+ struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+ struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
+ struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_cipher *cipher;
+
+ cipher = crypto_spawn_cipher(spawn);
+ if (IS_ERR(cipher))
+ return PTR_ERR(cipher);
+
+ ctx->child = cipher;
+ return 0;
+}
+
+static void crypto_ofb_exit_tfm(struct crypto_skcipher *tfm)
+{
+ struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ crypto_free_cipher(ctx->child);
+}
+
+static void crypto_ofb_free(struct skcipher_instance *inst)
+{
+ crypto_drop_skcipher(skcipher_instance_ctx(inst));
+ kfree(inst);
+}
+
+static int crypto_ofb_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+ struct skcipher_instance *inst;
+ struct crypto_attr_type *algt;
+ struct crypto_spawn *spawn;
+ struct crypto_alg *alg;
+ u32 mask;
+ int err;
+
+ err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER);
+ if (err)
+ return err;
+
+ inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+ if (!inst)
+ return -ENOMEM;
+
+ algt = crypto_get_attr_type(tb);
+ err = PTR_ERR(algt);
+ if (IS_ERR(algt))
+ goto err_free_inst;
+
+ mask = CRYPTO_ALG_TYPE_MASK |
+ crypto_requires_off(algt->type, algt->mask,
+ CRYPTO_ALG_NEED_FALLBACK);
+
+ alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, mask);
+ err = PTR_ERR(alg);
+ if (IS_ERR(alg))
+ goto err_free_inst;
+
+ spawn = skcipher_instance_ctx(inst);
+ err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
+ CRYPTO_ALG_TYPE_MASK);
+ crypto_mod_put(alg);
+ if (err)
+ goto err_free_inst;
+
+ err = crypto_inst_setname(skcipher_crypto_instance(inst), "ofb", alg);
+ if (err)
+ goto err_drop_spawn;
+
+ inst->alg.base.cra_priority = alg->cra_priority;
+ inst->alg.base.cra_blocksize = alg->cra_blocksize;
+ inst->alg.base.cra_alignmask = alg->cra_alignmask;
+
+ /* We access the data as u32s when xoring. */
+ inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
+
+ inst->alg.ivsize = alg->cra_blocksize;
+ inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
+ inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
+
+ inst->alg.base.cra_ctxsize = sizeof(struct crypto_ofb_ctx);
+
+ inst->alg.init = crypto_ofb_init_tfm;
+ inst->alg.exit = crypto_ofb_exit_tfm;
+
+ inst->alg.setkey = crypto_ofb_setkey;
+ inst->alg.encrypt = crypto_ofb_encrypt;
+ inst->alg.decrypt = crypto_ofb_decrypt;
+
+ inst->free = crypto_ofb_free;
+
+ err = skcipher_register_instance(tmpl, inst);
+ if (err)
+ goto err_drop_spawn;
+
+out:
+ return err;
+
+err_drop_spawn:
+ crypto_drop_spawn(spawn);
+err_free_inst:
+ kfree(inst);
+ goto out;
+}
+
+static struct crypto_template crypto_ofb_tmpl = {
+ .name = "ofb",
+ .create = crypto_ofb_create,
+ .module = THIS_MODULE,
+};
+
+static int __init crypto_ofb_module_init(void)
+{
+ return crypto_register_template(&crypto_ofb_tmpl);
+}
+
+static void __exit crypto_ofb_module_exit(void)
+{
+ crypto_unregister_template(&crypto_ofb_tmpl);
+}
+
+module_init(crypto_ofb_module_init);
+module_exit(crypto_ofb_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OFB block cipher algorithm");
+MODULE_ALIAS_CRYPTO("ofb");
diff --git a/crypto/rng.c b/crypto/rng.c
index b4a618668161..547f16ecbfb0 100644
--- a/crypto/rng.c
+++ b/crypto/rng.c
@@ -50,6 +50,7 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
}
err = crypto_rng_alg(tfm)->seed(tfm, seed, slen);
+ crypto_stat_rng_seed(tfm, err);
out:
kzfree(buf);
return err;
diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 9893dbfc1af4..812476e46821 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -261,15 +261,6 @@ static int pkcs1pad_encrypt(struct akcipher_request *req)
pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf,
ctx->key_size - 1 - req->src_len, req->src);
- req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL);
- if (!req_ctx->out_buf) {
- kfree(req_ctx->in_buf);
- return -ENOMEM;
- }
-
- pkcs1pad_sg_set_buf(req_ctx->out_sg, req_ctx->out_buf,
- ctx->key_size, NULL);
-
akcipher_request_set_tfm(&req_ctx->child_req, ctx->child);
akcipher_request_set_callback(&req_ctx->child_req, req->base.flags,
pkcs1pad_encrypt_sign_complete_cb, req);
diff --git a/crypto/seqiv.c b/crypto/seqiv.c
index 39dbf2f7e5f5..64a412be255e 100644
--- a/crypto/seqiv.c
+++ b/crypto/seqiv.c
@@ -73,9 +73,9 @@ static int seqiv_aead_encrypt(struct aead_request *req)
info = req->iv;
if (req->src != req->dst) {
- SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
- skcipher_request_set_tfm(nreq, ctx->sknull);
+ skcipher_request_set_sync_tfm(nreq, ctx->sknull);
skcipher_request_set_callback(nreq, req->base.flags,
NULL, NULL);
skcipher_request_set_crypt(nreq, req->src, req->dst,
diff --git a/crypto/shash.c b/crypto/shash.c
index 5d732c6bb4b2..d21f04d70dce 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -73,13 +73,6 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
}
EXPORT_SYMBOL_GPL(crypto_shash_setkey);
-static inline unsigned int shash_align_buffer_size(unsigned len,
- unsigned long mask)
-{
- typedef u8 __aligned_largest u8_aligned;
- return len + (mask & ~(__alignof__(u8_aligned) - 1));
-}
-
static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
@@ -88,11 +81,17 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
unsigned long alignmask = crypto_shash_alignmask(tfm);
unsigned int unaligned_len = alignmask + 1 -
((unsigned long)data & alignmask);
- u8 ubuf[shash_align_buffer_size(unaligned_len, alignmask)]
- __aligned_largest;
+ /*
+ * We cannot count on __aligned() working for large values:
+ * https://patchwork.kernel.org/patch/9507697/
+ */
+ u8 ubuf[MAX_ALGAPI_ALIGNMASK * 2];
u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
int err;
+ if (WARN_ON(buf + unaligned_len > ubuf + sizeof(ubuf)))
+ return -EINVAL;
+
if (unaligned_len > len)
unaligned_len = len;
@@ -124,11 +123,17 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out)
unsigned long alignmask = crypto_shash_alignmask(tfm);
struct shash_alg *shash = crypto_shash_alg(tfm);
unsigned int ds = crypto_shash_digestsize(tfm);
- u8 ubuf[shash_align_buffer_size(ds, alignmask)]
- __aligned_largest;
+ /*
+ * We cannot count on __aligned() working for large values:
+ * https://patchwork.kernel.org/patch/9507697/
+ */
+ u8 ubuf[MAX_ALGAPI_ALIGNMASK + HASH_MAX_DIGESTSIZE];
u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
int err;
+ if (WARN_ON(buf + ds > ubuf + sizeof(ubuf)))
+ return -EINVAL;
+
err = shash->final(desc, buf);
if (err)
goto out;
@@ -458,9 +463,9 @@ static int shash_prepare_alg(struct shash_alg *alg)
{
struct crypto_alg *base = &alg->base;
- if (alg->digestsize > PAGE_SIZE / 8 ||
- alg->descsize > PAGE_SIZE / 8 ||
- alg->statesize > PAGE_SIZE / 8)
+ if (alg->digestsize > HASH_MAX_DIGESTSIZE ||
+ alg->descsize > HASH_MAX_DESCSIZE ||
+ alg->statesize > HASH_MAX_STATESIZE)
return -EINVAL;
base->cra_type = &crypto_shash_type;
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 0bd8c6caa498..4caab81d2d02 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -949,6 +949,30 @@ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
}
EXPORT_SYMBOL_GPL(crypto_alloc_skcipher);
+struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(
+ const char *alg_name, u32 type, u32 mask)
+{
+ struct crypto_skcipher *tfm;
+
+ /* Only sync algorithms allowed. */
+ mask |= CRYPTO_ALG_ASYNC;
+
+ tfm = crypto_alloc_tfm(alg_name, &crypto_skcipher_type2, type, mask);
+
+ /*
+ * Make sure we do not allocate something that might get used with
+ * an on-stack request: check the request size.
+ */
+ if (!IS_ERR(tfm) && WARN_ON(crypto_skcipher_reqsize(tfm) >
+ MAX_SYNC_SKCIPHER_REQSIZE)) {
+ crypto_free_skcipher(tfm);
+ return ERR_PTR(-EINVAL);
+ }
+
+ return (struct crypto_sync_skcipher *)tfm;
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_sync_skcipher);
+
int crypto_has_skcipher2(const char *alg_name, u32 type, u32 mask)
{
return crypto_type_has_alg(alg_name, &crypto_skcipher_type2,
diff --git a/crypto/speck.c b/crypto/speck.c
deleted file mode 100644
index 58aa9f7f91f7..000000000000
--- a/crypto/speck.c
+++ /dev/null
@@ -1,307 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Speck: a lightweight block cipher
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Speck has 10 variants, including 5 block sizes. For now we only implement
- * the variants Speck128/128, Speck128/192, Speck128/256, Speck64/96, and
- * Speck64/128. Speck${B}/${K} denotes the variant with a block size of B bits
- * and a key size of K bits. The Speck128 variants are believed to be the most
- * secure variants, and they use the same block size and key sizes as AES. The
- * Speck64 variants are less secure, but on 32-bit processors are usually
- * faster. The remaining variants (Speck32, Speck48, and Speck96) are even less
- * secure and/or not as well suited for implementation on either 32-bit or
- * 64-bit processors, so are omitted.
- *
- * Reference: "The Simon and Speck Families of Lightweight Block Ciphers"
- * https://eprint.iacr.org/2013/404.pdf
- *
- * In a correspondence, the Speck designers have also clarified that the words
- * should be interpreted in little-endian format, and the words should be
- * ordered such that the first word of each block is 'y' rather than 'x', and
- * the first key word (rather than the last) becomes the first round key.
- */
-
-#include <asm/unaligned.h>
-#include <crypto/speck.h>
-#include <linux/bitops.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-
-/* Speck128 */
-
-static __always_inline void speck128_round(u64 *x, u64 *y, u64 k)
-{
- *x = ror64(*x, 8);
- *x += *y;
- *x ^= k;
- *y = rol64(*y, 3);
- *y ^= *x;
-}
-
-static __always_inline void speck128_unround(u64 *x, u64 *y, u64 k)
-{
- *y ^= *x;
- *y = ror64(*y, 3);
- *x ^= k;
- *x -= *y;
- *x = rol64(*x, 8);
-}
-
-void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx,
- u8 *out, const u8 *in)
-{
- u64 y = get_unaligned_le64(in);
- u64 x = get_unaligned_le64(in + 8);
- int i;
-
- for (i = 0; i < ctx->nrounds; i++)
- speck128_round(&x, &y, ctx->round_keys[i]);
-
- put_unaligned_le64(y, out);
- put_unaligned_le64(x, out + 8);
-}
-EXPORT_SYMBOL_GPL(crypto_speck128_encrypt);
-
-static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
- crypto_speck128_encrypt(crypto_tfm_ctx(tfm), out, in);
-}
-
-void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx,
- u8 *out, const u8 *in)
-{
- u64 y = get_unaligned_le64(in);
- u64 x = get_unaligned_le64(in + 8);
- int i;
-
- for (i = ctx->nrounds - 1; i >= 0; i--)
- speck128_unround(&x, &y, ctx->round_keys[i]);
-
- put_unaligned_le64(y, out);
- put_unaligned_le64(x, out + 8);
-}
-EXPORT_SYMBOL_GPL(crypto_speck128_decrypt);
-
-static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
- crypto_speck128_decrypt(crypto_tfm_ctx(tfm), out, in);
-}
-
-int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key,
- unsigned int keylen)
-{
- u64 l[3];
- u64 k;
- int i;
-
- switch (keylen) {
- case SPECK128_128_KEY_SIZE:
- k = get_unaligned_le64(key);
- l[0] = get_unaligned_le64(key + 8);
- ctx->nrounds = SPECK128_128_NROUNDS;
- for (i = 0; i < ctx->nrounds; i++) {
- ctx->round_keys[i] = k;
- speck128_round(&l[0], &k, i);
- }
- break;
- case SPECK128_192_KEY_SIZE:
- k = get_unaligned_le64(key);
- l[0] = get_unaligned_le64(key + 8);
- l[1] = get_unaligned_le64(key + 16);
- ctx->nrounds = SPECK128_192_NROUNDS;
- for (i = 0; i < ctx->nrounds; i++) {
- ctx->round_keys[i] = k;
- speck128_round(&l[i % 2], &k, i);
- }
- break;
- case SPECK128_256_KEY_SIZE:
- k = get_unaligned_le64(key);
- l[0] = get_unaligned_le64(key + 8);
- l[1] = get_unaligned_le64(key + 16);
- l[2] = get_unaligned_le64(key + 24);
- ctx->nrounds = SPECK128_256_NROUNDS;
- for (i = 0; i < ctx->nrounds; i++) {
- ctx->round_keys[i] = k;
- speck128_round(&l[i % 3], &k, i);
- }
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_speck128_setkey);
-
-static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- return crypto_speck128_setkey(crypto_tfm_ctx(tfm), key, keylen);
-}
-
-/* Speck64 */
-
-static __always_inline void speck64_round(u32 *x, u32 *y, u32 k)
-{
- *x = ror32(*x, 8);
- *x += *y;
- *x ^= k;
- *y = rol32(*y, 3);
- *y ^= *x;
-}
-
-static __always_inline void speck64_unround(u32 *x, u32 *y, u32 k)
-{
- *y ^= *x;
- *y = ror32(*y, 3);
- *x ^= k;
- *x -= *y;
- *x = rol32(*x, 8);
-}
-
-void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx,
- u8 *out, const u8 *in)
-{
- u32 y = get_unaligned_le32(in);
- u32 x = get_unaligned_le32(in + 4);
- int i;
-
- for (i = 0; i < ctx->nrounds; i++)
- speck64_round(&x, &y, ctx->round_keys[i]);
-
- put_unaligned_le32(y, out);
- put_unaligned_le32(x, out + 4);
-}
-EXPORT_SYMBOL_GPL(crypto_speck64_encrypt);
-
-static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
- crypto_speck64_encrypt(crypto_tfm_ctx(tfm), out, in);
-}
-
-void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx,
- u8 *out, const u8 *in)
-{
- u32 y = get_unaligned_le32(in);
- u32 x = get_unaligned_le32(in + 4);
- int i;
-
- for (i = ctx->nrounds - 1; i >= 0; i--)
- speck64_unround(&x, &y, ctx->round_keys[i]);
-
- put_unaligned_le32(y, out);
- put_unaligned_le32(x, out + 4);
-}
-EXPORT_SYMBOL_GPL(crypto_speck64_decrypt);
-
-static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
- crypto_speck64_decrypt(crypto_tfm_ctx(tfm), out, in);
-}
-
-int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key,
- unsigned int keylen)
-{
- u32 l[3];
- u32 k;
- int i;
-
- switch (keylen) {
- case SPECK64_96_KEY_SIZE:
- k = get_unaligned_le32(key);
- l[0] = get_unaligned_le32(key + 4);
- l[1] = get_unaligned_le32(key + 8);
- ctx->nrounds = SPECK64_96_NROUNDS;
- for (i = 0; i < ctx->nrounds; i++) {
- ctx->round_keys[i] = k;
- speck64_round(&l[i % 2], &k, i);
- }
- break;
- case SPECK64_128_KEY_SIZE:
- k = get_unaligned_le32(key);
- l[0] = get_unaligned_le32(key + 4);
- l[1] = get_unaligned_le32(key + 8);
- l[2] = get_unaligned_le32(key + 12);
- ctx->nrounds = SPECK64_128_NROUNDS;
- for (i = 0; i < ctx->nrounds; i++) {
- ctx->round_keys[i] = k;
- speck64_round(&l[i % 3], &k, i);
- }
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_speck64_setkey);
-
-static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- return crypto_speck64_setkey(crypto_tfm_ctx(tfm), key, keylen);
-}
-
-/* Algorithm definitions */
-
-static struct crypto_alg speck_algs[] = {
- {
- .cra_name = "speck128",
- .cra_driver_name = "speck128-generic",
- .cra_priority = 100,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = SPECK128_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct speck128_tfm_ctx),
- .cra_module = THIS_MODULE,
- .cra_u = {
- .cipher = {
- .cia_min_keysize = SPECK128_128_KEY_SIZE,
- .cia_max_keysize = SPECK128_256_KEY_SIZE,
- .cia_setkey = speck128_setkey,
- .cia_encrypt = speck128_encrypt,
- .cia_decrypt = speck128_decrypt
- }
- }
- }, {
- .cra_name = "speck64",
- .cra_driver_name = "speck64-generic",
- .cra_priority = 100,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = SPECK64_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct speck64_tfm_ctx),
- .cra_module = THIS_MODULE,
- .cra_u = {
- .cipher = {
- .cia_min_keysize = SPECK64_96_KEY_SIZE,
- .cia_max_keysize = SPECK64_128_KEY_SIZE,
- .cia_setkey = speck64_setkey,
- .cia_encrypt = speck64_encrypt,
- .cia_decrypt = speck64_decrypt
- }
- }
- }
-};
-
-static int __init speck_module_init(void)
-{
- return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-static void __exit speck_module_exit(void)
-{
- crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-module_init(speck_module_init);
-module_exit(speck_module_exit);
-
-MODULE_DESCRIPTION("Speck block cipher (generic)");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("speck128");
-MODULE_ALIAS_CRYPTO("speck128-generic");
-MODULE_ALIAS_CRYPTO("speck64");
-MODULE_ALIAS_CRYPTO("speck64-generic");
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index bdde95e8d369..c20c9f5c18f2 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -76,8 +76,7 @@ static char *check[] = {
"cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea",
"khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt",
"camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320",
- "lzo", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384", "sha3-512",
- NULL
+ "lzo", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512", NULL
};
static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 };
@@ -1103,6 +1102,9 @@ static void test_ahash_speed_common(const char *algo, unsigned int secs,
break;
}
+ if (speed[i].klen)
+ crypto_ahash_setkey(tfm, tvmem[0], speed[i].klen);
+
pr_info("test%3u "
"(%5u byte blocks,%5u bytes per update,%4u updates): ",
i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
@@ -1733,6 +1735,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
ret += tcrypt_test("xts(aes)");
ret += tcrypt_test("ctr(aes)");
ret += tcrypt_test("rfc3686(ctr(aes))");
+ ret += tcrypt_test("ofb(aes)");
break;
case 11:
@@ -1878,10 +1881,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
ret += tcrypt_test("ecb(seed)");
break;
- case 44:
- ret += tcrypt_test("zlib");
- break;
-
case 45:
ret += tcrypt_test("rfc4309(ccm(aes))");
break;
@@ -2033,6 +2032,8 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
break;
case 191:
ret += tcrypt_test("ecb(sm4)");
+ ret += tcrypt_test("cbc(sm4)");
+ ret += tcrypt_test("ctr(sm4)");
break;
case 200:
test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
@@ -2282,6 +2283,20 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
num_mb);
break;
+ case 218:
+ test_cipher_speed("ecb(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_cipher_speed("ecb(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_cipher_speed("cbc(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_cipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_16);
+ break;
case 300:
if (alg) {
test_hash_speed(alg, sec, generic_hash_speed_template);
diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h
index f0bfee1bb293..d09ea8b10b4f 100644
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
@@ -51,6 +51,7 @@ static struct cipher_speed_template des3_speed_template[] = {
* Cipher speed tests
*/
static u8 speed_template_8[] = {8, 0};
+static u8 speed_template_16[] = {16, 0};
static u8 speed_template_24[] = {24, 0};
static u8 speed_template_8_16[] = {8, 16, 0};
static u8 speed_template_8_32[] = {8, 32, 0};
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index a1d42245082a..b1f79c6bf409 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1400,8 +1400,8 @@ static int test_comp(struct crypto_comp *tfm,
int ilen;
unsigned int dlen = COMP_BUF_SIZE;
- memset(output, 0, sizeof(COMP_BUF_SIZE));
- memset(decomp_output, 0, sizeof(COMP_BUF_SIZE));
+ memset(output, 0, COMP_BUF_SIZE);
+ memset(decomp_output, 0, COMP_BUF_SIZE);
ilen = ctemplate[i].inlen;
ret = crypto_comp_compress(tfm, ctemplate[i].input,
@@ -1445,7 +1445,7 @@ static int test_comp(struct crypto_comp *tfm,
int ilen;
unsigned int dlen = COMP_BUF_SIZE;
- memset(decomp_output, 0, sizeof(COMP_BUF_SIZE));
+ memset(decomp_output, 0, COMP_BUF_SIZE);
ilen = dtemplate[i].inlen;
ret = crypto_comp_decompress(tfm, dtemplate[i].input,
@@ -2662,6 +2662,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.cipher = __VECS(serpent_cbc_tv_template)
},
}, {
+ .alg = "cbc(sm4)",
+ .test = alg_test_skcipher,
+ .suite = {
+ .cipher = __VECS(sm4_cbc_tv_template)
+ }
+ }, {
.alg = "cbc(twofish)",
.test = alg_test_skcipher,
.suite = {
@@ -2785,6 +2791,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.cipher = __VECS(serpent_ctr_tv_template)
}
}, {
+ .alg = "ctr(sm4)",
+ .test = alg_test_skcipher,
+ .suite = {
+ .cipher = __VECS(sm4_ctr_tv_template)
+ }
+ }, {
.alg = "ctr(twofish)",
.test = alg_test_skcipher,
.suite = {
@@ -3038,18 +3050,6 @@ static const struct alg_test_desc alg_test_descs[] = {
.cipher = __VECS(sm4_tv_template)
}
}, {
- .alg = "ecb(speck128)",
- .test = alg_test_skcipher,
- .suite = {
- .cipher = __VECS(speck128_tv_template)
- }
- }, {
- .alg = "ecb(speck64)",
- .test = alg_test_skcipher,
- .suite = {
- .cipher = __VECS(speck64_tv_template)
- }
- }, {
.alg = "ecb(tea)",
.test = alg_test_skcipher,
.suite = {
@@ -3577,18 +3577,6 @@ static const struct alg_test_desc alg_test_descs[] = {
.cipher = __VECS(serpent_xts_tv_template)
}
}, {
- .alg = "xts(speck128)",
- .test = alg_test_skcipher,
- .suite = {
- .cipher = __VECS(speck128_xts_tv_template)
- }
- }, {
- .alg = "xts(speck64)",
- .test = alg_test_skcipher,
- .suite = {
- .cipher = __VECS(speck64_xts_tv_template)
- }
- }, {
.alg = "xts(twofish)",
.test = alg_test_skcipher,
.suite = {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 173111c70746..1fe7b97ba03f 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -24,8 +24,6 @@
#ifndef _CRYPTO_TESTMGR_H
#define _CRYPTO_TESTMGR_H
-#include <linux/netlink.h>
-
#define MAX_DIGEST_SIZE 64
#define MAX_TAP 8
@@ -10133,12 +10131,13 @@ static const struct cipher_testvec serpent_xts_tv_template[] = {
};
/*
- * SM4 test vector taken from the draft RFC
- * https://tools.ietf.org/html/draft-crypto-sm4-00#ref-GBT.32907-2016
+ * SM4 test vectors taken from the "The SM4 Blockcipher Algorithm And Its
+ * Modes Of Operations" draft RFC
+ * https://datatracker.ietf.org/doc/draft-ribose-cfrg-sm4
*/
static const struct cipher_testvec sm4_tv_template[] = {
- { /* SM4 Appendix A: Example Calculations. Example 1. */
+ { /* GB/T 32907-2016 Example 1. */
.key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
"\xFE\xDC\xBA\x98\x76\x54\x32\x10",
.klen = 16,
@@ -10147,10 +10146,7 @@ static const struct cipher_testvec sm4_tv_template[] = {
.ctext = "\x68\x1E\xDF\x34\xD2\x06\x96\x5E"
"\x86\xB3\xE9\x4F\x53\x6E\x42\x46",
.len = 16,
- }, { /*
- * SM4 Appendix A: Example Calculations.
- * Last 10 iterations of Example 2.
- */
+ }, { /* Last 10 iterations of GB/T 32907-2016 Example 2. */
.key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
"\xFE\xDC\xBA\x98\x76\x54\x32\x10",
.klen = 16,
@@ -10195,744 +10191,116 @@ static const struct cipher_testvec sm4_tv_template[] = {
"\x59\x52\x98\xc7\xc6\xfd\x27\x1f"
"\x4\x2\xf8\x4\xc3\x3d\x3f\x66",
.len = 160
+ }, { /* A.2.1.1 SM4-ECB Example 1 */
+ .key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+ "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+ .klen = 16,
+ .ptext = "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb"
+ "\xcc\xcc\xcc\xcc\xdd\xdd\xdd\xdd"
+ "\xee\xee\xee\xee\xff\xff\xff\xff"
+ "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb",
+ .ctext = "\x5e\xc8\x14\x3d\xe5\x09\xcf\xf7"
+ "\xb5\x17\x9f\x8f\x47\x4b\x86\x19"
+ "\x2f\x1d\x30\x5a\x7f\xb1\x7d\xf9"
+ "\x85\xf8\x1c\x84\x82\x19\x23\x04",
+ .len = 32,
+ }, { /* A.2.1.2 SM4-ECB Example 2 */
+ .key = "\xFE\xDC\xBA\x98\x76\x54\x32\x10"
+ "\x01\x23\x45\x67\x89\xAB\xCD\xEF",
+ .klen = 16,
+ .ptext = "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb"
+ "\xcc\xcc\xcc\xcc\xdd\xdd\xdd\xdd"
+ "\xee\xee\xee\xee\xff\xff\xff\xff"
+ "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb",
+ .ctext = "\xC5\x87\x68\x97\xE4\xA5\x9B\xBB"
+ "\xA7\x2A\x10\xC8\x38\x72\x24\x5B"
+ "\x12\xDD\x90\xBC\x2D\x20\x06\x92"
+ "\xB5\x29\xA4\x15\x5A\xC9\xE6\x00",
+ .len = 32,
}
};
-/*
- * Speck test vectors taken from the original paper:
- * "The Simon and Speck Families of Lightweight Block Ciphers"
- * https://eprint.iacr.org/2013/404.pdf
- *
- * Note that the paper does not make byte and word order clear. But it was
- * confirmed with the authors that the intended orders are little endian byte
- * order and (y, x) word order. Equivalently, the printed test vectors, when
- * looking at only the bytes (ignoring the whitespace that divides them into
- * words), are backwards: the left-most byte is actually the one with the
- * highest memory address, while the right-most byte is actually the one with
- * the lowest memory address.
- */
-
-static const struct cipher_testvec speck128_tv_template[] = {
- { /* Speck128/128 */
- .key = "\x00\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+static const struct cipher_testvec sm4_cbc_tv_template[] = {
+ { /* A.2.2.1 SM4-CBC Example 1 */
+ .key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+ "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
.klen = 16,
- .ptext = "\x20\x6d\x61\x64\x65\x20\x69\x74"
- "\x20\x65\x71\x75\x69\x76\x61\x6c",
- .ctext = "\x18\x0d\x57\x5c\xdf\xfe\x60\x78"
- "\x65\x32\x78\x79\x51\x98\x5d\xa6",
- .len = 16,
- }, { /* Speck128/192 */
- .key = "\x00\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17",
- .klen = 24,
- .ptext = "\x65\x6e\x74\x20\x74\x6f\x20\x43"
- "\x68\x69\x65\x66\x20\x48\x61\x72",
- .ctext = "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9"
- "\x66\x55\x13\x13\x3a\xcf\xe4\x1b",
- .len = 16,
- }, { /* Speck128/256 */
- .key = "\x00\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17"
- "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
- .klen = 32,
- .ptext = "\x70\x6f\x6f\x6e\x65\x72\x2e\x20"
- "\x49\x6e\x20\x74\x68\x6f\x73\x65",
- .ctext = "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e"
- "\x3e\xf5\xc0\x05\x04\x01\x09\x41",
- .len = 16,
- },
-};
-
-/*
- * Speck128-XTS test vectors, taken from the AES-XTS test vectors with the
- * ciphertext recomputed with Speck128 as the cipher
- */
-static const struct cipher_testvec speck128_xts_tv_template[] = {
- {
- .key = "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .klen = 32,
- .iv = "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .ctext = "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62"
- "\x3b\x99\x4a\x64\x74\x77\xac\xed"
- "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42"
- "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54",
- .len = 32,
- }, {
- .key = "\x11\x11\x11\x11\x11\x11\x11\x11"
- "\x11\x11\x11\x11\x11\x11\x11\x11"
- "\x22\x22\x22\x22\x22\x22\x22\x22"
- "\x22\x22\x22\x22\x22\x22\x22\x22",
- .klen = 32,
- .iv = "\x33\x33\x33\x33\x33\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .ptext = "\x44\x44\x44\x44\x44\x44\x44\x44"
- "\x44\x44\x44\x44\x44\x44\x44\x44"
- "\x44\x44\x44\x44\x44\x44\x44\x44"
- "\x44\x44\x44\x44\x44\x44\x44\x44",
- .ctext = "\xfb\x53\x81\x75\x6f\x9f\x34\xad"
- "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a"
- "\xd4\x84\xa4\x53\xd5\x88\x73\x1b"
- "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6",
+ .ptext = "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb"
+ "\xcc\xcc\xcc\xcc\xdd\xdd\xdd\xdd"
+ "\xee\xee\xee\xee\xff\xff\xff\xff"
+ "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb",
+ .iv = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
+ .ctext = "\x78\xEB\xB1\x1C\xC4\x0B\x0A\x48"
+ "\x31\x2A\xAE\xB2\x04\x02\x44\xCB"
+ "\x4C\xB7\x01\x69\x51\x90\x92\x26"
+ "\x97\x9B\x0D\x15\xDC\x6A\x8F\x6D",
.len = 32,
- }, {
- .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
- "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
- "\x22\x22\x22\x22\x22\x22\x22\x22"
- "\x22\x22\x22\x22\x22\x22\x22\x22",
- .klen = 32,
- .iv = "\x33\x33\x33\x33\x33\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .ptext = "\x44\x44\x44\x44\x44\x44\x44\x44"
- "\x44\x44\x44\x44\x44\x44\x44\x44"
- "\x44\x44\x44\x44\x44\x44\x44\x44"
- "\x44\x44\x44\x44\x44\x44\x44\x44",
- .ctext = "\x21\x52\x84\x15\xd1\xf7\x21\x55"
- "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d"
- "\xda\x63\xb2\xf1\x82\xb0\x89\x59"
- "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92",
+ }, { /* A.2.2.2 SM4-CBC Example 2 */
+ .key = "\xFE\xDC\xBA\x98\x76\x54\x32\x10"
+ "\x01\x23\x45\x67\x89\xAB\xCD\xEF",
+ .klen = 16,
+ .ptext = "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb"
+ "\xcc\xcc\xcc\xcc\xdd\xdd\xdd\xdd"
+ "\xee\xee\xee\xee\xff\xff\xff\xff"
+ "\xaa\xaa\xaa\xaa\xbb\xbb\xbb\xbb",
+ .iv = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
+ .ctext = "\x0d\x3a\x6d\xdc\x2d\x21\xc6\x98"
+ "\x85\x72\x15\x58\x7b\x7b\xb5\x9a"
+ "\x91\xf2\xc1\x47\x91\x1a\x41\x44"
+ "\x66\x5e\x1f\xa1\xd4\x0b\xae\x38",
.len = 32,
- }, {
- .key = "\x27\x18\x28\x18\x28\x45\x90\x45"
- "\x23\x53\x60\x28\x74\x71\x35\x26"
- "\x31\x41\x59\x26\x53\x58\x97\x93"
- "\x23\x84\x62\x64\x33\x83\x27\x95",
- .klen = 32,
- .iv = "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17"
- "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
- "\x20\x21\x22\x23\x24\x25\x26\x27"
- "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
- "\x30\x31\x32\x33\x34\x35\x36\x37"
- "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
- "\x40\x41\x42\x43\x44\x45\x46\x47"
- "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
- "\x50\x51\x52\x53\x54\x55\x56\x57"
- "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
- "\x60\x61\x62\x63\x64\x65\x66\x67"
- "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
- "\x70\x71\x72\x73\x74\x75\x76\x77"
- "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
- "\x80\x81\x82\x83\x84\x85\x86\x87"
- "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
- "\x90\x91\x92\x93\x94\x95\x96\x97"
- "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
- "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
- "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
- "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
- "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
- "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
- "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
- "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
- "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
- "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
- "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
- "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
- "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
- "\x00\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17"
- "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
- "\x20\x21\x22\x23\x24\x25\x26\x27"
- "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
- "\x30\x31\x32\x33\x34\x35\x36\x37"
- "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
- "\x40\x41\x42\x43\x44\x45\x46\x47"
- "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
- "\x50\x51\x52\x53\x54\x55\x56\x57"
- "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
- "\x60\x61\x62\x63\x64\x65\x66\x67"
- "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
- "\x70\x71\x72\x73\x74\x75\x76\x77"
- "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
- "\x80\x81\x82\x83\x84\x85\x86\x87"
- "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
- "\x90\x91\x92\x93\x94\x95\x96\x97"
- "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
- "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
- "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
- "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
- "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
- "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
- "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
- "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
- "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
- "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
- "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
- "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
- "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
- .ctext = "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82"
- "\x53\xd0\xed\x2d\x30\xc1\x20\xef"
- "\x70\x67\x5e\xff\x09\x70\xbb\xc1"
- "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48"
- "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7"
- "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9"
- "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44"
- "\x19\xc5\x58\x84\x63\xb9\x12\x68"
- "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c"
- "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd"
- "\x74\x79\x2e\xb4\x44\xd7\x69\xc4"
- "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d"
- "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb"
- "\x6d\x13\x65\xa0\xf9\x31\x12\xe2"
- "\x26\xd1\xec\x2b\x0a\x8b\x59\x99"
- "\xa7\x49\xa0\x0e\x09\x33\x85\x50"
- "\xc3\x23\xca\x7a\xdd\x13\x45\x5f"
- "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f"
- "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6"
- "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f"
- "\x79\x91\x8d\x36\x13\x7b\xd0\x4a"
- "\x6c\x39\xfb\x53\xb8\x6f\x02\x51"
- "\xa5\x20\xac\x24\x1c\x73\x59\x73"
- "\x58\x61\x3a\x87\x58\xb3\x20\x56"
- "\x39\x06\x2b\x4d\xd3\x20\x2b\x89"
- "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd"
- "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91"
- "\x09\x35\x71\x50\x65\xac\x92\xe3"
- "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92"
- "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9"
- "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d"
- "\x77\x04\x80\xa9\xbf\x38\xb5\xbd"
- "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8"
- "\x2a\x26\xcc\x49\x14\x6d\x55\x01"
- "\x06\x94\xd8\xb2\x2d\x53\x83\x1b"
- "\x8f\xd4\xdd\x57\x12\x7e\x18\xba"
- "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d"
- "\x24\xa9\x60\xa4\x97\x85\x86\x2a"
- "\x01\x00\x09\xf1\xcb\x4a\x24\x1c"
- "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4"
- "\x97\x1c\x10\xc6\x4d\x66\x4f\x98"
- "\x87\x30\xac\xd5\xea\x73\x49\x10"
- "\x80\xea\xe5\x5f\x4d\x5f\x03\x33"
- "\x66\x02\x35\x3d\x60\x06\x36\x4f"
- "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8"
- "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28"
- "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93"
- "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30"
- "\xcc\x75\xcf\x16\x26\xa9\x26\x3b"
- "\xe7\x68\x2f\x15\x21\x5b\xe4\x00"
- "\xbd\x48\x50\xcd\x75\x70\xc4\x62"
- "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b"
- "\x51\x66\x02\x69\x04\x97\x36\xd4"
- "\x75\xae\x0b\xa3\x42\xf8\xca\x79"
- "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2"
- "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd"
- "\xea\x15\x5a\xa0\x85\x7e\x81\x0d"
- "\x03\xe7\x05\x39\xf5\x05\x26\xee"
- "\xec\xaa\x1f\x3d\xc9\x98\x76\x01"
- "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4"
- "\x50\x65\x50\x6d\x04\x1f\xdf\x5a"
- "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca"
- "\x47\x26\xef\x39\xb8\xb4\xf2\xd1"
- "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf",
- .len = 512,
- }, {
- .key = "\x27\x18\x28\x18\x28\x45\x90\x45"
- "\x23\x53\x60\x28\x74\x71\x35\x26"
- "\x62\x49\x77\x57\x24\x70\x93\x69"
- "\x99\x59\x57\x49\x66\x96\x76\x27"
- "\x31\x41\x59\x26\x53\x58\x97\x93"
- "\x23\x84\x62\x64\x33\x83\x27\x95"
- "\x02\x88\x41\x97\x16\x93\x99\x37"
- "\x51\x05\x82\x09\x74\x94\x45\x92",
- .klen = 64,
- .iv = "\xff\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17"
- "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
- "\x20\x21\x22\x23\x24\x25\x26\x27"
- "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
- "\x30\x31\x32\x33\x34\x35\x36\x37"
- "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
- "\x40\x41\x42\x43\x44\x45\x46\x47"
- "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
- "\x50\x51\x52\x53\x54\x55\x56\x57"
- "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
- "\x60\x61\x62\x63\x64\x65\x66\x67"
- "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
- "\x70\x71\x72\x73\x74\x75\x76\x77"
- "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
- "\x80\x81\x82\x83\x84\x85\x86\x87"
- "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
- "\x90\x91\x92\x93\x94\x95\x96\x97"
- "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
- "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
- "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
- "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
- "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
- "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
- "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
- "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
- "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
- "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
- "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
- "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
- "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
- "\x00\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17"
- "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
- "\x20\x21\x22\x23\x24\x25\x26\x27"
- "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
- "\x30\x31\x32\x33\x34\x35\x36\x37"
- "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
- "\x40\x41\x42\x43\x44\x45\x46\x47"
- "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
- "\x50\x51\x52\x53\x54\x55\x56\x57"
- "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
- "\x60\x61\x62\x63\x64\x65\x66\x67"
- "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
- "\x70\x71\x72\x73\x74\x75\x76\x77"
- "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
- "\x80\x81\x82\x83\x84\x85\x86\x87"
- "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
- "\x90\x91\x92\x93\x94\x95\x96\x97"
- "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
- "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
- "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
- "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
- "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
- "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
- "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
- "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
- "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
- "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
- "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
- "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
- "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
- .ctext = "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1"
- "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb"
- "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73"
- "\x92\x99\xde\xd3\x76\xed\xcd\x63"
- "\x64\x3a\x22\x57\xc1\x43\x49\xd4"
- "\x79\x36\x31\x19\x62\xae\x10\x7e"
- "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa"
- "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0"
- "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00"
- "\xfc\x81\x99\x8a\x14\x62\xf5\x7e"
- "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec"
- "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6"
- "\x62\x62\x37\xfe\x0a\x4c\x4a\x37"
- "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e"
- "\x85\x3c\x4f\x26\x64\x85\xbc\x68"
- "\xb0\xe0\x86\x5e\x26\x41\xce\x11"
- "\x50\xda\x97\x14\xe9\x9e\xc7\x6d"
- "\x3b\xdc\x43\xde\x2b\x27\x69\x7d"
- "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31"
- "\x14\x4d\xf0\x74\x37\xfd\x07\x25"
- "\x96\x55\xe5\xfc\x9e\x27\x2a\x74"
- "\x1b\x83\x4d\x15\x83\xac\x57\xa0"
- "\xac\xa5\xd0\x38\xef\x19\x56\x53"
- "\x25\x4b\xfc\xce\x04\x23\xe5\x6b"
- "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5"
- "\xed\x22\x34\x1c\x5d\xed\x17\x06"
- "\x36\xa3\xe6\x77\xb9\x97\x46\xb8"
- "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc"
- "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82"
- "\x35\x91\x3d\x1b\xe4\x97\x9f\x92"
- "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1"
- "\x8d\x39\xfc\x42\xfb\x38\x80\xb9"
- "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1"
- "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7"
- "\xa1\xbf\xf7\xda\x95\x93\x4b\x78"
- "\x19\xf5\x94\xf9\xd2\x00\x33\x37"
- "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee"
- "\x42\xb2\x9e\x2c\x5f\x48\x23\x26"
- "\x15\x25\x17\x03\x3d\xfe\x2c\xfc"
- "\xeb\xba\xda\xe0\x00\x05\xb6\xa6"
- "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf"
- "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a"
- "\x49\xa1\xc3\xfa\x10\x52\xb9\x14"
- "\xad\xb7\x73\xf8\x78\x12\xc8\x59"
- "\x17\x80\x4c\x57\x39\xf1\x6d\x80"
- "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21"
- "\xec\xce\xb7\xc8\x02\x8a\xed\x53"
- "\x2c\x25\x68\x2e\x1f\x85\x5e\x67"
- "\xd1\x07\x7a\x3a\x89\x08\xe0\x34"
- "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40"
- "\x31\x15\x72\xa0\xf0\x73\xd9\x3b"
- "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2"
- "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8"
- "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6"
- "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58"
- "\xcc\x1f\x48\x49\x65\x47\x75\xe9"
- "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07"
- "\xf2\xec\x76\xd8\x8f\x09\xf3\x16"
- "\xa1\x51\x89\x3b\xeb\x96\x42\xac"
- "\x65\xe0\x67\x63\x29\xdc\xb4\x7d"
- "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb"
- "\x66\x8d\x13\xca\xe0\x59\x2a\x00"
- "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5"
- "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c",
- .len = 512,
- .also_non_np = 1,
- .np = 3,
- .tap = { 512 - 20, 4, 16 },
}
};
-static const struct cipher_testvec speck64_tv_template[] = {
- { /* Speck64/96 */
- .key = "\x00\x01\x02\x03\x08\x09\x0a\x0b"
- "\x10\x11\x12\x13",
- .klen = 12,
- .ptext = "\x65\x61\x6e\x73\x20\x46\x61\x74",
- .ctext = "\x6c\x94\x75\x41\xec\x52\x79\x9f",
- .len = 8,
- }, { /* Speck64/128 */
- .key = "\x00\x01\x02\x03\x08\x09\x0a\x0b"
- "\x10\x11\x12\x13\x18\x19\x1a\x1b",
+static const struct cipher_testvec sm4_ctr_tv_template[] = {
+ { /* A.2.5.1 SM4-CTR Example 1 */
+ .key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+ "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
.klen = 16,
- .ptext = "\x2d\x43\x75\x74\x74\x65\x72\x3b",
- .ctext = "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c",
- .len = 8,
- },
-};
-
-/*
- * Speck64-XTS test vectors, taken from the AES-XTS test vectors with the
- * ciphertext recomputed with Speck64 as the cipher, and key lengths adjusted
- */
-static const struct cipher_testvec speck64_xts_tv_template[] = {
- {
- .key = "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .klen = 24,
- .iv = "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .ctext = "\x84\xaf\x54\x07\x19\xd4\x7c\xa6"
- "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2"
- "\x80\xf5\x72\xe7\xcd\xf0\x99\x22"
- "\x35\xa7\x2f\x06\xef\xdc\x51\xaa",
- .len = 32,
- }, {
- .key = "\x11\x11\x11\x11\x11\x11\x11\x11"
- "\x11\x11\x11\x11\x11\x11\x11\x11"
- "\x22\x22\x22\x22\x22\x22\x22\x22",
- .klen = 24,
- .iv = "\x33\x33\x33\x33\x33\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .ptext = "\x44\x44\x44\x44\x44\x44\x44\x44"
- "\x44\x44\x44\x44\x44\x44\x44\x44"
- "\x44\x44\x44\x44\x44\x44\x44\x44"
- "\x44\x44\x44\x44\x44\x44\x44\x44",
- .ctext = "\x12\x56\x73\xcd\x15\x87\xa8\x59"
- "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f"
- "\xb3\x12\x69\x7e\x36\xeb\x52\xff"
- "\x62\xdd\xba\x90\xb3\xe1\xee\x99",
- .len = 32,
- }, {
- .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
- "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
- "\x22\x22\x22\x22\x22\x22\x22\x22",
- .klen = 24,
- .iv = "\x33\x33\x33\x33\x33\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .ptext = "\x44\x44\x44\x44\x44\x44\x44\x44"
- "\x44\x44\x44\x44\x44\x44\x44\x44"
- "\x44\x44\x44\x44\x44\x44\x44\x44"
- "\x44\x44\x44\x44\x44\x44\x44\x44",
- .ctext = "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c"
- "\x27\x36\xc0\xbf\x5d\xea\x36\x37"
- "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b"
- "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34",
- .len = 32,
- }, {
- .key = "\x27\x18\x28\x18\x28\x45\x90\x45"
- "\x23\x53\x60\x28\x74\x71\x35\x26"
- "\x31\x41\x59\x26\x53\x58\x97\x93",
- .klen = 24,
- .iv = "\x00\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17"
- "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
- "\x20\x21\x22\x23\x24\x25\x26\x27"
- "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
- "\x30\x31\x32\x33\x34\x35\x36\x37"
- "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
- "\x40\x41\x42\x43\x44\x45\x46\x47"
- "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
- "\x50\x51\x52\x53\x54\x55\x56\x57"
- "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
- "\x60\x61\x62\x63\x64\x65\x66\x67"
- "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
- "\x70\x71\x72\x73\x74\x75\x76\x77"
- "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
- "\x80\x81\x82\x83\x84\x85\x86\x87"
- "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
- "\x90\x91\x92\x93\x94\x95\x96\x97"
- "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
- "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
- "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
- "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
- "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
- "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
- "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
- "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
- "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
- "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
- "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
- "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
- "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
- "\x00\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17"
- "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
- "\x20\x21\x22\x23\x24\x25\x26\x27"
- "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
- "\x30\x31\x32\x33\x34\x35\x36\x37"
- "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
- "\x40\x41\x42\x43\x44\x45\x46\x47"
- "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
- "\x50\x51\x52\x53\x54\x55\x56\x57"
- "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
- "\x60\x61\x62\x63\x64\x65\x66\x67"
- "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
- "\x70\x71\x72\x73\x74\x75\x76\x77"
- "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
- "\x80\x81\x82\x83\x84\x85\x86\x87"
- "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
- "\x90\x91\x92\x93\x94\x95\x96\x97"
- "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
- "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
- "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
- "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
- "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
- "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
- "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
- "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
- "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
- "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
- "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
- "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
- "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
- .ctext = "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e"
- "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09"
- "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3"
- "\x11\xc7\x39\x96\xd0\x95\xf4\x56"
- "\xf4\xdd\x03\x38\x01\x44\x2c\xcf"
- "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66"
- "\xfe\x3d\xc6\xfb\x01\x23\x51\x43"
- "\xd5\xd2\x13\x86\x94\x34\xe9\x62"
- "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef"
- "\x76\x35\x04\x3f\xdb\x23\x9d\x0b"
- "\x85\x42\xb9\x02\xd6\xcc\xdb\x96"
- "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d"
- "\xae\xd2\x04\xd5\xda\xc1\x7e\x24"
- "\x8c\x73\xbe\x48\x7e\xcf\x65\x28"
- "\x29\xe5\xbe\x54\x30\xcb\x46\x95"
- "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe"
- "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69"
- "\xa1\x09\x95\x71\x26\xe9\xc4\xdf"
- "\xe6\x31\xc3\x46\xda\xaf\x0b\x41"
- "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3"
- "\x82\xc0\x37\x27\xfc\x91\xa7\x05"
- "\xfb\xc5\xdc\x2b\x74\x96\x48\x43"
- "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f"
- "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a"
- "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c"
- "\x07\xff\xf3\x72\x74\x48\xb5\x40"
- "\x50\xb5\xdd\x90\x43\x31\x18\x15"
- "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a"
- "\x29\x93\x90\x8b\xda\x07\xf0\x35"
- "\x6d\x90\x88\x09\x4e\x83\xf5\x5b"
- "\x94\x12\xbb\x33\x27\x1d\x3f\x23"
- "\x51\xa8\x7c\x07\xa2\xae\x77\xa6"
- "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f"
- "\x66\xdd\xcd\x75\x24\x8b\x33\xf7"
- "\x20\xdb\x83\x9b\x4f\x11\x63\x6e"
- "\xcf\x37\xef\xc9\x11\x01\x5c\x45"
- "\x32\x99\x7c\x3c\x9e\x42\x89\xe3"
- "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05"
- "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc"
- "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d"
- "\xa0\xa8\x89\x3b\x73\x39\xa5\x94"
- "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89"
- "\x10\xff\xaf\xef\xca\xdd\x4f\x80"
- "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7"
- "\x33\xca\x00\x8b\x8b\x3f\xea\xec"
- "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f"
- "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5"
- "\x64\xa3\xf1\x1a\x76\x28\xcc\x35"
- "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b"
- "\xc7\x1b\x53\x17\x02\xea\xd1\xad"
- "\x13\x51\x73\xc0\xa0\xb2\x05\x32"
- "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19"
- "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d"
- "\x59\xda\xee\x1a\x22\x18\xda\x0d"
- "\x88\x0f\x55\x8b\x72\x62\xfd\xc1"
- "\x69\x13\xcd\x0d\x5f\xc1\x09\x52"
- "\xee\xd6\xe3\x84\x4d\xee\xf6\x88"
- "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f"
- "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54"
- "\x7d\x69\x8d\x00\x62\x77\x0d\x14"
- "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3"
- "\x50\xf7\x5f\xf4\xc2\xca\x41\x97"
- "\x37\xbe\x75\x74\xcd\xf0\x75\x6e"
- "\x25\x23\x94\xbd\xda\x8d\xb0\xd4",
- .len = 512,
- }, {
- .key = "\x27\x18\x28\x18\x28\x45\x90\x45"
- "\x23\x53\x60\x28\x74\x71\x35\x26"
- "\x62\x49\x77\x57\x24\x70\x93\x69"
- "\x99\x59\x57\x49\x66\x96\x76\x27",
- .klen = 32,
- .iv = "\xff\x00\x00\x00\x00\x00\x00\x00"
- "\x00\x00\x00\x00\x00\x00\x00\x00",
- .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17"
- "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
- "\x20\x21\x22\x23\x24\x25\x26\x27"
- "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
- "\x30\x31\x32\x33\x34\x35\x36\x37"
- "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
- "\x40\x41\x42\x43\x44\x45\x46\x47"
- "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
- "\x50\x51\x52\x53\x54\x55\x56\x57"
- "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
- "\x60\x61\x62\x63\x64\x65\x66\x67"
- "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
- "\x70\x71\x72\x73\x74\x75\x76\x77"
- "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
- "\x80\x81\x82\x83\x84\x85\x86\x87"
- "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
- "\x90\x91\x92\x93\x94\x95\x96\x97"
- "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
- "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
- "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
- "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
- "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
- "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
- "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
- "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
- "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
- "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
- "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
- "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
- "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
- "\x00\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17"
- "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
- "\x20\x21\x22\x23\x24\x25\x26\x27"
- "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
- "\x30\x31\x32\x33\x34\x35\x36\x37"
- "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
- "\x40\x41\x42\x43\x44\x45\x46\x47"
- "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
- "\x50\x51\x52\x53\x54\x55\x56\x57"
- "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
- "\x60\x61\x62\x63\x64\x65\x66\x67"
- "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
- "\x70\x71\x72\x73\x74\x75\x76\x77"
- "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
- "\x80\x81\x82\x83\x84\x85\x86\x87"
- "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
- "\x90\x91\x92\x93\x94\x95\x96\x97"
- "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
- "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
- "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
- "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
- "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
- "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
- "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
- "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
- "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
- "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
- "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
- "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
- "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
- .ctext = "\x55\xed\x71\xd3\x02\x8e\x15\x3b"
- "\xc6\x71\x29\x2d\x3e\x89\x9f\x59"
- "\x68\x6a\xcc\x8a\x56\x97\xf3\x95"
- "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c"
- "\x78\x16\xea\x80\xdb\x33\x75\x94"
- "\xf9\x29\xc4\x2b\x76\x75\x97\xc7"
- "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b"
- "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee"
- "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a"
- "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c"
- "\xf5\xec\x32\x74\xa3\xb8\x03\x88"
- "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f"
- "\x84\x5e\x46\xed\x20\x89\xb6\x44"
- "\x8d\xd0\xed\x54\x47\x16\xbe\x95"
- "\x8a\xb3\x6b\x72\xc4\x32\x52\x13"
- "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6"
- "\x44\x18\xdd\x8c\x6e\xca\x6e\x45"
- "\x8f\x1e\x10\x07\x57\x25\x98\x7b"
- "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8"
- "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb"
- "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff"
- "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e"
- "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d"
- "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65"
- "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a"
- "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a"
- "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78"
- "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3"
- "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e"
- "\x35\x10\x30\x82\x0d\xe7\xc5\x9b"
- "\xde\x44\x18\xbd\x9f\xd1\x45\xa9"
- "\x7b\x7a\x4a\xad\x35\x65\x27\xca"
- "\xb2\xc3\xd4\x9b\x71\x86\x70\xee"
- "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf"
- "\xfc\x42\xc8\x31\x59\xbe\x16\x60"
- "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14"
- "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef"
- "\x52\x7f\x29\x51\x94\x20\x67\x3c"
- "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63"
- "\xe7\xff\x73\x25\xd1\xdd\x96\x8a"
- "\x98\x52\x6d\xf3\xac\x3e\xf2\x18"
- "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed"
- "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e"
- "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad"
- "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa"
- "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81"
- "\x65\x53\x0f\x41\x11\xbd\x98\x99"
- "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d"
- "\x84\x98\xf9\x34\xed\x33\x2a\x1f"
- "\x82\xed\xc1\x73\x98\xd3\x02\xdc"
- "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76"
- "\x63\x51\x34\x9d\x96\x12\xae\xce"
- "\x83\xc9\x76\x5e\xa4\x1b\x53\x37"
- "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d"
- "\x54\x27\x74\xbb\x10\x86\x57\x46"
- "\x68\xe1\xed\x14\xe7\x9d\xfc\x84"
- "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf"
- "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d"
- "\x7b\x4f\x38\x55\x36\x71\x64\xc1"
- "\xfc\x5c\x75\x52\x33\x02\x18\xf8"
- "\x17\xe1\x2b\xc2\x43\x39\xbd\x76"
- "\x9b\x63\x76\x32\x2f\x19\x72\x10"
- "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5"
- "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c",
- .len = 512,
- .also_non_np = 1,
- .np = 3,
- .tap = { 512 - 20, 4, 16 },
+ .ptext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
+ "\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xee\xee\xee\xee\xee\xee\xee\xee"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb",
+ .iv = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
+ .ctext = "\xac\x32\x36\xcb\x97\x0c\xc2\x07"
+ "\x91\x36\x4c\x39\x5a\x13\x42\xd1"
+ "\xa3\xcb\xc1\x87\x8c\x6f\x30\xcd"
+ "\x07\x4c\xce\x38\x5c\xdd\x70\xc7"
+ "\xf2\x34\xbc\x0e\x24\xc1\x19\x80"
+ "\xfd\x12\x86\x31\x0c\xe3\x7b\x92"
+ "\x6e\x02\xfc\xd0\xfa\xa0\xba\xf3"
+ "\x8b\x29\x33\x85\x1d\x82\x45\x14",
+ .len = 64,
+ }, { /* A.2.5.2 SM4-CTR Example 2 */
+ .key = "\xFE\xDC\xBA\x98\x76\x54\x32\x10"
+ "\x01\x23\x45\x67\x89\xAB\xCD\xEF",
+ .klen = 16,
+ .ptext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
+ "\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xee\xee\xee\xee\xee\xee\xee\xee"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb",
+ .iv = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
+ .ctext = "\x5d\xcc\xcd\x25\xb9\x5a\xb0\x74"
+ "\x17\xa0\x85\x12\xee\x16\x0e\x2f"
+ "\x8f\x66\x15\x21\xcb\xba\xb4\x4c"
+ "\xc8\x71\x38\x44\x5b\xc2\x9e\x5c"
+ "\x0a\xe0\x29\x72\x05\xd6\x27\x04"
+ "\x17\x3b\x21\x23\x9b\x88\x7f\x6c"
+ "\x8c\xb5\xb8\x00\x91\x7a\x24\x88"
+ "\x28\x4b\xde\x9e\x16\xea\x29\x06",
+ .len = 64,
}
};
@@ -13883,6 +13251,27 @@ static const struct cipher_testvec aes_lrw_tv_template[] = {
.ctext = "\x5b\x90\x8e\xc1\xab\xdd\x67\x5f"
"\x3d\x69\x8a\x95\x53\xc8\x9c\xe5",
.len = 16,
+ }, { /* Test counter wrap-around, modified from LRW-32-AES 1 */
+ .key = "\x45\x62\xac\x25\xf8\x28\x17\x6d"
+ "\x4c\x26\x84\x14\xb5\x68\x01\x85"
+ "\x25\x8e\x2a\x05\xe7\x3e\x9d\x03"
+ "\xee\x5a\x83\x0c\xcc\x09\x4c\x87",
+ .klen = 32,
+ .iv = "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff",
+ .ptext = "\x30\x31\x32\x33\x34\x35\x36\x37"
+ "\x38\x39\x41\x42\x43\x44\x45\x46"
+ "\x30\x31\x32\x33\x34\x35\x36\x37"
+ "\x38\x39\x41\x42\x43\x44\x45\x46"
+ "\x30\x31\x32\x33\x34\x35\x36\x37"
+ "\x38\x39\x41\x42\x43\x44\x45\x46",
+ .ctext = "\x47\x90\x50\xf6\xf4\x8d\x5c\x7f"
+ "\x84\xc7\x83\x95\x2d\xa2\x02\xc0"
+ "\xda\x7f\xa3\xc0\x88\x2a\x0a\x50"
+ "\xfb\xc1\x78\x03\x39\xfe\x1d\xe5"
+ "\xf1\xb2\x73\xcd\x65\xa3\xdf\x5f"
+ "\xe9\x5d\x48\x92\x54\x63\x4e\xb8",
+ .len = 48,
}, {
/* http://www.mail-archive.com/stds-p1619@listserv.ieee.org/msg00173.html */
.key = "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
diff --git a/crypto/xcbc.c b/crypto/xcbc.c
index 25c75af50d3f..c055f57fab11 100644
--- a/crypto/xcbc.c
+++ b/crypto/xcbc.c
@@ -57,15 +57,17 @@ struct xcbc_desc_ctx {
u8 ctx[];
};
+#define XCBC_BLOCKSIZE 16
+
static int crypto_xcbc_digest_setkey(struct crypto_shash *parent,
const u8 *inkey, unsigned int keylen)
{
unsigned long alignmask = crypto_shash_alignmask(parent);
struct xcbc_tfm_ctx *ctx = crypto_shash_ctx(parent);
- int bs = crypto_shash_blocksize(parent);
u8 *consts = PTR_ALIGN(&ctx->ctx[0], alignmask + 1);
int err = 0;
- u8 key1[bs];
+ u8 key1[XCBC_BLOCKSIZE];
+ int bs = sizeof(key1);
if ((err = crypto_cipher_setkey(ctx->child, inkey, keylen)))
return err;
@@ -212,7 +214,7 @@ static int xcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
return PTR_ERR(alg);
switch(alg->cra_blocksize) {
- case 16:
+ case XCBC_BLOCKSIZE:
break;
default:
goto out_put_alg;
diff --git a/crypto/xts.c b/crypto/xts.c
index ccf55fbb8bc2..847f54f76789 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -26,8 +26,6 @@
#include <crypto/b128ops.h>
#include <crypto/gf128mul.h>
-#define XTS_BUFFER_SIZE 128u
-
struct priv {
struct crypto_skcipher *child;
struct crypto_cipher *tweak;
@@ -39,19 +37,7 @@ struct xts_instance_ctx {
};
struct rctx {
- le128 buf[XTS_BUFFER_SIZE / sizeof(le128)];
-
le128 t;
-
- le128 *ext;
-
- struct scatterlist srcbuf[2];
- struct scatterlist dstbuf[2];
- struct scatterlist *src;
- struct scatterlist *dst;
-
- unsigned int left;
-
struct skcipher_request subreq;
};
@@ -96,81 +82,27 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
return err;
}
-static int post_crypt(struct skcipher_request *req)
+/*
+ * We compute the tweak masks twice (both before and after the ECB encryption or
+ * decryption) to avoid having to allocate a temporary buffer and/or make
+ * mutliple calls to the 'ecb(..)' instance, which usually would be slower than
+ * just doing the gf128mul_x_ble() calls again.
+ */
+static int xor_tweak(struct skcipher_request *req, bool second_pass)
{
struct rctx *rctx = skcipher_request_ctx(req);
- le128 *buf = rctx->ext ?: rctx->buf;
- struct skcipher_request *subreq;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
const int bs = XTS_BLOCK_SIZE;
struct skcipher_walk w;
- struct scatterlist *sg;
- unsigned offset;
+ le128 t = rctx->t;
int err;
- subreq = &rctx->subreq;
- err = skcipher_walk_virt(&w, subreq, false);
-
- while (w.nbytes) {
- unsigned int avail = w.nbytes;
- le128 *wdst;
-
- wdst = w.dst.virt.addr;
-
- do {
- le128_xor(wdst, buf++, wdst);
- wdst++;
- } while ((avail -= bs) >= bs);
-
- err = skcipher_walk_done(&w, avail);
+ if (second_pass) {
+ req = &rctx->subreq;
+ /* set to our TFM to enforce correct alignment: */
+ skcipher_request_set_tfm(req, tfm);
}
-
- rctx->left -= subreq->cryptlen;
-
- if (err || !rctx->left)
- goto out;
-
- rctx->dst = rctx->dstbuf;
-
- scatterwalk_done(&w.out, 0, 1);
- sg = w.out.sg;
- offset = w.out.offset;
-
- if (rctx->dst != sg) {
- rctx->dst[0] = *sg;
- sg_unmark_end(rctx->dst);
- scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 2);
- }
- rctx->dst[0].length -= offset - sg->offset;
- rctx->dst[0].offset = offset;
-
-out:
- return err;
-}
-
-static int pre_crypt(struct skcipher_request *req)
-{
- struct rctx *rctx = skcipher_request_ctx(req);
- le128 *buf = rctx->ext ?: rctx->buf;
- struct skcipher_request *subreq;
- const int bs = XTS_BLOCK_SIZE;
- struct skcipher_walk w;
- struct scatterlist *sg;
- unsigned cryptlen;
- unsigned offset;
- bool more;
- int err;
-
- subreq = &rctx->subreq;
- cryptlen = subreq->cryptlen;
-
- more = rctx->left > cryptlen;
- if (!more)
- cryptlen = rctx->left;
-
- skcipher_request_set_crypt(subreq, rctx->src, rctx->dst,
- cryptlen, NULL);
-
- err = skcipher_walk_virt(&w, subreq, false);
+ err = skcipher_walk_virt(&w, req, false);
while (w.nbytes) {
unsigned int avail = w.nbytes;
@@ -181,180 +113,71 @@ static int pre_crypt(struct skcipher_request *req)
wdst = w.dst.virt.addr;
do {
- *buf++ = rctx->t;
- le128_xor(wdst++, &rctx->t, wsrc++);
- gf128mul_x_ble(&rctx->t, &rctx->t);
+ le128_xor(wdst++, &t, wsrc++);
+ gf128mul_x_ble(&t, &t);
} while ((avail -= bs) >= bs);
err = skcipher_walk_done(&w, avail);
}
- skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst,
- cryptlen, NULL);
-
- if (err || !more)
- goto out;
-
- rctx->src = rctx->srcbuf;
-
- scatterwalk_done(&w.in, 0, 1);
- sg = w.in.sg;
- offset = w.in.offset;
-
- if (rctx->src != sg) {
- rctx->src[0] = *sg;
- sg_unmark_end(rctx->src);
- scatterwalk_crypto_chain(rctx->src, sg_next(sg), 2);
- }
- rctx->src[0].length -= offset - sg->offset;
- rctx->src[0].offset = offset;
-
-out:
return err;
}
-static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
+static int xor_tweak_pre(struct skcipher