diff options
Diffstat (limited to 'drivers/crypto')
94 files changed, 7711 insertions, 1583 deletions
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 477fffdb4f49..1af94e2d1a25 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -159,6 +159,19 @@ config CRYPTO_GHASH_S390 It is available as of z196. +config CRYPTO_CRC32_S390 + tristate "CRC-32 algorithms" + depends on S390 + select CRYPTO_HASH + select CRC32 + help + Select this option if you want to use hardware accelerated + implementations of CRC algorithms. With this option, you + can optimize the computation of CRC-32 (IEEE 802.3 Ethernet) + and CRC-32C (Castagnoli). + + It is available with IBM z13 or later. + config CRYPTO_DEV_MV_CESA tristate "Marvell's Cryptographic Engine" depends on PLAT_ORION @@ -279,6 +292,14 @@ config CRYPTO_DEV_PPC4XX help This option allows you to have support for AMCC crypto acceleration. +config HW_RANDOM_PPC4XX + bool "PowerPC 4xx generic true random number generator support" + depends on CRYPTO_DEV_PPC4XX && HW_RANDOM + default y + ---help--- + This option provides the kernel-side support for the TRNG hardware + found in the security function of some PowerPC 4xx SoCs. + config CRYPTO_DEV_OMAP_SHAM tristate "Support for OMAP MD5/SHA1/SHA2 hw accelerator" depends on ARCH_OMAP2PLUS @@ -302,15 +323,16 @@ config CRYPTO_DEV_OMAP_AES want to use the OMAP module for AES algorithms. config CRYPTO_DEV_OMAP_DES - tristate "Support for OMAP DES3DES hw engine" + tristate "Support for OMAP DES/3DES hw engine" depends on ARCH_OMAP2PLUS select CRYPTO_DES select CRYPTO_BLKCIPHER + select CRYPTO_ENGINE help OMAP processors have DES/3DES module accelerator. Select this if you want to use the OMAP module for DES and 3DES algorithms. Currently - the ECB and CBC modes of operation supported by the driver. Also - accesses made on unaligned boundaries are also supported. + the ECB and CBC modes of operation are supported by the driver. Also + accesses made on unaligned boundaries are supported. config CRYPTO_DEV_PICOXCELL tristate "Support for picoXcell IPSEC and Layer2 crypto engines" @@ -340,9 +362,19 @@ config CRYPTO_DEV_SAHARA This option enables support for the SAHARA HW crypto accelerator found in some Freescale i.MX chips. +config CRYPTO_DEV_MXC_SCC + tristate "Support for Freescale Security Controller (SCC)" + depends on ARCH_MXC && OF + select CRYPTO_BLKCIPHER + select CRYPTO_DES + help + This option enables support for the Security Controller (SCC) + found in Freescale i.MX25 chips. + config CRYPTO_DEV_S5P tristate "Support for Samsung S5PV210/Exynos crypto accelerator" - depends on ARCH_S5PV210 || ARCH_EXYNOS + depends on ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST + depends on HAS_IOMEM && HAS_DMA select CRYPTO_AES select CRYPTO_BLKCIPHER help diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 713de9d11148..3c6432dd09d9 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o +obj-$(CONFIG_CRYPTO_DEV_MXC_SCC) += mxc-scc.o obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/ diff --git a/drivers/crypto/amcc/Makefile b/drivers/crypto/amcc/Makefile index 5c0c62b65d69..b95539928fdf 100644 --- a/drivers/crypto/amcc/Makefile +++ b/drivers/crypto/amcc/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += crypto4xx.o crypto4xx-y := crypto4xx_core.o crypto4xx_alg.o crypto4xx_sa.o +crypto4xx-$(CONFIG_HW_RANDOM_PPC4XX) += crypto4xx_trng.o diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index 62134c8a2260..dae1e39139e9 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -40,6 +40,7 @@ #include "crypto4xx_reg_def.h" #include "crypto4xx_core.h" #include "crypto4xx_sa.h" +#include "crypto4xx_trng.h" #define PPC4XX_SEC_VERSION_STR "0.5" @@ -1225,6 +1226,7 @@ static int crypto4xx_probe(struct platform_device *ofdev) if (rc) goto err_start_dev; + ppc4xx_trng_probe(core_dev); return 0; err_start_dev: @@ -1252,6 +1254,8 @@ static int crypto4xx_remove(struct platform_device *ofdev) struct device *dev = &ofdev->dev; struct crypto4xx_core_device *core_dev = dev_get_drvdata(dev); + ppc4xx_trng_remove(core_dev); + free_irq(core_dev->irq, dev); irq_dispose_mapping(core_dev->irq); @@ -1272,7 +1276,7 @@ MODULE_DEVICE_TABLE(of, crypto4xx_match); static struct platform_driver crypto4xx_driver = { .driver = { - .name = "crypto4xx", + .name = MODULE_NAME, .of_match_table = crypto4xx_match, }, .probe = crypto4xx_probe, @@ -1284,4 +1288,3 @@ module_platform_driver(crypto4xx_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("James Hsiao <jhsiao@amcc.com>"); MODULE_DESCRIPTION("Driver for AMCC PPC4xx crypto accelerator"); - diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h index bac0bdeb4b5f..ecfdcfe3698d 100644 --- a/drivers/crypto/amcc/crypto4xx_core.h +++ b/drivers/crypto/amcc/crypto4xx_core.h @@ -24,6 +24,8 @@ #include <crypto/internal/hash.h> +#define MODULE_NAME "crypto4xx" + #define PPC460SX_SDR0_SRST 0x201 #define PPC405EX_SDR0_SRST 0x200 #define PPC460EX_SDR0_SRST 0x201 @@ -72,6 +74,7 @@ struct crypto4xx_device { char *name; u64 ce_phy_address; void __iomem *ce_base; + void __iomem *trng_base; void *pdr; /* base address of packet descriptor ring */ @@ -106,6 +109,7 @@ struct crypto4xx_core_device { struct device *device; struct platform_device *ofdev; struct crypto4xx_device *dev; + struct hwrng *trng; u32 int_status; u32 irq; struct tasklet_struct tasklet; diff --git a/drivers/crypto/amcc/crypto4xx_reg_def.h b/drivers/crypto/amcc/crypto4xx_reg_def.h index 5f5fbc0716ff..46fe57c8f6eb 100644 --- a/drivers/crypto/amcc/crypto4xx_reg_def.h +++ b/drivers/crypto/amcc/crypto4xx_reg_def.h @@ -125,6 +125,7 @@ #define PPC4XX_INTERRUPT_CLR 0x3ffff #define PPC4XX_PRNG_CTRL_AUTO_EN 0x3 #define PPC4XX_DC_3DES_EN 1 +#define PPC4XX_TRNG_EN 0x00020000 #define PPC4XX_INT_DESCR_CNT 4 #define PPC4XX_INT_TIMEOUT_CNT 0 #define PPC4XX_INT_CFG 1 diff --git a/drivers/crypto/amcc/crypto4xx_trng.c b/drivers/crypto/amcc/crypto4xx_trng.c new file mode 100644 index 000000000000..677ca17fd223 --- /dev/null +++ b/drivers/crypto/amcc/crypto4xx_trng.c @@ -0,0 +1,131 @@ +/* + * Generic PowerPC 44x RNG driver + * + * Copyright 2011 IBM Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/hw_random.h> +#include <linux/delay.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> +#include <linux/io.h> + +#include "crypto4xx_core.h" +#include "crypto4xx_trng.h" +#include "crypto4xx_reg_def.h" + +#define PPC4XX_TRNG_CTRL 0x0008 +#define PPC4XX_TRNG_CTRL_DALM 0x20 +#define PPC4XX_TRNG_STAT 0x0004 +#define PPC4XX_TRNG_STAT_B 0x1 +#define PPC4XX_TRNG_DATA 0x0000 + +static int ppc4xx_trng_data_present(struct hwrng *rng, int wait) +{ + struct crypto4xx_device *dev = (void *)rng->priv; + int busy, i, present = 0; + + for (i = 0; i < 20; i++) { + busy = (in_le32(dev->trng_base + PPC4XX_TRNG_STAT) & + PPC4XX_TRNG_STAT_B); + if (!busy || !wait) { + present = 1; + break; + } + udelay(10); + } + return present; +} + +static int ppc4xx_trng_data_read(struct hwrng *rng, u32 *data) +{ + struct crypto4xx_device *dev = (void *)rng->priv; + *data = in_le32(dev->trng_base + PPC4XX_TRNG_DATA); + return 4; +} + +static void ppc4xx_trng_enable(struct crypto4xx_device *dev, bool enable) +{ + u32 device_ctrl; + + device_ctrl = readl(dev->ce_base + CRYPTO4XX_DEVICE_CTRL); + if (enable) + device_ctrl |= PPC4XX_TRNG_EN; + else + device_ctrl &= ~PPC4XX_TRNG_EN; + writel(device_ctrl, dev->ce_base + CRYPTO4XX_DEVICE_CTRL); +} + +static const struct of_device_id ppc4xx_trng_match[] = { + { .compatible = "ppc4xx-rng", }, + { .compatible = "amcc,ppc460ex-rng", }, + { .compatible = "amcc,ppc440epx-rng", }, + {}, +}; + +void ppc4xx_trng_probe(struct crypto4xx_core_device *core_dev) +{ + struct crypto4xx_device *dev = core_dev->dev; + struct device_node *trng = NULL; + struct hwrng *rng = NULL; + int err; + + /* Find the TRNG device node and map it */ + trng = of_find_matching_node(NULL, ppc4xx_trng_match); + if (!trng || !of_device_is_available(trng)) + return; + + dev->trng_base = of_iomap(trng, 0); + of_node_put(trng); + if (!dev->trng_base) + goto err_out; + + rng = kzalloc(sizeof(*rng), GFP_KERNEL); + if (!rng) + goto err_out; + + rng->name = MODULE_NAME; + rng->data_present = ppc4xx_trng_data_present; + rng->data_read = ppc4xx_trng_data_read; + rng->priv = (unsigned long) dev; + core_dev->trng = rng; + ppc4xx_trng_enable(dev, true); + out_le32(dev->trng_base + PPC4XX_TRNG_CTRL, PPC4XX_TRNG_CTRL_DALM); + err = devm_hwrng_register(core_dev->device, core_dev->trng); + if (err) { + ppc4xx_trng_enable(dev, false); + dev_err(core_dev->device, "failed to register hwrng (%d).\n", + err); + goto err_out; + } + return; + +err_out: + of_node_put(trng); + iounmap(dev->trng_base); + kfree(rng); + dev->trng_base = NULL; + core_dev->trng = NULL; +} + +void ppc4xx_trng_remove(struct crypto4xx_core_device *core_dev) +{ + if (core_dev && core_dev->trng) { + struct crypto4xx_device *dev = core_dev->dev; + + devm_hwrng_unregister(core_dev->device, core_dev->trng); + ppc4xx_trng_enable(dev, false); + iounmap(dev->trng_base); + kfree(core_dev->trng); + } +} + +MODULE_ALIAS("ppc4xx_rng"); diff --git a/drivers/crypto/amcc/crypto4xx_trng.h b/drivers/crypto/amcc/crypto4xx_trng.h new file mode 100644 index 000000000000..931d22531f51 --- /dev/null +++ b/drivers/crypto/amcc/crypto4xx_trng.h @@ -0,0 +1,34 @@ +/** + * AMCC SoC PPC4xx Crypto Driver + * + * Copyright (c) 2008 Applied Micro Circuits Corporation. + * All rights reserved. James Hsiao <jhsiao@amcc.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This file defines the security context + * associate format. + */ + +#ifndef __CRYPTO4XX_TRNG_H__ +#define __CRYPTO4XX_TRNG_H__ + +#ifdef CONFIG_HW_RANDOM_PPC4XX +void ppc4xx_trng_probe(struct crypto4xx_core_device *core_dev); +void ppc4xx_trng_remove(struct crypto4xx_core_device *core_dev); +#else +static inline void ppc4xx_trng_probe( + struct crypto4xx_device *dev __maybe_unused) { } +static inline void ppc4xx_trng_remove( + struct crypto4xx_device *dev __maybe_unused) { } +#endif + +#endif diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c index 95b73968cf72..10db7df366c8 100644 --- a/drivers/crypto/bfin_crc.c +++ b/drivers/crypto/bfin_crc.c @@ -588,11 +588,6 @@ static int bfin_crypto_crc_probe(struct platform_device *pdev) crypto_init_queue(&crc->queue, CRC_CCRYPTO_QUEUE_LENGTH); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res == NULL) { - dev_err(&pdev->dev, "Cannot get IORESOURCE_MEM\n"); - return -ENOENT; - } - crc->regs = devm_ioremap_resource(dev, res); if (IS_ERR((void *)crc->regs)) { dev_err(&pdev->dev, "Cannot map CRC IO\n"); diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index 5652a53415dc..64bf3024b680 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -1,6 +1,6 @@ config CRYPTO_DEV_FSL_CAAM tristate "Freescale CAAM-Multicore driver backend" - depends on FSL_SOC || ARCH_MXC + depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE help Enables the driver module for Freescale's Cryptographic Accelerator and Assurance Module (CAAM), also known as the SEC version 4 (SEC4). @@ -99,6 +99,18 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API To compile this as a module, choose M here: the module will be called caamhash. +config CRYPTO_DEV_FSL_CAAM_PKC_API + tristate "Register public key cryptography implementations with Crypto API" + depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR + default y + select CRYPTO_RSA + help + Selecting this will allow SEC Public key support for RSA. + Supported cryptographic primitives: encryption, decryption, + signature and verification. + To compile this as a module, choose M here: the module + will be called caam_pkc. + config CRYPTO_DEV_FSL_CAAM_RNG_API tristate "Register caam device for hwrng API" depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR @@ -116,10 +128,6 @@ config CRYPTO_DEV_FSL_CAAM_IMX def_bool SOC_IMX6 || SOC_IMX7D depends on CRYPTO_DEV_FSL_CAAM -config CRYPTO_DEV_FSL_CAAM_LE - def_bool CRYPTO_DEV_FSL_CAAM_IMX || SOC_LS1021A - depends on CRYPTO_DEV_FSL_CAAM - config CRYPTO_DEV_FSL_CAAM_DEBUG bool "Enable debug output in CAAM driver" depends on CRYPTO_DEV_FSL_CAAM diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile index 550758a333e7..08bf5515ae8a 100644 --- a/drivers/crypto/caam/Makefile +++ b/drivers/crypto/caam/Makefile @@ -2,7 +2,7 @@ # Makefile for the CAAM backend and dependent components # ifeq ($(CONFIG_CRYPTO_DEV_FSL_CAAM_DEBUG), y) - EXTRA_CFLAGS := -DDEBUG + ccflags-y := -DDEBUG endif obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o @@ -10,6 +10,8 @@ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_JR) += caam_jr.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o +obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caam_pkc.o caam-objs := ctrl.o caam_jr-objs := jr.o key_gen.o error.o +caam_pkc-y := caampkc.o pkc_desc.o diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index ea8189f4b021..b3044219772c 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -441,6 +441,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead) OP_ALG_AAI_CTR_MOD128); const bool is_rfc3686 = alg->caam.rfc3686; + if (!ctx->authsize) + return 0; + /* NULL encryption / decryption */ if (!ctx->enckeylen) return aead_null_set_sh_desc(aead); @@ -553,7 +556,10 @@ skip_enc: /* Read and write assoclen bytes */ append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); - append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + if (alg->caam.geniv) + append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM, ivsize); + else + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); /* Skip assoc data */ append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); @@ -562,6 +568,14 @@ skip_enc: append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | KEY_VLF); + if (alg->caam.geniv) { + append_seq_load(desc, ivsize, LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT | + (ctx1_iv_off << LDST_OFFSET_SHIFT)); + append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_CLASS2INFIFO | + (ctx1_iv_off << MOVE_OFFSET_SHIFT) | ivsize); + } + /* Load Counter into CONTEXT1 reg */ if (is_rfc3686) append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM | @@ -614,7 +628,7 @@ skip_enc: keys_fit_inline = true; /* aead_givencrypt shared descriptor */ - desc = ctx->sh_desc_givenc; + desc = ctx->sh_desc_enc; /* Note: Context registers are saved. */ init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686); @@ -645,13 +659,13 @@ copy_iv: append_operation(desc, ctx->class2_alg_type | OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - /* ivsize + cryptlen = seqoutlen - authsize */ - append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); - /* Read and write assoclen bytes */ append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + /* ivsize + cryptlen = seqoutlen - authsize */ + append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); + /* Skip assoc data */ append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); @@ -697,7 +711,7 @@ copy_iv: ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) { + if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } @@ -2147,7 +2161,7 @@ static void init_authenc_job(struct aead_request *req, init_aead_job(req, edesc, all_contig, encrypt); - if (ivsize && (is_rfc3686 || !(alg->caam.geniv && encrypt))) + if (ivsize && ((is_rfc3686 && encrypt) || !alg->caam.geniv)) append_load_as_imm(desc, req->iv, ivsize, LDST_CLASS_1_CCB | LDST_SRCDST_BYTE_CONTEXT | @@ -2534,20 +2548,6 @@ static int aead_decrypt(struct aead_request *req) return ret; } -static int aead_givdecrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - unsigned int ivsize = crypto_aead_ivsize(aead); - - if (req->cryptlen < ivsize) - return -EINVAL; - - req->cryptlen -= ivsize; - req->assoclen += ivsize; - - return aead_decrypt(req); -} - /* * allocate and map the ablkcipher extended descriptor for ablkcipher */ @@ -3207,7 +3207,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = AES_BLOCK_SIZE, .maxauthsize = MD5_DIGEST_SIZE, }, @@ -3253,7 +3253,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA1_DIGEST_SIZE, }, @@ -3299,7 +3299,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA224_DIGEST_SIZE, }, @@ -3345,7 +3345,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA256_DIGEST_SIZE, }, @@ -3391,7 +3391,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA384_DIGEST_SIZE, }, @@ -3437,7 +3437,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA512_DIGEST_SIZE, }, @@ -3483,7 +3483,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = MD5_DIGEST_SIZE, }, @@ -3531,7 +3531,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA1_DIGEST_SIZE, }, @@ -3579,7 +3579,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA224_DIGEST_SIZE, }, @@ -3627,7 +3627,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA256_DIGEST_SIZE, }, @@ -3675,7 +3675,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA384_DIGEST_SIZE, }, @@ -3723,7 +3723,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA512_DIGEST_SIZE, }, @@ -3769,7 +3769,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES_BLOCK_SIZE, .maxauthsize = MD5_DIGEST_SIZE, }, @@ -3815,7 +3815,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA1_DIGEST_SIZE, }, @@ -3861,7 +3861,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA224_DIGEST_SIZE, }, @@ -3907,7 +3907,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA256_DIGEST_SIZE, }, @@ -3953,7 +3953,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA384_DIGEST_SIZE, }, @@ -3999,7 +3999,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA512_DIGEST_SIZE, }, @@ -4048,7 +4048,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = MD5_DIGEST_SIZE, }, @@ -4099,7 +4099,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA1_DIGEST_SIZE, }, @@ -4150,7 +4150,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA224_DIGEST_SIZE, }, @@ -4201,7 +4201,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA256_DIGEST_SIZE, }, @@ -4252,7 +4252,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA384_DIGEST_SIZE, }, @@ -4303,7 +4303,7 @@ static struct caam_aead_alg driver_aeads[] = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, .encrypt = aead_encrypt, - .decrypt = aead_givdecrypt, + .decrypt = aead_decrypt, .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA512_DIGEST_SIZE, }, diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 5845d4a08797..36365b3efdfd 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -847,7 +847,7 @@ static int ahash_update_ctx(struct ahash_request *req) *next_buflen, 0); } else { (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= - SEC4_SG_LEN_FIN; + cpu_to_caam32(SEC4_SG_LEN_FIN); } state->current_buf = !state->current_buf; @@ -949,7 +949,8 @@ static int ahash_final_ctx(struct ahash_request *req) state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, buf, state->buf_dma, buflen, last_buflen); - (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= SEC4_SG_LEN_FIN; + (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= + cpu_to_caam32(SEC4_SG_LEN_FIN); edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, sec4_sg_bytes, DMA_TO_DEVICE); @@ -1897,6 +1898,7 @@ caam_hash_alloc(struct caam_hash_template *template, template->name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->driver_name); + t_alg->ahash_alg.setkey = NULL; } alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c new file mode 100644 index 000000000000..851015e652b8 --- /dev/null +++ b/drivers/crypto/caam/caampkc.c @@ -0,0 +1,607 @@ +/* + * caam - Freescale FSL CAAM support for Public Key Cryptography + * + * Copyright 2016 Freescale Semiconductor, Inc. + * + * There is no Shared Descriptor for PKC so that the Job Descriptor must carry + * all the desired key parameters, input and output pointers. + */ +#include "compat.h" +#include "regs.h" +#include "intern.h" +#include "jr.h" +#include "error.h" +#include "desc_constr.h" +#include "sg_sw_sec4.h" +#include "caampkc.h" + +#define DESC_RSA_PUB_LEN (2 * CAAM_CMD_SZ + sizeof(struct rsa_pub_pdb)) +#define DESC_RSA_PRIV_F1_LEN (2 * CAAM_CMD_SZ + \ + sizeof(struct rsa_priv_f1_pdb)) + +static void rsa_io_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + dma_unmap_sg(dev, req->dst, edesc->dst_nents, DMA_FROM_DEVICE); + dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE); + + if (edesc->sec4_sg_bytes) + dma_unmap_single(dev, edesc->sec4_sg_dma, edesc->sec4_sg_bytes, + DMA_TO_DEVICE); +} + +static void rsa_pub_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct rsa_pub_pdb *pdb = &edesc->pdb.pub; + + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->e_dma, key->e_sz, DMA_TO_DEVICE); +} + +static void rsa_priv_f1_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1; + + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE); +} + +/* RSA Job Completion handler */ +static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context) +{ + struct akcipher_request *req = context; + struct rsa_edesc *edesc; + + if (err) + caam_jr_strstatus(dev, err); + + edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + + rsa_pub_unmap(dev, edesc, req); + rsa_io_unmap(dev, edesc, req); + kfree(edesc); + + akcipher_request_complete(req, err); +} + +static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err, + void *context) +{ + struct akcipher_request *req = context; + struct rsa_edesc *edesc; + + if (err) + caam_jr_strstatus(dev, err); + + edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + + rsa_priv_f1_unmap(dev, edesc, req); + rsa_io_unmap(dev, edesc, req); + kfree(edesc); + + akcipher_request_complete(req, err); +} + +static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req, + size_t desclen) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *dev = ctx->dev; + struct rsa_edesc *edesc; + gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + int sgc; + int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes; + int src_nents, dst_nents; + + src_nents = sg_nents_for_len(req->src, req->src_len); + dst_nents = sg_nents_for_len(req->dst, req->dst_len); + + if (src_nents > 1) + sec4_sg_len = src_nents; + if (dst_nents > 1) + sec4_sg_len += dst_nents; + + sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); + + /* allocate space for base edesc, hw desc commands and link tables */ + edesc = kzalloc(sizeof(*edesc) + desclen + sec4_sg_bytes, + GFP_DMA | flags); + if (!edesc) + return ERR_PTR(-ENOMEM); + + sgc = dma_map_sg(dev, req->src, src_nents, DMA_TO_DEVICE); + if (unlikely(!sgc)) { + dev_err(dev, "unable to map source\n"); + goto src_fail; + } + + sgc = dma_map_sg(dev, req->dst, dst_nents, DMA_FROM_DEVICE); + if (unlikely(!sgc)) { + dev_err(dev, "unable to map destination\n"); + goto dst_fail; + } + + edesc->sec4_sg = (void *)edesc + sizeof(*edesc) + desclen; + + sec4_sg_index = 0; + if (src_nents > 1) { + sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0); + sec4_sg_index += src_nents; + } + if (dst_nents > 1) + sg_to_sec4_sg_last(req->dst, dst_nents, + edesc->sec4_sg + sec4_sg_index, 0); + + /* Save nents for later use in Job Descriptor */ + edesc->src_nents = src_nents; + edesc->dst_nents = dst_nents; + + if (!sec4_sg_bytes) + return edesc; + + edesc->sec4_sg_dma = dma_map_single(dev, edesc->sec4_sg, + sec4_sg_bytes, DMA_TO_DEVICE); + if (dma_mapping_error(dev, edesc->sec4_sg_dma)) { + dev_err(dev, "unable to map S/G table\n"); + goto sec4_sg_fail; + } + + edesc->sec4_sg_bytes = sec4_sg_bytes; + + return edesc; + +sec4_sg_fail: + dma_unmap_sg(dev, req->dst, dst_nents, DMA_FROM_DEVICE); +dst_fail: + dma_unmap_sg(dev, req->src, src_nents, DMA_TO_DEVICE); +src_fail: + kfree(edesc); + return ERR_PTR(-ENOMEM); +} + +static int set_rsa_pub_pdb(struct akcipher_request *req, + struct rsa_edesc *edesc) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *dev = ctx->dev; + struct rsa_pub_pdb *pdb = &edesc->pdb.pub; + int sec4_sg_index = 0; + + pdb->n_dma = dma_map_single(dev, key->n, key->n_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->n_dma)) { + dev_err(dev, "Unable to map RSA modulus memory\n"); + return -ENOMEM; + } + + pdb->e_dma = dma_map_single(dev, key->e, key->e_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->e_dma)) { + dev_err(dev, "Unable to map RSA public exponent memory\n"); + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + return -ENOMEM; + } + + if (edesc->src_nents > 1) { + pdb->sgf |= RSA_PDB_SGF_F; + pdb->f_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->src_nents; + } else { + pdb->f_dma = sg_dma_address(req->src); + } + + if (edesc->dst_nents > 1) { + pdb->sgf |= RSA_PDB_SGF_G; + pdb->g_dma = edesc->sec4_sg_dma + + sec4_sg_index * sizeof(struct sec4_sg_entry); + } else { + pdb->g_dma = sg_dma_address(req->dst); + } + + pdb->sgf |= (key->e_sz << RSA_PDB_E_SHIFT) | key->n_sz; + pdb->f_len = req->src_len; + + return 0; +} + +static int set_rsa_priv_f1_pdb(struct akcipher_request *req, + struct rsa_edesc *edesc) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *dev = ctx->dev; + struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1; + int sec4_sg_index = 0; + + pdb->n_dma = dma_map_single(dev, key->n, key->n_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->n_dma)) { + dev_err(dev, "Unable to map modulus memory\n"); + return -ENOMEM; + } + + pdb->d_dma = dma_map_single(dev, key->d, key->d_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->d_dma)) { + dev_err(dev, "Unable to map RSA private exponent memory\n"); + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + return -ENOMEM; + } + + if (edesc->src_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_G; + pdb->g_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->src_nents; + } else { + pdb->g_dma = sg_dma_address(req->src); + } + + if (edesc->dst_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_F; + pdb->f_dma = edesc->sec4_sg_dma + + sec4_sg_index * sizeof(struct sec4_sg_entry); + } else { + pdb->f_dma = sg_dma_address(req->dst); + } + + pdb->sgf |= (key->d_sz << RSA_PDB_D_SHIFT) | key->n_sz; + + return 0; +} + +static int caam_rsa_enc(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *jrdev = ctx->dev; + struct rsa_edesc *edesc; + int ret; + + if (unlikely(!key->n || !key->e)) + return -EINVAL; + + if (req->dst_len < key->n_sz) { + req->dst_len = key->n_sz; + dev_err(jrdev, "Output buffer length less than parameter n\n"); + return -EOVERFLOW; + } + + /* Allocate extended descriptor */ + edesc = rsa_edesc_alloc(req, DESC_RSA_PUB_LEN); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + /* Set RSA Encrypt Protocol Data Block */ + ret = set_rsa_pub_pdb(req, edesc); + if (ret) + goto init_fail; + + /* Initialize Job Descriptor */ + init_rsa_pub_desc(edesc->hw_desc, &edesc->pdb.pub); + + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_pub_done, req); + if (!ret) + return -EINPROGRESS; + + rsa_pub_unmap(jrdev, edesc, req); + +init_fail: + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + return ret; +} + +static int caam_rsa_dec(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *jrdev = ctx->dev; + struct rsa_edesc *edesc; + int ret; + + if (unlikely(!key->n || !key->d)) + return -EINVAL; + + if (req->dst_len < key->n_sz) { + req->dst_len = key->n_sz; + dev_err(jrdev, "Output buffer length less than parameter n\n"); + return -EOVERFLOW; + } + + /* Allocate extended descriptor */ + edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F1_LEN); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + /* Set RSA Decrypt Protocol Data Block - Private Key Form #1 */ + ret = set_rsa_priv_f1_pdb(req, edesc); + if (ret) + goto init_fail; + + /* Initialize Job Descriptor */ + init_rsa_priv_f1_desc(edesc->hw_desc, &edesc->pdb.priv_f1); + + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f1_done, req); + if (!ret) + return -EINPROGRESS; + + rsa_priv_f1_unmap(jrdev, edesc, req); + +init_fail: + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + return ret; +} + +static void caam_rsa_free_key(struct caam_rsa_key *key) +{ + kzfree(key->d); + kfree(key->e); + kfree(key->n); + key->d = NULL; + key->e = NULL; + key->n = NULL; + key->d_sz = 0; + key->e_sz = 0; + key->n_sz = 0; +} + +/** + * caam_read_raw_data - Read a raw byte stream as a positive integer. + * The function skips buffer's leading zeros, copies the remained data + * to a buffer allocated in the GFP_DMA | GFP_KERNEL zone and returns + * the address of the new buffer. + * + * @buf : The data to read + * @nbytes: The amount of data to read + */ +static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes) +{ + u8 *val; + + while (!*buf && *nbytes) { + buf++; + (*nbytes)--; + } + + val = kzalloc(*nbytes, GFP_DMA | GFP_KERNEL); + if (!val) + return NULL; + + memcpy(val, buf, *nbytes); + + return val; +} + +static int caam_rsa_check_key_length(unsigned int len) +{ + if (len > 4096) + return -EINVAL; + return 0; +} + +static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct rsa_key raw_key = {0}; + struct caam_rsa_key *rsa_key = &ctx->key; + int ret; + + /* Free the old RSA key if any */ + caam_rsa_free_key(rsa_key); + + ret = rsa_parse_pub_key(&raw_key, key, keylen); + if (ret) + return ret; + + /* Copy key in DMA zone */ + rsa_key->e = kzalloc(raw_key.e_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->e) + goto err; + + /* + * Skip leading zeros and copy the positive integer to a buffer + * allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor + * expects a positive integer for the RSA modulus and uses its length as + * decryption output length. + */ + rsa_key->n = caam_read_raw_data(raw_key.n, &raw_key.n_sz); + if (!rsa_key->n) + goto err; + + if (caam_rsa_check_key_length(raw_key.n_sz << 3)) { + caam_rsa_free_key(rsa_key); + return -EINVAL; + } + + rsa_key->e_sz = raw_key.e_sz; + rsa_key->n_sz = raw_key.n_sz; + + memcpy(rsa_key->e, raw_key.e, raw_key.e_sz); + + return 0; +err: + caam_rsa_free_key(rsa_key); + return -ENOMEM; +} + +static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct rsa_key raw_key = {0}; + struct caam_rsa_key *rsa_key = &ctx->key; + int ret; + + /* Free the old RSA key if any */ + caam_rsa_free_key(rsa_key); + + ret = rsa_parse_priv_key(&raw_key, key, keylen); + if (ret) + return ret; + + /* Copy key in DMA zone */ + rsa_key->d = kzalloc(raw_key.d_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->d) + goto err; + + rsa_key->e = kzalloc(raw_key.e_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->e) + goto err; + + /* + * Skip leading zeros and copy the positive integer to a buffer + * allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor + * expects a positive integer for the RSA modulus and uses its length as + * decryption output length. + */ + rsa_key->n = caam_read_raw_data(raw_key.n, &raw_key.n_sz); + if (!rsa_key->n) + goto err; + + if (caam_rsa_check_key_length(raw_key.n_sz << 3)) { + caam_rsa_free_key(rsa_key); + return -EINVAL; + } + + rsa_key->d_sz = raw_key.d_sz; + rsa_key->e_sz = raw_key.e_sz; + rsa_key->n_sz = raw_key.n_sz; + + memcpy(rsa_key->d, raw_key.d, raw_key.d_sz); + memcpy(rsa_key->e, raw_key.e, raw_key.e_sz); + + return 0; + +err: + caam_rsa_free_key(rsa_key); + return -ENOMEM; +} + +static int caam_rsa_max_size(struct crypto_akcipher *tfm) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + + return (key->n) ? key->n_sz : -EINVAL; +} + +/* Per session pkc's driver context creation function */ +static int caam_rsa_init_tfm(struct crypto_akcipher *tfm) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + + ctx->dev = caam_jr_alloc(); + + if (IS_ERR(ctx->dev)) { + dev_err(ctx->dev, "Job Ring Device allocation for transform failed\n"); + return PTR_ERR(ctx->dev); + } + + return 0; +} + +/* Per session pkc's driver context cleanup function */ +static void caam_rsa_exit_tfm(struct crypto_akcipher *tfm) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + + caam_rsa_free_key(key); + caam_jr_free(ctx->dev); +} + +static struct akcipher_alg caam_rsa = { + .encrypt = caam_rsa_enc, + .decrypt = caam_rsa_dec, + .sign = caam_rsa_dec, + .verify = caam_rsa_enc, + .set_pub_key = caam_rsa_set_pub_key, + .set_priv_key = caam_rsa_set_priv_key, + .max_size = caam_rsa_max_size, + .init = caam_rsa_init_tfm, + .exit = caam_rsa_exit_tfm, + .base = { + .cra_name = "rsa", + .cra_driver_name = "rsa-caam", + .cra_priority = 3000, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct caam_rsa_ctx), + }, +}; + +/* Public Key Cryptography module initialization handler */ +static int __init caam_pkc_init(void) +{ + struct device_node *dev_node; + struct platform_device *pdev; + struct device *ctrldev; + struct caam_drv_private *priv; + u32 cha_inst, pk_inst; + int err; + + dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); + if (!dev_node) { + dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0"); + if (!dev_node) + return -ENODEV; + } + + pdev = of_find_device_by_node(dev_node); + if (!pdev) { + of_node_put(dev_node); + return -ENODEV; + } + + ctrldev = &pdev->dev; + priv = dev_get_drvdata(ctrldev); + of_node_put(dev_node); + + /* + * If priv is NULL, it's probably because the caam driver wasn't + * properly initialized (e.g. RNG4 init failed). Thus, bail out here. + */ + if (!priv) + return -ENODEV; + + /* Determine public key hardware accelerator presence. */ + cha_inst = rd_reg32(&priv->ctrl->perfmon.cha_num_ls); + pk_inst = (cha_inst & CHA_ID_LS_PK_MASK) >> CHA_ID_LS_PK_SHIFT; + + /* Do not register algorithms if PKHA is not present. */ + if (!pk_inst) + return -ENODEV; + + err = crypto_register_akcipher(&caam_rsa); + if (err) + dev_warn(ctrldev, "%s alg registration failed\n", + caam_rsa.base.cra_driver_name); + else + dev_info(ctrldev, "caam pkc algorithms registered in /proc/crypto\n"); + + return err; +} + +static void __exit caam_pkc_exit(void) +{ + crypto_unregister_akcipher(&caam_rsa); +} + +module_init(caam_pkc_init); +module_exit(caam_pkc_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("FSL CAAM support for PKC functions of crypto API"); +MODULE_AUTHOR("Freescale Semiconductor"); diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h new file mode 100644 index 000000000000..f595d159b112 --- /dev/null +++ b/drivers/crypto/caam/caampkc.h @@ -0,0 +1,70 @@ +/* + * caam - Freescale FSL CAAM support for Public Key Cryptography descriptors + * + * Copyright 2016 Freescale Semiconductor, Inc. + * + * There is no Shared Descriptor for PKC so that the Job Descriptor must carry + * all the desired key parameters, input and output pointers. + */ + +#ifndef _PKC_DESC_H_ +#define _PKC_DESC_H_ +#include "compat.h" +#include "pdb.h" + +/** + * caam_rsa_key - CAAM RSA key structure. Keys are allocated in DMA zone. + * @n : RSA modulus raw byte stream + * @e : RSA public exponent raw byte stream + * @d : RSA private exponent raw byte stream + * @n_sz : length in bytes of RSA modulus n + * @e_sz : length in bytes of RSA public exponent + * @d_sz : length in bytes of RSA private exponent + */ +struct caam_rsa_key { + u8 *n; + u8 *e; + u8 *d; + size_t n_sz; + size_t e_sz; + size_t d_sz; +}; + +/** + * caam_rsa_ctx - per session context. + * @key : RSA key in DMA zone + * @dev : device structure + */ +struct caam_rsa_ctx { + struct caam_rsa_key key; + struct device *dev; +}; + +/** + * rsa_edesc - s/w-extended rsa descriptor + * @src_nents : number of segments in input scatterlist + * @dst_nents : number of segments in output scatterlist + * @sec4_sg_bytes : length of h/w link table + * @sec4_sg_dma : dma address of h/w link table + * @sec4_sg : pointer to h/w link table + * @pdb : specific RSA Protocol Data Block (PDB) + * @hw_desc : descriptor followed by link tables if any + */ +struct rsa_edesc { + int src_nents; + int dst_nents; + int sec4_sg_bytes; + dma_addr_t sec4_sg_dma; + struct sec4_sg_entry *sec4_sg; + union { + struct rsa_pub_pdb pub; + struct rsa_priv_f1_pdb priv_f1; + } pdb; + u32 hw_desc[]; +}; + +/* Descriptor construction primitives. */ +void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb); +void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb); + +#endif diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h index b6955ecdfb3f..7149cd2492e0 100644 --- a/drivers/crypto/caam/compat.h +++ b/drivers/crypto/caam/compat.h @@ -35,8 +35,11 @@ #include <crypto/md5.h> #include <crypto/internal/aead.h> #include <crypto/authenc.h> +#include <crypto/akcipher.h> #include <crypto/scatterwalk.h> #include <crypto/internal/skcipher.h> #include <crypto/internal/hash.h> +#include <crypto/internal/rsa.h> +#include <crypto/internal/akcipher.h> #endif /* !defined(CAAM_COMPAT_H) */ diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 44d30b45f3cc..0ec112ee5204 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -15,6 +15,9 @@ #include "desc_constr.h" #include "error.h" +bool caam_little_end; +EXPORT_SYMBOL(caam_little_end); + /* * i.MX targets tend to have clock control subsystems that can * enable/disable clocking to our device. @@ -106,7 +109,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, if (ctrlpriv->virt_en == 1) { - setbits32(&ctrl->deco_rsr, DECORSR_JR0); + clrsetbits_32(&ctrl->deco_rsr, 0, DECORSR_JR0); while (!(rd_reg32(&ctrl->deco_rsr) & DECORSR_VALID) && --timeout) @@ -115,7 +118,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, timeout = 100000; } - setbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); + clrsetbits_32(&ctrl->deco_rq, 0, DECORR_RQD0ENABLE); while (!(rd_reg32(&ctrl->deco_rq) & DECORR_DEN0) && --timeout) @@ -123,12 +126,12 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, if (!timeout) { dev_err(ctrldev, "failed to acquire DECO 0\n"); - clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); + clrsetbits_32(&ctrl->deco_rq, DECORR_RQD0ENABLE, 0); return -ENODEV; } for (i = 0; i < desc_len(desc); i++) - wr_reg32(&deco->descbuf[i], *(desc + i)); + wr_reg32(&deco->descbuf[i], caam32_to_cpu(*(desc + i))); flags = DECO_JQCR_WHL; /* @@ -139,7 +142,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, flags |= DECO_JQCR_FOUR; /* Instruct the DECO to execute it */ - setbits32(&deco->jr_ctl_hi, flags); + clrsetbits_32(&deco->jr_ctl_hi, 0, flags); timeout = 10000000; do { @@ -158,10 +161,10 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, DECO_OP_STATUS_HI_ERR_MASK; if (ctrlpriv->virt_en == 1) - clrbits32(&ctrl->deco_rsr, DECORSR_JR0); + clrsetbits_32(&ctrl->deco_rsr, DECORSR_JR0, 0); /* Mark the DECO as free */ - clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); + clrsetbits_32(&ctrl->deco_rq, DECORR_RQD0ENABLE, 0); if (!timeout) return -EAGAIN; @@ -349,7 +352,7 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) r4tst = &ctrl->r4tst[0]; /* put RNG4 into program mode */ - setbits32(&r4tst->rtmctl, RTMCTL_PRGM); + clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM); /* * Performance-wise, it does not make sense to @@ -363,7 +366,7 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) >> RTSDCTL_ENT_DLY_SHIFT; if (ent_delay <= val) { /* put RNG4 into run mode */ - clrbits32(&r4tst->rtmctl, RTMCTL_PRGM); + clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, 0); return; } @@ -381,9 +384,9 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) * select raw sampling in both entropy shifter * and statistical checker */ - setbits32(&val, RTMCTL_SAMP_MODE_RAW_ES_SC); + clrsetbits_32(&val, 0, RTMCTL_SAMP_MODE_RAW_ES_SC); /* put RNG4 into run mode */ - clrbits32(&val, RTMCTL_PRGM); + clrsetbits_32(&val, RTMCTL_PRGM, 0); /* write back the control register */ wr_reg32(&r4tst->rtmctl, val); } @@ -402,10 +405,27 @@ int caam_get_era(void) ret = of_property_read_u32(caam_node, "fsl,sec-era", &prop); of_node_put(caam_node); - return IS_ERR_VALUE(ret) ? -ENOTSUPP : prop; + return ret ? -ENOTSUPP : prop; } EXPORT_SYMBOL(caam_get_era); +#ifdef CONFIG_DEBUG_FS +static int caam_debugfs_u64_get(void *data, u64 *val) +{ + *val = caam64_to_cpu(*(u64 *)data); + return 0; +} + +static int caam_debugfs_u32_get(void *data, u64 *val) +{ + *val = caam32_to_cpu(*(u32 *)data); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n"); +#endif + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { @@ -504,6 +524,10 @@ static int caam_probe(struct platform_device *pdev) ret = -ENOMEM; goto disable_caam_emi_slow; } + + caam_little_end = !(bool)(rd_reg32(&ctrl->perfmon.status) & + (CSTA_PLEND | CSTA_ALT_PLEND)); + /* Finding the page size for using the CTPR_MS register */ comp_params = rd_reg32(&ctrl->perfmon.comp_parms_ms); pg_size = (comp_params & CTPR_MS_PG_SZ_MASK) >> CTPR_MS_PG_SZ_SHIFT; @@ -559,9 +583,9 @@ static int caam_probe(struct platform_device *pdev) } if (ctrlpriv->virt_en == 1) - setbits32(&ctrl->jrstart, JRSTART_JR0_START | - JRSTART_JR1_START | JRSTART_JR2_START | - JRSTART_JR3_START); + clrsetbits_32(&ctrl->jrstart, 0, JRSTART_JR0_START | + JRSTART_JR1_START | JRSTART_JR2_START | + JRSTART_JR3_START); if (sizeof(dma_addr_t) == sizeof(u64)) if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) @@ -693,7 +717,7 @@ static int caam_probe(struct platform_device *pdev) ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_IFMASK; /* Enable RDB bit so that RNG works faster */ - setbits32(&ctrl->scfgr, SCFGR_RDBENABLE); + clrsetbits_32(&ctrl->scfgr, 0, SCFGR_RDBENABLE); } /* NOTE: RTIC detection ought to go here, around Si time */ @@ -719,48 +743,59 @@ static int caam_probe(struct platform_device *pdev) ctrlpriv->ctl = debugfs_create_dir("ctl", ctrlpriv->dfs_root); /* Controller-level - performance monitor counters */ + ctrlpriv->ctl_rq_dequeued = - debugfs_create_u64("rq_dequeued", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->req_dequeued); + debugfs_create_file("rq_dequeued", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->req_dequeued, + &caam_fops_u64_ro); ctrlpriv->ctl_ob_enc_req = - debugfs_create_u64("ob_rq_encrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_enc_req); + debugfs_create_file("ob_rq_encrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_enc_req, + &caam_fops_u64_ro); ctrlpriv->ctl_ib_dec_req = - debugfs_create_u64("ib_rq_decrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_dec_req); + debugfs_create_file("ib_rq_decrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_dec_req, + &caam_fops_u64_ro); ctrlpriv->ctl_ob_enc_bytes = - debugfs_create_u64("ob_bytes_encrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_enc_bytes); + debugfs_create_file("ob_bytes_encrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_enc_bytes, + &caam_fops_u64_ro); ctrlpriv->ctl_ob_prot_bytes = - debugfs_create_u64("ob_bytes_protected", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_prot_bytes); + debugfs_create_file("ob_bytes_protected", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_prot_bytes, + &caam_fops_u64_ro); ctrlpriv->ctl_ib_dec_bytes = - debugfs_create_u64("ib_bytes_decrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_dec_bytes); + debugfs_create_file("ib_bytes_decrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_dec_bytes, + &caam_fops_u64_ro); ctrlpriv->ctl_ib_valid_bytes = - debugfs_create_u64("ib_bytes_validated", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_valid_bytes); + debugfs_create_file("ib_bytes_validated", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_valid_bytes, + &caam_fops_u64_ro); /* Controller level - global status values */ ctrlpriv->ctl_faultaddr = - debugfs_create_u64("fault_addr", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->faultaddr); + debugfs_create_file("fault_addr", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->faultaddr, + &caam_fops_u32_ro); ctrlpriv->ctl_faultdetail = - debugfs_create_u32("fault_detail", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->faultdetail); + debugfs_create_file("fault_detail", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->faultdetail, + &caam_fops_u32_ro); ctrlpriv->ctl_faultstatus = - debugfs_create_u32("fault_status", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->status); + debugfs_create_file("fault_status", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->status, + &caam_fops_u32_ro); /* Internal covering keys (useful in non-secure mode only) */ ctrlpriv->ctl_kek_wrap.data = &ctrlpriv->ctrl->kek[0]; diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index 1e93c6af2275..26427c11ad87 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -20,19 +20,18 @@ #define SEC4_SG_BPID_MASK 0x000000ff #define SEC4_SG_BPID_SHIFT 16 #define SEC4_SG_LEN_MASK 0x3fffffff /* Excludes EXT and FINAL */ -#define SEC4_SG_OFFS_MASK 0x00001fff +#define SEC4_SG_OFFSET_MASK 0x00001fff struct sec4_sg_entry { -#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX +#if !defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && \ + defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX) u32 rsvd1; dma_addr_t ptr; #else u64 ptr; #endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_IMX */ u32 len; - u8 rsvd2; - u8 buf_pool_id; - u16 offset; + u32 bpid_offset; }; /* Max size of any CAAM descriptor in 32-bit words, inclusive of header */ @@ -454,6 +453,8 @@ struct sec4_sg_entry { #define OP_PCLID_PUBLICKEYPAIR (0x14 << OP_PCLID_SHIFT) #define OP_PCLID_DSASIGN (0x15 << OP_PCLID_SHIFT) #define OP_PCLID_DSAVERIFY (0x16 << OP_PCLID_SHIFT) +#define OP_PCLID_RSAENC_PUBKEY (0x18 << OP_PCLID_SHIFT) +#define OP_PCLID_RSADEC_PRVKEY (0x19 << OP_PCLID_SHIFT) /* Assuming OP_TYPE = OP_TYPE_DECAP_PROTOCOL/ENCAP_PROTOCOL */ #define OP_PCLID_IPSEC (0x01 << OP_PCLID_SHIFT) diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index 98d07de24fc4..d3869b95e7b1 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -5,6 +5,7 @@ */ #include "desc.h" +#include "regs.h" #define IMMEDIATE (1 << 23) #define CAAM_CMD_SZ sizeof(u32) @@ -30,9 +31,11 @@ LDST_SRCDST_WORD_DECOCTRL | \ (LDOFF_ENABLE_AUTO_NFIFO << LDST_OFFSET_SHIFT)) +extern bool caam_little_end; + static inline int desc_len(u32 *desc) { - return *desc & HDR_DESCLEN_MASK; + return caam32_to_cpu(*desc) & HDR_DESCLEN_MASK; } static inline int desc_bytes(void *desc) @@ -52,7 +55,7 @@ static inline void *sh_desc_pdb(u32 *desc) static inline void init_desc(u32 *desc, u32 options) { - *desc = (options | HDR_ONE) + 1; + *desc = cpu_to_caam32((options | HDR_ONE) + 1); } static inline void init_sh_desc(u32 *desc, u32 options) @@ -74,13 +77,21 @@ static inline void init_job_desc(u32 *desc, u32 options) init_desc(desc, CMD_DESC_HDR | options); } +static inline void init_job_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes) +{ + u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ; + + init_job_desc(desc, (((pdb_len + 1) << HDR_START_IDX_SHIFT)) | options); +} + static inline void append_ptr(u32 *desc, dma_addr_t ptr) { dma_addr_t *offset = (dma_addr_t *)desc_end(desc); - *offset = ptr; + *offset = cpu_to_caam_dma(ptr); - (*desc) += CAAM_PTR_SZ / CAAM_CMD_SZ; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + + CAAM_PTR_SZ / CAAM_CMD_SZ); } static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len, @@ -99,16 +110,17 @@ static inline void append_data(u32 *desc, void *data, int len) if (len) /* avoid sparse warning: memcpy with byte count of 0 */ memcpy(offset, data, len); - (*desc) += (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + + (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ); } static inline void append_cmd(u32 *desc, u32 command) { u32 *cmd = desc_end(desc); - *cmd = command; + *cmd = cpu_to_caam32(command); - (*desc)++; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 1); } #define append_u32 append_cmd @@ -117,16 +129,22 @@ static inline void append_u64(u32 *desc, u64 data) { u32 *offset = desc_end(desc); - *offset = upper_32_bits(data); - *(++offset) = lower_32_bits(data); + /* Only 32-bit alignment is guaranteed in descriptor buffer */ + if (caam_little_end) { + *offset = cpu_to_caam32(lower_32_bits(data)); + *(++offset) = cpu_to_caam32(upper_32_bits(data)); + } else { + *offset = cpu_to_caam32(upper_32_bits(data)); + *(++offset) = cpu_to_caam32(lower_32_bits(data)); + } - (*desc) += 2; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 2); } /* Write command without affecting header, and return pointer to next word */ static inline u32 *write_cmd(u32 *desc, u32 command) { - *desc = command; + *desc = cpu_to_caam32(command); return desc + 1; } @@ -168,14 +186,17 @@ APPEND_CMD_RET(move, MOVE) static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd) { - *jump_cmd = *jump_cmd | (desc_len(desc) - (jump_cmd - desc)); + *jump_cmd = cpu_to_caam32(caam32_to_cpu(*jump_cmd) | + (desc_len(desc) - (jump_cmd - desc))); } static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd) { - *move_cmd &= ~MOVE_OFFSET_MASK; - *move_cmd = *move_cmd | ((desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & - MOVE_OFFSET_MASK); + u32 val = caam32_to_cpu(*move_cmd); + + val &= ~MOVE_OFFSET_MASK; + val |= (desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & MOVE_OFFSET_MASK; + *move_cmd = cpu_to_caam32(val); } #define APPEND_CMD(cmd, op) \ diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 6fd63a600614..a81f551ac222 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -31,7 +31,7 @@ static int caam_reset_hw_jr(struct device *dev) * mask interrupts since we are going to poll * for reset completion status */ - setbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JRCFG_IMSK); /* initiate flush (required prior to reset) */ wr_reg32(&jrp->rregs->jrcommand, JRCR_RESET); @@ -57,7 +57,7 @@ static int caam_reset_hw_jr(struct device *dev) } /* unmask interrupts */ - clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0); return 0; } @@ -147,7 +147,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev) } /* mask valid interrupts */ - setbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JRCFG_IMSK); /* Have valid interrupt at this point, just ACK and trigger */ wr_reg32(&jrp->rregs->jrintstatus, irqstate); @@ -182,7 +182,7 @@ static void caam_jr_dequeue(unsigned long devarg) sw_idx = (tail + i) & (JOBR_DEPTH - 1); if (jrp->outring[hw_idx].desc == - jrp->entinfo[sw_idx].desc_addr_dma) + caam_dma_to_cpu(jrp->entinfo[sw_idx].desc_addr_dma)) break; /* found */ } /* we should never fail to find a matching descriptor */ @@ -200,7 +200,7 @@ static void caam_jr_dequeue(unsigned long devarg) usercall = jrp->entinfo[sw_idx].callbk; userarg = jrp->entinfo[sw_idx].cbkarg; userdesc = jrp->entinfo[sw_idx].desc_addr_virt; - userstatus = jrp->outring[hw_idx].jrstatus; + userstatus = caam32_to_cpu(jrp->outring[hw_idx].jrstatus); /* * Make sure all information from the job has been obtained @@ -236,7 +236,7 @@ static void caam_jr_dequeue(unsigned long devarg) } /* reenable / unmask IRQs */ - clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0); } /** @@ -248,7 +248,7 @@ static void caam_jr_dequeue(unsigned long devarg) struct device *caam_jr_alloc(void) { struct caam_drv_private_jr *jrpriv, *min_jrpriv = NULL; - struct device *dev = NULL; + struct device *dev = ERR_PTR(-ENODEV); int min_tfm_cnt = INT_MAX; int tfm_cnt; @@ -330,7 +330,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, int head, tail, desc_size; dma_addr_t desc_dma; - desc_size = (*desc & HDR_JD_LENGTH_MASK) * sizeof(u32); + desc_size = (caam32_to_cpu(*desc) & HDR_JD_LENGTH_MASK) * sizeof(u32); desc_dma = dma_map_single(dev, desc, desc_size, DMA_TO_DEVICE); if (dma_mapping_error(dev, desc_dma)) { dev_err(dev, "caam_jr_enqueue(): can't map jobdesc\n"); @@ -356,7 +356,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, head_entry->cbkarg = areq; head_entry->desc_addr_dma = desc_dma; - jrp->inpring[jrp->inp_ring_write_index] = desc_dma; + jrp->inpring[jrp->inp_ring_write_index] = cpu_to_caam_dma(desc_dma); /* * Guarantee that the descriptor's DMA address has been written to @@ -444,9 +444,9 @@ static int caam_jr_init(struct device *dev) spin_lock_init(&jrp->outlock); /* Select interrupt coalescing parameters */ - setbits32(&jrp->rregs->rconfig_lo, JOBR_INTC | - (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) | - (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT)); + clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JOBR_INTC | + (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) | + (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT)); return 0; diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h index 3a87c0cf879a..aaa00dd1c601 100644 --- a/drivers/crypto/caam/pdb.h +++ b/drivers/crypto/caam/pdb.h @@ -1,18 +1,19 @@ /* * CAAM Protocol Data Block (PDB) definition header file * - * Copyright 2008-2012 Freescale Semiconductor, Inc. + * Copyright 2008-2016 Freescale Semiconductor, Inc. * */ #ifndef CAAM_PDB_H #define CAAM_PDB_H +#include "compat.h" /* * PDB- IPSec ESP Header Modification Options */ -#define PDBHMO_ESP_DECAP_SHIFT 12 -#define PDBHMO_ESP_ENCAP_SHIFT 4 +#define PDBHMO_ESP_DECAP_SHIFT 28 +#define PDBHMO_ESP_ENCAP_SHIFT 28 /* * Encap and Decap - Decrement TTL (Hop Limit) - Based on the value of the * Options Byte IP version (IPvsn) field: @@ -32,12 +33,23 @@ */ #define PDBHMO_ESP_DFBIT (0x04 << PDBHMO_ESP_ENCAP_SHIFT) +#define PDBNH_ESP_ENCAP_SHIFT 16 +#define PDBNH_ESP_ENCAP_MASK (0xff << PDBNH_ESP_ENCAP_SHIFT) + +#define PDBHDRLEN_ESP_DECAP_SHIFT 16 +#define PDBHDRLEN_MASK (0x0fff << PDBHDRLEN_ESP_DECAP_SHIFT) + +#define PDB_NH_OFFSET_SHIFT 8 +#define PDB_NH_OFFSET_MASK (0xff << PDB_NH_OFFSET_SHIFT) + /* * PDB - IPSec ESP Encap/Decap Options */ #define PDBOPTS_ESP_ARSNONE 0x00 /* no antireplay window */ #define PDBOPTS_ESP_ARS32 0x40 /* 32-entry antireplay window */ +#define PDBOPTS_ESP_ARS128 0x80 /* 128-entry antireplay window */ #define PDBOPTS_ESP_ARS64 0xc0 /* 64-entry antireplay window */ +#define PDBOPTS_ESP_ARS_MASK 0xc0 /* antireplay window mask */ #define PDBOPTS_ESP_IVSRC 0x20 /* IV comes from internal random gen */ #define PDBOPTS_ESP_ESN 0x10 /* extended sequence included */ #define PDBOPTS_ESP_OUTFMT 0x08 /* output only decapsulation (decap) */ @@ -54,35 +66,73 @@ /* * General IPSec encap/decap PDB definitions */ + +/** + * ipsec_encap_cbc - PDB part for IPsec CBC encapsulation + * @iv: 16-byte array initialization vector + */ struct ipsec_encap_cbc { - u32 iv[4]; + u8 iv[16]; }; +/** + * ipsec_encap_ctr - PDB part for IPsec CTR encapsulation + * @ctr_nonce: 4-byte array nonce + * @ctr_initial: initial count constant + * @iv: initialization vector + */ struct ipsec_encap_ctr { - u32 ctr_nonce; + u8 ctr_nonce[4]; u32 ctr_initial; - u32 iv[2]; + u64 iv; }; +/** + * ipsec_encap_ccm - PDB part for IPsec CCM encapsulation + * @salt: 3-byte array salt (lower 24 bits) + * @ccm_opt: CCM algorithm options - MSB-LSB description: + * b0_flags (8b) - CCM B0; use 0x5B for 8-byte ICV, 0x6B for 12-byte ICV, + * 0x7B for 16-byte ICV (cf. RFC4309, RFC3610) + * ctr_flags (8b) - counter flags; constant equal to 0x3 + * ctr_initial (16b) - initial count constant + * @iv: initialization vector + */ struct ipsec_encap_ccm { - u32 salt; /* lower 24 bits */ - u8 b0_flags; - u8 ctr_flags; - u16 ctr_initial; - u32 iv[2]; + u8 salt[4]; + u32 ccm_opt; + u64 iv; }; +/** + * ipsec_encap_gcm - PDB part for IPsec GCM encapsulation + * @salt: 3-byte array salt (lower 24 bits) + * @rsvd: reserved, do not use + * @iv: initialization vector + */ struct ipsec_encap_gcm { - u32 salt; /* lower 24 bits */ + u8 salt[4]; u32 rsvd1; - u32 iv[2]; + u64 iv; }; +/** + * ipsec_encap_pdb - PDB for IPsec encapsulation + * @options: MSB-LSB description + * hmo (header manipulation options) - 4b + * reserved - 4b + * next header - 8b + * next header offset - 8b + * option flags (depend on selected algorithm) - 8b + * @seq_num_ext_hi: (optional) IPsec Extended Sequence Number (ESN) + * @seq_num: IPsec sequence number + * @spi: IPsec SPI (Security Parameters Index) + * @ip_hdr_len: optional IP Header length (in bytes) + * reserved - 16b + * Opt. IP Hdr Len - 16b + * @ip_hdr: optional IP Header content + */ struct ipsec_encap_pdb { - u8 hmo_rsvd; - u8 ip_nh; - u8 ip_nh_offset; - u8 options; + u32 options; u32 seq_num_ext_hi; u32 seq_num; union { @@ -92,36 +142,65 @@ struct ipsec_encap_pdb { struct ipsec_encap_gcm gcm; }; u32 spi; - u16 rsvd1; - u16 ip_hdr_len; - u32 ip_hdr[0]; /* optional IP Header content */ + u32 ip_hdr_len; + u32 ip_hdr[0]; }; +/** + * ipsec_decap_cbc - PDB part for IPsec CBC decapsulation + * @rsvd: reserved, do not use + */ struct ipsec_decap_cbc { u32 rsvd[2]; }; +/** + * ipsec_decap_ctr - PDB part for IPsec CTR decapsulation + * @ctr_nonce: 4-byte array nonce + * @ctr_initial: initial count constant + */ struct ipsec_decap_ctr { - u32 salt; + u8 ctr_nonce[4]; u32 ctr_initial; }; +/** + * ipsec_decap_ccm - PDB part for IPsec CCM decapsulation + * @salt: 3-byte salt (lower 24 bits) + * @ccm_opt: CCM algorithm options - MSB-LSB description: + * b0_flags (8b) - CCM B0; use 0x5B for 8-byte ICV, 0x6B for 12-byte ICV, + * 0x7B for 16-byte ICV (cf. RFC4309, RFC3610) + * ctr_flags (8b) - counter flags; constant equal to 0x3 + * ctr_initial (16b) - initial count constant + */ struct ipsec_decap_ccm { - u32 salt; - u8 iv_flags; - u8 ctr_flags; - u16 ctr_initial; + u8 salt[4]; + u32 ccm_opt; }; +/** + * ipsec_decap_gcm - PDB part for IPsec GCN decapsulation + * @salt: 4-byte salt + * @rsvd: reserved, do not use + */ struct ipsec_decap_gcm { - u32 salt; + u8 salt[4]; u32 resvd; }; +/** + * ipsec_decap_pdb - PDB for IPsec decapsulation + * @options: MSB-LSB description + * hmo (header manipulation options) - 4b + * IP header length - 12b + * next header offset - 8b + * option flags (depend on selected algorithm) - 8b + * @seq_num_ext_hi: (optional) IPsec Extended Sequence Number (ESN) + * @seq_num: IPsec sequence number + * @anti_replay: Anti-replay window; size depends on ARS (option flags) + */ struct ipsec_decap_pdb { - u16 hmo_ip_hdr_len; - u8 ip_nh_offset; - u8 options; + u32 options; union { struct ipsec_decap_cbc cbc; struct ipsec_decap_ctr ctr; @@ -130,8 +209,7 @@ struct ipsec_decap_pdb { }; u32 seq_num_ext_hi; u32 seq_num; - u32 anti_replay[2]; - u32 end_index[0]; + __be32 anti_replay[4]; }; /* @@ -399,4 +477,52 @@ struct dsa_verify_pdb { u8 *ab; /* only used if ECC processing */ }; +/* RSA Protocol Data Block */ +#define RSA_PDB_SGF_SHIFT 28 +#define RSA_PDB_E_SHIFT 12 +#define RSA_PDB_E_MASK (0xFFF << RSA_PDB_E_SHIFT) +#define RSA_PDB_D_SHIFT 12 +#define RSA_PDB_D_MASK (0xFFF << RSA_PDB_D_SHIFT) + +#define RSA_PDB_SGF_F (0x8 << RSA_PDB_SGF_SHIFT) +#define RSA_PDB_SGF_G (0x4 << RSA_PDB_SGF_SHIFT) +#define RSA_PRIV_PDB_SGF_F (0x4 << RSA_PDB_SGF_SHIFT) +#define RSA_PRIV_PDB_SGF_G (0x8 << RSA_PDB_SGF_SHIFT) + +#define RSA_PRIV_KEY_FRM_1 0 + +/** + * RSA Encrypt Protocol Data Block + * @sgf: scatter-gather field + * @f_dma: dma address of input data + * @g_dma: dma address of encrypted output data + * @n_dma: dma address of RSA modulus + * @e_dma: dma address of RSA public exponent + * @f_len: length in octets of the input data + */ +struct rsa_pub_pdb { + u32 sgf; + dma_addr_t f_dma; + dma_addr_t g_dma; + dma_addr_t n_dma; + dma_addr_t e_dma; + u32 f_len; +} __packed; + +/** + * RSA Decrypt PDB - Private Key Form #1 + * @sgf: scatter-gather field + * @g_dma: dma address of encrypted input data + * @f_dma: dma address of output data + * @n_dma: dma address of RSA modulus + * @d_dma: dma address of RSA private exponent + */ +struct rsa_priv_f1_pdb { + u32 sgf; + dma_addr_t g_dma; + dma_addr_t f_dma; + dma_addr_t n_dma; + dma_addr_t d_dma; +} __packed; + #endif diff --git a/drivers/crypto/caam/pkc_desc.c b/drivers/crypto/caam/pkc_desc.c new file mode 100644 index 000000000000..4e4183e615ea --- /dev/null +++ b/drivers/crypto/caam/pkc_desc.c @@ -0,0 +1,36 @@ +/* + * caam - Freescale FSL CAAM support for Public Key Cryptography descriptors + * + * Copyright 2016 Freescale Semiconductor, Inc. + * + * There is no Shared Descriptor for PKC so that the Job Descriptor must carry + * all the desired key parameters, input and output pointers. + */ +#include "caampkc.h" +#include "desc_constr.h" + +/* Descriptor for RSA Public operation */ +void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb) +{ + init_job_desc_pdb(desc, 0, sizeof(*pdb)); + append_cmd(desc, pdb->sgf); + append_ptr(desc, pdb->f_dma); + append_ptr(desc, pdb->g_dma); + append_ptr(desc, pdb->n_dma); + append_ptr(desc, pdb->e_dma); + append_cmd(desc, pdb->f_len); + append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSAENC_PUBKEY); +} + +/* Descriptor for RSA Private operation - Private Key Form #1 */ +void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb) +{ + init_job_desc_pdb(desc, 0, sizeof(*pdb)); + append_cmd(desc, pdb->sgf); + append_ptr(desc, pdb->g_dma); + append_ptr(desc, pdb->f_dma); + append_ptr(desc, pdb->n_dma); + append_ptr(desc, pdb->d_dma); + append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY | + RSA_PRIV_KEY_FRM_1); +} diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 0ba9c40597dc..b3c5016f6458 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -8,6 +8,7 @@ #define REGS_H #include <linux/types.h> +#include <linux/bitops.h> #include <linux/io.h> /* @@ -65,46 +66,56 @@ * */ -#ifdef CONFIG_ARM -/* These are common macros for Power, put here for ARM */ -#define setbits32(_addr, _v) writel((readl(_addr) | (_v)), (_addr)) -#define clrbits32(_addr, _v) writel((readl(_addr) & ~(_v)), (_addr)) +extern bool caam_little_end; -#define out_arch(type, endian, a, v) __raw_write##type(cpu_to_##endian(v), a) -#define in_arch(type, endian, a) endian##_to_cpu(__raw_read##type(a)) +#define caam_to_cpu(len) \ +static inline u##len caam##len ## _to_cpu(u##len val) \ +{ \ + if (caam_little_end) \ + return le##len ## _to_cpu(val); \ + else \ + return be##len ## _to_cpu(val); \ +} -#define out_le32(a, v) out_arch(l, le32, a, v) -#define in_le32(a) in_arch(l, le32, a) +#define cpu_to_caam(len) \ +static inline u##len cpu_to_caam##len(u##len val) \ +{ \ + if (caam_little_end) \ + return cpu_to_le##len(val); \ + else \ + return cpu_to_be##len(val); \ +} -#define out_be32(a, v) out_arch(l, be32, a, v) -#define in_be32(a) in_arch(l, be32, a) +caam_to_cpu(16) +caam_to_cpu(32) +caam_to_cpu(64) +cpu_to_caam(16) +cpu_to_caam(32) +cpu_to_caam(64) -#define clrsetbits(type, addr, clear, set) \ - out_##type((addr), (in_##type(addr) & ~(clear)) | (set)) +static inline void wr_reg32(void __iomem *reg, u32 data) +{ + if (caam_little_end) + iowrite32(data, reg); + else + iowrite32be(data, reg); +} -#define clrsetbits_be32(addr, clear, set) clrsetbits(be32, addr, clear, set) -#define clrsetbits_le32(addr, clear, set) clrsetbits(le32, addr, clear, set) -#endif +static inline u32 rd_reg32(void __iomem *reg) +{ + if (caam_little_end) + return ioread32(reg); -#ifdef __BIG_ENDIAN -#define wr_reg32(reg, data) out_be32(reg, data) -#define rd_reg32(reg) in_be32(reg) -#define clrsetbits_32(addr, clear, set) clrsetbits_be32(addr, clear, set) -#ifdef CONFIG_64BIT -#define wr_reg64(reg, data) out_be64(reg, data) -#define rd_reg64(reg) in_be64(reg) -#endif -#else -#ifdef __LITTLE_ENDIAN -#define wr_reg32(reg, data) __raw_writel(data, reg) -#define rd_reg32(reg) __raw_readl(reg) -#define clrsetbits_32(addr, clear, set) clrsetbits_le32(addr, clear, set) -#ifdef CONFIG_64BIT -#define wr_reg64(reg, data) __raw_writeq(data, reg) -#define rd_reg64(reg) __raw_readq(reg) -#endif -#endif -#endif + return ioread32be(reg); +} + +static inline void clrsetbits_32(void __iomem *reg, u32 clear, u32 set) +{ + if (caam_little_end) + iowrite32((ioread32(reg) & ~clear) | set, reg); + else + iowrite32be((ioread32be(reg) & ~clear) | set, reg); +} /* * The only users of these wr/rd_reg64 functions is the Job Ring (JR). @@ -123,29 +134,67 @@ * base + 0x0000 : least-significant 32 bits * base + 0x0004 : most-significant 32 bits */ +#ifdef CONFIG_64BIT +static inline void wr_reg64(void __iomem *reg, u64 data) +{ + if (caam_little_end) + iowrite64(data, reg); + else + iowrite64be(data, reg); +} -#ifndef CONFIG_64BIT -#if !defined(CONFIG_CRYPTO_DEV_FSL_CAAM_LE) || \ - defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX) -#define REG64_MS32(reg) ((u32 __iomem *)(reg)) -#define REG64_LS32(reg) ((u32 __iomem *)(reg) + 1) -#else -#define REG64_MS32(reg) ((u32 __iomem *)(reg) + 1) -#define REG64_LS32(reg) ((u32 __iomem *)(reg)) -#endif - -static inline void wr_reg64(u64 __iomem *reg, u64 data) +static inline u64 rd_reg64(void __iomem *reg) { - wr_reg32(REG64_MS32(reg), data >> 32); - wr_reg32(REG64_LS32(reg), data); + if (caam_little_end) + return ioread64(reg); + else + return ioread64be(reg); } -static inline u64 rd_reg64(u64 __iomem *reg) +#else /* CONFIG_64BIT */ +static inline void wr_reg64(void __iomem *reg, u64 data) { - return ((u64)rd_reg32(REG64_MS32(reg)) << 32 | - (u64)rd_reg32(REG64_LS32(reg))); +#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX + if (caam_little_end) { + wr_reg32((u32 __iomem *)(reg) + 1, data >> 32); + wr_reg32((u32 __iomem *)(reg), data); + } else +#endif + { + wr_reg32((u32 __iomem *)(reg), data >> 32); + wr_reg32((u32 __iomem *)(reg) + 1, data); + } } + +static inline u64 rd_reg64(void __iomem *reg) +{ +#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX + if (caam_little_end) + return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 | + (u64)rd_reg32((u32 __iomem *)(reg))); + else #endif + return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 | + (u64)rd_reg32((u32 __iomem *)(reg) + 1)); +} +#endif /* CONFIG_64BIT */ + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT +#ifdef CONFIG_SOC_IMX7D +#define cpu_to_caam_dma(value) \ + (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \ + (u64)cpu_to_caam32(upper_32_bits(value))) +#define caam_dma_to_cpu(value) \ + (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \ + (u64)caam32_to_cpu(upper_32_bits(value))) +#else +#define cpu_to_caam_dma(value) cpu_to_caam64(value) +#define caam_dma_to_cpu(value) caam64_to_cpu(value) +#endif /* CONFIG_SOC_IMX7D */ +#else +#define cpu_to_caam_dma(value) cpu_to_caam32(value) +#define caam_dma_to_cpu(value) caam32_to_cpu(value) +#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */ /* * jr_outentry @@ -249,6 +298,8 @@ struct caam_perfmon { u32 faultliodn; /* FALR - Fault Address LIODN */ u32 faultdetail; /* FADR - Fault Addr Detail */ u32 rsvd2; +#define CSTA_PLEND BIT(10) +#define CSTA_ALT_PLEND BIT(18) u32 status; /* CSTA - CAAM Status */ u64 rsvd3; diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h index 12ec6616e89d..19dc64fede0d 100644 --- a/drivers/crypto/caam/sg_sw_sec4.h +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -5,18 +5,19 @@ * */ +#include "regs.h" + struct sec4_sg_entry; /* * convert single dma address to h/w link table format */ static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr, - dma_addr_t dma, u32 len, u32 offset) + dma_addr_t dma, u32 len, u16 offset) { - sec4_sg_ptr->ptr = dma; - sec4_sg_ptr->len = len; - sec4_sg_ptr->buf_pool_id = 0; - sec4_sg_ptr->offset = offset; + sec4_sg_ptr->ptr = cpu_to_caam_dma(dma); + sec4_sg_ptr->len = cpu_to_caam32(len); + sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK); #ifdef DEBUG print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ", DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr, @@ -30,7 +31,7 @@ static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr, */ static inline struct sec4_sg_entry * sg_to_sec4_sg(struct scatterlist *sg, int sg_count, - struct sec4_sg_entry *sec4_sg_ptr, u32 offset) + struct sec4_sg_entry *sec4_sg_ptr, u16 offset) { while (sg_count) { dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg), @@ -48,10 +49,10 @@ sg_to_sec4_sg(struct scatterlist *sg, int sg_count, */ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count, struct sec4_sg_entry *sec4_sg_ptr, - u32 offset) + u16 offset) { sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset); - sec4_sg_ptr->len |= SEC4_SG_LEN_FIN; + sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN); } static inline struct sec4_sg_entry *sg_to_sec4_sg_len( diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig index 6e37845abf8f..2238f77aa248 100644 --- a/drivers/crypto/ccp/Kconfig +++ b/drivers/crypto/ccp/Kconfig @@ -3,6 +3,8 @@ config CRYPTO_DEV_CCP_DD depends on CRYPTO_DEV_CCP default m select HW_RANDOM + select DMA_ENGINE + select DMADEVICES select CRYPTO_SHA1 select CRYPTO_SHA256 help diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile index b750592cc936..ee4d2741b3ab 100644 --- a/drivers/crypto/ccp/Makefile +++ b/drivers/crypto/ccp/Makefile @@ -1,5 +1,9 @@ obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o -ccp-objs := ccp-dev.o ccp-ops.o ccp-dev-v3.o ccp-platform.o +ccp-objs := ccp-dev.o \ + ccp-ops.o \ + ccp-dev-v3.o \ + ccp-platform.o \ + ccp-dmaengine.o ccp-$(CONFIG_PCI) += ccp-pci.o obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c index 52c7395cb8d8..58a4244b4752 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c @@ -14,9 +14,8 @@ #include <linux/sched.h> #include <linux/delay.h> #include <linux/scatterlist.h> -#include <linux/crypto.h> -#include <crypto/algapi.h> #include <crypto/aes.h> +#include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include "ccp-crypto.h" @@ -110,18 +109,16 @@ static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key, ctx->u.aes.key_len = key_len / 2; sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); - return crypto_ablkcipher_setkey(ctx->u.aes.tfm_ablkcipher, key, - key_len); + return crypto_skcipher_setkey(ctx->u.aes.tfm_skcipher, key, key_len); } static int ccp_aes_xts_crypt(struct ablkcipher_request *req, unsigned int encrypt) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); unsigned int unit; + u32 unit_size; int ret; if (!ctx->u.aes.key_len) @@ -133,20 +130,31 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req, if (!req->info) return -EINVAL; - for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++) - if (!(req->nbytes & (unit_size_map[unit].size - 1))) - break; + unit_size = CCP_XTS_AES_UNIT_SIZE__LAST; + if (req->nbytes <= unit_size_map[0].size) { + for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++) { + if (!(req->nbytes & (unit_size_map[unit].size - 1))) { + unit_size = unit_size_map[unit].value; + break; + } + } + } - if ((unit_size_map[unit].value == CCP_XTS_AES_UNIT_SIZE__LAST) || + if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) || (ctx->u.aes.key_len != AES_KEYSIZE_128)) { + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher); + /* Use the fallback to process the request for any * unsupported unit sizes or key sizes */ - ablkcipher_request_set_tfm(req, ctx->u.aes.tfm_ablkcipher); - ret = (encrypt) ? crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); - + skcipher_request_set_tfm(subreq, ctx->u.aes.tfm_skcipher); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + ret = encrypt ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return ret; } @@ -158,7 +166,7 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req, rctx->cmd.engine = CCP_ENGINE_XTS_AES_128; rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT : CCP_AES_ACTION_DECRYPT; - rctx->cmd.u.xts.unit_size = unit_size_map[unit].value; + rctx->cmd.u.xts.unit_size = unit_size; rctx->cmd.u.xts.key = &ctx->u.aes.key_sg; rctx->cmd.u.xts.key_len = ctx->u.aes.key_len; rctx->cmd.u.xts.iv = &rctx->iv_sg; @@ -185,23 +193,21 @@ static int ccp_aes_xts_decrypt(struct ablkcipher_request *req) static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm) { struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); - struct crypto_ablkcipher *fallback_tfm; + struct crypto_skcipher *fallback_tfm; ctx->complete = ccp_aes_xts_complete; ctx->u.aes.key_len = 0; - fallback_tfm = crypto_alloc_ablkcipher(crypto_tfm_alg_name(tfm), 0, - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK); + fallback_tfm = crypto_alloc_skcipher("xts(aes)", 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(fallback_tfm)) { - pr_warn("could not load fallback driver %s\n", - crypto_tfm_alg_name(tfm)); + pr_warn("could not load fallback driver xts(aes)\n"); return PTR_ERR(fallback_tfm); } - ctx->u.aes.tfm_ablkcipher = fallback_tfm; + ctx->u.aes.tfm_skcipher = fallback_tfm; - tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx) + - fallback_tfm->base.crt_ablkcipher.reqsize; + tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx); return 0; } @@ -210,9 +216,7 @@ static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm) { struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->u.aes.tfm_ablkcipher) - crypto_free_ablkcipher(ctx->u.aes.tfm_ablkcipher); - ctx->u.aes.tfm_ablkcipher = NULL; + crypto_free_skcipher(ctx->u.aes.tfm_skcipher); } static int ccp_register_aes_xts_alg(struct list_head *head, diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h index a326ec20bfa8..8335b32e815e 100644 --- a/drivers/crypto/ccp/ccp-crypto.h +++ b/drivers/crypto/ccp/ccp-crypto.h @@ -17,7 +17,6 @@ #include <linux/wait.h> #include <linux/pci.h> #include <linux/ccp.h> -#include <linux/crypto.h> #include <crypto/algapi.h> #include <crypto/aes.h> #include <crypto/ctr.h> @@ -69,7 +68,7 @@ static inline struct ccp_crypto_ahash_alg * /***** AES related defines *****/ struct ccp_aes_ctx { /* Fallback cipher for XTS with unsupported unit sizes */ - struct crypto_ablkcipher *tfm_ablkcipher; + struct crypto_skcipher *tfm_skcipher; /* Cipher used to generate CMAC K1/K2 keys */ struct crypto_cipher *tfm_cipher; diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c index 7d5eab49179e..d7a710347967 100644 --- a/drivers/crypto/ccp/ccp-dev-v3.c +++ b/drivers/crypto/ccp/ccp-dev-v3.c @@ -406,6 +406,11 @@ static int ccp_init(struct ccp_device *ccp) goto e_kthread; } + /* Register the DMA engine support */ + ret = ccp_dmaengine_register(ccp); + if (ret) + goto e_hwrng; + ccp_add_device(ccp); /* Enable interrupts */ @@ -413,6 +418,9 @@ static int ccp_init(struct ccp_device *ccp) return 0; +e_hwrng: + hwrng_unregister(&ccp->hwrng); + e_kthread: for (i = 0; i < ccp->cmd_q_count; i++) if (ccp->cmd_q[i].kthread) @@ -436,6 +444,9 @@ static void ccp_destroy(struct ccp_device *ccp) /* Remove this device from the list of available units first */ ccp_del_device(ccp); + /* Unregister the DMA engine */ + ccp_dmaengine_unregister(ccp); + /* Unregister the RNG */ hwrng_unregister(&ccp->hwrng); @@ -515,7 +526,7 @@ static irqreturn_t ccp_irq_handler(int irq, void *data) return IRQ_HANDLED; } -static struct ccp_actions ccp3_actions = { +static const struct ccp_actions ccp3_actions = { .perform_aes = ccp_perform_aes, .perform_xts_aes = ccp_perform_xts_aes, .perform_sha = ccp_perform_sha, diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c index 4dbc18727235..87b9f2bfa623 100644 --- a/drivers/crypto/ccp/ccp-dev.c +++ b/drivers/crypto/ccp/ccp-dev.c @@ -16,7 +16,7 @@ #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/spinlock.h> -#include <linux/rwlock_types.h> +#include <linux/spinlock_types.h> #include <linux/types.h> #include <linux/mutex.h> #include <linux/delay.h> diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h index 7745d0be491d..bd41ffceff82 100644 --- a/drivers/crypto/ccp/ccp-dev.h +++ b/drivers/crypto/ccp/ccp-dev.h @@ -22,6 +22,9 @@ #include <linux/dmapool.h> #include <linux/hw_random.h> #include <linux/bitops.h> +#include <linux/interrupt.h> +#include <linux/irqreturn.h> +#include <linux/dmaengine.h> #define MAX_CCP_NAME_LEN 16 #define MAX_DMAPOOL_NAME_LEN 32 @@ -159,7 +162,7 @@ struct ccp_actions { /* Structure to hold CCP version-specific values */ struct ccp_vdata { unsigned int version; - struct ccp_actions *perform; + const struct ccp_actions *perform; }; extern struct ccp_vdata ccpv3; @@ -167,6 +170,39 @@ extern struct ccp_vdata ccpv3; struct ccp_device; struct ccp_cmd; +struct ccp_dma_cmd { + struct list_head entry; + + struct ccp_cmd ccp_cmd; +}; + +struct ccp_dma_desc { + struct list_head entry; + + struct ccp_device *ccp; + + struct list_head pending; + struct list_head active; + + enum dma_status status; + struct dma_async_tx_descriptor tx_desc; + size_t len; +}; + +struct ccp_dma_chan { + struct ccp_device *ccp; + + spinlock_t lock; + struct list_head pending; + struct list_head active; + struct list_head complete; + + struct tasklet_struct cleanup_tasklet; + + enum dma_status status; + struct dma_chan dma_chan; +}; + struct ccp_cmd_queue { struct ccp_device *ccp; @@ -261,6 +297,14 @@ struct ccp_device { unsigned int hwrng_retries; /* + * Support for the CCP DMA capabilities + */ + struct dma_device dma_dev; + struct ccp_dma_chan *ccp_dma_chan; + struct kmem_cache *dma_cmd_cache; + struct kmem_cache *dma_desc_cache; + + /* * A counter used to generate job-ids for cmds submitted to the CCP */ atomic_t current_id ____cacheline_aligned; @@ -418,4 +462,7 @@ int ccp_cmd_queue_thread(void *data); int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd); +int ccp_dmaengine_register(struct ccp_device *ccp); +void ccp_dmaengine_unregister(struct ccp_device *ccp); + #endif diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c new file mode 100644 index 000000000000..94f77b0f9ae7 --- /dev/null +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -0,0 +1,727 @@ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * + * Author: Gary R Hook <gary.hook@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/dmaengine.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" +#include "../../dma/dmaengine.h" + +#define CCP_DMA_WIDTH(_mask) \ +({ \ + u64 mask = _mask + 1; \ + (mask == 0) ? 64 : fls64(mask); \ +}) + +static void ccp_free_cmd_resources(struct ccp_device *ccp, + struct list_head *list) +{ + struct ccp_dma_cmd *cmd, *ctmp; + + list_for_each_entry_safe(cmd, ctmp, list, entry) { + list_del(&cmd->entry); + kmem_cache_free(ccp->dma_cmd_cache, cmd); + } +} + +static void ccp_free_desc_resources(struct ccp_device *ccp, + struct list_head *list) +{ + struct ccp_dma_desc *desc, *dtmp; + + list_for_each_entry_safe(desc, dtmp, list, entry) { + ccp_free_cmd_resources(ccp, &desc->active); + ccp_free_cmd_resources(ccp, &desc->pending); + + list_del(&desc->entry); + kmem_cache_free(ccp->dma_desc_cache, desc); + } +} + +static void ccp_free_chan_resources(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + unsigned long flags; + + dev_dbg(chan->ccp->dev, "%s - chan=%p\n", __func__, chan); + + spin_lock_irqsave(&chan->lock, flags); + + ccp_free_desc_resources(chan->ccp, &chan->complete); + ccp_free_desc_resources(chan->ccp, &chan->active); + ccp_free_desc_resources(chan->ccp, &chan->pending); + + spin_unlock_irqrestore(&chan->lock, flags); +} + +static void ccp_cleanup_desc_resources(struct ccp_device *ccp, + struct list_head *list) +{ + struct ccp_dma_desc *desc, *dtmp; + + list_for_each_entry_safe_reverse(desc, dtmp, list, entry) { + if (!async_tx_test_ack(&desc->tx_desc)) + continue; + + dev_dbg(ccp->dev, "%s - desc=%p\n", __func__, desc); + + ccp_free_cmd_resources(ccp, &desc->active); + ccp_free_cmd_resources(ccp, &desc->pending); + + list_del(&desc->entry); + kmem_cache_free(ccp->dma_desc_cache, desc); + } +} + +static void ccp_do_cleanup(unsigned long data) +{ + struct ccp_dma_chan *chan = (struct ccp_dma_chan *)data; + unsigned long flags; + + dev_dbg(chan->ccp->dev, "%s - chan=%s\n", __func__, + dma_chan_name(&chan->dma_chan)); + + spin_lock_irqsave(&chan->lock, flags); + + ccp_cleanup_desc_resources(chan->ccp, &chan->complete); + + spin_unlock_irqrestore(&chan->lock, flags); +} + +static int ccp_issue_next_cmd(struct ccp_dma_desc *desc) +{ + struct ccp_dma_cmd *cmd; + int ret; + + cmd = list_first_entry(&desc->pending, struct ccp_dma_cmd, entry); + list_move(&cmd->entry, &desc->active); + + dev_dbg(desc->ccp->dev, "%s - tx %d, cmd=%p\n", __func__, + desc->tx_desc.cookie, cmd); + + ret = ccp_enqueue_cmd(&cmd->ccp_cmd); + if (!ret || (ret == -EINPROGRESS) || (ret == -EBUSY)) + return 0; + + dev_dbg(desc->ccp->dev, "%s - error: ret=%d, tx %d, cmd=%p\n", __func__, + ret, desc->tx_desc.cookie, cmd); + + return ret; +} + +static void ccp_free_active_cmd(struct ccp_dma_desc *desc) +{ + struct ccp_dma_cmd *cmd; + + cmd = list_first_entry_or_null(&desc->active, struct ccp_dma_cmd, + entry); + if (!cmd) + return; + + dev_dbg(desc->ccp->dev, "%s - freeing tx %d cmd=%p\n", + __func__, desc->tx_desc.cookie, cmd); + + list_del(&cmd->entry); + kmem_cache_free(desc->ccp->dma_cmd_cache, cmd); +} + +static struct ccp_dma_desc *__ccp_next_dma_desc(struct ccp_dma_chan *chan, + struct ccp_dma_desc *desc) +{ + /* Move current DMA descriptor to the complete list */ + if (desc) + list_move(&desc->entry, &chan->complete); + + /* Get the next DMA descriptor on the active list */ + desc = list_first_entry_or_null(&chan->active, struct ccp_dma_desc, + entry); + + return desc; +} + +static struct ccp_dma_desc *ccp_handle_active_desc(struct ccp_dma_chan *chan, + struct ccp_dma_desc *desc) +{ + struct dma_async_tx_descriptor *tx_desc; + unsigned long flags; + + /* Loop over descriptors until one is found with commands */ + do { + if (desc) { + /* Remove the DMA command from the list and free it */ + ccp_free_active_cmd(desc); + + if (!list_empty(&desc->pending)) { + /* No errors, keep going */ + if (desc->status != DMA_ERROR) + return desc; + + /* Error, free remaining commands and move on */ + ccp_free_cmd_resources(desc->ccp, + &desc->pending); + } + + tx_desc = &desc->tx_desc; + } else { + tx_desc = NULL; + } + + spin_lock_irqsave(&chan->lock, flags); + + if (desc) { + if (desc->status != DMA_ERROR) + desc->status = DMA_COMPLETE; + + dev_dbg(desc->ccp->dev, + "%s - tx %d complete, status=%u\n", __func__, + desc->tx_desc.cookie, desc->status); + + dma_cookie_complete(tx_desc); + } + + desc = __ccp_next_dma_desc(chan, desc); + + spin_unlock_irqrestore(&chan->lock, flags); + + if (tx_desc) { + if (tx_desc->callback && + (tx_desc->flags & DMA_PREP_INTERRUPT)) + tx_desc->callback(tx_desc->callback_param); + + dma_run_dependencies(tx_desc); + } + } while (desc); + + return NULL; +} + +static struct ccp_dma_desc *__ccp_pending_to_active(struct ccp_dma_chan *chan) +{ + struct ccp_dma_desc *desc; + + if (list_empty(&chan->pending)) + return NULL; + + desc = list_empty(&chan->active) + ? list_first_entry(&chan->pending, struct ccp_dma_desc, entry) + : NULL; + + list_splice_tail_init(&chan->pending, &chan->active); + + return desc; +} + +static void ccp_cmd_callback(void *data, int err) +{ + struct ccp_dma_desc *desc = data; + struct ccp_dma_chan *chan; + int ret; + + if (err == -EINPROGRESS) + return; + + chan = container_of(desc->tx_desc.chan, struct ccp_dma_chan, + dma_chan); + + dev_dbg(chan->ccp->dev, "%s - tx %d callback, err=%d\n", + __func__, desc->tx_desc.cookie, err); + + if (err) + desc->status = DMA_ERROR; + + while (true) { + /* Check for DMA descriptor completion */ + desc = ccp_handle_active_desc(chan, desc); + + /* Don't submit cmd if no descriptor or DMA is paused */ + if (!desc || (chan->status == DMA_PAUSED)) + break; + + ret = ccp_issue_next_cmd(desc); + if (!ret) + break; + + desc->status = DMA_ERROR; + } + + tasklet_schedule(&chan->cleanup_tasklet); +} + +static dma_cookie_t ccp_tx_submit(struct dma_async_tx_descriptor *tx_desc) +{ + struct ccp_dma_desc *desc = container_of(tx_desc, struct ccp_dma_desc, + tx_desc); + struct ccp_dma_chan *chan; + dma_cookie_t cookie; + unsigned long flags; + + chan = container_of(tx_desc->chan, struct ccp_dma_chan, dma_chan); + + spin_lock_irqsave(&chan->lock, flags); + + cookie = dma_cookie_assign(tx_desc); + list_add_tail(&desc->entry, &chan->pending); + + spin_unlock_irqrestore(&chan->lock, flags); + + dev_dbg(chan->ccp->dev, "%s - added tx descriptor %d to pending list\n", + __func__, cookie); + + return cookie; +} + +static struct ccp_dma_cmd *ccp_alloc_dma_cmd(struct ccp_dma_chan *chan) +{ + struct ccp_dma_cmd *cmd; + + cmd = kmem_cache_alloc(chan->ccp->dma_cmd_cache, GFP_NOWAIT); + if (cmd) + memset(cmd, 0, sizeof(*cmd)); + + return cmd; +} + +static struct ccp_dma_desc *ccp_alloc_dma_desc(struct ccp_dma_chan *chan, + unsigned long flags) +{ + struct ccp_dma_desc *desc; + + desc = kmem_cache_alloc(chan->ccp->dma_desc_cache, GFP_NOWAIT); + if (!desc) + return NULL; + + memset(desc, 0, sizeof(*desc)); + + dma_async_tx_descriptor_init(&desc->tx_desc, &chan->dma_chan); + desc->tx_desc.flags = flags; + desc->tx_desc.tx_submit = ccp_tx_submit; + desc->ccp = chan->ccp; + INIT_LIST_HEAD(&desc->pending); + INIT_LIST_HEAD(&desc->active); + desc->status = DMA_IN_PROGRESS; + + return desc; +} + +static struct ccp_dma_desc *ccp_create_desc(struct dma_chan *dma_chan, + struct scatterlist *dst_sg, + unsigned int dst_nents, + struct scatterlist *src_sg, + unsigned int src_nents, + unsigned long flags) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_device *ccp = chan->ccp; + struct ccp_dma_desc *desc; + struct ccp_dma_cmd *cmd; + struct ccp_cmd *ccp_cmd; + struct ccp_passthru_nomap_engine *ccp_pt; + unsigned int src_offset, src_len; + unsigned int dst_offset, dst_len; + unsigned int len; + unsigned long sflags; + size_t total_len; + + if (!dst_sg || !src_sg) + return NULL; + + if (!dst_nents || !src_nents) + return NULL; + + desc = ccp_alloc_dma_desc(chan, flags); + if (!desc) + return NULL; + + total_len = 0; + + src_len = sg_dma_len(src_sg); + src_offset = 0; + + dst_len = sg_dma_len(dst_sg); + dst_offset = 0; + + while (true) { + if (!src_len) { + src_nents--; + if (!src_nents) + break; + + src_sg = sg_next(src_sg); + if (!src_sg) + break; + + src_len = sg_dma_len(src_sg); + src_offset = 0; + continue; + } + + if (!dst_len) { + dst_nents--; + if (!dst_nents) + break; + + dst_sg = sg_next(dst_sg); + if (!dst_sg) + break; + + dst_len = sg_dma_len(dst_sg); + dst_offset = 0; + continue; + } + + len = min(dst_len, src_len); + + cmd = ccp_alloc_dma_cmd(chan); + if (!cmd) + goto err; + + ccp_cmd = &cmd->ccp_cmd; + ccp_pt = &ccp_cmd->u.passthru_nomap; + ccp_cmd->flags = CCP_CMD_MAY_BACKLOG; + ccp_cmd->flags |= CCP_CMD_PASSTHRU_NO_DMA_MAP; + ccp_cmd->engine = CCP_ENGINE_PASSTHRU; + ccp_pt->bit_mod = CCP_PASSTHRU_BITWISE_NOOP; + ccp_pt->byte_swap = CCP_PASSTHRU_BYTESWAP_NOOP; + ccp_pt->src_dma = sg_dma_address(src_sg) + src_offset; + ccp_pt->dst_dma = sg_dma_address(dst_sg) + dst_offset; + ccp_pt->src_len = len; + ccp_pt->final = 1; + ccp_cmd->callback = ccp_cmd_callback; + ccp_cmd->data = desc; + + list_add_tail(&cmd->entry, &desc->pending); + + dev_dbg(ccp->dev, + "%s - cmd=%p, src=%pad, dst=%pad, len=%llu\n", __func__, + cmd, &ccp_pt->src_dma, + &ccp_pt->dst_dma, ccp_pt->src_len); + + total_len += len; + + src_len -= len; + src_offset += len; + + dst_len -= len; + dst_offset += len; + } + + desc->len = total_len; + + if (list_empty(&desc->pending)) + goto err; + + dev_dbg(ccp->dev, "%s - desc=%p\n", __func__, desc); + + spin_lock_irqsave(&chan->lock, sflags); + + list_add_tail(&desc->entry, &chan->pending); + + spin_unlock_irqrestore(&chan->lock, sflags); + + return desc; + +err: + ccp_free_cmd_resources(ccp, &desc->pending); + kmem_cache_free(ccp->dma_desc_cache, desc); + + return NULL; +} + +static struct dma_async_tx_descriptor *ccp_prep_dma_memcpy( + struct dma_chan *dma_chan, dma_addr_t dst, dma_addr_t src, size_t len, + unsigned long flags) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + struct scatterlist dst_sg, src_sg; + + dev_dbg(chan->ccp->dev, + "%s - src=%pad, dst=%pad, len=%zu, flags=%#lx\n", + __func__, &src, &dst, len, flags); + + sg_init_table(&dst_sg, 1); + sg_dma_address(&dst_sg) = dst; + sg_dma_len(&dst_sg) = len; + + sg_init_table(&src_sg, 1); + sg_dma_address(&src_sg) = src; + sg_dma_len(&src_sg) = len; + + desc = ccp_create_desc(dma_chan, &dst_sg, 1, &src_sg, 1, flags); + if (!desc) + return NULL; + + return &desc->tx_desc; +} + +static struct dma_async_tx_descriptor *ccp_prep_dma_sg( + struct dma_chan *dma_chan, struct scatterlist *dst_sg, + unsigned int dst_nents, struct scatterlist *src_sg, + unsigned int src_nents, unsigned long flags) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + + dev_dbg(chan->ccp->dev, + "%s - src=%p, src_nents=%u dst=%p, dst_nents=%u, flags=%#lx\n", + __func__, src_sg, src_nents, dst_sg, dst_nents, flags); + + desc = ccp_create_desc(dma_chan, dst_sg, dst_nents, src_sg, src_nents, + flags); + if (!desc) + return NULL; + + return &desc->tx_desc; +} + +static struct dma_async_tx_descriptor *ccp_prep_dma_interrupt( + struct dma_chan *dma_chan, unsigned long flags) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + + desc = ccp_alloc_dma_desc(chan, flags); + if (!desc) + return NULL; + + return &desc->tx_desc; +} + +static void ccp_issue_pending(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + unsigned long flags; + + dev_dbg(chan->ccp->dev, "%s\n", __func__); + + spin_lock_irqsave(&chan->lock, flags); + + desc = __ccp_pending_to_active(chan); + + spin_unlock_irqrestore(&chan->lock, flags); + + /* If there was nothing active, start processing */ + if (desc) + ccp_cmd_callback(desc, 0); +} + +static enum dma_status ccp_tx_status(struct dma_chan *dma_chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + enum dma_status ret; + unsigned long flags; + + if (chan->status == DMA_PAUSED) { + ret = DMA_PAUSED; + goto out; + } + + ret = dma_cookie_status(dma_chan, cookie, state); + if (ret == DMA_COMPLETE) { + spin_lock_irqsave(&chan->lock, flags); + + /* Get status from complete chain, if still there */ + list_for_each_entry(desc, &chan->complete, entry) { + if (desc->tx_desc.cookie != cookie) + continue; + + ret = desc->status; + break; + } + + spin_unlock_irqrestore(&chan->lock, flags); + } + +out: + dev_dbg(chan->ccp->dev, "%s - %u\n", __func__, ret); + + return ret; +} + +static int ccp_pause(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + + chan->status = DMA_PAUSED; + + /*TODO: Wait for active DMA to complete before returning? */ + + return 0; +} + +static int ccp_resume(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + + desc = list_first_entry_or_null(&chan->active, struct ccp_dma_desc, + entry); + + spin_unlock_irqrestore(&chan->lock, flags); + + /* Indicate the channel is running again */ + chan->status = DMA_IN_PROGRESS; + + /* If there was something active, re-start */ + if (desc) + ccp_cmd_callback(desc, 0); + + return 0; +} + +static int ccp_terminate_all(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + unsigned long flags; + + dev_dbg(chan->ccp->dev, "%s\n", __func__); + + /*TODO: Wait for active DMA to complete before continuing */ + + spin_lock_irqsave(&chan->lock, flags); + + /*TODO: Purge the complete list? */ + ccp_free_desc_resources(chan->ccp, &chan->active); + ccp_free_desc_resources(chan->ccp, &chan->pending); + + spin_unlock_irqrestore(&chan->lock, flags); + + return 0; +} + +int ccp_dmaengine_register(struct ccp_device *ccp) +{ + struct ccp_dma_chan *chan; + struct dma_device *dma_dev = &ccp->dma_dev; + struct dma_chan *dma_chan; + char *dma_cmd_cache_name; + char *dma_desc_cache_name; + unsigned int i; + int ret; + + ccp->ccp_dma_chan = devm_kcalloc(ccp->dev, ccp->cmd_q_count, + sizeof(*(ccp->ccp_dma_chan)), + GFP_KERNEL); + if (!ccp->ccp_dma_chan) + return -ENOMEM; + + dma_cmd_cache_name = devm_kasprintf(ccp->dev, GFP_KERNEL, + "%s-dmaengine-cmd-cache", + ccp->name); + if (!dma_cmd_cache_name) + return -ENOMEM; + + ccp->dma_cmd_cache = kmem_cache_create(dma_cmd_cache_name, + sizeof(struct ccp_dma_cmd), + sizeof(void *), + SLAB_HWCACHE_ALIGN, NULL); + if (!ccp->dma_cmd_cache) + return -ENOMEM; + + dma_desc_cache_name = devm_kasprintf(ccp->dev, GFP_KERNEL, + "%s-dmaengine-desc-cache", + ccp->name); + if (!dma_cmd_cache_name) + return -ENOMEM; + ccp->dma_desc_cache = kmem_cache_create(dma_desc_cache_name, + sizeof(struct ccp_dma_desc), + sizeof(void *), + SLAB_HWCACHE_ALIGN, NULL); + if (!ccp->dma_desc_cache) { + ret = -ENOMEM; + goto err_cache; + } + + dma_dev->dev = ccp->dev; + dma_dev->src_addr_widths = CCP_DMA_WIDTH(dma_get_mask(ccp->dev)); + dma_dev->dst_addr_widths = CCP_DMA_WIDTH(dma_get_mask(ccp->dev)); + dma_dev->directions = DMA_MEM_TO_MEM; + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_cap_set(DMA_SG, dma_dev->cap_mask); + dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask); + + INIT_LIST_HEAD(&dma_dev->channels); + for (i = 0; i < ccp->cmd_q_count; i++) { + chan = ccp->ccp_dma_chan + i; + dma_chan = &chan->dma_chan; + + chan->ccp = ccp; + + spin_lock_init(&chan->lock); + INIT_LIST_HEAD(&chan->pending); + INIT_LIST_HEAD(&chan->active); + INIT_LIST_HEAD(&chan->complete); + + tasklet_init(&chan->cleanup_tasklet, ccp_do_cleanup, + (unsigned long)chan); + + dma_chan->device = dma_dev; + dma_cookie_init(dma_chan); + + list_add_tail(&dma_chan->device_node, &dma_dev->channels); + } + + dma_dev->device_free_chan_resources = ccp_free_chan_resources; + dma_dev->device_prep_dma_memcpy = ccp_prep_dma_memcpy; + dma_dev->device_prep_dma_sg = ccp_prep_dma_sg; + dma_dev->device_prep_dma_interrupt = ccp_prep_dma_interrupt; + dma_dev->device_issue_pending = ccp_issue_pending; + dma_dev->device_tx_status = ccp_tx_status; + dma_dev->device_pause = ccp_pause; + dma_dev->device_resume = ccp_resume; + dma_dev->device_terminate_all = ccp_terminate_all; + + ret = dma_async_device_register(dma_dev); + if (ret) + goto err_reg; + + return 0; + +err_reg: + kmem_cache_destroy(ccp->dma_desc_cache); + +err_cache: + kmem_cache_destroy(ccp->dma_cmd_cache); + + return ret; +} + +void ccp_dmaengine_unregister(struct ccp_device *ccp) +{ + struct dma_device *dma_dev = &ccp->dma_dev; + + dma_async_device_unregister(dma_dev); + + kmem_cache_destroy(ccp->dma_desc_cache); + kmem_cache_destroy(ccp->dma_cmd_cache); +} diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index eefdf595f758..ffa2891035ac 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -1427,6 +1427,70 @@ e_mask: return ret; } +static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q, + struct ccp_cmd *cmd) +{ + struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap; + struct ccp_dm_workarea mask; + struct ccp_op op; + int ret; + + if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1))) + return -EINVAL; + + if (!pt->src_dma || !pt->dst_dma) + return -EINVAL; + + if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { + if (pt->mask_len != CCP_PASSTHRU_MASKSIZE) + return -EINVAL; + if (!pt->mask) + return -EINVAL; + } + + BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1); + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = ccp_gen_jobid(cmd_q->ccp); + + if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { + /* Load the mask */ + op.ksb_key = cmd_q->ksb_key; + + mask.length = pt->mask_len; + mask.dma.address = pt->mask; + mask.dma.length = pt->mask_len; + + ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key, + CCP_PASSTHRU_BYTESWAP_NOOP); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + return ret; + } + } + + /* Send data to the CCP Passthru engine */ + op.eom = 1; + op.soc = 1; + + op.src.type = CCP_MEMTYPE_SYSTEM; + op.src.u.dma.address = pt->src_dma; + op.src.u.dma.offset = 0; + op.src.u.dma.length = pt->src_len; + + op.dst.type = CCP_MEMTYPE_SYSTEM; + op.dst.u.dma.address = pt->dst_dma; + op.dst.u.dma.offset = 0; + op.dst.u.dma.length = pt->src_len; + + ret = cmd_q->ccp->vdata->perform->perform_passthru(&op); + if (ret) + cmd->engine_error = cmd_q->cmd_error; + + return ret; +} + static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_ecc_engine *ecc = &cmd->u.ecc; @@ -1762,7 +1826,10 @@ int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) ret = ccp_run_rsa_cmd(cmd_q, cmd); break; case CCP_ENGINE_PASSTHRU: - ret = ccp_run_passthru_cmd(cmd_q, cmd); + if (cmd->flags & CCP_CMD_PASSTHRU_NO_DMA_MAP) + ret = ccp_run_passthru_nomap_cmd(cmd_q, cmd); + else + ret = ccp_run_passthru_cmd(cmd_q, cmd); break; case CCP_ENGINE_ECC: ret = ccp_run_ecc_cmd(cmd_q, cmd); diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index 80239ae69527..d64af8625d7e 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -31,22 +31,42 @@ #include "cesa.h" +/* Limit of the crypto queue before reaching the backlog */ +#define CESA_CRYPTO_DEFAULT_MAX_QLEN 128 + static int allhwsupport = !IS_ENABLED(CONFIG_CRYPTO_DEV_MV_CESA); module_param_named(allhwsupport, allhwsupport, int, 0444); MODULE_PARM_DESC(allhwsupport, "Enable support for all hardware (even it if overlaps with the mv_cesa driver)"); struct mv_cesa_dev *cesa_dev; -static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine) +struct crypto_async_request * +mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine, + struct crypto_async_request **backlog) { - struct crypto_async_request *req, *backlog; + struct crypto_async_request *req; + + *backlog = crypto_get_backlog(&engine->queue); + req = crypto_dequeue_request(&engine->queue); + + if (!req) + return NULL; + + return req; +} + +static void mv_cesa_rearm_engine(struct mv_cesa_engine *engine) +{ + struct crypto_async_request *req = NULL, *backlog = NULL; struct mv_cesa_ctx *ctx; - spin_lock_bh(&cesa_dev->lock); - backlog = crypto_get_backlog(&cesa_dev->queue); - req = crypto_dequeue_request(&cesa_dev->queue); - engine->req = req; - spin_unlock_bh(&cesa_dev->lock); + + spin_lock_bh(&engine->lock); + if (!engine->req) { + req = mv_cesa_dequeue_req_locked(engine, &backlog); + engine->req = req; + } + spin_unlock_bh(&engine->lock); if (!req) return; @@ -55,8 +75,47 @@ static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine) backlog->complete(backlog, -EINPROGRESS); ctx = crypto_tfm_ctx(req->tfm); - ctx->ops->prepare(req, engine); ctx->ops->step(req); + + return; +} + +static int mv_cesa_std_process(struct mv_cesa_engine *engine, u32 status) +{ + struct crypto_async_request *req; + struct mv_cesa_ctx *ctx; + int res; + + req = engine->req; + ctx = crypto_tfm_ctx(req->tfm); + res = ctx->ops->process(req, status); + + if (res == 0) { + ctx->ops->complete(req); + mv_cesa_engine_enqueue_complete_request(engine, req); + } else if (res == -EINPROGRESS) { + ctx->ops->step(req); + } + + return res; +} + +static int mv_cesa_int_process(struct mv_cesa_engine *engine, u32 status) +{ + if (engine->chain.first && engine->chain.last) + return mv_cesa_tdma_process(engine, status); + + return mv_cesa_std_process(engine, status); +} + +static inline void +mv_cesa_complete_req(struct mv_cesa_ctx *ctx, struct crypto_async_request *req, + int res) +{ + ctx->ops->cleanup(req); + local_bh_disable(); + req->complete(req, res); + local_bh_enable(); } static irqreturn_t mv_cesa_int(int irq, void *priv) @@ -83,49 +142,55 @@ static irqreturn_t mv_cesa_int(int irq, void *priv) writel(~status, engine->regs + CESA_SA_FPGA_INT_STATUS); writel(~status, engine->regs + CESA_SA_INT_STATUS); + /* Process fetched requests */ + res = mv_cesa_int_process(engine, status & mask); ret = IRQ_HANDLED; + spin_lock_bh(&engine->lock); req = engine->req; + if (res != -EINPROGRESS) + engine->req = NULL; spin_unlock_bh(&engine->lock); - if (req) { - ctx = crypto_tfm_ctx(req->tfm); - res = ctx->ops->process(req, status & mask); - if (res != -EINPROGRESS) { - spin_lock_bh(&engine->lock); - engine->req = NULL; - mv_cesa_dequeue_req_unlocked(engine); - spin_unlock_bh(&engine->lock); - ctx->ops->cleanup(req); - local_bh_disable(); - req->complete(req, res); - local_bh_enable(); - } else { - ctx->ops->step(req); - } + + ctx = crypto_tfm_ctx(req->tfm); + + if (res && res != -EINPROGRESS) + mv_cesa_complete_req(ctx, req, res); + + /* Launch the next pending request */ + mv_cesa_rearm_engine(engine); + + /* Iterate over the complete queue */ + while (true) { + req = mv_cesa_engine_dequeue_complete_request(engine); + if (!req) + break; + + mv_cesa_complete_req(ctx, req, 0); } } return ret; } -int mv_cesa_queue_req(struct crypto_async_request *req) +int mv_cesa_queue_req(struct crypto_async_request *req, + struct mv_cesa_req *creq) { int ret; - int i; + struct mv_cesa_engine *engine = creq->engine; - spin_lock_bh(&cesa_dev->lock); - ret = crypto_enqueue_request(&cesa_dev->queue, req); - spin_unlock_bh(&cesa_dev->lock); + spin_lock_bh(&engine->lock); + ret = crypto_enqueue_request(&engine->queue, req); + if ((mv_cesa_req_get_type(creq) == CESA_DMA_REQ) && + (ret == -EINPROGRESS || + (ret == -EBUSY && req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG))) + mv_cesa_tdma_chain(engine, creq); + spin_unlock_bh(&engine->lock); if (ret != -EINPROGRESS) return ret; - for (i = 0; i < cesa_dev->caps->nengines; i++) { - spin_lock_bh(&cesa_dev->engines[i].lock); - if (!cesa_dev->engines[i].req) - mv_cesa_dequeue_req_unlocked(&cesa_dev->engines[i]); - spin_unlock_bh(&cesa_dev->engines[i].lock); - } + mv_cesa_rearm_engine(engine); return -EINPROGRESS; } @@ -309,6 +374,10 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa) if (!dma->padding_pool) return -ENOMEM; + dma->iv_pool = dmam_pool_create("cesa_iv", dev, 16, 1, 0); + if (!dma->iv_pool) + return -ENOMEM; + cesa->dma = dma; return 0; @@ -416,7 +485,7 @@ static int mv_cesa_probe(struct platform_device *pdev) return -ENOMEM; spin_lock_init(&cesa->lock); - crypto_init_queue(&cesa->queue, 50); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); cesa->regs = devm_ioremap_resource(dev, res); if (IS_ERR(cesa->regs)) @@ -475,20 +544,24 @@ static int mv_cesa_probe(struct platform_device *pdev) engine->regs = cesa->regs + CESA_ENGINE_OFF(i); if (dram && cesa->caps->has_tdma) - mv_cesa_conf_mbus_windows(&cesa->engines[i], dram); + mv_cesa_conf_mbus_windows(engine, dram); - writel(0, cesa->engines[i].regs + CESA_SA_INT_STATUS); + writel(0, engine->regs + CESA_SA_INT_STATUS); writel(CESA_SA_CFG_STOP_DIG_ERR, - cesa->engines[i].regs + CESA_SA_CFG); + engine->regs + CESA_SA_CFG); writel(engine->sram_dma & CESA_SA_SRAM_MSK, - cesa->engines[i].regs + CESA_SA_DESC_P0); + engine->regs + CESA_SA_DESC_P0); ret = devm_request_threaded_irq(dev, irq, NULL, mv_cesa_int, IRQF_ONESHOT, dev_name(&pdev->dev), - &cesa->engines[i]); + engine); if (ret) goto err_cleanup; + + crypto_init_queue(&engine->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN); + atomic_set(&engine->load, 0); + INIT_LIST_HEAD(&engine->complete_queue); } cesa_dev = cesa; diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index 74071e45ada0..e423d33decd4 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -271,10 +271,13 @@ struct mv_cesa_op_ctx { /* TDMA descriptor flags */ #define CESA_TDMA_DST_IN_SRAM BIT(31) #define CESA_TDMA_SRC_IN_SRAM BIT(30) -#define CESA_TDMA_TYPE_MSK GENMASK(29, 0) +#define CESA_TDMA_END_OF_REQ BIT(29) +#define CESA_TDMA_BREAK_CHAIN BIT(28) +#define CESA_TDMA_TYPE_MSK GENMASK(27, 0) #define CESA_TDMA_DUMMY 0 #define CESA_TDMA_DATA 1 #define CESA_TDMA_OP 2 +#define CESA_TDMA_IV 3 /** * struct mv_cesa_tdma_desc - TDMA descriptor @@ -390,6 +393,7 @@ struct mv_cesa_dev_dma { struct dma_pool *op_pool; struct dma_pool *cache_pool; struct dma_pool *padding_pool; + struct dma_pool *iv_pool; }; /** @@ -398,7 +402,6 @@ struct mv_cesa_dev_dma { * @regs: device registers * @sram_size: usable SRAM size * @lock: device lock - * @queue: crypto request queue * @engines: array of engines * @dma: dma pools * @@ -410,7 +413,6 @@ struct mv_cesa_dev { struct device *dev; unsigned int sram_size; spinlock_t lock; - struct crypto_queue queue; struct mv_cesa_engine *engines; struct mv_cesa_dev_dma *dma; }; @@ -429,6 +431,11 @@ struct mv_cesa_dev { * @int_mask: interrupt mask cache * @pool: memory pool pointing to the memory region reserved in * SRAM + * @queue: fifo of the pending crypto requests + * @load: engine load counter, useful for load balancing + * @chain: list of the current tdma descriptors being processed + * by this engine. + * @complete_queue: fifo of the processed requests by the engine * * Structure storing CESA engine information. */ @@ -444,23 +451,27 @@ struct mv_cesa_engine { size_t max_req_len; u32 int_mask; struct gen_pool *pool; + struct crypto_queue queue; + atomic_t load; + struct mv_cesa_tdma_chain chain; + struct list_head complete_queue; }; /** * struct mv_cesa_req_ops - CESA request operations - * @prepare: prepare a request to be executed on the specified engine * @process: process a request chunk result (should return 0 if the * operation, -EINPROGRESS if it needs more steps or an error * code) * @step: launch the crypto operation on the next chunk * @cleanup: cleanup the crypto request (release associated data) + * @complete: complete the request, i.e copy result or context from sram when + * needed. */ struct mv_cesa_req_ops { - void (*prepare)(struct crypto_async_request *req, - struct mv_cesa_engine *engine); int (*process)(struct crypto_async_request *req, u32 status); void (*step)(struct crypto_async_request *req); void (*cleanup)(struct crypto_async_request *req); + void (*complete)(struct crypto_async_request *req); }; /** @@ -507,21 +518,11 @@ enum mv_cesa_req_type { /** * struct mv_cesa_req - CESA request - * @type: request type * @engine: engine associated with this request + * @chain: list of tdma descriptors associated with this request */ struct mv_cesa_req { - enum mv_cesa_req_type type; struct mv_cesa_engine *engine; -}; - -/** - * struct mv_cesa_tdma_req - CESA TDMA request - * @base: base information - * @chain: TDMA chain - */ -struct mv_cesa_tdma_req { - struct mv_cesa_req base; struct mv_cesa_tdma_chain chain; }; @@ -538,13 +539,11 @@ struct mv_cesa_sg_std_iter { /** * struct mv_cesa_ablkcipher_std_req - cipher standard request - * @base: base information * @op: operation context * @offset: current operation offset * @size: size of the crypto operation */ struct mv_cesa_ablkcipher_std_req { - struct mv_cesa_req base; struct mv_cesa_op_ctx op; unsigned int offset; unsigned int size; @@ -558,34 +557,27 @@ struct mv_cesa_ablkcipher_std_req { * @dst_nents: number of entries in the dest sg list */ struct mv_cesa_ablkcipher_req { - union { - struct mv_cesa_req base; - struct mv_cesa_tdma_req dma; - struct mv_cesa_ablkcipher_std_req std; - } req; + struct mv_cesa_req base; + struct mv_cesa_ablkcipher_std_req std; int src_nents; int dst_nents; }; /** * struct mv_cesa_ahash_std_req - standard hash request - * @base: base information * @offset: current operation offset */ struct mv_cesa_ahash_std_req { - struct mv_cesa_req base; unsigned int offset; }; /** * struct mv_cesa_ahash_dma_req - DMA hash request - * @base: base information * @padding: padding buffer * @padding_dma: DMA address of the padding buffer * @cache_dma: DMA address of the cache buffer */ struct mv_cesa_ahash_dma_req { - struct mv_cesa_tdma_req base; u8 *padding; dma_addr_t padding_dma; u8 *cache; @@ -604,8 +596,8 @@ struct mv_cesa_ahash_dma_req { * @state: hash state */ struct mv_cesa_ahash_req { + struct mv_cesa_req base; union { - struct mv_cesa_req base; struct mv_cesa_ahash_dma_req dma; struct mv_cesa_ahash_std_req std; } req; @@ -623,6 +615,35 @@ struct mv_cesa_ahash_req { extern struct mv_cesa_dev *cesa_dev; + +static inline void +mv_cesa_engine_enqueue_complete_request(struct mv_cesa_engine *engine, + struct crypto_async_request *req) +{ + list_add_tail(&req->list, &engine->complete_queue); +} + +static inline struct crypto_async_request * +mv_cesa_engine_dequeue_complete_request(struct mv_cesa_engine *engine) +{ + struct crypto_async_request *req; + + req = list_first_entry_or_null(&engine->complete_queue, + struct crypto_async_request, + list); + if (req) + list_del(&req->list); + + return req; +} + + +static inline enum mv_cesa_req_type +mv_cesa_req_get_type(struct mv_cesa_req *req) +{ + return req->chain.first ? CESA_DMA_REQ : CESA_STD_REQ; +} + static inline void mv_cesa_update_op_cfg(struct mv_cesa_op_ctx *op, u32 cfg, u32 mask) { @@ -695,7 +716,32 @@ static inline bool mv_cesa_mac_op_is_first_frag(const struct mv_cesa_op_ctx *op) CESA_SA_DESC_CFG_FIRST_FRAG; } -int mv_cesa_queue_req(struct crypto_async_request *req); +int mv_cesa_queue_req(struct crypto_async_request *req, + struct mv_cesa_req *creq); + +struct crypto_async_request * +mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine, + struct crypto_async_request **backlog); + +static inline struct mv_cesa_engine *mv_cesa_select_engine(int weight) +{ + int i; + u32 min_load = U32_MAX; + struct mv_cesa_engine *selected = NULL; + + for (i = 0; i < cesa_dev->caps->nengines; i++) { + struct mv_cesa_engine *engine = cesa_dev->engines + i; + u32 load = atomic_read(&engine->load); + if (load < min_load) { + min_load = load; + selected = engine; + } + } + + atomic_add(weight, &selected->load); + + return selected; +} /* * Helper function that indicates whether a crypto request needs to be @@ -765,9 +811,9 @@ static inline bool mv_cesa_req_dma_iter_next_op(struct mv_cesa_dma_iter *iter) return iter->op_len; } -void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq); +void mv_cesa_dma_step(struct mv_cesa_req *dreq); -static inline int mv_cesa_dma_process(struct mv_cesa_tdma_req *dreq, +static inline int mv_cesa_dma_process(struct mv_cesa_req *dreq, u32 status) { if (!(status & CESA_SA_INT_ACC0_IDMA_DONE)) @@ -779,10 +825,13 @@ static inline int mv_cesa_dma_process(struct mv_cesa_tdma_req *dreq, return 0; } -void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq, +void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, struct mv_cesa_engine *engine); +void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq); +void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, + struct mv_cesa_req *dreq); +int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status); -void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq); static inline void mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain) @@ -790,6 +839,9 @@ mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain) memset(chain, 0, sizeof(*chain)); } +int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, + u32 size, u32 flags, gfp_t gfp_flags); + struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain, const struct mv_cesa_op_ctx *op_templ, bool skip_ctx, diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index dcf1fceb9336..d19dc9614e6e 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -70,25 +70,28 @@ mv_cesa_ablkcipher_dma_cleanup(struct ablkcipher_request *req) dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_BIDIRECTIONAL); } - mv_cesa_dma_cleanup(&creq->req.dma); + mv_cesa_dma_cleanup(&creq->base); } static inline void mv_cesa_ablkcipher_cleanup(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ablkcipher_dma_cleanup(req); } static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; + struct mv_cesa_engine *engine = creq->base.engine; size_t len = min_t(size_t, req->nbytes - sreq->offset, CESA_SA_SRAM_PAYLOAD_SIZE); + mv_cesa_adjust_op(engine, &sreq->op); + memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op)); + len = sg_pcopy_to_buffer(req->src, creq->src_nents, engine->sram + CESA_SA_DATA_SRAM_OFFSET, len, sreq->offset); @@ -106,6 +109,8 @@ static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req) mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); + BUG_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } @@ -113,8 +118,8 @@ static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req, u32 status) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; + struct mv_cesa_engine *engine = creq->base.engine; size_t len; len = sg_pcopy_from_buffer(req->dst, creq->dst_nents, @@ -133,23 +138,12 @@ static int mv_cesa_ablkcipher_process(struct crypto_async_request *req, { struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; - int ret; - - if (creq->req.base.type == CESA_DMA_REQ) - ret = mv_cesa_dma_process(&creq->req.dma, status); - else - ret = mv_cesa_ablkcipher_std_process(ablkreq, status); - - if (ret) - return ret; + struct mv_cesa_req *basereq = &creq->base; - memcpy_fromio(ablkreq->info, - engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, - crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq))); + if (mv_cesa_req_get_type(basereq) == CESA_STD_REQ) + return mv_cesa_ablkcipher_std_process(ablkreq, status); - return 0; + return mv_cesa_dma_process(basereq, status); } static void mv_cesa_ablkcipher_step(struct crypto_async_request *req) @@ -157,8 +151,8 @@ static void mv_cesa_ablkcipher_step(struct crypto_async_request *req) struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); - if (creq->req.base.type == CESA_DMA_REQ) - mv_cesa_dma_step(&creq->req.dma); + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_dma_step(&creq->base); else mv_cesa_ablkcipher_std_step(ablkreq); } @@ -167,22 +161,19 @@ static inline void mv_cesa_ablkcipher_dma_prepare(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_tdma_req *dreq = &creq->req.dma; + struct mv_cesa_req *basereq = &creq->base; - mv_cesa_dma_prepare(dreq, dreq->base.engine); + mv_cesa_dma_prepare(basereq, basereq->engine); } static inline void mv_cesa_ablkcipher_std_prepare(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; sreq->size = 0; sreq->offset = 0; - mv_cesa_adjust_op(engine, &sreq->op); - memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op)); } static inline void mv_cesa_ablkcipher_prepare(struct crypto_async_request *req, @@ -190,9 +181,9 @@ static inline void mv_cesa_ablkcipher_prepare(struct crypto_async_request *req, { struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); - creq->req.base.engine = engine; + creq->base.engine = engine; - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ablkcipher_dma_prepare(ablkreq); else mv_cesa_ablkcipher_std_prepare(ablkreq); @@ -206,11 +197,34 @@ mv_cesa_ablkcipher_req_cleanup(struct crypto_async_request *req) mv_cesa_ablkcipher_cleanup(ablkreq); } +static void +mv_cesa_ablkcipher_complete(struct crypto_async_request *req) +{ + struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); + struct mv_cesa_engine *engine = creq->base.engine; + unsigned int ivsize; + + atomic_sub(ablkreq->nbytes, &engine->load); + ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)); + + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) { + struct mv_cesa_req *basereq; + + basereq = &creq->base; + memcpy(ablkreq->info, basereq->chain.last->data, ivsize); + } else { + memcpy_fromio(ablkreq->info, + engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, + ivsize); + } +} + static const struct mv_cesa_req_ops mv_cesa_ablkcipher_req_ops = { .step = mv_cesa_ablkcipher_step, .process = mv_cesa_ablkcipher_process, - .prepare = mv_cesa_ablkcipher_prepare, .cleanup = mv_cesa_ablkcipher_req_cleanup, + .complete = mv_cesa_ablkcipher_complete, }; static int mv_cesa_ablkcipher_cra_init(struct crypto_tfm *tfm) @@ -295,15 +309,14 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; - struct mv_cesa_tdma_req *dreq = &creq->req.dma; + struct mv_cesa_req *basereq = &creq->base; struct mv_cesa_ablkcipher_dma_iter iter; - struct mv_cesa_tdma_chain chain; bool skip_ctx = false; int ret; + unsigned int ivsize; - dreq->base.type = CESA_DMA_REQ; - dreq->chain.first = NULL; - dreq->chain.last = NULL; + basereq->chain.first = NULL; + basereq->chain.last = NULL; if (req->src != req->dst) { ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, @@ -324,13 +337,13 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, return -ENOMEM; } - mv_cesa_tdma_desc_iter_init(&chain); + mv_cesa_tdma_desc_iter_init(&basereq->chain); mv_cesa_ablkcipher_req_iter_init(&iter, req); do { struct mv_cesa_op_ctx *op; - op = mv_cesa_dma_add_op(&chain, op_templ, skip_ctx, flags); + op = mv_cesa_dma_add_op(&basereq->chain, op_templ, skip_ctx, flags); if (IS_ERR(op)) { ret = PTR_ERR(op); goto err_free_tdma; @@ -340,30 +353,38 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, mv_cesa_set_crypt_op_len(op, iter.base.op_len); /* Add input transfers */ - ret = mv_cesa_dma_add_op_transfers(&chain, &iter.base, + ret = mv_cesa_dma_add_op_transfers(&basereq->chain, &iter.base, &iter.src, flags); if (ret) goto err_free_tdma; /* Add dummy desc to launch the crypto operation */ - ret = mv_cesa_dma_add_dummy_launch(&chain, flags); + ret = mv_cesa_dma_add_dummy_launch(&basereq->chain, flags); if (ret) goto err_free_tdma; /* Add output transfers */ - ret = mv_cesa_dma_add_op_transfers(&chain, &iter.base, + ret = mv_cesa_dma_add_op_transfers(&basereq->chain, &iter.base, &iter.dst, flags); if (ret) goto err_free_tdma; } while (mv_cesa_ablkcipher_req_iter_next_op(&iter)); - dreq->chain = chain; + /* Add output data for IV */ + ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req)); + ret = mv_cesa_dma_add_iv_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET, + ivsize, CESA_TDMA_SRC_IN_SRAM, flags); + + if (ret) + goto err_free_tdma; + + basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ; return 0; err_free_tdma: - mv_cesa_dma_cleanup(dreq); + mv_cesa_dma_cleanup(basereq); if (req->dst != req->src) dma_unmap_sg(cesa_dev->dev, req->dst, creq->dst_nents, DMA_FROM_DEVICE); @@ -380,11 +401,13 @@ mv_cesa_ablkcipher_std_req_init(struct ablkcipher_request *req, const struct mv_cesa_op_ctx *op_templ) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; + struct mv_cesa_req *basereq = &creq->base; - sreq->base.type = CESA_STD_REQ; sreq->op = *op_templ; sreq->skip_ctx = false; + basereq->chain.first = NULL; + basereq->chain.last = NULL; return 0; } @@ -414,7 +437,6 @@ static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req, mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_OP_CRYPT_ONLY, CESA_SA_DESC_CFG_OP_MSK); - /* TODO: add a threshold for DMA usage */ if (cesa_dev->caps->has_tdma) ret = mv_cesa_ablkcipher_dma_req_init(req, tmpl); else @@ -423,28 +445,41 @@ static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req, return ret; } -static int mv_cesa_des_op(struct ablkcipher_request *req, - struct mv_cesa_op_ctx *tmpl) +static int mv_cesa_ablkcipher_queue_req(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) { - struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm); int ret; - - mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES, - CESA_SA_DESC_CFG_CRYPTM_MSK); - - memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE); + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); + struct mv_cesa_engine *engine; ret = mv_cesa_ablkcipher_req_init(req, tmpl); if (ret) return ret; - ret = mv_cesa_queue_req(&req->base); + engine = mv_cesa_select_engine(req->nbytes); + mv_cesa_ablkcipher_prepare(&req->base, engine); + + ret = mv_cesa_queue_req(&req->base, &creq->base); + if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ablkcipher_cleanup(req); return ret; } +static int mv_cesa_des_op(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + + mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES, + CESA_SA_DESC_CFG_CRYPTM_MSK); + + memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE); + + return mv_cesa_ablkcipher_queue_req(req, tmpl); +} + static int mv_cesa_ecb_des_encrypt(struct ablkcipher_request *req) { struct mv_cesa_op_ctx tmpl; @@ -547,22 +582,13 @@ static int mv_cesa_des3_op(struct ablkcipher_request *req, struct mv_cesa_op_ctx *tmpl) { struct mv_cesa_des3_ctx *ctx = crypto_tfm_ctx(req->base.tfm); - int ret; mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_3DES, CESA_SA_DESC_CFG_CRYPTM_MSK); memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES3_EDE_KEY_SIZE); - ret = mv_cesa_ablkcipher_req_init(req, tmpl); - if (ret) - return ret; - - ret = mv_cesa_queue_req(&req->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ablkcipher_cleanup(req); - - return ret; + return mv_cesa_ablkcipher_queue_req(req, tmpl); } static int mv_cesa_ecb_des3_ede_encrypt(struct ablkcipher_request *req) @@ -673,7 +699,7 @@ static int mv_cesa_aes_op(struct ablkcipher_request *req, struct mv_cesa_op_ctx *tmpl) { struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(req->base.tfm); - int ret, i; + int i; u32 *key; u32 cfg; @@ -696,15 +722,7 @@ static int mv_cesa_aes_op(struct ablkcipher_request *req, CESA_SA_DESC_CFG_CRYPTM_MSK | CESA_SA_DESC_CFG_AES_LEN_MSK); - ret = mv_cesa_ablkcipher_req_init(req, tmpl); - if (ret) - return ret; - - ret = mv_cesa_queue_req(&req->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ablkcipher_cleanup(req); - - return ret; + return mv_cesa_ablkcipher_queue_req(req, tmpl); } static int mv_cesa_ecb_aes_encrypt(struct ablkcipher_request *req) diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 7ca2e0f9dc2e..82e0f4e6eb1c 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -103,14 +103,14 @@ static inline void mv_cesa_ahash_dma_cleanup(struct ahash_request *req) dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); mv_cesa_ahash_dma_free_cache(&creq->req.dma); - mv_cesa_dma_cleanup(&creq->req.dma.base); + mv_cesa_dma_cleanup(&creq->base); } static inline void mv_cesa_ahash_cleanup(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ahash_dma_cleanup(req); } @@ -118,7 +118,7 @@ static void mv_cesa_ahash_last_cleanup(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ahash_dma_last_cleanup(req); } @@ -157,11 +157,23 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_ahash_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_engine *engine = creq->base.engine; struct mv_cesa_op_ctx *op; unsigned int new_cache_ptr = 0; u32 frag_mode; size_t len; + unsigned int digsize; + int i; + + mv_cesa_adjust_op(engine, &creq->op_tmpl); + memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl)); + + digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); + for (i = 0; i < digsize / 4; i++) + writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i)); + + mv_cesa_adjust_op(engine, &creq->op_tmpl); + memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl)); if (creq->cache_ptr) memcpy_toio(engine->sram + CESA_SA_DATA_SRAM_OFFSET, @@ -237,6 +249,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); + BUG_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } @@ -254,20 +268,17 @@ static int mv_cesa_ahash_std_process(struct ahash_request *req, u32 status) static inline void mv_cesa_ahash_dma_prepare(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); - struct mv_cesa_tdma_req *dreq = &creq->req.dma.base; + struct mv_cesa_req *basereq = &creq->base; - mv_cesa_dma_prepare(dreq, dreq->base.engine); + mv_cesa_dma_prepare(basereq, basereq->engine); } static void mv_cesa_ahash_std_prepare(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_ahash_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; sreq->offset = 0; - mv_cesa_adjust_op(engine, &creq->op_tmpl); - memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl)); } static void mv_cesa_ahash_step(struct crypto_async_request *req) @@ -275,8 +286,8 @@ static void mv_cesa_ahash_step(struct crypto_async_request *req) struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); - if (creq->req.base.type == CESA_DMA_REQ) - mv_cesa_dma_step(&creq->req.dma.base); + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_dma_step(&creq->base); else mv_cesa_ahash_std_step(ahashreq); } @@ -285,28 +296,25 @@ static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status) { struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); - struct mv_cesa_engine *engine = creq->req.base.engine; - unsigned int digsize; - int ret, i; - if (creq->req.base.type == CESA_DMA_REQ) - ret = mv_cesa_dma_process(&creq->req.dma.base, status); - else - ret = mv_cesa_ahash_std_process(ahashreq, status); + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + return mv_cesa_dma_process(&creq->base, status); - if (ret == -EINPROGRESS) - return ret; + return mv_cesa_ahash_std_process(ahashreq, status); +} + +static void mv_cesa_ahash_complete(struct crypto_async_request *req) +{ + struct ahash_request *ahashreq = ahash_request_cast(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); + struct mv_cesa_engine *engine = creq->base.engine; + unsigned int digsize; + int i; digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); for (i = 0; i < digsize / 4; i++) creq->state[i] = readl_relaxed(engine->regs + CESA_IVDIG(i)); - if (creq->cache_ptr) - sg_pcopy_to_buffer(ahashreq->src, creq->src_nents, - creq->cache, - creq->cache_ptr, - ahashreq->nbytes - creq->cache_ptr); - if (creq->last_req) { /* * Hardware's MD5 digest is in little endian format, but @@ -325,7 +333,7 @@ static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status) } } - return ret; + atomic_sub(ahashreq->nbytes, &engine->load); } static void mv_cesa_ahash_prepare(struct crypto_async_request *req, @@ -333,19 +341,13 @@ static void mv_cesa_ahash_prepare(struct crypto_async_request *req, { struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); - unsigned int digsize; - int i; - creq->req.base.engine = engine; + creq->base.engine = engine; - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ahash_dma_prepare(ahashreq); else mv_cesa_ahash_std_prepare(ahashreq); - - digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); - for (i = 0; i < digsize / 4; i++) - writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i)); } static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req) @@ -357,13 +359,19 @@ static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req) mv_cesa_ahash_last_cleanup(ahashreq); mv_cesa_ahash_cleanup(ahashreq); + + if (creq->cache_ptr) + sg_pcopy_to_buffer(ahashreq->src, creq->src_nents, + creq->cache, + creq->cache_ptr, + ahashreq->nbytes - creq->cache_ptr); } static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = { .step = mv_cesa_ahash_step, .process = mv_cesa_ahash_process, - .prepare = mv_cesa_ahash_prepare, .cleanup = mv_cesa_ahash_req_cleanup, + .complete = mv_cesa_ahash_complete, }; static int mv_cesa_ahash_init(struct ahash_request *req, @@ -553,15 +561,14 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; - struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma; - struct mv_cesa_tdma_req *dreq = &ahashdreq->base; + struct mv_cesa_req *basereq = &creq->base; struct mv_cesa_ahash_dma_iter iter; struct mv_cesa_op_ctx *op = NULL; unsigned int frag_len; int ret; - dreq->chain.first = NULL; - dreq->chain.last = NULL; + basereq->chain.first = NULL; + basereq->chain.last = NULL; if (creq->src_nents) { ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, @@ -572,14 +579,14 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) } } - mv_cesa_tdma_desc_iter_init(&dreq->chain); + mv_cesa_tdma_desc_iter_init(&basereq->chain); mv_cesa_ahash_req_iter_init(&iter, req); /* * Add the cache (left-over data from a previous block) first. * This will never overflow the SRAM size. */ - ret = mv_cesa_ahash_dma_add_cache(&dreq->chain, &iter, creq, flags); + ret = mv_cesa_ahash_dma_add_cache(&basereq->chain, &iter, creq, flags); if (ret) goto err_free_tdma; @@ -590,7 +597,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) * data. We intentionally do not add the final op block. */ while (true) { - ret = mv_cesa_dma_add_op_transfers(&dreq->chain, + ret = mv_cesa_dma_add_op_transfers(&basereq->chain, &iter.base, &iter.src, flags); if (ret) @@ -601,7 +608,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (!mv_cesa_ahash_req_iter_next_op(&iter)) break; - op = mv_cesa_dma_add_frag(&dreq->chain, &creq->op_tmpl, + op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl, frag_len, flags); if (IS_ERR(op)) { ret = PTR_ERR(op); @@ -619,10 +626,10 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) * operation, which depends whether this is the final request. */ if (creq->last_req) - op = mv_cesa_ahash_dma_last_req(&dreq->chain, &iter, creq, + op = mv_cesa_ahash_dma_last_req(&basereq->chain, &iter, creq, frag_len, flags); else if (frag_len) - op = mv_cesa_dma_add_frag(&dreq->chain, &creq->op_tmpl, + op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl, frag_len, flags); if (IS_ERR(op)) { @@ -632,7 +639,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (op) { /* Add dummy desc to wait for crypto operation end */ - ret = mv_cesa_dma_add_dummy_end(&dreq->chain, flags); + ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags); if (ret) goto err_free_tdma; } @@ -643,10 +650,13 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) else creq->cache_ptr = 0; + basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ | + CESA_TDMA_BREAK_CHAIN); + return 0; err_free_tdma: - mv_cesa_dma_cleanup(dreq); + mv_cesa_dma_cleanup(basereq); dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); err: @@ -660,11 +670,6 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached) struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); int ret; - if (cesa_dev->caps->has_tdma) - creq->req.base.type = CESA_DMA_REQ; - else - creq->req.base.type = CESA_STD_REQ; - creq->src_nents = sg_nents_for_len(req->src, req->nbytes); if (creq->src_nents < 0) { dev_err(cesa_dev->dev, "Invalid number of src SG"); @@ -678,19 +683,19 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached) if (*cached) return 0; - if (creq->req.base.type == CESA_DMA_REQ) + if (cesa_dev->caps->has_tdma) ret = mv_cesa_ahash_dma_req_init(req); return ret; } -static int mv_cesa_ahash_update(struct ahash_request *req) +static int mv_cesa_ahash_queue_req(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_engine *engine; bool cached = false; int ret; - creq->len += req->nbytes; ret = mv_cesa_ahash_req_init(req, &cached); if (ret) return ret; @@ -698,61 +703,48 @@ static int mv_cesa_ahash_update(struct ahash_request *req) if (cached) return 0; - ret = mv_cesa_queue_req(&req->base); + engine = mv_cesa_select_engine(req->nbytes); + mv_cesa_ahash_prepare(&req->base, engine); + + ret = mv_cesa_queue_req(&req->base, &creq->base); + if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ahash_cleanup(req); return ret; } +static int mv_cesa_ahash_update(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + creq->len += req->nbytes; + + return mv_cesa_ahash_queue_req(req); +} + static int mv_cesa_ahash_final(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl; - bool cached = false; - int ret; mv_cesa_set_mac_op_total_len(tmpl, creq->len); creq->last_req = true; req->nbytes = 0; - ret = mv_cesa_ahash_req_init(req, &cached); - if (ret) - return ret; - - if (cached) - return 0; - - ret = mv_cesa_queue_req(&req->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ahash_cleanup(req); - - return ret; + return mv_cesa_ahash_queue_req(req); } static int mv_cesa_ahash_finup(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl; - bool cached = false; - int ret; creq->len += req->nbytes; mv_cesa_set_mac_op_total_len(tmpl, creq->len); creq->last_req = true; - ret = mv_cesa_ahash_req_init(req, &cached); - if (ret) - return ret; - - if (cached) - return 0; - - ret = mv_cesa_queue_req(&req->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ahash_cleanup(req); - - return ret; + return mv_cesa_ahash_queue_req(req); } static int mv_cesa_ahash_export(struct ahash_request *req, void *hash, @@ -768,8 +760,7 @@ static int mv_cesa_ahash_export(struct ahash_request *req, void *hash, *len = creq->len; memcpy(hash, creq->state, digsize); memset(cache, 0, blocksize); - if (creq->cache) - memcpy(cache, creq->cache, creq->cache_ptr); + memcpy(cache, creq->cache, creq->cache_ptr); return 0; } diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index 76427981275b..86a065bcc187 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -37,9 +37,9 @@ bool mv_cesa_req_dma_iter_next_transfer(struct mv_cesa_dma_iter *iter, return true; } -void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq) +void mv_cesa_dma_step(struct mv_cesa_req *dreq) { - struct mv_cesa_engine *engine = dreq->base.engine; + struct mv_cesa_engine *engine = dreq->engine; writel_relaxed(0, engine->regs + CESA_SA_CFG); @@ -53,19 +53,25 @@ void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq) engine->regs + CESA_SA_CFG); writel_relaxed(dreq->chain.first->cur_dma, engine->regs + CESA_TDMA_NEXT_ADDR); + BUG_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } -void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq) +void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq) { struct mv_cesa_tdma_desc *tdma; for (tdma = dreq->chain.first; tdma;) { struct mv_cesa_tdma_desc *old_tdma = tdma; + u32 type = tdma->flags & CESA_TDMA_TYPE_MSK; - if (tdma->flags & CESA_TDMA_OP) + if (type == CESA_TDMA_OP) dma_pool_free(cesa_dev->dma->op_pool, tdma->op, le32_to_cpu(tdma->src)); + else if (type == CESA_TDMA_IV) + dma_pool_free(cesa_dev->dma->iv_pool, tdma->data, + le32_to_cpu(tdma->dst)); tdma = tdma->next; dma_pool_free(cesa_dev->dma->tdma_desc_pool, old_tdma, @@ -76,7 +82,7 @@ void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq) dreq->chain.last = NULL; } -void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq, +void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, struct mv_cesa_engine *engine) { struct mv_cesa_tdma_desc *tdma; @@ -88,23 +94,108 @@ void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq, if (tdma->flags & CESA_TDMA_SRC_IN_SRAM) tdma->src = cpu_to_le32(tdma->src + engine->sram_dma); - if (tdma->flags & CESA_TDMA_OP) + if ((tdma->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_OP) mv_cesa_adjust_op(engine, tdma->op); } } +void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, + struct mv_cesa_req *dreq) +{ + if (engine->chain.first == NULL && engine->chain.last == NULL) { + engine->chain.first = dreq->chain.first; + engine->chain.last = dreq->chain.last; + } else { + struct mv_cesa_tdma_desc *last; + + last = engine->chain.last; + last->next = dreq->chain.first; + engine->chain.last = dreq->chain.last; + + if (!(last->flags & CESA_TDMA_BREAK_CHAIN)) + last->next_dma = dreq->chain.first->cur_dma; + } +} + +int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status) +{ + struct crypto_async_request *req = NULL; + struct mv_cesa_tdma_desc *tdma = NULL, *next = NULL; + dma_addr_t tdma_cur; + int res = 0; + + tdma_cur = readl(engine->regs + CESA_TDMA_CUR); + + for (tdma = engine->chain.first; tdma; tdma = next) { + spin_lock_bh(&engine->lock); + next = tdma->next; + spin_unlock_bh(&engine->lock); + + if (tdma->flags & CESA_TDMA_END_OF_REQ) { + struct crypto_async_request *backlog = NULL; + struct mv_cesa_ctx *ctx; + u32 current_status; + + spin_lock_bh(&engine->lock); + /* + * if req is NULL, this means we're processing the + * request in engine->req. + */ + if (!req) + req = engine->req; + else + req = mv_cesa_dequeue_req_locked(engine, + &backlog); + + /* Re-chaining to the next request */ + engine->chain.first = tdma->next; + tdma->next = NULL; + + /* If this is the last request, clear the chain */ + if (engine->chain.first == NULL) + engine->chain.last = NULL; + spin_unlock_bh(&engine->lock); + + ctx = crypto_tfm_ctx(req->tfm); + current_status = (tdma->cur_dma == tdma_cur) ? + status : CESA_SA_INT_ACC0_IDMA_DONE; + res = ctx->ops->process(req, current_status); + ctx->ops->complete(req); + + if (res == 0) + mv_cesa_engine_enqueue_complete_request(engine, + req); + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + } + + if (res || tdma->cur_dma == tdma_cur) + break; + } + + /* Save the last request in error to engine->req, so that the core + * knows which request was fautly */ + if (res) { + spin_lock_bh(&engine->lock); + engine->req = req; + spin_unlock_bh(&engine->lock); + } + + return res; +} + static struct mv_cesa_tdma_desc * mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags) { struct mv_cesa_tdma_desc *new_tdma = NULL; dma_addr_t dma_handle; - new_tdma = dma_pool_alloc(cesa_dev->dma->tdma_desc_pool, flags, - &dma_handle); + new_tdma = dma_pool_zalloc(cesa_dev->dma->tdma_desc_pool, flags, + &dma_handle); if (!new_tdma) return ERR_PTR(-ENOMEM); - memset(new_tdma, 0, sizeof(*new_tdma)); new_tdma->cur_dma = dma_handle; if (chain->last) { chain->last->next_dma = cpu_to_le32(dma_handle); @@ -118,6 +209,32 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags) return new_tdma; } +int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, + u32 size, u32 flags, gfp_t gfp_flags) +{ + + struct mv_cesa_tdma_desc *tdma; + u8 *iv; + dma_addr_t dma_handle; + + tdma = mv_cesa_dma_add_desc(chain, gfp_flags); + if (IS_ERR(tdma)) + return PTR_ERR(tdma); + + iv = dma_pool_alloc(cesa_dev->dma->iv_pool, gfp_flags, &dma_handle); + if (!iv) + return -ENOMEM; + + tdma->byte_cnt = cpu_to_le32(size | BIT(31)); + tdma->src = src; + tdma->dst = cpu_to_le32(dma_handle); + tdma->data = iv; + + flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM); + tdma->flags = flags | CESA_TDMA_IV; + return 0; +} + struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain, const struct mv_cesa_op_ctx *op_templ, bool skip_ctx, diff --git a/drivers/crypto/mxc-scc.c b/drivers/crypto/mxc-scc.c new file mode 100644 index 000000000000..ff383ef83871 --- /dev/null +++ b/drivers/crypto/mxc-scc.c @@ -0,0 +1,765 @@ +/* + * Copyright (C) 2016 Pengutronix, Steffen Trumtrar <kernel@pengutronix.de> + * + * The driver is based on information gathered from + * drivers/mxc/security/mxc_scc.c which can be found in + * the Freescale linux-2.6-imx.git in the imx_2.6.35_maintain branch. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/clk.h> +#include <linux/crypto.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> + +#include <crypto/algapi.h> +#include <crypto/des.h> + +/* Secure Memory (SCM) registers */ +#define SCC_SCM_RED_START 0x0000 +#define SCC_SCM_BLACK_START 0x0004 +#define SCC_SCM_LENGTH 0x0008 +#define SCC_SCM_CTRL 0x000C +#define SCC_SCM_STATUS 0x0010 +#define SCC_SCM_ERROR_STATUS 0x0014 +#define SCC_SCM_INTR_CTRL 0x0018 +#define SCC_SCM_CFG 0x001C +#define SCC_SCM_INIT_VECTOR_0 0x0020 +#define SCC_SCM_INIT_VECTOR_1 0x0024 +#define SCC_SCM_RED_MEMORY 0x0400 +#define SCC_SCM_BLACK_MEMORY 0x0800 + +/* Security Monitor (SMN) Registers */ +#define SCC_SMN_STATUS 0x1000 +#define SCC_SMN_COMMAND 0x1004 +#define SCC_SMN_SEQ_START 0x1008 +#define SCC_SMN_SEQ_END 0x100C +#define SCC_SMN_SEQ_CHECK 0x1010 +#define SCC_SMN_BIT_COUNT 0x1014 +#define SCC_SMN_BITBANK_INC_SIZE 0x1018 +#define SCC_SMN_BITBANK_DECREMENT 0x101C +#define SCC_SMN_COMPARE_SIZE 0x1020 +#define SCC_SMN_PLAINTEXT_CHECK 0x1024 +#define SCC_SMN_CIPHERTEXT_CHECK 0x1028 +#define SCC_SMN_TIMER_IV 0x102C +#define SCC_SMN_TIMER_CONTROL 0x1030 +#define SCC_SMN_DEBUG_DETECT_STAT 0x1034 +#define SCC_SMN_TIMER 0x1038 + +#define SCC_SCM_CTRL_START_CIPHER BIT(2) +#define SCC_SCM_CTRL_CBC_MODE BIT(1) +#define SCC_SCM_CTRL_DECRYPT_MODE BIT(0) + +#define SCC_SCM_STATUS_LEN_ERR BIT(12) +#define SCC_SCM_STATUS_SMN_UNBLOCKED BIT(11) +#define SCC_SCM_STATUS_CIPHERING_DONE BIT(10) +#define SCC_SCM_STATUS_ZEROIZING_DONE BIT(9) +#define SCC_SCM_STATUS_INTR_STATUS BIT(8) +#define SCC_SCM_STATUS_SEC_KEY BIT(7) +#define SCC_SCM_STATUS_INTERNAL_ERR BIT(6) +#define SCC_SCM_STATUS_BAD_SEC_KEY BIT(5) +#define SCC_SCM_STATUS_ZEROIZE_FAIL BIT(4) +#define SCC_SCM_STATUS_SMN_BLOCKED BIT(3) +#define SCC_SCM_STATUS_CIPHERING BIT(2) +#define SCC_SCM_STATUS_ZEROIZING BIT(1) +#define SCC_SCM_STATUS_BUSY BIT(0) + +#define SCC_SMN_STATUS_STATE_MASK 0x0000001F +#define SCC_SMN_STATE_START 0x0 +/* The SMN is zeroizing its RAM during reset */ +#define SCC_SMN_STATE_ZEROIZE_RAM 0x5 +/* SMN has passed internal checks */ +#define SCC_SMN_STATE_HEALTH_CHECK 0x6 +/* Fatal Security Violation. SMN is locked, SCM is inoperative. */ +#define SCC_SMN_STATE_FAIL 0x9 +/* SCC is in secure state. SCM is using secret key. */ +#define SCC_SMN_STATE_SECURE 0xA +/* SCC is not secure. SCM is using default key. */ +#define SCC_SMN_STATE_NON_SECURE 0xC + +#define SCC_SCM_INTR_CTRL_ZEROIZE_MEM BIT(2) +#define SCC_SCM_INTR_CTRL_CLR_INTR BIT(1) +#define SCC_SCM_INTR_CTRL_MASK_INTR BIT(0) + +/* Size, in blocks, of Red memory. */ +#define SCC_SCM_CFG_BLACK_SIZE_MASK 0x07fe0000 +#define SCC_SCM_CFG_BLACK_SIZE_SHIFT 17 +/* Size, in blocks, of Black memory. */ +#define SCC_SCM_CFG_RED_SIZE_MASK 0x0001ff80 +#define SCC_SCM_CFG_RED_SIZE_SHIFT 7 +/* Number of bytes per block. */ +#define SCC_SCM_CFG_BLOCK_SIZE_MASK 0x0000007f + +#define SCC_SMN_COMMAND_TAMPER_LOCK BIT(4) +#define SCC_SMN_COMMAND_CLR_INTR BIT(3) +#define SCC_SMN_COMMAND_CLR_BIT_BANK BIT(2) +#define SCC_SMN_COMMAND_EN_INTR BIT(1) +#define SCC_SMN_COMMAND_SET_SOFTWARE_ALARM BIT(0) + +#define SCC_KEY_SLOTS 20 +#define SCC_MAX_KEY_SIZE 32 +#define SCC_KEY_SLOT_SIZE 32 + +#define SCC_CRC_CCITT_START 0xFFFF + +/* + * Offset into each RAM of the base of the area which is not + * used for Stored Keys. + */ +#define SCC_NON_RESERVED_OFFSET (SCC_KEY_SLOTS * SCC_KEY_SLOT_SIZE) + +/* Fixed padding for appending to plaintext to fill out a block */ +static char scc_block_padding[8] = { 0x80, 0, 0, 0, 0, 0, 0, 0 }; + +enum mxc_scc_state { + SCC_STATE_OK, + SCC_STATE_UNIMPLEMENTED, + SCC_STATE_FAILED +}; + +struct mxc_scc { + struct device *dev; + void __iomem *base; + struct clk *clk; + bool hw_busy; + spinlock_t lock; + struct crypto_queue queue; + struct crypto_async_request *req; + int block_size_bytes; + int black_ram_size_blocks; + int memory_size_bytes; + int bytes_remaining; + + void __iomem *red_memory; + void __iomem *black_memory; +}; + +struct mxc_scc_ctx { + struct mxc_scc *scc; + struct scatterlist *sg_src; + size_t src_nents; + struct scatterlist *sg_dst; + size_t dst_nents; + unsigned int offset; + unsigned int size; + unsigned int ctrl; +}; + +struct mxc_scc_crypto_tmpl { + struct mxc_scc *scc; + struct crypto_alg alg; +}; + +static int mxc_scc_get_data(struct mxc_scc_ctx *ctx, + struct crypto_async_request *req) +{ + struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); + struct mxc_scc *scc = ctx->scc; + size_t len; + void __iomem *from; + + if (ctx->ctrl & SCC_SCM_CTRL_DECRYPT_MODE) + from = scc->red_memory; + else + from = scc->black_memory; + + dev_dbg(scc->dev, "pcopy: from 0x%p %d bytes\n", from, + ctx->dst_nents * 8); + len = sg_pcopy_from_buffer(ablkreq->dst, ctx->dst_nents, + from, ctx->size, ctx->offset); + if (!len) { + dev_err(scc->dev, "pcopy err from 0x%p (len=%d)\n", from, len); + return -EINVAL; + } + +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "red memory@"__stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, + scc->red_memory, ctx->size, 1); + print_hex_dump(KERN_ERR, + "black memory@"__stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, + scc->black_memory, ctx->size, 1); +#endif + + ctx->offset += len; + + if (ctx->offset < ablkreq->nbytes) + return -EINPROGRESS; + + return 0; +} + +static int mxc_scc_ablkcipher_req_init(struct ablkcipher_request *req, + struct mxc_scc_ctx *ctx) +{ + struct mxc_scc *scc = ctx->scc; + int nents; + + nents = sg_nents_for_len(req->src, req->nbytes); + if (nents < 0) { + dev_err(scc->dev, "Invalid number of src SC"); + return nents; + } + ctx->src_nents = nents; + + nents = sg_nents_for_len(req->dst, req->nbytes); + if (nents < 0) { + dev_err(scc->dev, "Invalid number of dst SC"); + return nents; + } + ctx->dst_nents = nents; + + ctx->size = 0; + ctx->offset = 0; + + return 0; +} + +static int mxc_scc_ablkcipher_req_complete(struct crypto_async_request *req, + struct mxc_scc_ctx *ctx, + int result) +{ + struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); + struct mxc_scc *scc = ctx->scc; + + scc->req = NULL; + scc->bytes_remaining = scc->memory_size_bytes; + + if (ctx->ctrl & SCC_SCM_CTRL_CBC_MODE) + memcpy(ablkreq->info, scc->base + SCC_SCM_INIT_VECTOR_0, + scc->block_size_bytes); + + req->complete(req, result); + scc->hw_busy = false; + + return 0; +} + +static int mxc_scc_put_data(struct mxc_scc_ctx *ctx, + struct ablkcipher_request *req) +{ + u8 padding_buffer[sizeof(u16) + sizeof(scc_block_padding)]; + size_t len = min_t(size_t, req->nbytes - ctx->offset, + ctx->scc->bytes_remaining); + unsigned int padding_byte_count = 0; + struct mxc_scc *scc = ctx->scc; + void __iomem *to; + + if (ctx->ctrl & SCC_SCM_CTRL_DECRYPT_MODE) + to = scc->black_memory; + else + to = scc->red_memory; + + if (ctx->ctrl & SCC_SCM_CTRL_CBC_MODE && req->info) + memcpy(scc->base + SCC_SCM_INIT_VECTOR_0, req->info, + scc->block_size_bytes); + + len = sg_pcopy_to_buffer(req->src, ctx->src_nents, + to, len, ctx->offset); + if (!len) { + dev_err(scc->dev, "pcopy err to 0x%p (len=%d)\n", to, len); + return -EINVAL; + } + + ctx->size = len; + +#ifdef DEBUG + dev_dbg(scc->dev, "copied %d bytes to 0x%p\n", len, to); + print_hex_dump(KERN_ERR, + "init vector0@"__stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, + scc->base + SCC_SCM_INIT_VECTOR_0, scc->block_size_bytes, + 1); + print_hex_dump(KERN_ERR, + "red memory@"__stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, + scc->red_memory, ctx->size, 1); + print_hex_dump(KERN_ERR, + "black memory@"__stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, + scc->black_memory, ctx->size, 1); +#endif + + scc->bytes_remaining -= len; + + padding_byte_count = len % scc->block_size_bytes; + + if (padding_byte_count) { + memcpy(padding_buffer, scc_block_padding, padding_byte_count); + memcpy(to + len, padding_buffer, padding_byte_count); + ctx->size += padding_byte_count; + } + +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "data to encrypt@"__stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, + to, ctx->size, 1); +#endif + + return 0; +} + +static void mxc_scc_ablkcipher_next(struct mxc_scc_ctx *ctx, + struct crypto_async_request *req) +{ + struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); + struct mxc_scc *scc = ctx->scc; + int err; + + dev_dbg(scc->dev, "dispatch request (nbytes=%d, src=%p, dst=%p)\n", + ablkreq->nbytes, ablkreq->src, ablkreq->dst); + + writel(0, scc->base + SCC_SCM_ERROR_STATUS); + + err = mxc_scc_put_data(ctx, ablkreq); + if (err) { + mxc_scc_ablkcipher_req_complete(req, ctx, err); + return; + } + + dev_dbg(scc->dev, "Start encryption (0x%p/0x%p)\n", + (void *)readl(scc->base + SCC_SCM_RED_START), + (void *)readl(scc->base + SCC_SCM_BLACK_START)); + + /* clear interrupt control registers */ + writel(SCC_SCM_INTR_CTRL_CLR_INTR, + scc->base + SCC_SCM_INTR_CTRL); + + writel((ctx->size / ctx->scc->block_size_bytes) - 1, + scc->base + SCC_SCM_LENGTH); + + dev_dbg(scc->dev, "Process %d block(s) in 0x%p\n", + ctx->size / ctx->scc->block_size_bytes, + (ctx->ctrl & SCC_SCM_CTRL_DECRYPT_MODE) ? scc->black_memory : + scc->red_memory); + + writel(ctx->ctrl, scc->base + SCC_SCM_CTRL); +} + +static irqreturn_t mxc_scc_int(int irq, void *priv) +{ + struct crypto_async_request *req; + struct mxc_scc_ctx *ctx; + struct mxc_scc *scc = priv; + int status; + int ret; + + status = readl(scc->base + SCC_SCM_STATUS); + + /* clear interrupt control registers */ + writel(SCC_SCM_INTR_CTRL_CLR_INTR, scc->base + SCC_SCM_INTR_CTRL); + + if (status & SCC_SCM_STATUS_BUSY) + return IRQ_NONE; + + req = scc->req; + if (req) { + ctx = crypto_tfm_ctx(req->tfm); + ret = mxc_scc_get_data(ctx, req); + if (ret != -EINPROGRESS) + mxc_scc_ablkcipher_req_complete(req, ctx, ret); + else + mxc_scc_ablkcipher_next(ctx, req); + } + + return IRQ_HANDLED; +} + +static int mxc_scc_cra_init(struct crypto_tfm *tfm) +{ + struct mxc_scc_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_alg *alg = tfm->__crt_alg; + struct mxc_scc_crypto_tmpl *algt; + + algt = container_of(alg, struct mxc_scc_crypto_tmpl, alg); + + ctx->scc = algt->scc; + return 0; +} + +static void mxc_scc_dequeue_req_unlocked(struct mxc_scc_ctx *ctx) +{ + struct crypto_async_request *req, *backlog; + + if (ctx->scc->hw_busy) + return; + + spin_lock_bh(&ctx->scc->lock); + backlog = crypto_get_backlog(&ctx->scc->queue); + req = crypto_dequeue_request(&ctx->scc->queue); + ctx->scc->req = req; + ctx->scc->hw_busy = true; + spin_unlock_bh(&ctx->scc->lock); + + if (!req) + return; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + mxc_scc_ablkcipher_next(ctx, req); +} + +static int mxc_scc_queue_req(struct mxc_scc_ctx *ctx, + struct crypto_async_request *req) +{ + int ret; + + spin_lock_bh(&ctx->scc->lock); + ret = crypto_enqueue_request(&ctx->scc->queue, req); + spin_unlock_bh(&ctx->scc->lock); + + if (ret != -EINPROGRESS) + return ret; + + mxc_scc_dequeue_req_unlocked(ctx); + + return -EINPROGRESS; +} + +static int mxc_scc_des3_op(struct mxc_scc_ctx *ctx, + struct ablkcipher_request *req) +{ + int err; + + err = mxc_scc_ablkcipher_req_init(req, ctx); + if (err) + return err; + + return mxc_scc_queue_req(ctx, &req->base); +} + +static int mxc_scc_ecb_des_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(req); + struct mxc_scc_ctx *ctx = crypto_ablkcipher_ctx(cipher); + + ctx->ctrl = SCC_SCM_CTRL_START_CIPHER; + + return mxc_scc_des3_op(ctx, req); +} + +static int mxc_scc_ecb_des_decrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(req); + struct mxc_scc_ctx *ctx = crypto_ablkcipher_ctx(cipher); + + ctx->ctrl = SCC_SCM_CTRL_START_CIPHER; + ctx->ctrl |= SCC_SCM_CTRL_DECRYPT_MODE; + + return mxc_scc_des3_op(ctx, req); +} + +static int mxc_scc_cbc_des_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(req); + struct mxc_scc_ctx *ctx = crypto_ablkcipher_ctx(cipher); + + ctx->ctrl = SCC_SCM_CTRL_START_CIPHER; + ctx->ctrl |= SCC_SCM_CTRL_CBC_MODE; + + return mxc_scc_des3_op(ctx, req); +} + +static int mxc_scc_cbc_des_decrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(req); + struct mxc_scc_ctx *ctx = crypto_ablkcipher_ctx(cipher); + + ctx->ctrl = SCC_SCM_CTRL_START_CIPHER; + ctx->ctrl |= SCC_SCM_CTRL_CBC_MODE; + ctx->ctrl |= SCC_SCM_CTRL_DECRYPT_MODE; + + return mxc_scc_des3_op(ctx, req); +} + +static void mxc_scc_hw_init(struct mxc_scc *scc) +{ + int offset; + + offset = SCC_NON_RESERVED_OFFSET / scc->block_size_bytes; + + /* Fill the RED_START register */ + writel(offset, scc->base + SCC_SCM_RED_START); + + /* Fill the BLACK_START register */ + writel(offset, scc->base + SCC_SCM_BLACK_START); + + scc->red_memory = scc->base + SCC_SCM_RED_MEMORY + + SCC_NON_RESERVED_OFFSET; + + scc->black_memory = scc->base + SCC_SCM_BLACK_MEMORY + + SCC_NON_RESERVED_OFFSET; + + scc->bytes_remaining = scc->memory_size_bytes; +} + +static int mxc_scc_get_config(struct mxc_scc *scc) +{ + int config; + + config = readl(scc->base + SCC_SCM_CFG); + + scc->block_size_bytes = config & SCC_SCM_CFG_BLOCK_SIZE_MASK; + + scc->black_ram_size_blocks = config & SCC_SCM_CFG_BLACK_SIZE_MASK; + + scc->memory_size_bytes = (scc->block_size_bytes * + scc->black_ram_size_blocks) - + SCC_NON_RESERVED_OFFSET; + + return 0; +} + +static enum mxc_scc_state mxc_scc_get_state(struct mxc_scc *scc) +{ + enum mxc_scc_state state; + int status; + + status = readl(scc->base + SCC_SMN_STATUS) & + SCC_SMN_STATUS_STATE_MASK; + + /* If in Health Check, try to bringup to secure state */ + if (status & SCC_SMN_STATE_HEALTH_CHECK) { + /* + * Write a simple algorithm to the Algorithm Sequence + * Checker (ASC) + */ + writel(0xaaaa, scc->base + SCC_SMN_SEQ_START); + writel(0x5555, scc->base + SCC_SMN_SEQ_END); + writel(0x5555, scc->base + SCC_SMN_SEQ_CHECK); + + status = readl(scc->base + SCC_SMN_STATUS) & + SCC_SMN_STATUS_STATE_MASK; + } + + switch (status) { + case SCC_SMN_STATE_NON_SECURE: + case SCC_SMN_STATE_SECURE: + state = SCC_STATE_OK; + break; + case SCC_SMN_STATE_FAIL: + state = SCC_STATE_FAILED; + break; + default: + state = SCC_STATE_UNIMPLEMENTED; + break; + } + + return state; +} + +static struct mxc_scc_crypto_tmpl scc_ecb_des = { + .alg = { + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "ecb-des3-scc", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mxc_scc_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = mxc_scc_cra_init, + .cra_u.ablkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .encrypt = mxc_scc_ecb_des_encrypt, + .decrypt = mxc_scc_ecb_des_decrypt, + } + } +}; + +static struct mxc_scc_crypto_tmpl scc_cbc_des = { + .alg = { + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "cbc-des3-scc", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mxc_scc_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = mxc_scc_cra_init, + .cra_u.ablkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .encrypt = mxc_scc_cbc_des_encrypt, + .decrypt = mxc_scc_cbc_des_decrypt, + } + } +}; + +static struct mxc_scc_crypto_tmpl *scc_crypto_algs[] = { + &scc_ecb_des, + &scc_cbc_des, +}; + +static int mxc_scc_crypto_register(struct mxc_scc *scc) +{ + int i; + int err = 0; + + for (i = 0; i < ARRAY_SIZE(scc_crypto_algs); i++) { + scc_crypto_algs[i]->scc = scc; + err = crypto_register_alg(&scc_crypto_algs[i]->alg); + if (err) + goto err_out; + } + + return 0; + +err_out: + while (--i >= 0) + crypto_unregister_alg(&scc_crypto_algs[i]->alg); + + return err; +} + +static void mxc_scc_crypto_unregister(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(scc_crypto_algs); i++) + crypto_unregister_alg(&scc_crypto_algs[i]->alg); +} + +static int mxc_scc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *res; + struct mxc_scc *scc; + enum mxc_scc_state state; + int irq; + int ret; + int i; + + scc = devm_kzalloc(dev, sizeof(*scc), GFP_KERNEL); + if (!scc) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + scc->base = devm_ioremap_resource(dev, res); + if (IS_ERR(scc->base)) + return PTR_ERR(scc->base); + + scc->clk = devm_clk_get(&pdev->dev, "ipg"); + if (IS_ERR(scc->clk)) { + dev_err(dev, "Could not get ipg clock\n"); + return PTR_ERR(scc->clk); + } + + clk_prepare_enable(scc->clk); + + /* clear error status register */ + writel(0x0, scc->base + SCC_SCM_ERROR_STATUS); + + /* clear interrupt control registers */ + writel(SCC_SCM_INTR_CTRL_CLR_INTR | + SCC_SCM_INTR_CTRL_MASK_INTR, + scc->base + SCC_SCM_INTR_CTRL); + + writel(SCC_SMN_COMMAND_CLR_INTR | + SCC_SMN_COMMAND_EN_INTR, + scc->base + SCC_SMN_COMMAND); + + scc->dev = dev; + platform_set_drvdata(pdev, scc); + + ret = mxc_scc_get_config(scc); + if (ret) + goto err_out; + + state = mxc_scc_get_state(scc); + + if (state != SCC_STATE_OK) { + dev_err(dev, "SCC in unusable state %d\n", state); + ret = -EINVAL; + goto err_out; + } + + mxc_scc_hw_init(scc); + + spin_lock_init(&scc->lock); + /* FIXME: calculate queue from RAM slots */ + crypto_init_queue(&scc->queue, 50); + + for (i = 0; i < 2; i++) { + irq = platform_get_irq(pdev, i); + if (irq < 0) { + dev_err(dev, "failed to get irq resource\n"); + ret = -EINVAL; + goto err_out; + } + + ret = devm_request_threaded_irq(dev, irq, NULL, mxc_scc_int, + IRQF_ONESHOT, dev_name(dev), scc); + if (ret) + goto err_out; + } + + ret = mxc_scc_crypto_register(scc); + if (ret) { + dev_err(dev, "could not register algorithms"); + goto err_out; + } + + dev_info(dev, "registered successfully.\n"); + + return 0; + +err_out: + clk_disable_unprepare(scc->clk); + + return ret; +} + +static int mxc_scc_remove(struct platform_device *pdev) +{ + struct mxc_scc *scc = platform_get_drvdata(pdev); + + mxc_scc_crypto_unregister(); + + clk_disable_unprepare(scc->clk); + + return 0; +} + +static const struct of_device_id mxc_scc_dt_ids[] = { + { .compatible = "fsl,imx25-scc", .data = NULL, }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, mxc_scc_dt_ids); + +static struct platform_driver mxc_scc_driver = { + .probe = mxc_scc_probe, + .remove = mxc_scc_remove, + .driver = { + .name = "mxc-scc", + .of_match_table = mxc_scc_dt_ids, + }, +}; + +module_platform_driver(mxc_scc_driver); +MODULE_AUTHOR("Steffen Trumtrar <kernel@pengutronix.de>"); +MODULE_DESCRIPTION("Freescale i.MX25 SCC Crypto driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 59ed54e464a9..625ee50fd78b 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -11,7 +11,6 @@ * http://www.gnu.org/copyleft/gpl.html */ -#include <linux/crypto.h> #include <linux/dma-mapping.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -25,6 +24,7 @@ #include <crypto/aes.h> #include <crypto/sha.h> #include <crypto/internal/hash.h> +#include <crypto/internal/skcipher.h> #define DCP_MAX_CHANS 4 #define DCP_BUF_SZ PAGE_SIZE @@ -84,7 +84,7 @@ struct dcp_async_ctx { unsigned int hot:1; /* Crypto-specific context */ - struct crypto_ablkcipher *fallback; + struct crypto_skcipher *fallback; unsigned int key_len; uint8_t key[AES_KEYSIZE_128]; }; @@ -374,20 +374,22 @@ static int dcp_chan_thread_aes(void *data) static int mxs_dcp_block_fallback(struct ablkcipher_request *req, int enc) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); - struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx( - crypto_ablkcipher_reqtfm(req)); + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx(tfm); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); int ret; - ablkcipher_request_set_tfm(req, ctx->fallback); + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); if (enc) - ret = crypto_ablkcipher_encrypt(req); + ret = crypto_skcipher_encrypt(subreq); else - ret = crypto_ablkcipher_decrypt(req); + ret = crypto_skcipher_decrypt(subreq); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + skcipher_request_zero(subreq); return ret; } @@ -453,28 +455,22 @@ static int mxs_dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, return 0; } - /* Check if the key size is supported by kernel at all. */ - if (len != AES_KEYSIZE_192 && len != AES_KEYSIZE_256) { - tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - /* * If the requested AES key size is not supported by the hardware, * but is supported by in-kernel software implementation, we use * software fallback. */ - actx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - actx->fallback->base.crt_flags |= - tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK; + crypto_skcipher_clear_flags(actx->fallback, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(actx->fallback, + tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); - ret = crypto_ablkcipher_setkey(actx->fallback, key, len); + ret = crypto_skcipher_setkey(actx->fallback, key, len); if (!ret) return 0; tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->base.crt_flags |= - actx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK; + tfm->base.crt_flags |= crypto_skcipher_get_flags(actx->fallback) & + CRYPTO_TFM_RES_MASK; return ret; } @@ -484,9 +480,9 @@ static int mxs_dcp_aes_fallback_init(struct crypto_tfm *tfm) const char *name = crypto_tfm_alg_name(tfm); const uint32_t flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK; struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm); - struct crypto_ablkcipher *blk; + struct crypto_skcipher *blk; - blk = crypto_alloc_ablkcipher(name, 0, flags); + blk = crypto_alloc_skcipher(name, 0, flags); if (IS_ERR(blk)) return PTR_ERR(blk); @@ -499,8 +495,7 @@ static void mxs_dcp_aes_fallback_exit(struct crypto_tfm *tfm) { struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm); - crypto_free_ablkcipher(actx->fallback); - actx->fallback = NULL; + crypto_free_skcipher(actx->fallback); } /* diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index b85a7a7dbf63..c5aac25a5738 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1598,7 +1598,7 @@ static void *new_queue(unsigned long q_type) static void free_queue(void *p, unsigned long q_type) { - return kmem_cache_free(queue_cache[q_type - 1], p); + kmem_cache_free(queue_cache[q_type - 1], p); } static int queue_cache_init(void) diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 0794f1cc0018..42f0f229f7f7 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -392,7 +392,7 @@ static void nx_of_update_msc(struct device *dev, ((bytes_so_far + sizeof(struct msc_triplet)) <= lenp) && i < msc->triplets; i++) { - if (msc->fc > NX_MAX_FC || msc->mode > NX_MAX_MODE) { + if (msc->fc >= NX_MAX_FC || msc->mode >= NX_MAX_MODE) { dev_err(dev, "unknown function code/mode " "combo: %d/%d (ignored)\n", msc->fc, msc->mode); diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index d420ec751c7c..4ab53a604312 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -26,7 +26,6 @@ #include <linux/scatterlist.h> #include <linux/dma-mapping.h> #include <linux/dmaengine.h> -#include <linux/omap-dma.h> #include <linux/pm_runtime.h> #include <linux/of.h> #include <linux/of_device.h> @@ -176,9 +175,7 @@ struct omap_aes_dev { struct scatter_walk in_walk; struct scatter_walk out_walk; - int dma_in; struct dma_chan *dma_lch_in; - int dma_out; struct dma_chan *dma_lch_out; int in_sg_len; int out_sg_len; @@ -351,30 +348,21 @@ static void omap_aes_dma_out_callback(void *data) static int omap_aes_dma_init(struct omap_aes_dev *dd) { - int err = -ENOMEM; - dma_cap_mask_t mask; + int err; dd->dma_lch_out = NULL; dd->dma_lch_in = NULL; - dma_cap_zero(mask); - dma_cap_set(DMA_SLAVE, mask); - - dd->dma_lch_in = dma_request_slave_channel_compat(mask, - omap_dma_filter_fn, - &dd->dma_in, - dd->dev, "rx"); - if (!dd->dma_lch_in) { + dd->dma_lch_in = dma_request_chan(dd->dev, "rx"); + if (IS_ERR(dd->dma_lch_in)) { dev_err(dd->dev, "Unable to request in DMA channel\n"); - goto err_dma_in; + return PTR_ERR(dd->dma_lch_in); } - dd->dma_lch_out = dma_request_slave_channel_compat(mask, - omap_dma_filter_fn, - &dd->dma_out, - dd->dev, "tx"); - if (!dd->dma_lch_out) { + dd->dma_lch_out = dma_request_chan(dd->dev, "tx"); + if (IS_ERR(dd->dma_lch_out)) { dev_err(dd->dev, "Unable to request out DMA channel\n"); + err = PTR_ERR(dd->dma_lch_out); goto err_dma_out; } @@ -382,14 +370,15 @@ static int omap_aes_dma_init(struct omap_aes_dev *dd) err_dma_out: dma_release_channel(dd->dma_lch_in); -err_dma_in: - if (err) - pr_err("error: %d\n", err); + return err; } static void omap_aes_dma_cleanup(struct omap_aes_dev *dd) { + if (dd->pio_only) + return; + dma_release_channel(dd->dma_lch_out); dma_release_channel(dd->dma_lch_in); } @@ -539,8 +528,6 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) omap_aes_dma_stop(dd); - dmaengine_terminate_all(dd->dma_lch_in); - dmaengine_terminate_all(dd->dma_lch_out); return 0; } @@ -591,10 +578,12 @@ static int omap_aes_copy_sgs(struct omap_aes_dev *dd) sg_init_table(&dd->in_sgl, 1); sg_set_buf(&dd->in_sgl, buf_in, total); dd->in_sg = &dd->in_sgl; + dd->in_sg_len = 1; sg_init_table(&dd->out_sgl, 1); sg_set_buf(&dd->out_sgl, buf_out, total); dd->out_sg = &dd->out_sgl; + dd->out_sg_len = 1; return 0; } @@ -615,7 +604,6 @@ static int omap_aes_prepare_req(struct crypto_engine *engine, crypto_ablkcipher_reqtfm(req)); struct omap_aes_dev *dd = omap_aes_find_dev(ctx); struct omap_aes_reqctx *rctx; - int len; if (!dd) return -ENODEV; @@ -627,6 +615,14 @@ static int omap_aes_prepare_req(struct crypto_engine *engine, dd->in_sg = req->src; dd->out_sg = req->dst; + dd->in_sg_len = sg_nents_for_len(dd->in_sg, dd->total); + if (dd->in_sg_len < 0) + return dd->in_sg_len; + + dd->out_sg_len = sg_nents_for_len(dd->out_sg, dd->total); + if (dd->out_sg_len < 0) + return dd->out_sg_len; + if (omap_aes_check_aligned(dd->in_sg, dd->total) || omap_aes_check_aligned(dd->out_sg, dd->total)) { if (omap_aes_copy_sgs(dd)) @@ -636,11 +632,6 @@ static int omap_aes_prepare_req(struct crypto_engine *engine, dd->sgs_copied = 0; } - len = ALIGN(dd->total, AES_BLOCK_SIZE); - dd->in_sg_len = scatterwalk_bytes_sglen(dd->in_sg, len); - dd->out_sg_len = scatterwalk_bytes_sglen(dd->out_sg, len); - BUG_ON(dd->in_sg_len < 0 || dd->out_sg_len < 0); - rctx = ablkcipher_request_ctx(req); ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); rctx->mode &= FLAGS_MODE_MASK; @@ -1080,9 +1071,6 @@ static int omap_aes_get_res_of(struct omap_aes_dev *dd, goto err; } - dd->dma_out = -1; /* Dummy value that's unused */ - dd->dma_in = -1; /* Dummy value that's unused */ - dd->pdata = match->data; err: @@ -1116,24 +1104,6 @@ static int omap_aes_get_res_pdev(struct omap_aes_dev *dd, } memcpy(res, r, sizeof(*res)); - /* Get the DMA out channel */ - r = platform_get_resource(pdev, IORESOURCE_DMA, 0); - if (!r) { - dev_err(dev, "no DMA out resource info\n"); - err = -ENODEV; - goto err; - } - dd->dma_out = r->start; - - /* Get the DMA in channel */ - r = platform_get_resource(pdev, IORESOURCE_DMA, 1); - if (!r) { - dev_err(dev, "no DMA in resource info\n"); - err = -ENODEV; - goto err; - } - dd->dma_in = r->start; - /* Only OMAP2/3 can be non-DT */ dd->pdata = &omap_aes_pdata_omap2; @@ -1191,7 +1161,9 @@ static int omap_aes_probe(struct platform_device *pdev) tasklet_init(&dd->done_task, omap_aes_done_task, (unsigned long)dd); err = omap_aes_dma_init(dd); - if (err && AES_REG_IRQ_STATUS(dd) && AES_REG_IRQ_ENABLE(dd)) { + if (err == -EPROBE_DEFER) { + goto err_irq; + } else if (err && AES_REG_IRQ_STATUS(dd) && AES_REG_IRQ_ENABLE(dd)) { dd->pio_only = 1; irq = platform_get_irq(pdev, 0); @@ -1215,17 +1187,19 @@ static int omap_aes_probe(struct platform_device *pdev) spin_unlock(&list_lock); for (i = 0; i < dd->pdata->algs_info_size; i++) { - for (j = 0; j < dd->pdata->algs_info[i].size; j++) { - algp = &dd->pdata->algs_info[i].algs_list[j]; + if (!dd->pdata->algs_info[i].registered) { + for (j = 0; j < dd->pdata->algs_info[i].size; j++) { + algp = &dd->pdata->algs_info[i].algs_list[j]; - pr_debug("reg alg: %s\n", algp->cra_name); - INIT_LIST_HEAD(&algp->cra_list); + pr_debug("reg alg: %s\n", algp->cra_name); + INIT_LIST_HEAD(&algp->cra_list); - err = crypto_register_alg(algp); - if (err) - goto err_algs; + err = crypto_register_alg(algp); + if (err) + goto err_algs; - dd->pdata->algs_info[i].registered++; + dd->pdata->algs_info[i].registered++; + } } } @@ -1248,8 +1222,8 @@ err_algs: for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) crypto_unregister_alg( &dd->pdata->algs_info[i].algs_list[j]); - if (!dd->pio_only) - omap_aes_dma_cleanup(dd); + + omap_aes_dma_cleanup(dd); err_irq: tasklet_kill(&dd->done_task); pm_runtime_disable(dev); diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index dd7b93f2f94c..5691434ffb2d 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -29,7 +29,6 @@ #include <linux/scatterlist.h> #include <linux/dma-mapping.h> #include <linux/dmaengine.h> -#include <linux/omap-dma.h> #include <linux/pm_runtime.h> #include <linux/of.h> #include <linux/of_device.h> @@ -39,6 +38,7 @@ #include <linux/interrupt.h> #include <crypto/scatterwalk.h> #include <crypto/des.h> +#include <crypto/algapi.h> #define DST_MAXBURST 2 @@ -132,14 +132,10 @@ struct omap_des_dev { unsigned long flags; int err; - /* spinlock used for queues */ - spinlock_t lock; - struct crypto_queue queue; - struct tasklet_struct done_task; - struct tasklet_struct queue_task; struct ablkcipher_request *req; + struct crypto_engine *engine; /* * total is used by PIO mode for book keeping so introduce * variable total_save as need it to calc page_order @@ -158,9 +154,7 @@ struct omap_des_dev { struct scatter_walk in_walk; struct scatter_walk out_walk; - int dma_in; struct dma_chan *dma_lch_in; - int dma_out; struct dma_chan *dma_lch_out; int in_sg_len; int out_sg_len; @@ -340,30 +334,21 @@ static void omap_des_dma_out_callback(void *data) static int omap_des_dma_init(struct omap_des_dev *dd) { - int err = -ENOMEM; - dma_cap_mask_t mask; + int err; dd->dma_lch_out = NULL; dd->dma_lch_in = NULL; - dma_cap_zero(mask); - dma_cap_set(DMA_SLAVE, mask); - - dd->dma_lch_in = dma_request_slave_channel_compat(mask, - omap_dma_filter_fn, - &dd->dma_in, - dd->dev, "rx"); - if (!dd->dma_lch_in) { + dd->dma_lch_in = dma_request_chan(dd->dev, "rx"); + if (IS_ERR(dd->dma_lch_in)) { dev_err(dd->dev, "Unable to request in DMA channel\n"); - goto err_dma_in; + return PTR_ERR(dd->dma_lch_in); } - dd->dma_lch_out = dma_request_slave_channel_compat(mask, - omap_dma_filter_fn, - &dd->dma_out, - dd->dev, "tx"); - if (!dd->dma_lch_out) { + dd->dma_lch_out = dma_request_chan(dd->dev, "tx"); + if (IS_ERR(dd->dma_lch_out)) { dev_err(dd->dev, "Unable to request out DMA channel\n"); + err = PTR_ERR(dd->dma_lch_out); goto err_dma_out; } @@ -371,14 +356,15 @@ static int omap_des_dma_init(struct omap_des_dev *dd) err_dma_out: dma_release_channel(dd->dma_lch_in); -err_dma_in: - if (err) - pr_err("error: %d\n", err); + return err; } static void omap_des_dma_cleanup(struct omap_des_dev *dd) { + if (dd->pio_only) + return; + dma_release_channel(dd->dma_lch_out); dma_release_channel(dd->dma_lch_in); } @@ -520,9 +506,7 @@ static void omap_des_finish_req(struct omap_des_dev *dd, int err) pr_debug("err: %d\n", err); pm_runtime_put(dd->dev); - dd->flags &= ~FLAGS_BUSY; - - req->base.complete(&req->base, err); + crypto_finalize_request(dd->engine, req, err); } static int omap_des_crypt_dma_stop(struct omap_des_dev *dd) @@ -576,43 +560,35 @@ static int omap_des_copy_sgs(struct omap_des_dev *dd) sg_init_table(&dd->in_sgl, 1); sg_set_buf(&dd->in_sgl, buf_in, dd->total); dd->in_sg = &dd->in_sgl; + dd->in_sg_len = 1; sg_init_table(&dd->out_sgl, 1); sg_set_buf(&dd->out_sgl, buf_out, dd->total); dd->out_sg = &dd->out_sgl; + dd->out_sg_len = 1; return 0; } static int omap_des_handle_queue(struct omap_des_dev *dd, - struct ablkcipher_request *req) + struct ablkcipher_request *req) { - struct crypto_async_request *async_req, *backlog; - struct omap_des_ctx *ctx; - struct omap_des_reqctx *rctx; - unsigned long flags; - int err, ret = 0; - - spin_lock_irqsave(&dd->lock, flags); if (req) - ret = ablkcipher_enqueue_request(&dd->queue, req); - if (dd->flags & FLAGS_BUSY) { - spin_unlock_irqrestore(&dd->lock, flags); - return ret; - } - backlog = crypto_get_backlog(&dd->queue); - async_req = crypto_dequeue_request(&dd->queue); - if (async_req) - dd->flags |= FLAGS_BUSY; - spin_unlock_irqrestore(&dd->lock, flags); + return crypto_transfer_request_to_engine(dd->engine, req); - if (!async_req) - return ret; + return 0; +} - if (backlog) - backlog->complete(backlog, -EINPROGRESS); +static int omap_des_prepare_req(struct crypto_engine *engine, + struct ablkcipher_request *req) +{ + struct omap_des_ctx *ctx = crypto_ablkcipher_ctx( + crypto_ablkcipher_reqtfm(req)); + struct omap_des_dev *dd = omap_des_find_dev(ctx); + struct omap_des_reqctx *rctx; - req = ablkcipher_request_cast(async_req); + if (!dd) + return -ENODEV; /* assign new request to device */ dd->req = req; @@ -621,6 +597,14 @@ static int omap_des_handle_queue(struct omap_des_dev *dd, dd->in_sg = req->src; dd->out_sg = req->dst; + dd->in_sg_len = sg_nents_for_len(dd->in_sg, dd->total); + if (dd->in_sg_len < 0) + return dd->in_sg_len; + + dd->out_sg_len = sg_nents_for_len(dd->out_sg, dd->total); + if (dd->out_sg_len < 0) + return dd->out_sg_len; + if (omap_des_copy_needed(dd->in_sg) || omap_des_copy_needed(dd->out_sg)) { if (omap_des_copy_sgs(dd)) @@ -630,10 +614,6 @@ static int omap_des_handle_queue(struct omap_des_dev *dd, dd->sgs_copied = 0; } - dd->in_sg_len = scatterwalk_bytes_sglen(dd->in_sg, dd->total); - dd->out_sg_len = scatterwalk_bytes_sglen(dd->out_sg, dd->total); - BUG_ON(dd->in_sg_len < 0 || dd->out_sg_len < 0); - rctx = ablkcipher_request_ctx(req); ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); rctx->mode &= FLAGS_MODE_MASK; @@ -642,16 +622,20 @@ static int omap_des_handle_queue(struct omap_des_dev *dd, dd->ctx = ctx; ctx->dd = dd; - err = omap_des_write_ctrl(dd); - if (!err) - err = omap_des_crypt_dma_start(dd); - if (err) { - /* des_task will not finish it, so do it here */ - omap_des_finish_req(dd, err); - tasklet_schedule(&dd->queue_task); - } + return omap_des_write_ctrl(dd); +} - return ret; /* return ret, which is enqueue return value */ +static int omap_des_crypt_req(struct crypto_engine *engine, + struct ablkcipher_request *req) +{ + struct omap_des_ctx *ctx = crypto_ablkcipher_ctx( + crypto_ablkcipher_reqtfm(req)); + struct omap_des_dev *dd = omap_des_find_dev(ctx); + + if (!dd) + return -ENODEV; + + return omap_des_crypt_dma_start(dd); } static void omap_des_done_task(unsigned long data) @@ -683,18 +667,10 @@ static void omap_des_done_task(unsigned long data) } omap_des_finish_req(dd, 0); - omap_des_handle_queue(dd, NULL); pr_debug("exit\n"); } -static void omap_des_queue_task(unsigned long data) -{ - struct omap_des_dev *dd = (struct omap_des_dev *)data; - - omap_des_handle_queue(dd, NULL); -} - static int omap_des_crypt(struct ablkcipher_request *req, unsigned long mode) { struct omap_des_ctx *ctx = crypto_ablkcipher_ctx( @@ -999,8 +975,6 @@ static int omap_des_get_of(struct omap_des_dev *dd, return -EINVAL; } - dd->dma_out = -1; /* Dummy value that's unused */ - dd->dma_in = -1; /* Dummy value that's unused */ dd->pdata = match->data; return 0; @@ -1016,33 +990,10 @@ static int omap_des_get_of(struct omap_des_dev *dd, static int omap_des_get_pdev(struct omap_des_dev *dd, struct platform_device *pdev) { - struct device *dev = &pdev->dev; - struct resource *r; - int err = 0; - - /* Get the DMA out channel */ - r = platform_get_resource(pdev, IORESOURCE_DMA, 0); - if (!r) { - dev_err(dev, "no DMA out resource info\n"); - err = -ENODEV; - goto err; - } - dd->dma_out = r->start; - - /* Get the DMA in channel */ - r = platform_get_resource(pdev, IORESOURCE_DMA, 1); - if (!r) { - dev_err(dev, "no DMA in resource info\n"); - err = -ENODEV; - goto err; - } - dd->dma_in = r->start; - /* non-DT devices get pdata from pdev */ dd->pdata = pdev->dev.platform_data; -err: - return err; + return 0; } static int omap_des_probe(struct platform_device *pdev) @@ -1062,9 +1013,6 @@ static int omap_des_probe(struct platform_device *pdev) dd->dev = dev; platform_set_drvdata(pdev, dd); - spin_lock_init(&dd->lock); - crypto_init_queue(&dd->queue, OMAP_DES_QUEUE_LENGTH); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { dev_err(dev, "no MEM resource info\n"); @@ -1103,10 +1051,11 @@ static int omap_des_probe(struct platform_device *pdev) (reg & dd->pdata->minor_mask) >> dd->pdata->minor_shift); tasklet_init(&dd->done_task, omap_des_done_task, (unsigned long)dd); - tasklet_init(&dd->queue_task, omap_des_queue_task, (unsigned long)dd); err = omap_des_dma_init(dd); - if (err && DES_REG_IRQ_STATUS(dd) && DES_REG_IRQ_ENABLE(dd)) { + if (err == -EPROBE_DEFER) { + goto err_irq; + } else if (err && DES_REG_IRQ_STATUS(dd) && DES_REG_IRQ_ENABLE(dd)) { dd->pio_only = 1; irq = platform_get_irq(pdev, 0); @@ -1144,17 +1093,30 @@ static int omap_des_probe(struct platform_device *pdev) } } + /* Initialize des crypto engine */ + dd->engine = crypto_engine_alloc_init(dev, 1); + if (!dd->engine) + goto err_algs; + + dd->engine->prepare_request = omap_des_prepare_req; + dd->engine->crypt_one_request = omap_des_crypt_req; + err = crypto_engine_start(dd->engine); + if (err) + goto err_engine; + return 0; + +err_engine: + crypto_engine_exit(dd->engine); err_algs: for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) crypto_unregister_alg( &dd->pdata->algs_info[i].algs_list[j]); - if (!dd->pio_only) - omap_des_dma_cleanup(dd); + + omap_des_dma_cleanup(dd); err_irq: tasklet_kill(&dd->done_task); - tasklet_kill(&dd->queue_task); err_get: pm_runtime_disable(dev); err_res: @@ -1182,7 +1144,6 @@ static int omap_des_remove(struct platform_device *pdev) &dd->pdata->algs_info[i].algs_list[j]); tasklet_kill(&dd->done_task); - tasklet_kill(&dd->queue_task); omap_des_dma_cleanup(dd); pm_runtime_disable(dd->dev); dd = NULL; diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 48adb2a0903e..7fe4eef12fe2 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -29,7 +29,6 @@ #include <linux/scatterlist.h> #include <linux/dma-mapping.h> #include <linux/dmaengine.h> -#include <linux/omap-dma.h> #include <linux/pm_runtime.h> #include <linux/of.h> #include <linux/of_device.h> @@ -101,6 +100,8 @@ #define DEFAULT_TIMEOUT_INTERVAL HZ +#define DEFAULT_AUTOSUSPEND_DELAY 1000 + /* mostly device flags */ #define FLAGS_BUSY 0 #define FLAGS_FINAL 1 @@ -174,7 +175,7 @@ struct omap_sham_ctx { struct omap_sham_hmac_ctx base[0]; }; -#define OMAP_SHAM_QUEUE_LENGTH 1 +#define OMAP_SHAM_QUEUE_LENGTH 10 struct omap_sham_algs_info { struct ahash_alg *algs_list; @@ -219,7 +220,6 @@ struct omap_sham_dev { int irq; spinlock_t lock; int err; - unsigned int dma; struct dma_chan *dma_lch; struct tasklet_struct done_task; u8 polling_mode; @@ -815,7 +815,6 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - dmaengine_terminate_all(dd->dma_lch); if (ctx->flags & BIT(FLAGS_SG)) { dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE); @@ -1001,7 +1000,8 @@ static void omap_sham_finish_req(struct ahash_request *req, int err) dd->flags &= ~(BIT(FLAGS_BUSY) | BIT(FLAGS_FINAL) | BIT(FLAGS_CPU) | BIT(FLAGS_DMA_READY) | BIT(FLAGS_OUTPUT_READY)); - pm_runtime_put(dd->dev); + pm_runtime_mark_last_busy(dd->dev); + pm_runtime_put_autosuspend(dd->dev); if (req->base.complete) req->base.complete(&req->base, err); @@ -1095,7 +1095,7 @@ static int omap_sham_update(struct ahash_request *req) ctx->offset = 0; if (ctx->flags & BIT(FLAGS_FINUP)) { - if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 9) { + if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 240) { /* * OMAP HW accel works only with buffers >= 9 * will switch to bypass in final() @@ -1151,9 +1151,13 @@ static int omap_sham_final(struct ahash_request *req) if (ctx->flags & BIT(FLAGS_ERROR)) return 0; /* uncompleted hash is not needed */ - /* OMAP HW accel works only with buffers >= 9 */ - /* HMAC is always >= 9 because ipad == block size */ - if ((ctx->digcnt + ctx->bufcnt) < 9) + /* + * OMAP HW accel works only with buffers >= 9. + * HMAC is always >= 9 because ipad == block size. + * If buffersize is less than 240, we use fallback SW encoding, + * as using DMA + HW in this case doesn't provide any benefit. + */ + if ((ctx->digcnt + ctx->bufcnt) < 240) return omap_sham_final_shash(req); else if (ctx->bufcnt) return omap_sham_enqueue(req, OP_FINAL); @@ -1330,7 +1334,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "sha1", .cra_driver_name = "omap-sha1", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1353,7 +1357,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "md5", .cra_driver_name = "omap-md5", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1377,7 +1381,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "hmac(sha1)", .cra_driver_name = "omap-hmac-sha1", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1402,7 +1406,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "hmac(md5)", .cra_driver_name = "omap-hmac-md5", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1430,7 +1434,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "sha224", .cra_driver_name = "omap-sha224", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1452,7 +1456,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "sha256", .cra_driver_name = "omap-sha256", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1475,7 +1479,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "hmac(sha224)", .cra_driver_name = "omap-hmac-sha224", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1499,7 +1503,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "hmac(sha256)", .cra_driver_name = "omap-hmac-sha256", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1525,7 +1529,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "sha384", .cra_driver_name = "omap-sha384", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1547,7 +1551,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "sha512", .cra_driver_name = "omap-sha512", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1570,7 +1574,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "hmac(sha384)", .cra_driver_name = "omap-hmac-sha384", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1594,7 +1598,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "hmac(sha512)", .cra_driver_name = "omap-hmac-sha512", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1842,7 +1846,6 @@ static int omap_sham_get_res_of(struct omap_sham_dev *dd, goto err; } - dd->dma = -1; /* Dummy value that's unused */ dd->pdata = match->data; err: @@ -1884,15 +1887,6 @@ static int omap_sham_get_res_pdev(struct omap_sham_dev *dd, goto err; } - /* Get the DMA */ - r = platform_get_resource(pdev, IORESOURCE_DMA, 0); - if (!r) { - dev_err(dev, "no DMA resource info\n"); - err = -ENODEV; - goto err; - } - dd->dma = r->start; - /* Only OMAP2/3 can be non-DT */ dd->pdata = &omap_sham_pdata_omap2; @@ -1946,15 +1940,21 @@ static int omap_sham_probe(struct platform_device *pdev) dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); - dd->dma_lch = dma_request_slave_channel_compat(mask, omap_dma_filter_fn, - &dd->dma, dev, "rx"); - if (!dd->dma_lch) { + dd->dma_lch = dma_request_chan(dev, "rx"); + if (IS_ERR(dd->dma_lch)) { + err = PTR_ERR(dd->dma_lch); + if (err == -EPROBE_DEFER) + goto data_err; + dd->polling_mode = 1; dev_dbg(dev, "using polling mode instead of dma\n"); } dd->flags |= dd->pdata->flags; + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, DEFAULT_AUTOSUSPEND_DELAY); + pm_runtime_enable(dev); pm_runtime_irq_safe(dev); @@ -1995,7 +1995,7 @@ err_algs: &dd->pdata->algs_info[i].algs_list[j]); err_pm: pm_runtime_disable(dev); - if (dd->dma_lch) + if (!dd->polling_mode) dma_release_channel(dd->dma_lch); data_err: dev_err(dev, "initialization failed.\n"); @@ -2021,7 +2021,7 @@ static int omap_sham_remove(struct platform_device *pdev) tasklet_kill(&dd->done_task); pm_runtime_disable(&pdev->dev); - if (dd->dma_lch) + if (!dd->polling_mode) dma_release_channel(dd->dma_lch); return 0; diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index 3b1c7ecf078f..47576098831f 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -171,7 +171,7 @@ struct spacc_ablk_ctx { * The fallback cipher. If the operation can't be done in hardware, * fallback to a software version. */ - struct crypto_ablkcipher *sw_cipher; + struct crypto_skcipher *sw_cipher; }; /* AEAD cipher context. */ @@ -789,33 +789,35 @@ static int spacc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key, * request for any other size (192 bits) then we need to do a software * fallback. */ - if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256 && - ctx->sw_cipher) { + if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256) { + if (!ctx->sw_cipher) + return -EINVAL; + /* * Set the fallback transform to use the same request flags as * the hardware transform. */ - ctx->sw_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - ctx->sw_cipher->base.crt_flags |= - cipher->base.crt_flags & CRYPTO_TFM_REQ_MASK; + crypto_skcipher_clear_flags(ctx->sw_cipher, + CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctx->sw_cipher, + cipher->base.crt_flags & + CRYPTO_TFM_REQ_MASK); + + err = crypto_skcipher_setkey(ctx->sw_cipher, key, len); + + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= + crypto_skcipher_get_flags(ctx->sw_cipher) & + CRYPTO_TFM_RES_MASK; - err = crypto_ablkcipher_setkey(ctx->sw_cipher, key, len); if (err) goto sw_setkey_failed; - } else if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256 && - !ctx->sw_cipher) - err = -EINVAL; + } memcpy(ctx->key, key, len); ctx->key_len = len; sw_setkey_failed: - if (err && ctx->sw_cipher) { - tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->crt_flags |= - ctx->sw_cipher->base.crt_flags & CRYPTO_TFM_RES_MASK; - } - return err; } @@ -910,20 +912,21 @@ static int spacc_ablk_do_fallback(struct ablkcipher_request *req, struct crypto_tfm *old_tfm = crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(old_tfm); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->sw_cipher); int err; - if (!ctx->sw_cipher) - return -EINVAL; - /* * Change the request to use the software fallback transform, and once * the ciphering has completed, put the old transform back into the * request. */ - ablkcipher_request_set_tfm(req, ctx->sw_cipher); - err = is_encrypt ? crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(old_tfm)); + skcipher_request_set_tfm(subreq, ctx->sw_cipher); + skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = is_encrypt ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -1015,12 +1018,13 @@ static int spacc_ablk_cra_init(struct crypto_tfm *tfm) ctx->generic.flags = spacc_alg->type; ctx->generic.engine = engine; if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { - ctx->sw_cipher = crypto_alloc_ablkcipher(alg->cra_name, 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + ctx->sw_cipher = crypto_alloc_skcipher( + alg->cra_name, 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->sw_cipher)) { dev_warn(engine->dev, "failed to allocate fallback for %s\n", alg->cra_name); - ctx->sw_cipher = NULL; + return PTR_ERR(ctx->sw_cipher); } } ctx->generic.key_offs = spacc_alg->key_offs; @@ -1035,9 +1039,7 @@ static void spacc_ablk_cra_exit(struct crypto_tfm *tfm) { struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->sw_cipher) - crypto_free_ablkcipher(ctx->sw_cipher); - ctx->sw_cipher = NULL; + crypto_free_skcipher(ctx->sw_cipher); } static int spacc_ablk_encrypt(struct ablkcipher_request *req) diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig index 85b44e577684..ce3cae40f949 100644 --- a/drivers/crypto/qat/Kconfig +++ b/drivers/crypto/qat/Kconfig @@ -4,12 +4,13 @@ config CRYPTO_DEV_QAT select CRYPTO_AUTHENC select CRYPTO_BLKCIPHER select CRYPTO_AKCIPHER + select CRYPTO_DH select CRYPTO_HMAC + select CRYPTO_RSA select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 select FW_LOADER - select ASN1 config CRYPTO_DEV_QAT_DH895xCC tristate "Support for Intel(R) DH895xCC" diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index c5bd5a9abc4d..6bc68bc00d76 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -229,6 +229,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->reset_device = adf_reset_flr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; } diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c index e13bd08ddd1e..640c3fc870fd 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c @@ -300,9 +300,7 @@ static void adf_remove(struct pci_dev *pdev) pr_err("QAT: Driver removal failed\n"); return; } - if (adf_dev_stop(accel_dev)) - dev_err(&GET_DEV(accel_dev), "Failed to stop QAT accel dev\n"); - + adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); adf_disable_aer(accel_dev); adf_cleanup_accel(accel_dev); diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index 1af321c2ce1a..d2d0ae445fd8 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -109,29 +109,6 @@ static void adf_vf_void_noop(struct adf_accel_dev *accel_dev) { } -static int adf_vf2pf_init(struct adf_accel_dev *accel_dev) -{ - u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | - (ADF_VF2PF_MSGTYPE_INIT << ADF_VF2PF_MSGTYPE_SHIFT)); - - if (adf_iov_putmsg(accel_dev, msg, 0)) { - dev_err(&GET_DEV(accel_dev), - "Failed to send Init event to PF\n"); - return -EFAULT; - } - return 0; -} - -static void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) -{ - u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | - (ADF_VF2PF_MSGTYPE_SHUTDOWN << ADF_VF2PF_MSGTYPE_SHIFT)); - - if (adf_iov_putmsg(accel_dev, msg, 0)) - dev_err(&GET_DEV(accel_dev), - "Failed to send Shutdown event to PF\n"); -} - void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) { hw_data->dev_class = &c3xxxiov_class; diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c index 1ac4ae90e072..949d77b79fbe 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c @@ -238,6 +238,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_err_free_reg; + set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); + ret = adf_dev_init(accel_dev); if (ret) goto out_err_dev_shutdown; @@ -270,9 +272,7 @@ static void adf_remove(struct pci_dev *pdev) pr_err("QAT: Driver removal failed\n"); return; } - if (adf_dev_stop(accel_dev)) - dev_err(&GET_DEV(accel_dev), "Failed to stop QAT accel dev\n"); - + adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); adf_cleanup_accel(accel_dev); adf_cleanup_pci_dev(accel_dev); diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index 879e04cae714..618cec360b39 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -239,6 +239,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->reset_device = adf_reset_flr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; } diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index 512c56509718..bc5cbc193aae 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -300,9 +300,7 @@ static void adf_remove(struct pci_dev *pdev) pr_err("QAT: Driver removal failed\n"); return; } - if (adf_dev_stop(accel_dev)) - dev_err(&GET_DEV(accel_dev), "Failed to stop QAT accel dev\n"); - + adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); adf_disable_aer(accel_dev); adf_cleanup_accel(accel_dev); diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c index baf4b509c892..38e4bc04f407 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -109,29 +109,6 @@ static void adf_vf_void_noop(struct adf_accel_dev *accel_dev) { } -static int adf_vf2pf_init(struct adf_accel_dev *accel_dev) -{ - u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | - (ADF_VF2PF_MSGTYPE_INIT << ADF_VF2PF_MSGTYPE_SHIFT)); - - if (adf_iov_putmsg(accel_dev, msg, 0)) { - dev_err(&GET_DEV(accel_dev), - "Failed to send Init event to PF\n"); - return -EFAULT; - } - return 0; -} - -static void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) -{ - u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | - (ADF_VF2PF_MSGTYPE_SHUTDOWN << ADF_VF2PF_MSGTYPE_SHIFT)); - - if (adf_iov_putmsg(accel_dev, msg, 0)) - dev_err(&GET_DEV(accel_dev), - "Failed to send Shutdown event to PF\n"); -} - void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data) { hw_data->dev_class = &c62xiov_class; diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c index d2e4b928f3be..7540ce13b0d0 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c @@ -238,6 +238,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_err_free_reg; + set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); + ret = adf_dev_init(accel_dev); if (ret) goto out_err_dev_shutdown; @@ -270,9 +272,7 @@ static void adf_remove(struct pci_dev *pdev) pr_err("QAT: Driver removal failed\n"); return; } - if (adf_dev_stop(accel_dev)) - dev_err(&GET_DEV(accel_dev), "Failed to stop QAT accel dev\n"); - + adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); adf_cleanup_accel(accel_dev); adf_cleanup_pci_dev(accel_dev); diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile index 29c7c53d2845..92fb6ffdc062 100644 --- a/drivers/crypto/qat/qat_common/Makefile +++ b/drivers/crypto/qat/qat_common/Makefile @@ -1,15 +1,6 @@ -$(obj)/qat_rsapubkey-asn1.o: $(obj)/qat_rsapubkey-asn1.c \ - $(obj)/qat_rsapubkey-asn1.h -$(obj)/qat_rsaprivkey-asn1.o: $(obj)/qat_rsaprivkey-asn1.c \ - $(obj)/qat_rsaprivkey-asn1.h - -clean-files += qat_rsapubkey-asn1.c qat_rsapubkey-asn1.h -clean-files += qat_rsaprivkey-asn1.c qat_rsaprivkey-asn1.h - obj-$(CONFIG_CRYPTO_DEV_QAT) += intel_qat.o intel_qat-objs := adf_cfg.o \ adf_isr.o \ - adf_vf_isr.o \ adf_ctl_drv.o \ adf_dev_mgr.o \ adf_init.o \ @@ -20,11 +11,10 @@ intel_qat-objs := adf_cfg.o \ adf_hw_arbiter.o \ qat_crypto.o \ qat_algs.o \ - qat_rsapubkey-asn1.o \ - qat_rsaprivkey-asn1.o \ qat_asym_algs.o \ qat_uclo.o \ qat_hal.o intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o -intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_pf2vf_msg.o +intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_pf2vf_msg.o \ + adf_vf2pf_msg.o adf_vf_isr.o diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index 5a07208ce778..e8822536530b 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -176,6 +176,7 @@ struct adf_hw_device_data { void (*disable_iov)(struct adf_accel_dev *accel_dev); void (*enable_ints)(struct adf_accel_dev *accel_dev); int (*enable_vf2pf_comms)(struct adf_accel_dev *accel_dev); + void (*reset_device)(struct adf_accel_dev *accel_dev); const char *fw_name; const char *fw_mmp_name; uint32_t fuses; diff --git a/drivers/crypto/qat/qat_common/adf_admin.c b/drivers/crypto/qat/qat_common/adf_admin.c index eb557f69e367..ce7c4626c983 100644 --- a/drivers/crypto/qat/qat_common/adf_admin.c +++ b/drivers/crypto/qat/qat_common/adf_admin.c @@ -61,7 +61,7 @@ #define ADF_DH895XCC_MAILBOX_STRIDE 0x1000 #define ADF_ADMINMSG_LEN 32 -static const u8 const_tab[1024] = { +static const u8 const_tab[1024] __aligned(1024) = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index b40d9c8dad96..2839fccdd84b 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -82,18 +82,12 @@ struct adf_reset_dev_data { struct work_struct reset_work; }; -void adf_dev_restore(struct adf_accel_dev *accel_dev) +void adf_reset_sbr(struct adf_accel_dev *accel_dev) { struct pci_dev *pdev = accel_to_pci_dev(accel_dev); struct pci_dev *parent = pdev->bus->self; uint16_t bridge_ctl = 0; - if (accel_dev->is_vf) - return; - - dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n", - accel_dev->accel_id); - if (!parent) parent = pdev; @@ -101,6 +95,8 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev) dev_info(&GET_DEV(accel_dev), "Transaction still in progress. Proceeding\n"); + dev_info(&GET_DEV(accel_dev), "Secondary bus reset\n"); + pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl); bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET; pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl); @@ -108,8 +104,40 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev) bridge_ctl &= ~PCI_BRIDGE_CTL_BUS_RESET; pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl); msleep(100); - pci_restore_state(pdev); - pci_save_state(pdev); +} +EXPORT_SYMBOL_GPL(adf_reset_sbr); + +void adf_reset_flr(struct adf_accel_dev *accel_dev) +{ + struct pci_dev *pdev = accel_to_pci_dev(accel_dev); + u16 control = 0; + int pos = 0; + + dev_info(&GET_DEV(accel_dev), "Function level reset\n"); + pos = pci_pcie_cap(pdev); + if (!pos) { + dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); + return; + } + pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &control); + control |= PCI_EXP_DEVCTL_BCR_FLR; + pci_write_config_word(pdev, pos + PCI_EXP_DEVCTL, control); + msleep(100); +} +EXPORT_SYMBOL_GPL(adf_reset_flr); + +void adf_dev_restore(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + struct pci_dev *pdev = accel_to_pci_dev(accel_dev); + + if (hw_device->reset_device) { + dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n", + accel_dev->accel_id); + hw_device->reset_device(accel_dev); + pci_restore_state(pdev); + pci_save_state(pdev); + } } static void adf_device_reset_worker(struct work_struct *work) @@ -243,7 +271,8 @@ EXPORT_SYMBOL_GPL(adf_disable_aer); int adf_init_aer(void) { - device_reset_wq = create_workqueue("qat_device_reset_wq"); + device_reset_wq = alloc_workqueue("qat_device_reset_wq", + WQ_MEM_RECLAIM, 0); return !device_reset_wq ? -EFAULT : 0; } diff --git a/drivers/crypto/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/qat/qat_common/adf_cfg_strings.h index 13575111382c..7632ed0f25c5 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg_strings.h +++ b/drivers/crypto/qat/qat_common/adf_cfg_strings.h @@ -57,10 +57,8 @@ #define ADF_RING_DC_SIZE "NumConcurrentRequests" #define ADF_RING_ASYM_TX "RingAsymTx" #define ADF_RING_SYM_TX "RingSymTx" -#define ADF_RING_RND_TX "RingNrbgTx" #define ADF_RING_ASYM_RX "RingAsymRx" #define ADF_RING_SYM_RX "RingSymRx" -#define ADF_RING_RND_RX "RingNrbgRx" #define ADF_RING_DC_TX "RingTx" #define ADF_RING_DC_RX "RingRx" #define ADF_ETRMGR_BANK "Bank" diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 976b01e58afb..980e07475012 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -67,7 +67,7 @@ #define ADF_STATUS_AE_INITIALISED 4 #define ADF_STATUS_AE_UCODE_LOADED 5 #define ADF_STATUS_AE_STARTED 6 -#define ADF_STATUS_ORPHAN_TH_RUNNING 7 +#define ADF_STATUS_PF_RUNNING 7 #define ADF_STATUS_IRQ_ALLOCATED 8 enum adf_dev_reset_mode { @@ -103,7 +103,7 @@ int adf_service_unregister(struct service_hndl *service); int adf_dev_init(struct adf_accel_dev *accel_dev); int adf_dev_start(struct adf_accel_dev *accel_dev); -int adf_dev_stop(struct adf_accel_dev *accel_dev); +void adf_dev_stop(struct adf_accel_dev *accel_dev); void adf_dev_shutdown(struct adf_accel_dev *accel_dev); int adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr); @@ -141,6 +141,8 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev); int adf_enable_aer(struct adf_accel_dev *accel_dev, struct pci_driver *adf); void adf_disable_aer(struct adf_accel_dev *accel_dev); +void adf_reset_sbr(struct adf_accel_dev *accel_dev); +void adf_reset_flr(struct adf_accel_dev *accel_dev); void adf_dev_restore(struct adf_accel_dev *accel_dev); int adf_init_aer(void); void adf_exit_aer(void); @@ -236,8 +238,13 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, uint32_t vf_mask); void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); + +int adf_vf2pf_init(struct adf_accel_dev *accel_dev); +void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev); int adf_init_pf_wq(void); void adf_exit_pf_wq(void); +int adf_init_vf_wq(void); +void adf_exit_vf_wq(void); #else static inline int adf_sriov_configure(struct pci_dev *pdev, int numvfs) { @@ -256,6 +263,15 @@ static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) { } +static inline int adf_vf2pf_init(struct adf_accel_dev *accel_dev) +{ + return 0; +} + +static inline void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) +{ +} + static inline int adf_init_pf_wq(void) { return 0; @@ -264,5 +280,15 @@ static inline int adf_init_pf_wq(void) static inline void adf_exit_pf_wq(void) { } + +static inline int adf_init_vf_wq(void) +{ + return 0; +} + +static inline void adf_exit_vf_wq(void) +{ +} + #endif #endif diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c index 3c3f948290ca..abc7a7f64d64 100644 --- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c +++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c @@ -270,26 +270,33 @@ static int adf_ctl_is_device_in_use(int id) return 0; } -static int adf_ctl_stop_devices(uint32_t id) +static void adf_ctl_stop_devices(uint32_t id) { struct adf_accel_dev *accel_dev; - int ret = 0; - list_for_each_entry_reverse(accel_dev, adf_devmgr_get_head(), list) { + list_for_each_entry(accel_dev, adf_devmgr_get_head(), list) { if (id == accel_dev->accel_id || id == ADF_CFG_ALL_DEVICES) { if (!adf_dev_started(accel_dev)) continue; - if (adf_dev_stop(accel_dev)) { - dev_err(&GET_DEV(accel_dev), - "Failed to stop qat_dev%d\n", id); - ret = -EFAULT; - } else { - adf_dev_shutdown(accel_dev); - } + /* First stop all VFs */ + if (!accel_dev->is_vf) + continue; + + adf_dev_stop(accel_dev); + adf_dev_shutdown(accel_dev); + } + } + + list_for_each_entry(accel_dev, adf_devmgr_get_head(), list) { + if (id == accel_dev->accel_id || id == ADF_CFG_ALL_DEVICES) { + if (!adf_dev_started(accel_dev)) + continue; + + adf_dev_stop(accel_dev); + adf_dev_shutdown(accel_dev); } } - return ret; } static int adf_ctl_ioctl_dev_stop(struct file *fp, unsigned int cmd, @@ -318,9 +325,8 @@ static int adf_ctl_ioctl_dev_stop(struct file *fp, unsigned int cmd, pr_info("QAT: Stopping acceleration device qat_dev%d.\n", ctl_data->device_id); - ret = adf_ctl_stop_devices(ctl_data->device_id); - if (ret) - pr_err("QAT: failed to stop device.\n"); + adf_ctl_stop_devices(ctl_data->device_id); + out: kfree(ctl_data); return ret; @@ -465,12 +471,17 @@ static int __init adf_register_ctl_device_driver(void) if (adf_init_pf_wq()) goto err_pf_wq; + if (adf_init_vf_wq()) + goto err_vf_wq; + if (qat_crypto_register()) goto err_crypto_register; return 0; err_crypto_register: + adf_exit_vf_wq(); +err_vf_wq: adf_exit_pf_wq(); err_pf_wq: adf_exit_aer(); @@ -485,6 +496,7 @@ static void __exit adf_unregister_ctl_device_driver(void) { adf_chr_drv_destroy(); adf_exit_aer(); + adf_exit_vf_wq(); adf_exit_pf_wq(); qat_crypto_unregister(); adf_clean_vf_map(false); diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index ef5575e4a215..888c6675e7e5 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -236,9 +236,9 @@ EXPORT_SYMBOL_GPL(adf_dev_start); * is shuting down. * To be used by QAT device specific drivers. * - * Return: 0 on success, error code otherwise. + * Return: void */ -int adf_dev_stop(struct adf_accel_dev *accel_dev) +void adf_dev_stop(struct adf_accel_dev *accel_dev) { struct service_hndl *service; struct list_head *list_itr; @@ -246,9 +246,9 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev) int ret; if (!adf_dev_started(accel_dev) && - !test_bit(ADF_STATUS_STARTING, &accel_dev->status)) { - return 0; - } + !test_bit(ADF_STATUS_STARTING, &accel_dev->status)) + return; + clear_bit(ADF_STATUS_STARTING, &accel_dev->status); clear_bit(ADF_STATUS_STARTED, &accel_dev->status); @@ -279,8 +279,6 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev) else clear_bit(ADF_STATUS_AE_STARTED, &accel_dev->status); } - - return 0; } EXPORT_SYMBOL_GPL(adf_dev_stop); @@ -329,6 +327,8 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev) clear_bit(accel_dev->accel_id, &service->init_status); } + hw_data->disable_iov(accel_dev); + if (test_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status)) { hw_data->free_irq(accel_dev); clear_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status); @@ -344,7 +344,6 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev) if (hw_data->exit_admin_comms) hw_data->exit_admin_comms(accel_dev); - hw_data->disable_iov(accel_dev); adf_cleanup_etr_data(accel_dev); adf_dev_restore(accel_dev); } diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c index b81f79acc4ea..06d49017a52b 100644 --- a/drivers/crypto/qat/qat_common/adf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_isr.c @@ -302,7 +302,7 @@ static void adf_cleanup_bh(struct adf_accel_dev *accel_dev) } /** - * adf_vf_isr_resource_free() - Free IRQ for acceleration device + * adf_isr_resource_free() - Free IRQ for acceleration device * @accel_dev: Pointer to acceleration device. * * Function frees interrupts for acceleration device. @@ -317,7 +317,7 @@ void adf_isr_resource_free(struct adf_accel_dev *accel_dev) EXPORT_SYMBOL_GPL(adf_isr_resource_free); /** - * adf_vf_isr_resource_alloc() - Allocate IRQ for acceleration device + * adf_isr_resource_alloc() - Allocate IRQ for acceleration device * @accel_dev: Pointer to acceleration device. * * Function allocates interrupts for acceleration device. diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 38a0415e767d..9320ae1d005b 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -249,13 +249,7 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs) return -EBUSY; } - if (adf_dev_stop(accel_dev)) { - dev_err(&GET_DEV(accel_dev), - "Failed to stop qat_dev%d\n", - accel_dev->accel_id); - return -EFAULT; - } - + adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); } @@ -298,7 +292,7 @@ EXPORT_SYMBOL_GPL(adf_sriov_configure); int __init adf_init_pf_wq(void) { /* Workqueue for PF2VF responses */ - pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq"); + pf2vf_resp_wq = alloc_workqueue("qat_pf2vf_resp_wq", WQ_MEM_RECLAIM, 0); return !pf2vf_resp_wq ? -ENOMEM : 0; } diff --git a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c new file mode 100644 index 000000000000..cd5f37dffe8a --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c @@ -0,0 +1,92 @@ +/* + This file is provided under a dual BSD/GPLv2 license. When using or + redistributing this file, you may do so under either license. + + GPL LICENSE SUMMARY + Copyright(c) 2015 Intel Corporation. + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Contact Information: + qat-linux@intel.com + + BSD LICENSE + Copyright(c) 2015 Intel Corporation. + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +#include "adf_accel_devices.h" +#include "adf_common_drv.h" +#include "adf_pf2vf_msg.h" + +/** + * adf_vf2pf_init() - send init msg to PF + * @accel_dev: Pointer to acceleration VF device. + * + * Function sends an init messge from the VF to a PF + * + * Return: 0 on success, error code otherwise. + */ +int adf_vf2pf_init(struct adf_accel_dev *accel_dev) +{ + u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | + (ADF_VF2PF_MSGTYPE_INIT << ADF_VF2PF_MSGTYPE_SHIFT)); + + if (adf_iov_putmsg(accel_dev, msg, 0)) { + dev_err(&GET_DEV(accel_dev), + "Failed to send Init event to PF\n"); + return -EFAULT; + } + set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); + return 0; +} +EXPORT_SYMBOL_GPL(adf_vf2pf_init); + +/** + * adf_vf2pf_shutdown() - send shutdown msg to PF + * @accel_dev: Pointer to acceleration VF device. + * + * Function sends a shutdown messge from the VF to a PF + * + * Return: void + */ +void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) +{ + u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | + (ADF_VF2PF_MSGTYPE_SHUTDOWN << ADF_VF2PF_MSGTYPE_SHIFT)); + + if (test_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status)) + if (adf_iov_putmsg(accel_dev, msg, 0)) + dev_err(&GET_DEV(accel_dev), + "Failed to send Shutdown event to PF\n"); +} +EXPORT_SYMBOL_GPL(adf_vf2pf_shutdown); diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index 09427b3d4d55..bf99e11a3403 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -51,6 +51,7 @@ #include <linux/slab.h> #include <linux/errno.h> #include <linux/interrupt.h> +#include <linux/workqueue.h> #include "adf_accel_devices.h" #include "adf_common_drv.h" #include "adf_cfg.h" @@ -64,6 +65,13 @@ #define ADF_VINTSOU_BUN BIT(0) #define ADF_VINTSOU_PF2VF BIT(1) +static struct workqueue_struct *adf_vf_stop_wq; + +struct adf_vf_stop_data { + struct adf_accel_dev *accel_dev; + struct work_struct work; +}; + static int adf_enable_msi(struct adf_accel_dev *accel_dev) { struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev; @@ -90,6 +98,20 @@ static void adf_disable_msi(struct adf_accel_dev *accel_dev) pci_disable_msi(pdev); } +static void adf_dev_stop_async(struct work_struct *work) +{ + struct adf_vf_stop_data *stop_data = + container_of(work, struct adf_vf_stop_data, work); + struct adf_accel_dev *accel_dev = stop_data->accel_dev; + + adf_dev_stop(accel_dev); + adf_dev_shutdown(accel_dev); + + /* Re-enable PF2VF interrupts */ + adf_enable_pf2vf_interrupts(accel_dev); + kfree(stop_data); +} + static void adf_pf2vf_bh_handler(void *data) { struct adf_accel_dev *accel_dev = data; @@ -107,11 +129,29 @@ static void adf_pf2vf_bh_handler(void *data) goto err; switch ((msg & ADF_PF2VF_MSGTYPE_MASK) >> ADF_PF2VF_MSGTYPE_SHIFT) { - case ADF_PF2VF_MSGTYPE_RESTARTING: + case ADF_PF2VF_MSGTYPE_RESTARTING: { + struct adf_vf_stop_data *stop_data; + dev_dbg(&GET_DEV(accel_dev), "Restarting msg received from PF 0x%x\n", msg); - adf_dev_stop(accel_dev); - break; + + clear_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); + + stop_data = kzalloc(sizeof(*stop_data), GFP_ATOMIC); + if (!stop_data) { + dev_err(&GET_DEV(accel_dev), + "Couldn't schedule stop for vf_%d\n", + accel_dev->accel_id); + return; + } + stop_data->accel_dev = accel_dev; + INIT_WORK(&stop_data->work, adf_dev_stop_async); + queue_work(adf_vf_stop_wq, &stop_data->work); + /* To ack, clear the PF2VFINT bit */ + msg &= ~BIT(0); + ADF_CSR_WR(pmisc_bar_addr, hw_data->get_pf2vf_offset(0), msg); + return; + } case ADF_PF2VF_MSGTYPE_VERSION_RESP: dev_dbg(&GET_DEV(accel_dev), "Version resp received from PF 0x%x\n", msg); @@ -278,3 +318,18 @@ err_out: return -EFAULT; } EXPORT_SYMBOL_GPL(adf_vf_isr_resource_alloc); + +int __init adf_init_vf_wq(void) +{ + adf_vf_stop_wq = alloc_workqueue("adf_vf_stop_wq", WQ_MEM_RECLAIM, 0); + + return !adf_vf_stop_wq ? -EFAULT : 0; +} + +void adf_exit_vf_wq(void) +{ + if (adf_vf_stop_wq) + destroy_workqueue(adf_vf_stop_wq); + + adf_vf_stop_wq = NULL; +} diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 1e8852a8a057..20f35df8a01f 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -947,13 +947,13 @@ static int qat_alg_ablkcipher_setkey(struct crypto_ablkcipher *tfm, return 0; out_free_all: - memset(ctx->dec_cd, 0, sizeof(*ctx->enc_cd)); - dma_free_coherent(dev, sizeof(*ctx->enc_cd), + memset(ctx->dec_cd, 0, sizeof(*ctx->dec_cd)); + dma_free_coherent(dev, sizeof(*ctx->dec_cd), ctx->dec_cd, ctx->dec_cd_paddr); ctx->dec_cd = NULL; out_free_enc: - memset(ctx->enc_cd, 0, sizeof(*ctx->dec_cd)); - dma_free_coherent(dev, sizeof(*ctx->dec_cd), + memset(ctx->enc_cd, 0, sizeof(*ctx->enc_cd)); + dma_free_coherent(dev, sizeof(*ctx->enc_cd), ctx->enc_cd, ctx->enc_cd_paddr); ctx->enc_cd = NULL; return -ENOMEM; @@ -1260,8 +1260,8 @@ static struct crypto_alg qat_algs[] = { { .setkey = qat_alg_ablkcipher_xts_setkey, .decrypt = qat_alg_ablkcipher_decrypt, .encrypt = qat_alg_ablkcipher_encrypt, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, }, }, diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index e5c0727d4876..0d35dca2e925 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -49,11 +49,12 @@ #include <crypto/internal/rsa.h> #include <crypto/internal/akcipher.h> #include <crypto/akcipher.h> +#include <crypto/kpp.h> +#include <crypto/internal/kpp.h> +#include <crypto/dh.h> #include <linux/dma-mapping.h> #include <linux/fips.h> #include <crypto/scatterwalk.h> -#include "qat_rsapubkey-asn1.h" -#include "qat_rsaprivkey-asn1.h" #include "icp_qat_fw_pke.h" #include "adf_accel_devices.h" #include "adf_transport.h" @@ -75,6 +76,14 @@ struct qat_rsa_input_params { dma_addr_t d; dma_addr_t n; } dec; + struct { + dma_addr_t c; + dma_addr_t p; + dma_addr_t q; + dma_addr_t dp; + dma_addr_t dq; + dma_addr_t qinv; + } dec_crt; u64 in_tab[8]; }; } __packed __aligned(64); @@ -95,71 +104,480 @@ struct qat_rsa_ctx { char *n; char *e; char *d; + char *p; + char *q; + char *dp; + char *dq; + char *qinv; dma_addr_t dma_n; dma_addr_t dma_e; dma_addr_t dma_d; + dma_addr_t dma_p; + dma_addr_t dma_q; + dma_addr_t dma_dp; + dma_addr_t dma_dq; + dma_addr_t dma_qinv; unsigned int key_sz; + bool crt_mode; + struct qat_crypto_instance *inst; +} __packed __aligned(64); + +struct qat_dh_input_params { + union { + struct { + dma_addr_t b; + dma_addr_t xa; + dma_addr_t p; + } in; + struct { + dma_addr_t xa; + dma_addr_t p; + } in_g2; + u64 in_tab[8]; + }; +} __packed __aligned(64); + +struct qat_dh_output_params { + union { + dma_addr_t r; + u64 out_tab[8]; + }; +} __packed __aligned(64); + +struct qat_dh_ctx { + char *g; + char *xa; + char *p; + dma_addr_t dma_g; + dma_addr_t dma_xa; + dma_addr_t dma_p; + unsigned int p_size; + bool g2; struct qat_crypto_instance *inst; } __packed __aligned(64); -struct qat_rsa_request { - struct qat_rsa_input_params in; - struct qat_rsa_output_params out; +struct qat_asym_request { + union { + struct qat_rsa_input_params rsa; + struct qat_dh_input_params dh; + } in; + union { + struct qat_rsa_output_params rsa; + struct qat_dh_output_params dh; + } out; dma_addr_t phy_in; dma_addr_t phy_out; char *src_align; char *dst_align; struct icp_qat_fw_pke_request req; - struct qat_rsa_ctx *ctx; + union { + struct qat_rsa_ctx *rsa; + struct qat_dh_ctx *dh; + } ctx; + union { + struct akcipher_request *rsa; + struct kpp_request *dh; + } areq; int err; + void (*cb)(struct icp_qat_fw_pke_resp *resp); } __aligned(64); -static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) +static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp) { - struct akcipher_request *areq = (void *)(__force long)resp->opaque; - struct qat_rsa_request *req = PTR_ALIGN(akcipher_request_ctx(areq), 64); - struct device *dev = &GET_DEV(req->ctx->inst->accel_dev); + struct qat_asym_request *req = (void *)(__force long)resp->opaque; + struct kpp_request *areq = req->areq.dh; + struct device *dev = &GET_DEV(req->ctx.dh->inst->accel_dev); int err = ICP_QAT_FW_PKE_RESP_PKE_STAT_GET( resp->pke_resp_hdr.comn_resp_flags); err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL; - if (req->src_align) - dma_free_coherent(dev, req->ctx->key_sz, req->src_align, - req->in.enc.m); - else - dma_unmap_single(dev, req->in.enc.m, req->ctx->key_sz, - DMA_TO_DEVICE); + if (areq->src) { + if (req->src_align) + dma_free_coherent(dev, req->ctx.dh->p_size, + req->src_align, req->in.dh.in.b); + else + dma_unmap_single(dev, req->in.dh.in.b, + req->ctx.dh->p_size, DMA_TO_DEVICE); + } - areq->dst_len = req->ctx->key_sz; + areq->dst_len = req->ctx.dh->p_size; if (req->dst_align) { - char *ptr = req->dst_align; + scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, + areq->dst_len, 1); - while (!(*ptr) && areq->dst_len) { - areq->dst_len--; - ptr++; - } + dma_free_coherent(dev, req->ctx.dh->p_size, req->dst_align, + req->out.dh.r); + } else { + dma_unmap_single(dev, req->out.dh.r, req->ctx.dh->p_size, + DMA_FROM_DEVICE); + } - if (areq->dst_len != req->ctx->key_sz) - memmove(req->dst_align, ptr, areq->dst_len); + dma_unmap_single(dev, req->phy_in, sizeof(struct qat_dh_input_params), + DMA_TO_DEVICE); + dma_unmap_single(dev, req->phy_out, + sizeof(struct qat_dh_output_params), + DMA_TO_DEVICE); - scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, - areq->dst_len, 1); + kpp_request_complete(areq, err); +} + +#define PKE_DH_1536 0x390c1a49 +#define PKE_DH_G2_1536 0x2e0b1a3e +#define PKE_DH_2048 0x4d0c1a60 +#define PKE_DH_G2_2048 0x3e0b1a55 +#define PKE_DH_3072 0x510c1a77 +#define PKE_DH_G2_3072 0x3a0b1a6c +#define PKE_DH_4096 0x690c1a8e +#define PKE_DH_G2_4096 0x4a0b1a83 - dma_free_coherent(dev, req->ctx->key_sz, req->dst_align, - req->out.enc.c); +static unsigned long qat_dh_fn_id(unsigned int len, bool g2) +{ + unsigned int bitslen = len << 3; + + switch (bitslen) { + case 1536: + return g2 ? PKE_DH_G2_1536 : PKE_DH_1536; + case 2048: + return g2 ? PKE_DH_G2_2048 : PKE_DH_2048; + case 3072: + return g2 ? PKE_DH_G2_3072 : PKE_DH_3072; + case 4096: + return g2 ? PKE_DH_G2_4096 : PKE_DH_4096; + default: + return 0; + }; +} + +static inline struct qat_dh_ctx *qat_dh_get_params(struct crypto_kpp *tfm) +{ + return kpp_tfm_ctx(tfm); +} + +static int qat_dh_compute_value(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct qat_crypto_instance *inst = ctx->inst; + struct device *dev = &GET_DEV(inst->accel_dev); + struct qat_asym_request *qat_req = + PTR_ALIGN(kpp_request_ctx(req), 64); + struct icp_qat_fw_pke_request *msg = &qat_req->req; + int ret, ctr = 0; + int n_input_params = 0; + + if (unlikely(!ctx->xa)) + return -EINVAL; + + if (req->dst_len < ctx->p_size) { + req->dst_len = ctx->p_size; + return -EOVERFLOW; + } + memset(msg, '\0', sizeof(*msg)); + ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr, + ICP_QAT_FW_COMN_REQ_FLAG_SET); + + msg->pke_hdr.cd_pars.func_id = qat_dh_fn_id(ctx->p_size, + !req->src && ctx->g2); + if (unlikely(!msg->pke_hdr.cd_pars.func_id)) + return -EINVAL; + + qat_req->cb = qat_dh_cb; + qat_req->ctx.dh = ctx; + qat_req->areq.dh = req; + msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE; + msg->pke_hdr.comn_req_flags = + ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT, + QAT_COMN_CD_FLD_TYPE_64BIT_ADR); + + /* + * If no source is provided use g as base + */ + if (req->src) { + qat_req->in.dh.in.xa = ctx->dma_xa; + qat_req->in.dh.in.p = ctx->dma_p; + n_input_params = 3; } else { - char *ptr = sg_virt(areq->dst); + if (ctx->g2) { + qat_req->in.dh.in_g2.xa = ctx->dma_xa; + qat_req->in.dh.in_g2.p = ctx->dma_p; + n_input_params = 2; + } else { + qat_req->in.dh.in.b = ctx->dma_g; + qat_req->in.dh.in.xa = ctx->dma_xa; + qat_req->in.dh.in.p = ctx->dma_p; + n_input_params = 3; + } + } - while (!(*ptr) && areq->dst_len) { - areq->dst_len--; - ptr++; + ret = -ENOMEM; + if (req->src) { + /* + * src can be of any size in valid range, but HW expects it to + * be the same as modulo p so in case it is different we need + * to allocate a new buf and copy src data. + * In other case we just need to map the user provided buffer. + * Also need to make sure that it is in contiguous buffer. + */ + if (sg_is_last(req->src) && req->src_len == ctx->p_size) { + qat_req->src_align = NULL; + qat_req->in.dh.in.b = dma_map_single(dev, + sg_virt(req->src), + req->src_len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, + qat_req->in.dh.in.b))) + return ret; + + } else { + int shift = ctx->p_size - req->src_len; + + qat_req->src_align = dma_zalloc_coherent(dev, + ctx->p_size, + &qat_req->in.dh.in.b, + GFP_KERNEL); + if (unlikely(!qat_req->src_align)) + return ret; + + scatterwalk_map_and_copy(qat_req->src_align + shift, + req->src, 0, req->src_len, 0); } + } + /* + * dst can be of any size in valid range, but HW expects it to be the + * same as modulo m so in case it is different we need to allocate a + * new buf and copy src data. + * In other case we just need to map the user provided buffer. + * Also need to make sure that it is in contiguous buffer. + */ + if (sg_is_last(req->dst) && req->dst_len == ctx->p_size) { + qat_req->dst_align = NULL; + qat_req->out.dh.r = dma_map_single(dev, sg_virt(req->dst), + req->dst_len, + DMA_FROM_DEVICE); + + if (unlikely(dma_mapping_error(dev, qat_req->out.dh.r))) + goto unmap_src; + + } else { + qat_req->dst_align = dma_zalloc_coherent(dev, ctx->p_size, + &qat_req->out.dh.r, + GFP_KERNEL); + if (unlikely(!qat_req->dst_align)) + goto unmap_src; + } + + qat_req->in.dh.in_tab[n_input_params] = 0; + qat_req->out.dh.out_tab[1] = 0; + /* Mapping in.in.b or in.in_g2.xa is the same */ + qat_req->phy_in = dma_map_single(dev, &qat_req->in.dh.in.b, + sizeof(struct qat_dh_input_params), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) + goto unmap_dst; + + qat_req->phy_out = dma_map_single(dev, &qat_req->out.dh.r, + sizeof(struct qat_dh_output_params), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) + goto unmap_in_params; + + msg->pke_mid.src_data_addr = qat_req->phy_in; + msg->pke_mid.dest_data_addr = qat_req->phy_out; + msg->pke_mid.opaque = (uint64_t)(__force long)qat_req; + msg->input_param_count = n_input_params; + msg->output_param_count = 1; + + do { + ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg); + } while (ret == -EBUSY && ctr++ < 100); + + if (!ret) + return -EINPROGRESS; + + if (!dma_mapping_error(dev, qat_req->phy_out)) + dma_unmap_single(dev, qat_req->phy_out, + sizeof(struct qat_dh_output_params), + DMA_TO_DEVICE); +unmap_in_params: + if (!dma_mapping_error(dev, qat_req->phy_in)) + dma_unmap_single(dev, qat_req->phy_in, + sizeof(struct qat_dh_input_params), + DMA_TO_DEVICE); +unmap_dst: + if (qat_req->dst_align) + dma_free_coherent(dev, ctx->p_size, qat_req->dst_align, + qat_req->out.dh.r); + else + if (!dma_mapping_error(dev, qat_req->out.dh.r)) + dma_unmap_single(dev, qat_req->out.dh.r, ctx->p_size, + DMA_FROM_DEVICE); +unmap_src: + if (req->src) { + if (qat_req->src_align) + dma_free_coherent(dev, ctx->p_size, qat_req->src_align, + qat_req->in.dh.in.b); + else + if (!dma_mapping_error(dev, qat_req->in.dh.in.b)) + dma_unmap_single(dev, qat_req->in.dh.in.b, + ctx->p_size, + DMA_TO_DEVICE); + } + return ret; +} + +static int qat_dh_check_params_length(unsigned int p_len) +{ + switch (p_len) { + case 1536: + case 2048: + case 3072: + case 4096: + return 0; + } + return -EINVAL; +} + +static int qat_dh_set_params(struct qat_dh_ctx *ctx, struct dh *params) +{ + struct qat_crypto_instance *inst = ctx->inst; + struct device *dev = &GET_DEV(inst->accel_dev); + + if (unlikely(!params->p || !params->g)) + return -EINVAL; + + if (qat_dh_check_params_length(params->p_size << 3)) + return -EINVAL; + + ctx->p_size = params->p_size; + ctx->p = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_p, GFP_KERNEL); + if (!ctx->p) + return -ENOMEM; + memcpy(ctx->p, params->p, ctx->p_size); + + /* If g equals 2 don't copy it */ + if (params->g_size == 1 && *(char *)params->g == 0x02) { + ctx->g2 = true; + return 0; + } + + ctx->g = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_g, GFP_KERNEL); + if (!ctx->g) { + dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p); + ctx->p = NULL; + return -ENOMEM; + } + memcpy(ctx->g + (ctx->p_size - params->g_size), params->g, + params->g_size); + + return 0; +} + +static void qat_dh_clear_ctx(struct device *dev, struct qat_dh_ctx *ctx) +{ + if (ctx->g) { + dma_free_coherent(dev, ctx->p_size, ctx->g, ctx->dma_g); + ctx->g = NULL; + } + if (ctx->xa) { + dma_free_coherent(dev, ctx->p_size, ctx->xa, ctx->dma_xa); + ctx->xa = NULL; + } + if (ctx->p) { + dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p); + ctx->p = NULL; + } + ctx->p_size = 0; + ctx->g2 = false; +} + +static int qat_dh_set_secret(struct crypto_kpp *tfm, void *buf, + unsigned int len) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct device *dev = &GET_DEV(ctx->inst->accel_dev); + struct dh params; + int ret; + + if (crypto_dh_decode_key(buf, len, ¶ms) < 0) + return -EINVAL; + + /* Free old secret if any */ + qat_dh_clear_ctx(dev, ctx); + + ret = qat_dh_set_params(ctx, ¶ms); + if (ret < 0) + return ret; + + ctx->xa = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_xa, + GFP_KERNEL); + if (!ctx->xa) { + qat_dh_clear_ctx(dev, ctx); + return -ENOMEM; + } + memcpy(ctx->xa + (ctx->p_size - params.key_size), params.key, + params.key_size); + + return 0; +} + +static int qat_dh_max_size(struct crypto_kpp *tfm) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + + return ctx->p ? ctx->p_size : -EINVAL; +} + +static int qat_dh_init_tfm(struct crypto_kpp *tfm) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct qat_crypto_instance *inst = + qat_crypto_get_instance_node(get_current_node()); + + if (!inst) + return -EINVAL; + + ctx->p_size = 0; + ctx->g2 = false; + ctx->inst = inst; + return 0; +} + +static void qat_dh_exit_tfm(struct crypto_kpp *tfm) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct device *dev = &GET_DEV(ctx->inst->accel_dev); + + qat_dh_clear_ctx(dev, ctx); + qat_crypto_put_instance(ctx->inst); +} + +static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) +{ + struct qat_asym_request *req = (void *)(__force long)resp->opaque; + struct akcipher_request *areq = req->areq.rsa; + struct device *dev = &GET_DEV(req->ctx.rsa->inst->accel_dev); + int err = ICP_QAT_FW_PKE_RESP_PKE_STAT_GET( + resp->pke_resp_hdr.comn_resp_flags); + + err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL; - if (sg_virt(areq->dst) != ptr && areq->dst_len) - memmove(sg_virt(areq->dst), ptr, areq->dst_len); + if (req->src_align) + dma_free_coherent(dev, req->ctx.rsa->key_sz, req->src_align, + req->in.rsa.enc.m); + else + dma_unmap_single(dev, req->in.rsa.enc.m, req->ctx.rsa->key_sz, + DMA_TO_DEVICE); - dma_unmap_single(dev, req->out.enc.c, req->ctx->key_sz, + areq->dst_len = req->ctx.rsa->key_sz; + if (req->dst_align) { + scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, + areq->dst_len, 1); + + dma_free_coherent(dev, req->ctx.rsa->key_sz, req->dst_align, + req->out.rsa.enc.c); + } else { + dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz, DMA_FROM_DEVICE); } @@ -175,8 +593,9 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) void qat_alg_asym_callback(void *_resp) { struct icp_qat_fw_pke_resp *resp = _resp; + struct qat_asym_request *areq = (void *)(__force long)resp->opaque; - qat_rsa_cb(resp); + areq->cb(resp); } #define PKE_RSA_EP_512 0x1c161b21 @@ -237,13 +656,42 @@ static unsigned long qat_rsa_dec_fn_id(unsigned int len) }; } +#define PKE_RSA_DP2_512 0x1c131b57 +#define PKE_RSA_DP2_1024 0x26131c2d +#define PKE_RSA_DP2_1536 0x45111d12 +#define PKE_RSA_DP2_2048 0x59121dfa +#define PKE_RSA_DP2_3072 0x81121ed9 +#define PKE_RSA_DP2_4096 0xb1111fb2 + +static unsigned long qat_rsa_dec_fn_id_crt(unsigned int len) +{ + unsigned int bitslen = len << 3; + + switch (bitslen) { + case 512: + return PKE_RSA_DP2_512; + case 1024: + return PKE_RSA_DP2_1024; + case 1536: + return PKE_RSA_DP2_1536; + case 2048: + return PKE_RSA_DP2_2048; + case 3072: + return PKE_RSA_DP2_3072; + case 4096: + return PKE_RSA_DP2_4096; + default: + return 0; + }; +} + static int qat_rsa_enc(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); - struct qat_rsa_request *qat_req = + struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; int ret, ctr = 0; @@ -262,14 +710,16 @@ static int qat_rsa_enc(struct akcipher_request *req) if (unlikely(!msg->pke_hdr.cd_pars.func_id)) return -EINVAL; - qat_req->ctx = ctx; + qat_req->cb = qat_rsa_cb; + qat_req->ctx.rsa = ctx; + qat_req->areq.rsa = req; msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE; msg->pke_hdr.comn_req_flags = ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT, QAT_COMN_CD_FLD_TYPE_64BIT_ADR); - qat_req->in.enc.e = ctx->dma_e; - qat_req->in.enc.n = ctx->dma_n; + qat_req->in.rsa.enc.e = ctx->dma_e; + qat_req->in.rsa.enc.n = ctx->dma_n; ret = -ENOMEM; /* @@ -281,16 +731,16 @@ static int qat_rsa_enc(struct akcipher_request *req) */ if (sg_is_last(req->src) && req->src_len == ctx->key_sz) { qat_req->src_align = NULL; - qat_req->in.enc.m = dma_map_single(dev, sg_virt(req->src), + qat_req->in.rsa.enc.m = dma_map_single(dev, sg_virt(req->src), req->src_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->in.enc.m))) + if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.enc.m))) return ret; } else { int shift = ctx->key_sz - req->src_len; qat_req->src_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->in.enc.m, + &qat_req->in.rsa.enc.m, GFP_KERNEL); if (unlikely(!qat_req->src_align)) return ret; @@ -300,30 +750,30 @@ static int qat_rsa_enc(struct akcipher_request *req) } if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) { qat_req->dst_align = NULL; - qat_req->out.enc.c = dma_map_single(dev, sg_virt(req->dst), - req->dst_len, - DMA_FROM_DEVICE); + qat_req->out.rsa.enc.c = dma_map_single(dev, sg_virt(req->dst), + req->dst_len, + DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->out.enc.c))) + if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.enc.c))) goto unmap_src; } else { qat_req->dst_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->out.enc.c, + &qat_req->out.rsa.enc.c, GFP_KERNEL); if (unlikely(!qat_req->dst_align)) goto unmap_src; } - qat_req->in.in_tab[3] = 0; - qat_req->out.out_tab[1] = 0; - qat_req->phy_in = dma_map_single(dev, &qat_req->in.enc.m, + qat_req->in.rsa.in_tab[3] = 0; + qat_req->out.rsa.out_tab[1] = 0; + qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.enc.m, sizeof(struct qat_rsa_input_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) goto unmap_dst; - qat_req->phy_out = dma_map_single(dev, &qat_req->out.enc.c, + qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.enc.c, sizeof(struct qat_rsa_output_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) @@ -331,7 +781,7 @@ static int qat_rsa_enc(struct akcipher_request *req) msg->pke_mid.src_data_addr = qat_req->phy_in; msg->pke_mid.dest_data_addr = qat_req->phy_out; - msg->pke_mid.opaque = (uint64_t)(__force long)req; + msg->pke_mid.opaque = (uint64_t)(__force long)qat_req; msg->input_param_count = 3; msg->output_param_count = 1; do { @@ -353,19 +803,19 @@ unmap_in_params: unmap_dst: if (qat_req->dst_align) dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align, - qat_req->out.enc.c); + qat_req->out.rsa.enc.c); else - if (!dma_mapping_error(dev, qat_req->out.enc.c)) - dma_unmap_single(dev, qat_req->out.enc.c, ctx->key_sz, - DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, qat_req->out.rsa.enc.c)) + dma_unmap_single(dev, qat_req->out.rsa.enc.c, + ctx->key_sz, DMA_FROM_DEVICE); unmap_src: if (qat_req->src_align) dma_free_coherent(dev, ctx->key_sz, qat_req->src_align, - qat_req->in.enc.m); + qat_req->in.rsa.enc.m); else - if (!dma_mapping_error(dev, qat_req->in.enc.m)) - dma_unmap_single(dev, qat_req->in.enc.m, ctx->key_sz, - DMA_TO_DEVICE); + if (!dma_mapping_error(dev, qat_req->in.rsa.enc.m)) + dma_unmap_single(dev, qat_req->in.rsa.enc.m, + ctx->key_sz, DMA_TO_DEVICE); return ret; } @@ -375,7 +825,7 @@ static int qat_rsa_dec(struct akcipher_request *req) struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); - struct qat_rsa_request *qat_req = + struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; int ret, ctr = 0; @@ -390,18 +840,30 @@ static int qat_rsa_dec(struct akcipher_request *req) memset(msg, '\0', sizeof(*msg)); ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr, ICP_QAT_FW_COMN_REQ_FLAG_SET); - msg->pke_hdr.cd_pars.func_id = qat_rsa_dec_fn_id(ctx->key_sz); + msg->pke_hdr.cd_pars.func_id = ctx->crt_mode ? + qat_rsa_dec_fn_id_crt(ctx->key_sz) : + qat_rsa_dec_fn_id(ctx->key_sz); if (unlikely(!msg->pke_hdr.cd_pars.func_id)) return -EINVAL; - qat_req->ctx = ctx; + qat_req->cb = qat_rsa_cb; + qat_req->ctx.rsa = ctx; + qat_req->areq.rsa = req; msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE; msg->pke_hdr.comn_req_flags = ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT, QAT_COMN_CD_FLD_TYPE_64BIT_ADR); - qat_req->in.dec.d = ctx->dma_d; - qat_req->in.dec.n = ctx->dma_n; + if (ctx->crt_mode) { + qat_req->in.rsa.dec_crt.p = ctx->dma_p; + qat_req->in.rsa.dec_crt.q = ctx->dma_q; + qat_req->in.rsa.dec_crt.dp = ctx->dma_dp; + qat_req->in.rsa.dec_crt.dq = ctx->dma_dq; + qat_req->in.rsa.dec_crt.qinv = ctx->dma_qinv; + } else { + qat_req->in.rsa.dec.d = ctx->dma_d; + qat_req->in.rsa.dec.n = ctx->dma_n; + } ret = -ENOMEM; /* @@ -413,16 +875,16 @@ static int qat_rsa_dec(struct akcipher_request *req) */ if (sg_is_last(req->src) && req->src_len == ctx->key_sz) { qat_req->src_align = NULL; - qat_req->in.dec.c = dma_map_single(dev, sg_virt(req->src), + qat_req->in.rsa.dec.c = dma_map_single(dev, sg_virt(req->src), req->dst_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->in.dec.c))) + if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.dec.c))) return ret; } else { int shift = ctx->key_sz - req->src_len; qat_req->src_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->in.dec.c, + &qat_req->in.rsa.dec.c, GFP_KERNEL); if (unlikely(!qat_req->src_align)) return ret; @@ -432,31 +894,34 @@ static int qat_rsa_dec(struct akcipher_request *req) } if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) { qat_req->dst_align = NULL; - qat_req->out.dec.m = dma_map_single(dev, sg_virt(req->dst), + qat_req->out.rsa.dec.m = dma_map_single(dev, sg_virt(req->dst), req->dst_len, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->out.dec.m))) + if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.dec.m))) goto unmap_src; } else { qat_req->dst_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->out.dec.m, + &qat_req->out.rsa.dec.m, GFP_KERNEL); if (unlikely(!qat_req->dst_align)) goto unmap_src; } - qat_req->in.in_tab[3] = 0; - qat_req->out.out_tab[1] = 0; - qat_req->phy_in = dma_map_single(dev, &qat_req->in.dec.c, + if (ctx->crt_mode) + qat_req->in.rsa.in_tab[6] = 0; + else + qat_req->in.rsa.in_tab[3] = 0; + qat_req->out.rsa.out_tab[1] = 0; + qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.dec.c, sizeof(struct qat_rsa_input_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) goto unmap_dst; - qat_req->phy_out = dma_map_single(dev, &qat_req->out.dec.m, + qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.dec.m, sizeof(struct qat_rsa_output_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) @@ -464,8 +929,12 @@ static int qat_rsa_dec(struct akcipher_request *req) msg->pke_mid.src_data_addr = qat_req->phy_in; msg->pke_mid.dest_data_addr = qat_req->phy_out; - msg->pke_mid.opaque = (uint64_t)(__force long)req; - msg->input_param_count = 3; + msg->pke_mid.opaque = (uint64_t)(__force long)qat_req; + if (ctx->crt_mode) + msg->input_param_count = 6; + else + msg->input_param_count = 3; + msg->output_param_count = 1; do { ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg); @@ -486,26 +955,24 @@ unmap_in_params: unmap_dst: if (qat_req->dst_align) dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align, - qat_req->out.dec.m); + qat_req->out.rsa.dec.m); else - if (!dma_mapping_error(dev, qat_req->out.dec.m)) - dma_unmap_single(dev, qat_req->out.dec.m, ctx->key_sz, - DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, qat_req->out.rsa.dec.m)) + dma_unmap_single(dev, qat_req->out.rsa.dec.m, + ctx->key_sz, DMA_FROM_DEVICE); unmap_src: if (qat_req->src_align) dma_free_coherent(dev, ctx->key_sz, qat_req->src_align, - qat_req->in.dec.c); + qat_req->in.rsa.dec.c); else - if (!dma_mapping_error(dev, qat_req->in.dec.c)) - dma_unmap_single(dev, qat_req->in.dec.c, ctx->key_sz, - DMA_TO_DEVICE); + if (!dma_mapping_error(dev, qat_req->in.rsa.dec.c)) + dma_unmap_single(dev, qat_req->in.rsa.dec.c, + ctx->key_sz, DMA_TO_DEVICE); return ret; } -int qat_rsa_get_n(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) +int qat_rsa_set_n(struct qat_rsa_ctx *ctx, const char *value, size_t vlen) { - struct qat_rsa_ctx *ctx = context; struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); const char *ptr = value; @@ -518,11 +985,6 @@ int qat_rsa_get_n(void *context, size_t hdrlen, unsigned char tag, ctx->key_sz = vlen; ret = -EINVAL; - /* In FIPS mode only allow key size 2K & 3K */ - if (fips_enabled && (ctx->key_sz != 256 && ctx->key_sz != 384)) { - pr_err("QAT: RSA: key size not allowed in FIPS mode\n"); - goto err; - } /* invalid key size provided */ if (!qat_rsa_enc_fn_id(ctx->key_sz)) goto err; @@ -540,10 +1002,8 @@ err: return ret; } -int qat_rsa_get_e(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) +int qat_rsa_set_e(struct qat_rsa_ctx *ctx, const char *value, size_t vlen) { - struct qat_rsa_ctx *ctx = context; struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); const char *ptr = value; @@ -559,18 +1019,15 @@ int qat_rsa_get_e(void *context, size_t hdrlen, unsigned char tag, } ctx->e = dma_zalloc_coherent(dev, ctx->key_sz, &ctx->dma_e, GFP_KERNEL); - if (!ctx->e) { - ctx->e = NULL; + if (!ctx->e) return -ENOMEM; - } + memcpy(ctx->e + (ctx->key_sz - vlen), ptr, vlen); return 0; } -int qat_rsa_get_d(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) +int qat_rsa_set_d(struct qat_rsa_ctx *ctx, const char *value, size_t vlen) { - struct qat_rsa_ctx *ctx = context; struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); const char *ptr = value; @@ -585,15 +1042,9 @@ int qat_rsa_get_d(void *context, size_t hdrlen, unsigned char tag, if (!ctx->key_sz || !vlen || vlen > ctx->key_sz) goto err; - /* In FIPS mode only allow key size 2K & 3K */ - if (fips_enabled && (vlen != 256 && vlen != 384)) { - pr_err("QAT: RSA: key size not allowed in FIPS mode\n"); - goto err; - } - ret = -ENOMEM; ctx->d = dma_zalloc_coherent(dev, ctx->key_sz, &ctx->dma_d, GFP_KERNEL); - if (!ctx->n) + if (!ctx->d) goto err; memcpy(ctx->d + (ctx->key_sz - vlen), ptr, vlen); @@ -603,12 +1054,106 @@ err: return ret; } -static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, - unsigned int keylen, bool private) +static void qat_rsa_drop_leading_zeros(const char **ptr, unsigned int *len) { - struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); - struct device *dev = &GET_DEV(ctx->inst->accel_dev); - int ret; + while (!**ptr && *len) { + (*ptr)++; + (*len)--; + } +} + +static void qat_rsa_setkey_crt(struct qat_rsa_ctx *ctx, struct rsa_key *rsa_key) +{ + struct qat_crypto_instance *inst = ctx->inst; + struct device *dev = &GET_DEV(inst->accel_dev); + const char *ptr; + unsigned int len; + unsigned int half_key_sz = ctx->key_sz / 2; + + /* p */ + ptr = rsa_key->p; + len = rsa_key->p_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto err; + ctx->p = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_p, GFP_KERNEL); + if (!ctx->p) + goto err; + memcpy(ctx->p + (half_key_sz - len), ptr, len); + + /* q */ + ptr = rsa_key->q; + len = rsa_key->q_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_p; + ctx->q = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_q, GFP_KERNEL); + if (!ctx->q) + goto free_p; + memcpy(ctx->q + (half_key_sz - len), ptr, len); + + /* dp */ + ptr = rsa_key->dp; + len = rsa_key->dp_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_q; + ctx->dp = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_dp, + GFP_KERNEL); + if (!ctx->dp) + goto free_q; + memcpy(ctx->dp + (half_key_sz - len), ptr, len); + + /* dq */ + ptr = rsa_key->dq; + len = rsa_key->dq_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_dp; + ctx->dq = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_dq, + GFP_KERNEL); + if (!ctx->dq) + goto free_dp; + memcpy(ctx->dq + (half_key_sz - len), ptr, len); + + /* qinv */ + ptr = rsa_key->qinv; + len = rsa_key->qinv_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_dq; + ctx->qinv = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_qinv, + GFP_KERNEL); + if (!ctx->qinv) + goto free_dq; + memcpy(ctx->qinv + (half_key_sz - len), ptr, len); + + ctx->crt_mode = true; + return; + +free_dq: + memset(ctx->dq, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dq, ctx->dma_dq); + ctx->dq = NULL; +free_dp: + memset(ctx->dp, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dp, ctx->dma_dp); + ctx->dp = NULL; +free_q: + memset(ctx->q, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->q, ctx->dma_q); + ctx->q = NULL; +free_p: + memset(ctx->p, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->p, ctx->dma_p); + ctx->p = NULL; +err: + ctx->crt_mode = false; +} + +static void qat_rsa_clear_ctx(struct device *dev, struct qat_rsa_ctx *ctx) +{ + unsigned int half_key_sz = ctx->key_sz / 2; /* Free the old key if any */ if (ctx->n) @@ -619,19 +1164,68 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, memset(ctx->d, '\0', ctx->key_sz); dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d); } + if (ctx->p) { + memset(ctx->p, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->p, ctx->dma_p); + } + if (ctx->q) { + memset(ctx->q, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->q, ctx->dma_q); + } + if (ctx->dp) { + memset(ctx->dp, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dp, ctx->dma_dp); + } + if (ctx->dq) { + memset(ctx->dq, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dq, ctx->dma_dq); + } + if (ctx->qinv) { + memset(ctx->qinv, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->qinv, ctx->dma_qinv); + } ctx->n = NULL; ctx->e = NULL; ctx->d = NULL; + ctx->p = NULL; + ctx->q = NULL; + ctx->dp = NULL; + ctx->dq = NULL; + ctx->qinv = NULL; + ctx->crt_mode = false; + ctx->key_sz = 0; +} + +static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen, bool private) +{ + struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *dev = &GET_DEV(ctx->inst->accel_dev); + struct rsa_key rsa_key; + int ret; + + qat_rsa_clear_ctx(dev, ctx); if (private) - ret = asn1_ber_decoder(&qat_rsaprivkey_decoder, ctx, key, - keylen); + ret = rsa_parse_priv_key(&rsa_key, key, keylen); else - ret = asn1_ber_decoder(&qat_rsapubkey_decoder, ctx, key, - keylen); + ret = rsa_parse_pub_key(&rsa_key, key, keylen); + if (ret < 0) + goto free; + + ret = qat_rsa_set_n(ctx, rsa_key.n, rsa_key.n_sz); + if (ret < 0) + goto free; + ret = qat_rsa_set_e(ctx, rsa_key.e, rsa_key.e_sz); if (ret < 0) goto free; + if (private) { + ret = qat_rsa_set_d(ctx, rsa_key.d, rsa_key.d_sz); + if (ret < 0) + goto free; + qat_rsa_setkey_crt(ctx, &rsa_key); + } if (!ctx->n || !ctx->e) { /* invalid key provided */ @@ -646,20 +1240,7 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, return 0; free: - if (ctx->d) { - memset(ctx->d, '\0', ctx->key_sz); - dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d); - ctx->d = NULL; - } - if (ctx->e) { - dma_free_coherent(dev, ctx->key_sz, ctx->e, ctx->dma_e); - ctx->e = NULL; - } - if (ctx->n) { - dma_free_coherent(dev, ctx->key_sz, ctx->n, ctx->dma_n); - ctx->n = NULL; - ctx->key_sz = 0; - } + qat_rsa_clear_ctx(dev, ctx); return ret; } @@ -711,7 +1292,7 @@ static void qat_rsa_exit_tfm(struct crypto_akcipher *tfm) } qat_crypto_put_instance(ctx->inst); ctx->n = NULL; - ctx->d = NULL; + ctx->e = NULL; ctx->d = NULL; } @@ -725,7 +1306,7 @@ static struct akcipher_alg rsa = { .max_size = qat_rsa_max_size, .init = qat_rsa_init_tfm, .exit = qat_rsa_exit_tfm, - .reqsize = sizeof(struct qat_rsa_request) + 64, + .reqsize = sizeof(struct qat_asym_request) + 64, .base = { .cra_name = "rsa", .cra_driver_name = "qat-rsa", @@ -735,6 +1316,23 @@ static struct akcipher_alg rsa = { }, }; +static struct kpp_alg dh = { + .set_secret = qat_dh_set_secret, + .generate_public_key = qat_dh_compute_value, + .compute_shared_secret = qat_dh_compute_value, + .max_size = qat_dh_max_size, + .init = qat_dh_init_tfm, + .exit = qat_dh_exit_tfm, + .reqsize = sizeof(struct qat_asym_request) + 64, + .base = { + .cra_name = "dh", + .cra_driver_name = "qat-dh", + .cra_priority = 1000, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct qat_dh_ctx), + }, +}; + int qat_asym_algs_register(void) { int ret = 0; @@ -743,7 +1341,11 @@ int qat_asym_algs_register(void) if (++active_devs == 1) { rsa.base.cra_flags = 0; ret = crypto_register_akcipher(&rsa); + if (ret) + goto unlock; + ret = crypto_register_kpp(&dh); } +unlock: mutex_unlock(&algs_lock); return ret; } @@ -751,7 +1353,9 @@ int qat_asym_algs_register(void) void qat_asym_algs_unregister(void) { mutex_lock(&algs_lock); - if (--active_devs == 0) + if (--active_devs == 0) { crypto_unregister_akcipher(&rsa); + crypto_unregister_kpp(&dh); + } mutex_unlock(&algs_lock); } diff --git a/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1 b/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1 deleted file mode 100644 index f0066adb79b8..000000000000 --- a/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1 +++ /dev/null @@ -1,11 +0,0 @@ -RsaPrivKey ::= SEQUENCE { - version INTEGER, - n INTEGER ({ qat_rsa_get_n }), - e INTEGER ({ qat_rsa_get_e }), - d INTEGER ({ qat_rsa_get_d }), - prime1 INTEGER, - prime2 INTEGER, - exponent1 INTEGER, - exponent2 INTEGER, - coefficient INTEGER -} diff --git a/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1 b/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1 deleted file mode 100644 index bd667b31a21a..000000000000 --- a/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1 +++ /dev/null @@ -1,4 +0,0 @@ -RsaPubKey ::= SEQUENCE { - n INTEGER ({ qat_rsa_get_n }), - e INTEGER ({ qat_rsa_get_e }) -} diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 6e1d5e185526..1dfcab317bed 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -252,6 +252,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->reset_device = adf_reset_sbr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; } diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index a8c4b92a7cbd..4d2de2838451 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -302,9 +302,7 @@ static void adf_remove(struct pci_dev *pdev) pr_err("QAT: Driver removal failed\n"); return; } - if (adf_dev_stop(accel_dev)) - dev_err(&GET_DEV(accel_dev), "Failed to stop QAT accel dev\n"); - + adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); adf_disable_aer(accel_dev); adf_cleanup_accel(accel_dev); diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index dc04ab68d24d..a3b4dd8099a7 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -109,29 +109,6 @@ static void adf_vf_void_noop(struct adf_accel_dev *accel_dev) { } -static int adf_vf2pf_init(struct adf_accel_dev *accel_dev) -{ - u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | - (ADF_VF2PF_MSGTYPE_INIT << ADF_VF2PF_MSGTYPE_SHIFT)); - - if (adf_iov_putmsg(accel_dev, msg, 0)) { - dev_err(&GET_DEV(accel_dev), - "Failed to send Init event to PF\n"); - return -EFAULT; - } - return 0; -} - -static void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev) -{ - u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM | - (ADF_VF2PF_MSGTYPE_SHUTDOWN << ADF_VF2PF_MSGTYPE_SHIFT)); - - if (adf_iov_putmsg(accel_dev, msg, 0)) - dev_err(&GET_DEV(accel_dev), - "Failed to send Shutdown event to PF\n"); -} - void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) { hw_data->dev_class = &dh895xcciov_class; diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c index f8cc4bf0a50c..60df98632fa2 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c @@ -238,6 +238,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_err_free_reg; + set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); + ret = adf_dev_init(accel_dev); if (ret) goto out_err_dev_shutdown; @@ -270,9 +272,7 @@ static void adf_remove(struct pci_dev *pdev) pr_err("QAT: Driver removal failed\n"); return; } - if (adf_dev_stop(accel_dev)) - dev_err(&GET_DEV(accel_dev), "Failed to stop QAT accel dev\n"); - + adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); adf_cleanup_accel(accel_dev); adf_cleanup_pci_dev(accel_dev); diff --git a/drivers/crypto/qce/ablkcipher.c b/drivers/crypto/qce/ablkcipher.c index dbcbbe242bd6..b04b42f48366 100644 --- a/drivers/crypto/qce/ablkcipher.c +++ b/drivers/crypto/qce/ablkcipher.c @@ -15,8 +15,8 @@ #include <linux/interrupt.h> #include <linux/types.h> #include <crypto/aes.h> -#include <crypto/algapi.h> #include <crypto/des.h> +#include <crypto/internal/skcipher.h> #include "cipher.h" @@ -189,7 +189,7 @@ static int qce_ablkcipher_setkey(struct crypto_ablkcipher *ablk, const u8 *key, memcpy(ctx->enc_key, key, keylen); return 0; fallback: - ret = crypto_ablkcipher_setkey(ctx->fallback, key, keylen); + ret = crypto_skcipher_setkey(ctx->fallback, key, keylen); if (!ret) ctx->enc_keylen = keylen; return ret; @@ -212,10 +212,16 @@ static int qce_ablkcipher_crypt(struct ablkcipher_request *req, int encrypt) if (IS_AES(rctx->flags) && ctx->enc_keylen != AES_KEYSIZE_128 && ctx->enc_keylen != AES_KEYSIZE_256) { - ablkcipher_request_set_tfm(req, ctx->fallback); - ret = encrypt ? crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + ret = encrypt ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return ret; } @@ -239,10 +245,9 @@ static int qce_ablkcipher_init(struct crypto_tfm *tfm) memset(ctx, 0, sizeof(*ctx)); tfm->crt_ablkcipher.reqsize = sizeof(struct qce_cipher_reqctx); - ctx->fallback = crypto_alloc_ablkcipher(crypto_tfm_alg_name(tfm), - CRYPTO_ALG_TYPE_ABLKCIPHER, - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK); + ctx->fallback = crypto_alloc_skcipher(crypto_tfm_alg_name(tfm), 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->fallback)) return PTR_ERR(ctx->fallback); @@ -253,7 +258,7 @@ static void qce_ablkcipher_exit(struct crypto_tfm *tfm) { struct qce_cipher_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_free_ablkcipher(ctx->fallback); + crypto_free_skcipher(ctx->fallback); } struct qce_ablkcipher_def { diff --git a/drivers/crypto/qce/cipher.h b/drivers/crypto/qce/cipher.h index 5c6a5f8633e5..2b0278bb6e92 100644 --- a/drivers/crypto/qce/cipher.h +++ b/drivers/crypto/qce/cipher.h @@ -22,7 +22,7 @@ struct qce_cipher_ctx { u8 enc_key[QCE_MAX_KEY_SIZE]; unsigned int enc_keylen; - struct crypto_ablkcipher *fallback; + struct crypto_skcipher *fallback; }; /** diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index 5f161a9777e3..dce1af0ce85c 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -11,65 +11,64 @@ * */ -#include <linux/delay.h> +#include <linux/clk.h> +#include <linux/crypto.h> +#include <linux/dma-mapping.h> #include <linux/err.h> -#include <linux/module.h> -#include <linux/init.h> #include <linux/errno.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> #include <linux/kernel.h> -#include <linux/clk.h> +#include <linux/module.h> +#include <linux/of.h> #include <linux/platform_device.h> #include <linux/scatterlist.h> -#include <linux/dma-mapping.h> -#include <linux/io.h> -#include <linux/of.h> -#include <linux/crypto.h> -#include <linux/interrupt.h> -#include <crypto/algapi.h> -#include <crypto/aes.h> #include <crypto/ctr.h> +#include <crypto/aes.h> +#include <crypto/algapi.h> +#include <crypto/scatterwalk.h> #define _SBF(s, v) ((v) << (s)) -#define _BIT(b) _SBF(b, 1) /* Feed control registers */ #define SSS_REG_FCINTSTAT 0x0000 -#define SSS_FCINTSTAT_BRDMAINT _BIT(3) -#define SSS_FCINTSTAT_BTDMAINT _BIT(2) -#define SSS_FCINTSTAT_HRDMAINT _BIT(1) -#define SSS_FCINTSTAT_PKDMAINT _BIT(0) +#define SSS_FCINTSTAT_BRDMAINT BIT(3) +#define SSS_FCINTSTAT_BTDMAINT BIT(2) +#define SSS_FCINTSTAT_HRDMAINT BIT(1) +#define SSS_FCINTSTAT_PKDMAINT BIT(0) #define SSS_REG_FCINTENSET 0x0004 -#define SSS_FCINTENSET_BRDMAINTENSET _BIT(3) -#define SSS_FCINTENSET_BTDMAINTENSET _BIT(2) -#define SSS_FCINTENSET_HRDMAINTENSET _BIT(1) -#define SSS_FCINTENSET_PKDMAINTENSET _BIT(0) +#define SSS_FCINTENSET_BRDMAINTENSET BIT(3) +#define SSS_FCINTENSET_BTDMAINTENSET BIT(2) +#define SSS_FCINTENSET_HRDMAINTENSET BIT(1) +#define SSS_FCINTENSET_PKDMAINTENSET BIT(0) #define SSS_REG_FCINTENCLR 0x0008 -#define SSS_FCINTENCLR_BRDMAINTENCLR _BIT(3) -#define SSS_FCINTENCLR_BTDMAINTENCLR _BIT(2) -#define SSS_FCINTENCLR_HRDMAINTENCLR _BIT(1) -#define SSS_FCINTENCLR_PKDMAINTENCLR _BIT(0) +#define SSS_FCINTENCLR_BRDMAINTENCLR BIT(3) +#define SSS_FCINTENCLR_BTDMAINTENCLR BIT(2) +#define SSS_FCINTENCLR_HRDMAINTENCLR BIT(1) +#define SSS_FCINTENCLR_PKDMAINTENCLR BIT(0) #define SSS_REG_FCINTPEND 0x000C -#define SSS_FCINTPEND_BRDMAINTP _BIT(3) -#define SSS_FCINTPEND_BTDMAINTP _BIT(2) -#define SSS_FCINTPEND_HRDMAINTP _BIT(1) -#define SSS_FCINTPEND_PKDMAINTP _BIT(0) +#define SSS_FCINTPEND_BRDMAINTP BIT(3) +#define SSS_FCINTPEND_BTDMAINTP BIT(2) +#define SSS_FCINTPEND_HRDMAINTP BIT(1) +#define SSS_FCINTPEND_PKDMAINTP BIT(0) #define SSS_REG_FCFIFOSTAT 0x0010 -#define SSS_FCFIFOSTAT_BRFIFOFUL _BIT(7) -#define SSS_FCFIFOSTAT_BRFIFOEMP _BIT(6) -#define SSS_FCFIFOSTAT_BTFIFOFUL _BIT(5) -#define SSS_FCFIFOSTAT_BTFIFOEMP _BIT(4) -#define SSS_FCFIFOSTAT_HRFIFOFUL _BIT(3) -#define SSS_FCFIFOSTAT_HRFIFOEMP _BIT(2) -#define SSS_FCFIFOSTAT_PKFIFOFUL _BIT(1) -#define SSS_FCFIFOSTAT_PKFIFOEMP _BIT(0) +#define SSS_FCFIFOSTAT_BRFIFOFUL BIT(7) +#define SSS_FCFIFOSTAT_BRFIFOEMP BIT(6) +#define SSS_FCFIFOSTAT_BTFIFOFUL BIT(5) +#define SSS_FCFIFOSTAT_BTFIFOEMP BIT(4) +#define SSS_FCFIFOSTAT_HRFIFOFUL BIT(3) +#define SSS_FCFIFOSTAT_HRFIFOEMP BIT(2) +#define SSS_FCFIFOSTAT_PKFIFOFUL BIT(1) +#define SSS_FCFIFOSTAT_PKFIFOEMP BIT(0) #define SSS_REG_FCFIFOCTRL 0x0014 -#define SSS_FCFIFOCTRL_DESSEL _BIT(2) +#define SSS_FCFIFOCTRL_DESSEL BIT(2) #define SSS_HASHIN_INDEPENDENT _SBF(0, 0x00) #define SSS_HASHIN_CIPHER_INPUT _SBF(0, 0x01) #define SSS_HASHIN_CIPHER_OUTPUT _SBF(0, 0x02) @@ -77,52 +76,52 @@ #define SSS_REG_FCBRDMAS 0x0020 #define SSS_REG_FCBRDMAL 0x0024 #define SSS_REG_FCBRDMAC 0x0028 -#define SSS_FCBRDMAC_BYTESWAP _BIT(1) -#define SSS_FCBRDMAC_FLUSH _BIT(0) +#define SSS_FCBRDMAC_BYTESWAP BIT(1) +#define SSS_FCBRDMAC_FLUSH BIT(0) #define SSS_REG_FCBTDMAS 0x0030 #define SSS_REG_FCBTDMAL 0x0034 #define SSS_REG_FCBTDMAC 0x0038 -#define SSS_FCBTDMAC_BYTESWAP _BIT(1) -#define SSS_FCBTDMAC_FLUSH _BIT(0) +#define SSS_FCBTDMAC_BYTESWAP BIT(1) +#define SSS_FCBTDMAC_FLUSH BIT(0) #define SSS_REG_FCHRDMAS 0x0040 #define SSS_REG_FCHRDMAL 0x0044 #define SSS_REG_FCHRDMAC 0x0048 -#define SSS_FCHRDMAC_BYTESWAP _BIT(1) -#define SSS_FCHRDMAC_FLUSH _BIT(0) +#define SSS_FCHRDMAC_BYTESWAP BIT(1) +#define SSS_FCHRDMAC_FLUSH BIT(0) #define SSS_REG_FCPKDMAS 0x0050 #define SSS_REG_FCPKDMAL 0x0054 #define SSS_REG_FCPKDMAC 0x0058 -#define SSS_FCPKDMAC_BYTESWAP _BIT(3) -#define SSS_FCPKDMAC_DESCEND _BIT(2) -#define SSS_FCPKDMAC_TRANSMIT _BIT(1) -#define SSS_FCPKDMAC_FLUSH _BIT(0) +#define SSS_FCPKDMAC_BYTESWAP BIT(3) +#define SSS_FCPKDMAC_DESCEND BIT(2) +#define SSS_FCPKDMAC_TRANSMIT BIT(1) +#define SSS_FCPKDMAC_FLUSH BIT(0) #define SSS_REG_FCPKDMAO 0x005C /* AES registers */ #define SSS_REG_AES_CONTROL 0x00 -#define SSS_AES_BYTESWAP_DI _BIT(11) -#define SSS_AES_BYTESWAP_DO _BIT(10) -#define SSS_AES_BYTESWAP_IV _BIT(9) -#define SSS_AES_BYTESWAP_CNT _BIT(8) -#define SSS_AES_BYTESWAP_KEY _BIT(7) -#define SSS_AES_KEY_CHANGE_MODE _BIT(6) +#define SSS_AES_BYTESWAP_DI BIT(11) +#define SSS_AES_BYTESWAP_DO BIT(10) +#define SSS_AES_BYTESWAP_IV BIT(9) +#define SSS_AES_BYTESWAP_CNT BIT(8) +#define SSS_AES_BYTESWAP_KEY BIT(7) +#define SSS_AES_KEY_CHANGE_MODE BIT(6) #define SSS_AES_KEY_SIZE_128 _SBF(4, 0x00) #define SSS_AES_KEY_SIZE_192 _SBF(4, 0x01) #define SSS_AES_KEY_SIZE_256 _SBF(4, 0x02) -#define SSS_AES_FIFO_MODE _BIT(3) +#define SSS_AES_FIFO_MODE BIT(3) #define SSS_AES_CHAIN_MODE_ECB _SBF(1, 0x00) #define SSS_AES_CHAIN_MODE_CBC _SBF(1, 0x01) #define SSS_AES_CHAIN_MODE_CTR _SBF(1, 0x02) -#define SSS_AES_MODE_DECRYPT _BIT(0) +#define SSS_AES_MODE_DECRYPT BIT(0) #define SSS_REG_AES_STATUS 0x04 -#define SSS_AES_BUSY _BIT(2) -#define SSS_AES_INPUT_READY _BIT(1) -#define SSS_AES_OUTPUT_READY _BIT(0) +#define SSS_AES_BUSY BIT(2) +#define SSS_AES_INPUT_READY BIT(1) +#define SSS_AES_OUTPUT_READY BIT(0) #define SSS_REG_AES_IN_DATA(s) (0x10 + (s << 2)) #define SSS_REG_AES_OUT_DATA(s) (0x20 + (s << 2)) @@ -139,7 +138,7 @@ SSS_AES_REG(dev, reg)) /* HW engine modes */ -#define FLAGS_AES_DECRYPT _BIT(0) +#define FLAGS_AES_DECRYPT BIT(0) #define FLAGS_AES_MODE_MASK _SBF(1, 0x03) #define FLAGS_AES_CBC _SBF(1, 0x01) #define FLAGS_AES_CTR _SBF(1, 0x02) @@ -149,7 +148,6 @@ /** * struct samsung_aes_variant - platform specific SSS driver data - * @has_hash_irq: true if SSS module uses hash interrupt, false otherwise * @aes_offset: AES register offset from SSS module's base. * * Specifies platform specific configuration of SSS module. @@ -157,52 +155,52 @@ * expansion of its usage. */ struct samsung_aes_variant { - bool has_hash_irq; - unsigned int aes_offset; + unsigned int aes_offset; }; struct s5p_aes_reqctx { - unsigned long mode; + unsigned long mode; }; struct s5p_aes_ctx { - struct s5p_aes_dev *dev; + struct s5p_aes_dev *dev; - uint8_t aes_key[AES_MAX_KEY_SIZE]; - uint8_t nonce[CTR_RFC3686_NONCE_SIZE]; - int keylen; + uint8_t aes_key[AES_MAX_KEY_SIZE]; + uint8_t nonce[CTR_RFC3686_NONCE_SIZE]; + int keylen; }; struct s5p_aes_dev { - struct device *dev; - struct clk *clk; - void __iomem *ioaddr; - void __iomem *aes_ioaddr; - int irq_hash; - int irq_fc; - - struct ablkcipher_request *req; - struct s5p_aes_ctx *ctx; - struct scatterlist *sg_src; - struct scatterlist *sg_dst; - - struct tasklet_struct tasklet; - struct crypto_queue queue; - bool busy; - spinlock_t lock; - - struct samsung_aes_variant *variant; + struct device *dev; + struct clk *clk; + void __iomem *ioaddr; + void __iomem *aes_ioaddr; + int irq_fc; + + struct ablkcipher_request *req; + struct s5p_aes_ctx *ctx; + struct scatterlist *sg_src; + struct scatterlist *sg_dst; + + /* In case of unaligned access: */ + struct scatterlist *sg_src_cpy; + struct scatterlist *sg_dst_cpy; + + struct tasklet_struct tasklet; + struct crypto_queue queue; + bool busy; + spinlock_t lock; + + struct samsung_aes_variant *variant; }; static struct s5p_aes_dev *s5p_dev; static const struct samsung_aes_variant s5p_aes_data = { - .has_hash_irq = true, .aes_offset = 0x4000, }; static const struct samsung_aes_variant exynos_aes_data = { - .has_hash_irq = false, .aes_offset = 0x200, }; @@ -245,8 +243,45 @@ static void s5p_set_dma_outdata(struct s5p_aes_dev *dev, struct scatterlist *sg) SSS_WRITE(dev, FCBTDMAL, sg_dma_len(sg)); } +static void s5p_free_sg_cpy(struct s5p_aes_dev *dev, struct scatterlist **sg) +{ + int len; + + if (!*sg) + return; + + len = ALIGN(dev->req->nbytes, AES_BLOCK_SIZE); + free_pages((unsigned long)sg_virt(*sg), get_order(len)); + + kfree(*sg); + *sg = NULL; +} + +static void s5p_sg_copy_buf(void *buf, struct scatterlist *sg, + unsigned int nbytes, int out) +{ + struct scatter_walk walk; + + if (!nbytes) + return; + + scatterwalk_start(&walk, sg); + scatterwalk_copychunks(buf, &walk, nbytes, out); + scatterwalk_done(&walk, out, 0); +} + static void s5p_aes_complete(struct s5p_aes_dev *dev, int err) { + if (dev->sg_dst_cpy) { + dev_dbg(dev->dev, + "Copying %d bytes of output data back to original place\n", + dev->req->nbytes); + s5p_sg_copy_buf(sg_virt(dev->sg_dst_cpy), dev->req->dst, + dev->req->nbytes, 1); + } + s5p_free_sg_cpy(dev, &dev->sg_src_cpy); + s5p_free_sg_cpy(dev, &dev->sg_dst_cpy); + /* holding a lock outside */ dev->req->base.complete(&dev->req->base, err); dev->busy = false; @@ -262,15 +297,37 @@ static void s5p_unset_indata(struct s5p_aes_dev *dev) dma_unmap_sg(dev->dev, dev->sg_src, 1, DMA_TO_DEVICE); } +static int s5p_make_sg_cpy(struct s5p_aes_dev *dev, struct scatterlist *src, + struct scatterlist **dst) +{ + void *pages; + int len; + + *dst = kmalloc(sizeof(**dst), GFP_ATOMIC); + if (!*dst) + return -ENOMEM; + + len = ALIGN(dev->req->nbytes, AES_BLOCK_SIZE); + pages = (void *)__get_free_pages(GFP_ATOMIC, get_order(len)); + if (!pages) { + kfree(*dst); + *dst = NULL; + return -ENOMEM; + } + + s5p_sg_copy_buf(pages, src, dev->req->nbytes, 0); + + sg_init_table(*dst, 1); + sg_set_buf(*dst, pages, len); + + return 0; +} + static int s5p_set_outdata(struct s5p_aes_dev *dev, struct scatterlist *sg) { int err; - if (!IS_ALIGNED(sg_dma_len(sg), AES_BLOCK_SIZE)) { - err = -EINVAL; - goto exit; - } - if (!sg_dma_len(sg)) { + if (!sg->length) { err = -EINVAL; goto exit; } @@ -284,7 +341,7 @@ static int s5p_set_outdata(struct s5p_aes_dev *dev, struct scatterlist *sg) dev->sg_dst = sg; err = 0; - exit: +exit: return err; } @@ -292,11 +349,7 @@ static int s5p_set_indata(struct s5p_aes_dev *dev, struct scatterlist *sg) { int err; - if (!IS_ALIGNED(sg_dma_len(sg), AES_BLOCK_SIZE)) { - err = -EINVAL; - goto exit; - } - if (!sg_dma_len(sg)) { + if (!sg->length) { err = -EINVAL; goto exit; } @@ -310,67 +363,90 @@ static int s5p_set_indata(struct s5p_aes_dev *dev, struct scatterlist *sg) dev->sg_src = sg; err = 0; - exit: +exit: return err; } -static void s5p_aes_tx(struct s5p_aes_dev *dev) +/* + * Returns true if new transmitting (output) data is ready and its + * address+length have to be written to device (by calling + * s5p_set_dma_outdata()). False otherwise. + */ +static bool s5p_aes_tx(struct s5p_aes_dev *dev) { int err = 0; + bool ret = false; s5p_unset_outdata(dev); if (!sg_is_last(dev->sg_dst)) { err = s5p_set_outdata(dev, sg_next(dev->sg_dst)); - if (err) { + if (err) s5p_aes_complete(dev, err); - return; - } - - s5p_set_dma_outdata(dev, dev->sg_dst); + else + ret = true; } else { s5p_aes_complete(dev, err); dev->busy = true; tasklet_schedule(&dev->tasklet); } + + return ret; } -static void s5p_aes_rx(struct s5p_aes_dev *dev) +/* + * Returns true if new receiving (input) data is ready and its + * address+length have to be written to device (by calling + * s5p_set_dma_indata()). False otherwise. + */ +static bool s5p_aes_rx(struct s5p_aes_dev *dev) { int err; + bool ret = false; s5p_unset_indata(dev); if (!sg_is_last(dev->sg_src)) { err = s5p_set_indata(dev, sg_next(dev->sg_src)); - if (err) { + if (err) s5p_aes_complete(dev, err); - return; - } - - s5p_set_dma_indata(dev, dev->sg_src); + else + ret = true; } + + return ret; } static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id) { struct platform_device *pdev = dev_id; - struct s5p_aes_dev *dev = platform_get_drvdata(pdev); - uint32_t status; - unsigned long flags; + struct s5p_aes_dev *dev = platform_get_drvdata(pdev); + bool set_dma_tx = false; + bool set_dma_rx = false; + unsigned long flags; + uint32_t status; spin_lock_irqsave(&dev->lock, flags); - if (irq == dev->irq_fc) { - status = SSS_READ(dev, FCINTSTAT); - if (status & SSS_FCINTSTAT_BRDMAINT) - s5p_aes_rx(dev); - if (status & SSS_FCINTSTAT_BTDMAINT) - s5p_aes_tx(dev); - - SSS_WRITE(dev, FCINTPEND, status); - } + status = SSS_READ(dev, FCINTSTAT); + if (status & SSS_FCINTSTAT_BRDMAINT) + set_dma_rx = s5p_aes_rx(dev); + if (status & SSS_FCINTSTAT_BTDMAINT) + set_dma_tx = s5p_aes_tx(dev); + + SSS_WRITE(dev, FCINTPEND, status); + + /* + * Writing length of DMA block (either receiving or transmitting) + * will start the operation immediately, so this should be done + * at the end (even after clearing pending interrupts to not miss the + * interrupt). + */ + if (set_dma_tx) + s5p_set_dma_outdata(dev, dev->sg_dst); + if (set_dma_rx) + s5p_set_dma_indata(dev, dev->sg_src); spin_unlock_irqrestore(&dev->lock, flags); @@ -395,12 +471,77 @@ static void s5p_set_aes(struct s5p_aes_dev *dev, memcpy_toio(keystart, key, keylen); } +static bool s5p_is_sg_aligned(struct scatterlist *sg) +{ + while (sg) { + if (!IS_ALIGNED(sg->length, AES_BLOCK_SIZE)) + return false; + sg = sg_next(sg); + } + + return true; +} + +static int s5p_set_indata_start(struct s5p_aes_dev *dev, + struct ablkcipher_request *req) +{ + struct scatterlist *sg; + int err; + + dev->sg_src_cpy = NULL; + sg = req->src; + if (!s5p_is_sg_aligned(sg)) { + dev_dbg(dev->dev, + "At least one unaligned source scatter list, making a copy\n"); + err = s5p_make_sg_cpy(dev, sg, &dev->sg_src_cpy); + if (err) + return err; + + sg = dev->sg_src_cpy; + } + + err = s5p_set_indata(dev, sg); + if (err) { + s5p_free_sg_cpy(dev, &dev->sg_src_cpy); + return err; + } + + return 0; +} + +static int s5p_set_outdata_start(struct s5p_aes_dev *dev, + struct ablkcipher_request *req) +{ + struct scatterlist *sg; + int err; + + dev->sg_dst_cpy = NULL; + sg = req->dst; + if (!s5p_is_sg_aligned(sg)) { + dev_dbg(dev->dev, + "At least one unaligned dest scatter list, making a copy\n"); + err = s5p_make_sg_cpy(dev, sg, &dev->sg_dst_cpy); + if (err) + return err; + + sg = dev->sg_dst_cpy; + } + + err = s5p_set_outdata(dev, sg); + if (err) { + s5p_free_sg_cpy(dev, &dev->sg_dst_cpy); + return err; + } + + return 0; +} + static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode) { - struct ablkcipher_request *req = dev->req; - uint32_t aes_control; - int err; - unsigned long flags; + struct ablkcipher_request *req = dev->req; + uint32_t aes_control; + unsigned long flags; + int err; aes_control = SSS_AES_KEY_CHANGE_MODE; if (mode & FLAGS_AES_DECRYPT) @@ -431,19 +572,19 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode) SSS_FCINTENCLR_BTDMAINTENCLR | SSS_FCINTENCLR_BRDMAINTENCLR); SSS_WRITE(dev, FCFIFOCTRL, 0x00); - err = s5p_set_indata(dev, req->src); + err = s5p_set_indata_start(dev, req); if (err) goto indata_error; - err = s5p_set_outdata(dev, req->dst); + err = s5p_set_outdata_start(dev, req); if (err) goto outdata_error; SSS_AES_WRITE(dev, AES_CONTROL, aes_control); s5p_set_aes(dev, dev->ctx->aes_key, req->info, dev->ctx->keylen); - s5p_set_dma_indata(dev, req->src); - s5p_set_dma_outdata(dev, req->dst); + s5p_set_dma_indata(dev, dev->sg_src); + s5p_set_dma_outdata(dev, dev->sg_dst); SSS_WRITE(dev, FCINTENSET, SSS_FCINTENSET_BTDMAINTENSET | SSS_FCINTENSET_BRDMAINTENSET); @@ -452,10 +593,10 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode) return; - outdata_error: +outdata_error: s5p_unset_indata(dev); - indata_error: +indata_error: s5p_aes_complete(dev, err); spin_unlock_irqrestore(&dev->lock, flags); } @@ -506,16 +647,16 @@ static int s5p_aes_handle_req(struct s5p_aes_dev *dev, tasklet_schedule(&dev->tasklet); - exit: +exit: return err; } static int s5p_aes_crypt(struct ablkcipher_request *req, unsigned long mode) { - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct s5p_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct s5p_aes_reqctx *reqctx = ablkcipher_request_ctx(req); - struct s5p_aes_dev *dev = ctx->dev; + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct s5p_aes_reqctx *reqctx = ablkcipher_request_ctx(req); + struct s5p_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct s5p_aes_dev *dev = ctx->dev; if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) { dev_err(dev->dev, "request size is not exact amount of AES blocks\n"); @@ -530,7 +671,7 @@ static int s5p_aes_crypt(struct ablkcipher_request *req, unsigned long mode) static int s5p_aes_setkey(struct crypto_ablkcipher *cipher, const uint8_t *key, unsigned int keylen) { - struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); struct s5p_aes_ctx *ctx = crypto_tfm_ctx(tfm); if (keylen != AES_KEYSIZE_128 && @@ -622,11 +763,11 @@ static struct crypto_alg algs[] = { static int s5p_aes_probe(struct platform_device *pdev) { - int i, j, err = -ENODEV; - struct s5p_aes_dev *pdata; - struct device *dev = &pdev->dev; - struct resource *res; + struct device *dev = &pdev->dev; + int i, j, err = -ENODEV; struct samsung_aes_variant *variant; + struct s5p_aes_dev *pdata; + struct resource *res; if (s5p_dev) return -EEXIST; @@ -671,21 +812,6 @@ static int s5p_aes_probe(struct platform_device *pdev) goto err_irq; } - if (variant->has_hash_irq) { - pdata->irq_hash = platform_get_irq(pdev, 1); - if (pdata->irq_hash < 0) { - err = pdata->irq_hash; - dev_warn(dev, "hash interrupt is not available.\n"); - goto err_irq; - } - err = devm_request_irq(dev, pdata->irq_hash, s5p_aes_interrupt, - IRQF_SHARED, pdev->name, pdev); - if (err < 0) { - dev_warn(dev, "hash interrupt is not available.\n"); - goto err_irq; - } - } - pdata->busy = false; pdata->variant = variant; pdata->dev = dev; @@ -705,7 +831,7 @@ static int s5p_aes_probe(struct platform_device *pdev) return 0; - err_algs: +err_algs: dev_err(dev, "can't register '%s': %d\n", algs[i].cra_name, err); for (j = 0; j < i; j++) @@ -713,7 +839,7 @@ static int s5p_aes_probe(struct platform_device *pdev) tasklet_kill(&pdata->tasklet); - err_irq: +err_irq: clk_disable_unprepare(pdata->clk); s5p_dev = NULL; diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index c3f3d89e4831..0c49956ee0ce 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -14,10 +14,9 @@ * Based on omap-aes.c and tegra-aes.c */ -#include <crypto/algapi.h> #include <crypto/aes.h> -#include <crypto/hash.h> #include <crypto/internal/hash.h> +#include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <crypto/sha.h> @@ -150,10 +149,7 @@ struct sahara_ctx { /* AES-specific context */ int keylen; u8 key[AES_KEYSIZE_128]; - struct crypto_ablkcipher *fallback; - - /* SHA-specific context */ - struct crypto_shash *shash_fallback; + struct crypto_skcipher *fallback; }; struct sahara_aes_reqctx { @@ -620,25 +616,21 @@ static int sahara_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, return 0; } - if (keylen != AES_KEYSIZE_128 && - keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_256) + if (keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_256) return -EINVAL; /* * The requested key size is not supported by HW, do a fallback. */ - ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - ctx->fallback->base.crt_flags |= - (tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); + crypto_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags & + CRYPTO_TFM_REQ_MASK); - ret = crypto_ablkcipher_setkey(ctx->fallback, key, keylen); - if (ret) { - struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm); + ret = crypto_skcipher_setkey(ctx->fallback, key, keylen); - tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm_aux->crt_flags |= - (ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK); - } + tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->base.crt_flags |= crypto_skcipher_get_flags(ctx->fallback) & + CRYPTO_TFM_RES_MASK; return ret; } @@ -670,16 +662,20 @@ static int sahara_aes_crypt(struct ablkcipher_request *req, unsigned long mode) static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_encrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_encrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -688,16 +684,20 @@ static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req) static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -706,16 +706,20 @@ static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req) static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_encrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_encrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -724,16 +728,20 @@ static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req) static int sahara_aes_cbc_decrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -745,8 +753,9 @@ static int sahara_aes_cra_init(struct crypto_tfm *tfm) const char *name = crypto_tfm_alg_name(tfm); struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - ctx->fallback = crypto_alloc_ablkcipher(name, 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + ctx->fallback = crypto_alloc_skcipher(name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->fallback)) { pr_err("Error allocating fallback algo %s\n", name); return PTR_ERR(ctx->fallback); @@ -761,9 +770,7 @@ static void sahara_aes_cra_exit(struct crypto_tfm *tfm) { struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->fallback) - crypto_free_ablkcipher(ctx->fallback); - ctx->fallback = NULL; + crypto_free_skcipher(ctx->fallback); } static u32 sahara_sha_init_hdr(struct sahara_dev *dev, @@ -1180,15 +1187,6 @@ static int sahara_sha_import(struct ahash_request *req, const void *in) static int sahara_sha_cra_init(struct crypto_tfm *tfm) { - const char *name = crypto_tfm_alg_name(tfm); - struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - - ctx->shash_fallback = crypto_alloc_shash(name, 0, - CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(ctx->shash_fallback)) { - pr_err("Error allocating fallback algo %s\n", name); - return PTR_ERR(ctx->shash_fallback); - } crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct sahara_sha_reqctx) + SHA_BUFFER_LEN + SHA256_BLOCK_SIZE); @@ -1196,14 +1194,6 @@ static int sahara_sha_cra_init(struct crypto_tfm *tfm) return 0; } -static void sahara_sha_cra_exit(struct crypto_tfm *tfm) -{ - struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - - crypto_free_shash(ctx->shash_fallback); - ctx->shash_fallback = NULL; -} - static struct crypto_alg aes_algs[] = { { .cra_name = "ecb(aes)", @@ -1272,7 +1262,6 @@ static struct ahash_alg sha_v3_algs[] = { .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_init = sahara_sha_cra_init, - .cra_exit = sahara_sha_cra_exit, } }, }; @@ -1300,7 +1289,6 @@ static struct ahash_alg sha_v4_algs[] = { .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_init = sahara_sha_cra_init, - .cra_exit = sahara_sha_cra_exit, } }, }; diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c index 7be3fbcd8d78..3830d7c4e138 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c @@ -35,6 +35,7 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq) unsigned int todo; struct sg_mapping_iter mi, mo; unsigned int oi, oo; /* offset for in and out */ + unsigned long flags; if (areq->nbytes == 0) return 0; @@ -49,7 +50,7 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq) return -EINVAL; } - spin_lock_bh(&ss->slock); + spin_lock_irqsave(&ss->slock, flags); for (i = 0; i < op->keylen; i += 4) writel(*(op->key + i / 4), ss->base + SS_KEY0 + i); @@ -117,7 +118,7 @@ release_ss: sg_miter_stop(&mi); sg_miter_stop(&mo); writel(0, ss->base + SS_CTL); - spin_unlock_bh(&ss->slock); + spin_unlock_irqrestore(&ss->slock, flags); return err; } @@ -149,6 +150,7 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq) unsigned int ob = 0; /* offset in buf */ unsigned int obo = 0; /* offset in bufo*/ unsigned int obl = 0; /* length of data in bufo */ + unsigned long flags; if (areq->nbytes == 0) return 0; @@ -181,7 +183,7 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq) if (no_chunk == 1) return sun4i_ss_opti_poll(areq); - spin_lock_bh(&ss->slock); + spin_lock_irqsave(&ss->slock, flags); for (i = 0; i < op->keylen; i += 4) writel(*(op->key + i / 4), ss->base + SS_KEY0 + i); @@ -307,7 +309,7 @@ release_ss: sg_miter_stop(&mi); sg_miter_stop(&mo); writel(0, ss->base + SS_CTL); - spin_unlock_bh(&ss->slock); + spin_unlock_irqrestore(&ss->slock, flags); return err; } diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index aae05547b924..0418a2f41dc0 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -91,10 +91,17 @@ static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr, return be16_to_cpu(ptr->len); } -static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr, bool is_sec1) +static void to_talitos_ptr_ext_set(struct talitos_ptr *ptr, u8 val, + bool is_sec1) { if (!is_sec1) - ptr->j_extent = 0; + ptr->j_extent = val; +} + +static void to_talitos_ptr_ext_or(struct talitos_ptr *ptr, u8 val, bool is_sec1) +{ + if (!is_sec1) + ptr->j_extent |= val; } /* @@ -111,7 +118,7 @@ static void map_single_talitos_ptr(struct device *dev, to_talitos_ptr_len(ptr, len, is_sec1); to_talitos_ptr(ptr, dma_addr, is_sec1); - to_talitos_ptr_extent_clear(ptr, is_sec1); + to_talitos_ptr_ext_set(ptr, 0, is_sec1); } /* @@ -804,6 +811,11 @@ static void talitos_unregister_rng(struct device *dev) * crypto alg */ #define TALITOS_CRA_PRIORITY 3000 +/* + * Defines a priority for doing AEAD with descriptors type + * HMAC_SNOOP_NO_AFEA (HSNA) instead of type IPSEC_ESP + */ +#define TALITOS_CRA_PRIORITY_AEAD_HSNA (TALITOS_CRA_PRIORITY - 1) #define TALITOS_MAX_KEY_SIZE 96 #define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ @@ -835,6 +847,16 @@ struct talitos_ahash_req_ctx { struct scatterlist *psrc; }; +struct talitos_export_state { + u32 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE / sizeof(u32)]; + u8 buf[HASH_MAX_BLOCK_SIZE]; + unsigned int swinit; + unsigned int first; + unsigned int last; + unsigned int to_hash_later; + unsigned int nbuf; +}; + static int aead_setkey(struct crypto_aead *authenc, const u8 *key, unsigned int keylen) { @@ -894,35 +916,59 @@ struct talitos_edesc { static void talitos_sg_unmap(struct device *dev, struct talitos_edesc *edesc, struct scatterlist *src, - struct scatterlist *dst) + struct scatterlist *dst, + unsigned int len, unsigned int offset) { + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); unsigned int src_nents = edesc->src_nents ? : 1; unsigned int dst_nents = edesc->dst_nents ? : 1; + if (is_sec1 && dst && dst_nents > 1) { + dma_sync_single_for_device(dev, edesc->dma_link_tbl + offset, + len, DMA_FROM_DEVICE); + sg_pcopy_from_buffer(dst, dst_nents, edesc->buf + offset, len, + offset); + } if (src != dst) { - dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE); + if (src_nents == 1 || !is_sec1) + dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE); - if (dst) { + if (dst && (dst_nents == 1 || !is_sec1)) dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE); - } - } else + } else if (src_nents == 1 || !is_sec1) { dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL); + } } static void ipsec_esp_unmap(struct device *dev, struct talitos_edesc *edesc, struct aead_request *areq) { - unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6], DMA_FROM_DEVICE); + struct crypto_aead *aead = crypto_aead_reqtfm(areq); + struct talitos_ctx *ctx = crypto_aead_ctx(aead); + unsigned int ivsize = crypto_aead_ivsize(aead); + + if (edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP) + unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6], + DMA_FROM_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[3], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[0], DMA_TO_DEVICE); - talitos_sg_unmap(dev, edesc, areq->src, areq->dst); + talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->cryptlen, + areq->assoclen); if (edesc->dma_len) dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); + + if (!(edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP)) { + unsigned int dst_nents = edesc->dst_nents ? : 1; + + sg_pcopy_to_buffer(areq->dst, dst_nents, ctx->iv, ivsize, + areq->assoclen + areq->cryptlen - ivsize); + } } /* @@ -932,6 +978,8 @@ static void ipsec_esp_encrypt_done(struct device *dev, struct talitos_desc *desc, void *context, int err) { + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); struct aead_request *areq = context; struct crypto_aead *authenc = crypto_aead_reqtfm(areq); unsigned int authsize = crypto_aead_authsize(authenc); @@ -945,8 +993,11 @@ static void ipsec_esp_encrypt_done(struct device *dev, /* copy the generated ICV to dst */ if (edesc->icv_ool) { - icvdata = &edesc->link_tbl[edesc->src_nents + - edesc->dst_nents + 2]; + if (is_sec1) + icvdata = edesc->buf + areq->assoclen + areq->cryptlen; + else + icvdata = &edesc->link_tbl[edesc->src_nents + + edesc->dst_nents + 2]; sg = sg_last(areq->dst, edesc->dst_nents); memcpy((char *)sg_virt(sg) + sg->length - authsize, icvdata, authsize); @@ -967,6 +1018,8 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev, struct talitos_edesc *edesc; struct scatterlist *sg; char *oicv, *icv; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); edesc = container_of(desc, struct talitos_edesc, desc); @@ -978,7 +1031,12 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev, icv = (char *)sg_virt(sg) + sg->length - authsize; if (edesc->dma_len) { - oicv = (char *)&edesc->link_tbl[edesc->src_nents + + if (is_sec1) + oicv = (char *)&edesc->dma_link_tbl + + req->assoclen + req->cryptlen; + else + oicv = (char *) + &edesc->link_tbl[edesc->src_nents + edesc->dst_nents + 2]; if (edesc->icv_ool) icv = oicv + authsize; @@ -1040,8 +1098,8 @@ static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count, to_talitos_ptr(link_tbl_ptr + count, sg_dma_address(sg) + offset, 0); - link_tbl_ptr[count].len = cpu_to_be16(len); - link_tbl_ptr[count].j_extent = 0; + to_talitos_ptr_len(link_tbl_ptr + count, len, 0); + to_talitos_ptr_ext_set(link_tbl_ptr + count, 0, 0); count++; cryptlen -= len; offset = 0; @@ -1052,17 +1110,43 @@ next: /* tag end of link table */ if (count > 0) - link_tbl_ptr[count - 1].j_extent = DESC_PTR_LNKTBL_RETURN; + to_talitos_ptr_ext_set(link_tbl_ptr + count - 1, + DESC_PTR_LNKTBL_RETURN, 0); return count; } -static inline int sg_to_link_tbl(struct scatterlist *sg, int sg_count, - int cryptlen, - struct talitos_ptr *link_tbl_ptr) +int talitos_sg_map(struct device *dev, struct scatterlist *src, + unsigned int len, struct talitos_edesc *edesc, + struct talitos_ptr *ptr, + int sg_count, unsigned int offset, int tbl_off) { - return sg_to_link_tbl_offset(sg, sg_count, 0, cryptlen, - link_tbl_ptr); + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + to_talitos_ptr_len(ptr, len, is_sec1); + to_talitos_ptr_ext_set(ptr, 0, is_sec1); + + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(src) + offset, is_sec1); + return sg_count; + } + if (is_sec1) { + to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, is_sec1); + return sg_count; + } + sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len, + &edesc->link_tbl[tbl_off]); + if (sg_count == 1) { + /* Only one segment now, so no link tbl needed*/ + copy_talitos_ptr(ptr, &edesc->link_tbl[tbl_off], is_sec1); + return sg_count; + } + to_talitos_ptr(ptr, edesc->dma_link_tbl + + tbl_off * sizeof(struct talitos_ptr), is_sec1); + to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, is_sec1); + + return sg_count; } /* @@ -1083,42 +1167,52 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, int tbl_off = 0; int sg_count, ret; int sg_link_tbl_len; + bool sync_needed = false; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); /* hmac key */ map_single_talitos_ptr(dev, &desc->ptr[0], ctx->authkeylen, &ctx->key, DMA_TO_DEVICE); - sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ?: 1, - (areq->src == areq->dst) ? DMA_BIDIRECTIONAL - : DMA_TO_DEVICE); - /* hmac data */ - desc->ptr[1].len = cpu_to_be16(areq->assoclen); - if (sg_count > 1 && - (ret = sg_to_link_tbl_offset(areq->src, sg_count, 0, - areq->assoclen, - &edesc->link_tbl[tbl_off])) > 1) { - to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off * - sizeof(struct talitos_ptr), 0); - desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP; + sg_count = edesc->src_nents ?: 1; + if (is_sec1 && sg_count > 1) + sg_copy_to_buffer(areq->src, sg_count, edesc->buf, + areq->assoclen + cryptlen); + else + sg_count = dma_map_sg(dev, areq->src, sg_count, + (areq->src == areq->dst) ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, DMA_BIDIRECTIONAL); + /* hmac data */ + ret = talitos_sg_map(dev, areq->src, areq->assoclen, edesc, + &desc->ptr[1], sg_count, 0, tbl_off); + if (ret > 1) { tbl_off += ret; - } else { - to_talitos_ptr(&desc->ptr[1], sg_dma_address(areq->src), 0); - desc->ptr[1].j_extent = 0; + sync_needed = true; } /* cipher iv */ - to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, 0); - desc->ptr[2].len = cpu_to_be16(ivsize); - desc->ptr[2].j_extent = 0; + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) { + to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, is_sec1); + to_talitos_ptr_len(&desc->ptr[2], ivsize, is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[2], 0, is_sec1); + } else { + to_talitos_ptr(&desc->ptr[3], edesc->iv_dma, is_sec1); + to_talitos_ptr_len(&desc->ptr[3], ivsize, is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[3], 0, is_sec1); + } /* cipher key */ - map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen, - (char *)&ctx->key + ctx->authkeylen, - DMA_TO_DEVICE); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) + map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen, + (char *)&ctx->key + ctx->authkeylen, + DMA_TO_DEVICE); + else + map_single_talitos_ptr(dev, &desc->ptr[2], ctx->enckeylen, + (char *)&ctx->key + ctx->authkeylen, + DMA_TO_DEVICE); /* * cipher in @@ -1126,78 +1220,82 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, * extent is bytes of HMAC postpended to ciphertext, * typically 12 for ipsec */ - desc->ptr[4].len = cpu_to_be16(cryptlen); - desc->ptr[4].j_extent = authsize; + to_talitos_ptr_len(&desc->ptr[4], cryptlen, is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[4], 0, is_sec1); sg_link_tbl_len = cryptlen; - if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) - sg_link_tbl_len += authsize; - if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src) + - areq->assoclen, 0); - } else if ((ret = sg_to_link_tbl_offset(areq->src, sg_count, - areq->assoclen, sg_link_tbl_len, - &edesc->link_tbl[tbl_off])) > - 1) { - desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; - to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl + - tbl_off * - sizeof(struct talitos_ptr), 0); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - tbl_off += ret; - } else { - copy_talitos_ptr(&desc->ptr[4], &edesc->link_tbl[tbl_off], 0); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) { + to_talitos_ptr_ext_set(&desc->ptr[4], authsize, is_sec1); + + if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) + sg_link_tbl_len += authsize; } - /* cipher out */ - desc->ptr[5].len = cpu_to_be16(cryptlen); - desc->ptr[5].j_extent = authsize; + sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc, + &desc->ptr[4], sg_count, areq->assoclen, + tbl_off); - if (areq->src != areq->dst) - sg_count = dma_map_sg(dev, areq->dst, edesc->dst_nents ? : 1, - DMA_FROM_DEVICE); + if (sg_count > 1) { + tbl_off += sg_count; + sync_needed = true; + } - edesc->icv_ool = false; + /* cipher out */ + if (areq->src != areq->dst) { + sg_count = edesc->dst_nents ? : 1; + if (!is_sec1 || sg_count == 1) + dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE); + } - if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst) + - areq->assoclen, 0); - } else if ((sg_count = - sg_to_link_tbl_offset(areq->dst, sg_count, - areq->assoclen, cryptlen, - &edesc->link_tbl[tbl_off])) > 1) { - struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; - - to_talitos_ptr(&desc->ptr[5], edesc->dma_link_tbl + - tbl_off * sizeof(struct talitos_ptr), 0); - - /* Add an entry to the link table for ICV data */ - tbl_ptr += sg_count - 1; - tbl_ptr->j_extent = 0; - tbl_ptr++; - tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; - tbl_ptr->len = cpu_to_be16(authsize); - - /* icv data follows link tables */ - to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + - (edesc->src_nents + edesc->dst_nents + - 2) * sizeof(struct talitos_ptr) + - authsize, 0); - desc->ptr[5].j_extent |= DESC_PTR_LNKTBL_JUMP; - dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, - edesc->dma_len, DMA_BIDIRECTIONAL); + sg_count = talitos_sg_map(dev, areq->dst, cryptlen, edesc, + &desc->ptr[5], sg_count, areq->assoclen, + tbl_off); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) + to_talitos_ptr_ext_or(&desc->ptr[5], authsize, is_sec1); + + if (sg_count > 1) { edesc->icv_ool = true; + sync_needed = true; + + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) { + struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; + int offset = (edesc->src_nents + edesc->dst_nents + 2) * + sizeof(struct talitos_ptr) + authsize; + + /* Add an entry to the link table for ICV data */ + tbl_ptr += sg_count - 1; + to_talitos_ptr_ext_set(tbl_ptr, 0, is_sec1); + tbl_ptr++; + to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RETURN, + is_sec1); + to_talitos_ptr_len(tbl_ptr, authsize, is_sec1); + + /* icv data follows link tables */ + to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + offset, + is_sec1); + } } else { - copy_talitos_ptr(&desc->ptr[5], &edesc->link_tbl[tbl_off], 0); + edesc->icv_ool = false; + } + + /* ICV data */ + if (!(desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)) { + to_talitos_ptr_len(&desc->ptr[6], authsize, is_sec1); + to_talitos_ptr(&desc->ptr[6], edesc->dma_link_tbl + + areq->assoclen + cryptlen, is_sec1); } /* iv out */ - map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, - DMA_FROM_DEVICE); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) + map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, + DMA_FROM_DEVICE); + + if (sync_needed) + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { @@ -1223,7 +1321,7 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, bool encrypt) { struct talitos_edesc *edesc; - int src_nents, dst_nents, alloc_len, dma_len; + int src_nents, dst_nents, alloc_len, dma_len, src_len, dst_len; dma_addr_t iv_dma = 0; gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; @@ -1241,8 +1339,8 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE); if (!dst || dst == src) { - src_nents = sg_nents_for_len(src, - assoclen + cryptlen + authsize); + src_len = assoclen + cryptlen + authsize; + src_nents = sg_nents_for_len(src, src_len); if (src_nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); err = ERR_PTR(-EINVAL); @@ -1250,17 +1348,18 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, } src_nents = (src_nents == 1) ? 0 : src_nents; dst_nents = dst ? src_nents : 0; + dst_len = 0; } else { /* dst && dst != src*/ - src_nents = sg_nents_for_len(src, assoclen + cryptlen + - (encrypt ? 0 : authsize)); + src_len = assoclen + cryptlen + (encrypt ? 0 : authsize); + src_nents = sg_nents_for_len(src, src_len); if (src_nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); err = ERR_PTR(-EINVAL); goto error_sg; } src_nents = (src_nents == 1) ? 0 : src_nents; - dst_nents = sg_nents_for_len(dst, assoclen + cryptlen + - (encrypt ? authsize : 0)); + dst_len = assoclen + cryptlen + (encrypt ? authsize : 0); + dst_nents = sg_nents_for_len(dst, dst_len); if (dst_nents < 0) { dev_err(dev, "Invalid number of dst SG.\n"); err = ERR_PTR(-EINVAL); @@ -1277,8 +1376,8 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, alloc_len = sizeof(struct talitos_edesc); if (src_nents || dst_nents) { if (is_sec1) - dma_len = (src_nents ? cryptlen : 0) + - (dst_nents ? cryptlen : 0); + dma_len = (src_nents ? src_len : 0) + + (dst_nents ? dst_len : 0); else dma_len = (src_nents + dst_nents + 2) * sizeof(struct talitos_ptr) + authsize * 2; @@ -1402,40 +1501,13 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, return 0; } -static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src, - struct scatterlist *dst, unsigned int len, - struct talitos_edesc *edesc) -{ - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - if (is_sec1) { - if (!edesc->src_nents) { - dma_unmap_sg(dev, src, 1, - dst != src ? DMA_TO_DEVICE - : DMA_BIDIRECTIONAL); - } - if (dst && edesc->dst_nents) { - dma_sync_single_for_device(dev, - edesc->dma_link_tbl + len, - len, DMA_FROM_DEVICE); - sg_copy_from_buffer(dst, edesc->dst_nents ? : 1, - edesc->buf + len, len); - } else if (dst && dst != src) { - dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE); - } - } else { - talitos_sg_unmap(dev, edesc, src, dst); - } -} - static void common_nonsnoop_unmap(struct device *dev, struct talitos_edesc *edesc, struct ablkcipher_request *areq) { unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); - unmap_sg_talitos_ptr(dev, areq->src, areq->dst, areq->nbytes, edesc); + talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->nbytes, 0); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE); @@ -1460,100 +1532,6 @@ static void ablkcipher_done(struct device *dev, areq->base.complete(&areq->base, err); } -int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, - unsigned int len, struct talitos_edesc *edesc, - enum dma_data_direction dir, struct talitos_ptr *ptr) -{ - int sg_count; - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - to_talitos_ptr_len(ptr, len, is_sec1); - - if (is_sec1) { - sg_count = edesc->src_nents ? : 1; - - if (sg_count == 1) { - dma_map_sg(dev, src, 1, dir); - to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); - } else { - sg_copy_to_buffer(src, sg_count, edesc->buf, len); - to_talitos_ptr(ptr, edesc->dma_link_tbl, is_sec1); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - len, DMA_TO_DEVICE); - } - } else { - to_talitos_ptr_extent_clear(ptr, is_sec1); - - sg_count = dma_map_sg(dev, src, edesc->src_nents ? : 1, dir); - - if (sg_count == 1) { - to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); - } else { - sg_count = sg_to_link_tbl(src, sg_count, len, - &edesc->link_tbl[0]); - if (sg_count > 1) { - to_talitos_ptr(ptr, edesc->dma_link_tbl, 0); - ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; - dma_sync_single_for_device(dev, - edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } else { - /* Only one segment now, so no link tbl needed*/ - to_talitos_ptr(ptr, sg_dma_address(src), - is_sec1); - } - } - } - return sg_count; -} - -void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, - unsigned int len, struct talitos_edesc *edesc, - enum dma_data_direction dir, - struct talitos_ptr *ptr, int sg_count) -{ - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - if (dir != DMA_NONE) - sg_count = dma_map_sg(dev, dst, edesc->dst_nents ? : 1, dir); - - to_talitos_ptr_len(ptr, len, is_sec1); - - if (is_sec1) { - if (sg_count == 1) { - if (dir != DMA_NONE) - dma_map_sg(dev, dst, 1, dir); - to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); - } else { - to_talitos_ptr(ptr, edesc->dma_link_tbl + len, is_sec1); - dma_sync_single_for_device(dev, - edesc->dma_link_tbl + len, - len, DMA_FROM_DEVICE); - } - } else { - to_talitos_ptr_extent_clear(ptr, is_sec1); - - if (sg_count == 1) { - to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); - } else { - struct talitos_ptr *link_tbl_ptr = - &edesc->link_tbl[edesc->src_nents + 1]; - - to_talitos_ptr(ptr, edesc->dma_link_tbl + - (edesc->src_nents + 1) * - sizeof(struct talitos_ptr), 0); - ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; - sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } - } -} - static int common_nonsnoop(struct talitos_edesc *edesc, struct ablkcipher_request *areq, void (*callback) (struct device *dev, @@ -1567,6 +1545,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, unsigned int cryptlen = areq->nbytes; unsigned int ivsize = crypto_ablkcipher_ivsize(cipher); int sg_count, ret; + bool sync_needed = false; struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); @@ -1576,25 +1555,39 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* cipher iv */ to_talitos_ptr(&desc->ptr[1], edesc->iv_dma, is_sec1); to_talitos_ptr_len(&desc->ptr[1], ivsize, is_sec1); - to_talitos_ptr_extent_clear(&desc->ptr[1], is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[1], 0, is_sec1); /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, (char *)&ctx->key, DMA_TO_DEVICE); + sg_count = edesc->src_nents ?: 1; + if (is_sec1 && sg_count > 1) + sg_copy_to_buffer(areq->src, sg_count, edesc->buf, + cryptlen); + else + sg_count = dma_map_sg(dev, areq->src, sg_count, + (areq->src == areq->dst) ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE); /* * cipher in */ - sg_count = map_sg_in_talitos_ptr(dev, areq->src, cryptlen, edesc, - (areq->src == areq->dst) ? - DMA_BIDIRECTIONAL : DMA_TO_DEVICE, - &desc->ptr[3]); + sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc, + &desc->ptr[3], sg_count, 0, 0); + if (sg_count > 1) + sync_needed = true; /* cipher out */ - map_sg_out_talitos_ptr(dev, areq->dst, cryptlen, edesc, - (areq->src == areq->dst) ? DMA_NONE - : DMA_FROM_DEVICE, - &desc->ptr[4], sg_count); + if (areq->src != areq->dst) { + sg_count = edesc->dst_nents ? : 1; + if (!is_sec1 || sg_count == 1) + dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE); + } + + ret = talitos_sg_map(dev, areq->dst, cryptlen, edesc, &desc->ptr[4], + sg_count, 0, (edesc->src_nents + 1)); + if (ret > 1) + sync_needed = true; /* iv out */ map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, @@ -1603,6 +1596,10 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* last DWORD empty */ desc->ptr[6] = zero_entry; + if (sync_needed) + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, DMA_BIDIRECTIONAL); + ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { common_nonsnoop_unmap(dev, edesc, areq); @@ -1666,7 +1663,7 @@ static void common_nonsnoop_hash_unmap(struct device *dev, unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); - unmap_sg_talitos_ptr(dev, req_ctx->psrc, NULL, 0, edesc); + talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0); /* When using hashctx-in, must unmap it. */ if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1)) @@ -1737,8 +1734,10 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, struct device *dev = ctx->dev; struct talitos_desc *desc = &edesc->desc; int ret; + bool sync_needed = false; struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); + int sg_count; /* first DWORD empty */ desc->ptr[0] = zero_entry; @@ -1763,11 +1762,19 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, else desc->ptr[2] = zero_entry; + sg_count = edesc->src_nents ?: 1; + if (is_sec1 && sg_count > 1) + sg_copy_to_buffer(areq->src, sg_count, edesc->buf, length); + else + sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count, + DMA_TO_DEVICE); /* * data in */ - map_sg_in_talitos_ptr(dev, req_ctx->psrc, length, edesc, - DMA_TO_DEVICE, &desc->ptr[3]); + sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc, + &desc->ptr[3], sg_count, 0, 0); + if (sg_count > 1) + sync_needed = true; /* fifth DWORD empty */ desc->ptr[4] = zero_entry; @@ -1788,6 +1795,10 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, if (is_sec1 && from_talitos_ptr_len(&desc->ptr[3], true) == 0) talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]); + if (sync_needed) + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, DMA_BIDIRECTIONAL); + ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { common_nonsnoop_hash_unmap(dev, edesc, areq); @@ -1981,6 +1992,46 @@ static int ahash_digest(struct ahash_request *areq) return ahash_process_req(areq, areq->nbytes); } +static int ahash_export(struct ahash_request *areq, void *out) +{ + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct talitos_export_state *export = out; + + memcpy(export->hw_context, req_ctx->hw_context, + req_ctx->hw_context_size); + memcpy(export->buf, req_ctx->buf, req_ctx->nbuf); + export->swinit = req_ctx->swinit; + export->first = req_ctx->first; + export->last = req_ctx->last; + export->to_hash_later = req_ctx->to_hash_later; + export->nbuf = req_ctx->nbuf; + + return 0; +} + +static int ahash_import(struct ahash_request *areq, const void *in) +{ + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + const struct talitos_export_state *export = in; + + memset(req_ctx, 0, sizeof(*req_ctx)); + req_ctx->hw_context_size = + (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE) + ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256 + : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512; + memcpy(req_ctx->hw_context, export->hw_context, + req_ctx->hw_context_size); + memcpy(req_ctx->buf, export->buf, export->nbuf); + req_ctx->swinit = export->swinit; + req_ctx->first = export->first; + req_ctx->last = export->last; + req_ctx->to_hash_later = export->to_hash_later; + req_ctx->nbuf = export->nbuf; + + return 0; +} + struct keyhash_result { struct completion completion; int err; @@ -2074,6 +2125,7 @@ static int ahash_setkey(struct crypto_ahash *tfm, const u8 *key, struct talitos_alg_template { u32 type; + u32 priority; union { struct crypto_alg crypto; struct ahash_alg hash; @@ -2105,6 +2157,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_SHA1_HMAC, }, { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha1),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha1-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA1_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { .cra_name = "authenc(hmac(sha1)," @@ -2126,6 +2199,29 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA1_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha1)," + "cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha1-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA1_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2146,6 +2242,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA224_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha224),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha224-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA224_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2169,6 +2286,29 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_SHA224_HMAC, }, { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha224)," + "cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha224-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA224_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { .cra_name = "authenc(hmac(sha256),cbc(aes))", @@ -2189,6 +2329,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_SHA256_HMAC, }, { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha256),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha256-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA256_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { .cra_name = "authenc(hmac(sha256)," @@ -2211,6 +2372,29 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_SHA256_HMAC, }, { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha256)," + "cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha256-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA256_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { .cra_name = "authenc(hmac(sha384),cbc(aes))", @@ -2315,6 +2499,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_MD5_HMAC, }, { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(md5),cbc(aes))", + .cra_driver_name = "authenc-hmac-md5-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = MD5_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_MD5_HMAC, + }, + { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { .cra_name = "authenc(hmac(md5),cbc(des3_ede))", @@ -2335,6 +2540,28 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_MD5_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(md5),cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-md5-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = MD5_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_MD5_HMAC, + }, /* ABLKCIPHER algorithms. */ { .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .alg.crypto = { @@ -2458,6 +2685,7 @@ static struct talitos_alg_template driver_algs[] = { { .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .halg.digestsize = MD5_DIGEST_SIZE, + .halg.statesize = sizeof(struct talitos_export_state), .halg.base = { .cra_name = "md5", .cra_driver_name = "md5-talitos", @@ -2473,6 +2701,7 @@ static struct talitos_alg_template driver_algs[] = { { .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.statesize = sizeof(struct talitos_export_state), .halg.base = { .cra_name = "sha1", .cra_driver_name = "sha1-talitos", @@ -2488,6 +2717,7 @@ static struct talitos_alg_template driver_algs[] = { { .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct talitos_export_state), .halg.base = { .cra_name = "sha224", .cra_driver_name = "sha224-talitos", @@ -2503,6 +2733,7 @@ static struct talitos_alg_template driver_algs[] = { { .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct talitos_export_state), .halg.base = { .cra_name = "sha256", .cra_driver_name = "sha256-talitos", @@ -2518,6 +2749,7 @@ static struct talitos_alg_template driver_algs[] = { { .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct talitos_export_state), .halg.base = { .cra_name = "sha384", .cra_driver_name = "sha384-talitos", @@ -2533,6 +2765,7 @@ static struct talitos_alg_template driver_algs[] = { { .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct talitos_export_state), .halg.base = { .cra_name = "sha512", .cra_driver_name = "sha512-talitos", @@ -2548,6 +2781,7 @@ static struct talitos_alg_template driver_algs[] = { { .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .halg.digestsize = MD5_DIGEST_SIZE, + .halg.statesize = sizeof(struct talitos_export_state), .halg.base = { .cra_name = "hmac(md5)", .cra_driver_name = "hmac-md5-talitos", @@ -2563,6 +2797,7 @@ static struct talitos_alg_template driver_algs[] = { { .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.statesize = sizeof(struct talitos_export_state), .halg.base = { .cra_name = "hmac(sha1)", .cra_driver_name = "hmac-sha1-talitos", @@ -2578,6 +2813,7 @@ static struct talitos_alg_template driver_algs[] = { { .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct talitos_export_state), .halg.base = { .cra_name = "hmac(sha224)", .cra_driver_name = "hmac-sha224-talitos", @@ -2593,6 +2829,7 @@ static struct talitos_alg_template driver_algs[] = { { .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct talitos_export_state), .halg.base = { .cra_name = "hmac(sha256)", .cra_driver_name = "hmac-sha256-talitos", @@ -2608,6 +2845,7 @@ static struct talitos_alg_template driver_algs[] = { { .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct talitos_export_state), .halg.base = { .cra_name = "hmac(sha384)", .cra_driver_name = "hmac-sha384-talitos", @@ -2623,6 +2861,7 @@ static struct talitos_alg_template driver_algs[] = { { .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct talitos_export_state), .halg.base = { .cra_name = "hmac(sha512)", .cra_driver_name = "hmac-sha512-talitos", @@ -2814,6 +3053,8 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, t_alg->algt.alg.hash.finup = ahash_finup; t_alg->algt.alg.hash.digest = ahash_digest; t_alg->algt.alg.hash.setkey = ahash_setkey; + t_alg->algt.alg.hash.import = ahash_import; + t_alg->algt.alg.hash.export = ahash_export; if (!(priv->features & TALITOS_FTR_HMAC_OK) && !strncmp(alg->cra_name, "hmac", 4)) { @@ -2837,7 +3078,10 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, } alg->cra_module = THIS_MODULE; - alg->cra_priority = TALITOS_CRA_PRIORITY; + if (t_alg->algt.priority) + alg->cra_priority = t_alg->algt.priority; + else + alg->cra_priority = TALITOS_CRA_PRIORITY; alg->cra_alignmask = 0; alg->cra_ctxsize = sizeof(struct talitos_ctx); alg->cra_flags |= CRYPTO_ALG_KERN_DRIVER_ONLY; diff --git a/drivers/crypto/ux500/cryp/Makefile b/drivers/crypto/ux500/cryp/Makefile index e5d362a6f680..b497ae3dde07 100644 --- a/drivers/crypto/ux500/cryp/Makefile +++ b/drivers/crypto/ux500/cryp/Makefile @@ -4,9 +4,9 @@ # * License terms: GNU General Public License (GPL) version 2 */ ifdef CONFIG_CRYPTO_DEV_UX500_DEBUG -CFLAGS_cryp_core.o := -DDEBUG -O0 -CFLAGS_cryp.o := -DDEBUG -O0 -CFLAGS_cryp_irq.o := -DDEBUG -O0 +CFLAGS_cryp_core.o := -DDEBUG +CFLAGS_cryp.o := -DDEBUG +CFLAGS_cryp_irq.o := -DDEBUG endif obj-$(CONFIG_CRYPTO_DEV_UX500_CRYP) += ux500_cryp.o diff --git a/drivers/crypto/ux500/hash/Makefile b/drivers/crypto/ux500/hash/Makefile index b2f90d9bac72..784d9c0a8853 100644 --- a/drivers/crypto/ux500/hash/Makefile +++ b/drivers/crypto/ux500/hash/Makefile @@ -4,7 +4,7 @@ # License terms: GNU General Public License (GPL) version 2 # ifdef CONFIG_CRYPTO_DEV_UX500_DEBUG -CFLAGS_hash_core.o := -DDEBUG -O0 +CFLAGS_hash_core.o := -DDEBUG endif obj-$(CONFIG_CRYPTO_DEV_UX500_HASH) += ux500_hash.o diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 574e87c7f2b8..9acccad26928 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -781,7 +781,7 @@ static int hash_process_data(struct hash_device_data *device_data, &device_data->state); memmove(req_ctx->state.buffer, device_data->state.buffer, - HASH_BLOCK_SIZE / sizeof(u32)); + HASH_BLOCK_SIZE); if (ret) { dev_err(device_data->dev, "%s: hash_resume_state() failed!\n", @@ -832,7 +832,7 @@ static int hash_process_data(struct hash_device_data *device_data, memmove(device_data->state.buffer, req_ctx->state.buffer, - HASH_BLOCK_SIZE / sizeof(u32)); + HASH_BLOCK_SIZE); if (ret) { dev_err(device_data->dev, "%s: hash_save_state() failed!\n", __func__); diff --git a/drivers/crypto/vmx/.gitignore b/drivers/crypto/vmx/.gitignore new file mode 100644 index 000000000000..af4a7ce4738d --- /dev/null +++ b/drivers/crypto/vmx/.gitignore @@ -0,0 +1,2 @@ +aesp8-ppc.S +ghashp8-ppc.S diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig index 89d8208d9851..a83ead109d5f 100644 --- a/drivers/crypto/vmx/Kconfig +++ b/drivers/crypto/vmx/Kconfig @@ -1,7 +1,7 @@ config CRYPTO_DEV_VMX_ENCRYPT tristate "Encryption acceleration support on P8 CPU" depends on CRYPTO_DEV_VMX - default y + default m help Support for VMX cryptographic acceleration instructions on Power8 CPU. This module supports acceleration for AES and GHASH in hardware. If you diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index d28ab96a2475..de6e241b0866 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o -vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o ghash.o +vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) TARGET := linux-ppc64le diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 495577b6d31b..94ad5c0adbcb 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = { .cra_name = "cbc(aes)", .cra_driver_name = "p8_aes_cbc", .cra_module = THIS_MODULE, - .cra_priority = 1000, + .cra_priority = 2000, .cra_type = &crypto_blkcipher_type, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_alignmask = 0, diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index 0a3c1b04cf3c..38ed10d761d0 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = { .cra_name = "ctr(aes)", .cra_driver_name = "p8_aes_ctr", .cra_module = THIS_MODULE, - .cra_priority = 1000, + .cra_priority = 2000, .cra_type = &crypto_blkcipher_type, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_alignmask = 0, diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c new file mode 100644 index 000000000000..24353ec336c5 --- /dev/null +++ b/drivers/crypto/vmx/aes_xts.c @@ -0,0 +1,190 @@ +/** + * AES XTS routines supporting VMX In-core instructions on Power 8 + * + * Copyright (C) 2015 International Business Machines Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundations; version 2 only. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY of FITNESS FOR A PARTICUPAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com> + */ + +#include <linux/types.h> +#include <linux/err.h> +#include <linux/crypto.h> +#include <linux/delay.h> +#include <linux/hardirq.h> +#include <asm/switch_to.h> +#include <crypto/aes.h> +#include <crypto/scatterwalk.h> +#include <crypto/xts.h> + +#include "aesp8-ppc.h" + +struct p8_aes_xts_ctx { + struct crypto_blkcipher *fallback; + struct aes_key enc_key; + struct aes_key dec_key; + struct aes_key tweak_key; +}; + +static int p8_aes_xts_init(struct crypto_tfm *tfm) +{ + const char *alg; + struct crypto_blkcipher *fallback; + struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!(alg = crypto_tfm_alg_name(tfm))) { + printk(KERN_ERR "Failed to get algorithm name.\n"); + return -ENOENT; + } + + fallback = + crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback)) { + printk(KERN_ERR + "Failed to allocate transformation for '%s': %ld\n", + alg, PTR_ERR(fallback)); + return PTR_ERR(fallback); + } + printk(KERN_INFO "Using '%s' as fallback implementation.\n", + crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); + + crypto_blkcipher_set_flags( + fallback, + crypto_blkcipher_get_flags((struct crypto_blkcipher *)tfm)); + ctx->fallback = fallback; + + return 0; +} + +static void p8_aes_xts_exit(struct crypto_tfm *tfm) +{ + struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback) { + crypto_free_blkcipher(ctx->fallback); + ctx->fallback = NULL; + } +} + +static int p8_aes_xts_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + int ret; + struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + + ret = xts_check_key(tfm, key, keylen); + if (ret) + return ret; + + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); + ret = aes_p8_set_encrypt_key(key + keylen/2, (keylen/2) * 8, &ctx->tweak_key); + ret += aes_p8_set_encrypt_key(key, (keylen/2) * 8, &ctx->enc_key); + ret += aes_p8_set_decrypt_key(key, (keylen/2) * 8, &ctx->dec_key); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + + ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); + return ret; +} + +static int p8_aes_xts_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes, int enc) +{ + int ret; + u8 tweak[AES_BLOCK_SIZE]; + u8 *iv; + struct blkcipher_walk walk; + struct p8_aes_xts_ctx *ctx = + crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm)); + struct blkcipher_desc fallback_desc = { + .tfm = ctx->fallback, + .info = desc->info, + .flags = desc->flags + }; + + if (in_interrupt()) { + ret = enc ? crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes) : + crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes); + } else { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); + + blkcipher_walk_init(&walk, dst, src, nbytes); + + ret = blkcipher_walk_virt(desc, &walk); + iv = walk.iv; + memset(tweak, 0, AES_BLOCK_SIZE); + aes_p8_encrypt(iv, tweak, &ctx->tweak_key); + + while ((nbytes = walk.nbytes)) { + if (enc) + aes_p8_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, + nbytes & AES_BLOCK_MASK, &ctx->enc_key, NULL, tweak); + else + aes_p8_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, + nbytes & AES_BLOCK_MASK, &ctx->dec_key, NULL, tweak); + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, &walk, nbytes); + } + + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + } + return ret; +} + +static int p8_aes_xts_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return p8_aes_xts_crypt(desc, dst, src, nbytes, 1); +} + +static int p8_aes_xts_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return p8_aes_xts_crypt(desc, dst, src, nbytes, 0); +} + +struct crypto_alg p8_aes_xts_alg = { + .cra_name = "xts(aes)", + .cra_driver_name = "p8_aes_xts", + .cra_module = THIS_MODULE, + .cra_priority = 2000, + .cra_type = &crypto_blkcipher_type, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, + .cra_alignmask = 0, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct p8_aes_xts_ctx), + .cra_init = p8_aes_xts_init, + .cra_exit = p8_aes_xts_exit, + .cra_blkcipher = { + .ivsize = AES_BLOCK_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .setkey = p8_aes_xts_setkey, + .encrypt = p8_aes_xts_encrypt, + .decrypt = p8_aes_xts_decrypt, + } +}; diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h index 4cd34ee54a94..01972e16a6c0 100644 --- a/drivers/crypto/vmx/aesp8-ppc.h +++ b/drivers/crypto/vmx/aesp8-ppc.h @@ -19,3 +19,7 @@ void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len, void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out, size_t len, const struct aes_key *key, const u8 *iv); +void aes_p8_xts_encrypt(const u8 *in, u8 *out, size_t len, + const struct aes_key *key1, const struct aes_key *key2, u8 *iv); +void aes_p8_xts_decrypt(const u8 *in, u8 *out, size_t len, + const struct aes_key *key1, const struct aes_key *key2, u8 *iv); diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl index 228053921b3f..0b4a293b8a1e 100644 --- a/drivers/crypto/vmx/aesp8-ppc.pl +++ b/drivers/crypto/vmx/aesp8-ppc.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -20,6 +27,19 @@ # instructions are interleaved. It's reckoned that eventual # misalignment penalties at page boundaries are in average lower # than additional overhead in pure AltiVec approach. +# +# May 2016 +# +# Add XTS subroutine, 9x on little- and 12x improvement on big-endian +# systems were measured. +# +###################################################################### +# Current large-block performance in cycles per byte processed with +# 128-bit key (less is better). +# +# CBC en-/decrypt CTR XTS +# POWER8[le] 3.96/0.72 0.74 1.1 +# POWER8[be] 3.75/0.65 0.66 1.0 $flavour = shift; @@ -1875,6 +1895,1845 @@ Lctr32_enc8x_done: ___ }} }}} +######################################################################### +{{{ # XTS procedures # +# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # +# const AES_KEY *key1, const AES_KEY *key2, # +# [const] unsigned char iv[16]); # +# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # +# input tweak value is assumed to be encrypted already, and last tweak # +# value, one suitable for consecutive call on same chunk of data, is # +# written back to original buffer. In addition, in "tweak chaining" # +# mode only complete input blocks are processed. # + +my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); +my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); +my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); +my $taillen = $key2; + + ($inp,$idx) = ($idx,$inp); # reassign + +$code.=<<___; +.globl .${prefix}_xts_encrypt + mr $inp,r3 # reassign + li r3,-1 + ${UCMP}i $len,16 + bltlr- + + lis r0,0xfff0 + mfspr r12,256 # save vrsave + li r11,0 + mtspr 256,r0 + + vspltisb $seven,0x07 # 0x070707..07 + le?lvsl $leperm,r11,r11 + le?vspltisb $tmp,0x0f + le?vxor $leperm,$leperm,$seven + + li $idx,15 + lvx $tweak,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $tweak,$tweak,$inptail,$inpperm + + neg r11,$inp + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inout,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ${UCMP}i $key2,0 # key2==NULL? + beq Lxts_enc_no_key2 + + ?lvsl $keyperm,0,$key2 # prepare for unaligned key + lwz $rounds,240($key2) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + lvx $rndkey0,0,$key2 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + mtctr $rounds + +Ltweak_xts_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + bdnz Ltweak_xts_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $tweak,$tweak,$rndkey0 + + li $ivp,0 # don't chain the tweak + b Lxts_enc + +Lxts_enc_no_key2: + li $idx,-16 + and $len,$len,$idx # in "tweak chaining" + # mode only complete + # blocks are processed +Lxts_enc: + lvx $inptail,0,$inp + addi $inp,$inp,16 + + ?lvsl $keyperm,0,$key1 # prepare for unaligned key + lwz $rounds,240($key1) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + vslb $eighty7,$seven,$seven # 0x808080..80 + vor $eighty7,$eighty7,$seven # 0x878787..87 + vspltisb $tmp,1 # 0x010101..01 + vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 + + ${UCMP}i $len,96 + bge _aesp8_xts_encrypt6x + + andi. $taillen,$len,15 + subic r0,$len,32 + subi $taillen,$taillen,16 + subfe r0,r0,r0 + and r0,r0,$taillen + add $inp,$inp,r0 + + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + mtctr $rounds + b Loop_xts_enc + +.align 5 +Loop_xts_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak + vcipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + addi $out,$out,16 + + subic. $len,$len,16 + beq Lxts_enc_done + + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + + subic r0,$len,32 + subfe r0,r0,r0 + and r0,r0,$taillen + add $inp,$inp,r0 + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $output,$output,$rndkey0 # just in case $len<16 + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + mtctr $rounds + ${UCMP}i $len,16 + bge Loop_xts_enc + + vxor $output,$output,$tweak + lvsr $inpperm,0,$len # $inpperm is no longer needed + vxor $inptail,$inptail,$inptail # $inptail is no longer needed + vspltisb $tmp,-1 + vperm $inptail,$inptail,$tmp,$inpperm + vsel $inout,$inout,$output,$inptail + + subi r11,$out,17 + subi $out,$out,16 + mtctr $len + li $len,16 +Loop_xts_enc_steal: + lbzu r0,1(r11) + stb r0,16(r11) + bdnz Loop_xts_enc_steal + + mtctr $rounds + b Loop_xts_enc # one more time... + +Lxts_enc_done: + ${UCMP}i $ivp,0 + beq Lxts_enc_ret + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_enc_ret: + mtspr 256,r12 # restore vrsave + li r3,0 + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt + +.globl .${prefix}_xts_decrypt + mr $inp,r3 # reassign + li r3,-1 + ${UCMP}i $len,16 + bltlr- + + lis r0,0xfff8 + mfspr r12,256 # save vrsave + li r11,0 + mtspr 256,r0 + + andi. r0,$len,15 + neg r0,r0 + andi. r0,r0,16 + sub $len,$len,r0 + + vspltisb $seven,0x07 # 0x070707..07 + le?lvsl $leperm,r11,r11 + le?vspltisb $tmp,0x0f + le?vxor $leperm,$leperm,$seven + + li $idx,15 + lvx $tweak,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $tweak,$tweak,$inptail,$inpperm + + neg r11,$inp + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inout,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ${UCMP}i $key2,0 # key2==NULL? + beq Lxts_dec_no_key2 + + ?lvsl $keyperm,0,$key2 # prepare for unaligned key + lwz $rounds,240($key2) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + lvx $rndkey0,0,$key2 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + mtctr $rounds + +Ltweak_xts_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + bdnz Ltweak_xts_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $tweak,$tweak,$rndkey0 + + li $ivp,0 # don't chain the tweak + b Lxts_dec + +Lxts_dec_no_key2: + neg $idx,$len + andi. $idx,$idx,15 + add $len,$len,$idx # in "tweak chaining" + # mode only complete + # blocks are processed +Lxts_dec: + lvx $inptail,0,$inp + addi $inp,$inp,16 + + ?lvsl $keyperm,0,$key1 # prepare for unaligned key + lwz $rounds,240($key1) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + vslb $eighty7,$seven,$seven # 0x808080..80 + vor $eighty7,$eighty7,$seven # 0x878787..87 + vspltisb $tmp,1 # 0x010101..01 + vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 + + ${UCMP}i $len,96 + bge _aesp8_xts_decrypt6x + + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + mtctr $rounds + + ${UCMP}i $len,16 + blt Ltail_xts_dec + be?b Loop_xts_dec + +.align 5 +Loop_xts_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak + vncipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + addi $out,$out,16 + + subic. $len,$len,16 + beq Lxts_dec_done + + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + mtctr $rounds + ${UCMP}i $len,16 + bge Loop_xts_dec + +Ltail_xts_dec: + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak1,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak1,$tweak1,$tmp + + subi $inp,$inp,16 + add $inp,$inp,$len + + vxor $inout,$inout,$tweak # :-( + vxor $inout,$inout,$tweak1 # :-) + +Loop_xts_dec_short: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_dec_short + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak1 + vncipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + + vmr $inout,$inptail + lvx $inptail,0,$inp + #addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + + lvsr $inpperm,0,$len # $inpperm is no longer needed + vxor $inptail,$inptail,$inptail # $inptail is no longer needed + vspltisb $tmp,-1 + vperm $inptail,$inptail,$tmp,$inpperm + vsel $inout,$inout,$output,$inptail + + vxor $rndkey0,$rndkey0,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + subi r11,$out,1 + mtctr $len + li $len,16 +Loop_xts_dec_steal: + lbzu r0,1(r11) + stb r0,16(r11) + bdnz Loop_xts_dec_steal + + mtctr $rounds + b Loop_xts_dec # one more time... + +Lxts_dec_done: + ${UCMP}i $ivp,0 + beq Lxts_dec_ret + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_dec_ret: + mtspr 256,r12 # restore vrsave + li r3,0 + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt +___ +######################################################################### +{{ # Optimized XTS procedures # +my $key_=$key2; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); + $x00=0 if ($flavour =~ /osx/); +my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); +my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); +my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($keyperm)=($out0); # aliases with "caller", redundant assignment +my $taillen=$x70; + +$code.=<<___; +.align 5 +_aesp8_xts_encrypt6x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr r11 + li r7,`$FRAME+8*16+15` + li r3,`$FRAME+8*16+31` + $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + stvx v20,r7,$sp # ABI says so + addi r7,r7,32 + stvx v21,r3,$sp + addi r3,r3,32 + stvx v22,r7,$sp + addi r7,r7,32 + stvx v23,r3,$sp + addi r3,r3,32 + stvx v24,r7,$sp + addi r7,r7,32 + stvx v25,r3,$sp + addi r3,r3,32 + stvx v26,r7,$sp + addi r7,r7,32 + stvx v27,r3,$sp + addi r3,r3,32 + stvx v28,r7,$sp + addi r7,r7,32 + stvx v29,r3,$sp + addi r3,r3,32 + stvx v30,r7,$sp + stvx v31,r3,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key1 # load key schedule + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + lvx v31,$x00,$key1 + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_xts_enc_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key1 + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_xts_enc_key + + lvx v26,$x10,$key1 + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key1 + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key1 + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key1 + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key1 + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key1 + ?vperm v29,v29,v30,$keyperm + lvx $twk5,$x70,$key1 # borrow $twk5 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$twk5,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vperm $in0,$inout,$inptail,$inpperm + subi $inp,$inp,31 # undo "caller" + vxor $twk0,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $out0,$in0,$twk0 + vxor $tweak,$tweak,$tmp + + lvx_u $in1,$x10,$inp + vxor $twk1,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in1,$in1,$in1,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out1,$in1,$twk1 + vxor $tweak,$tweak,$tmp + + lvx_u $in2,$x20,$inp + andi. $taillen,$len,15 + vxor $twk2,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in2,$in2,$in2,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out2,$in2,$twk2 + vxor $tweak,$tweak,$tmp + + lvx_u $in3,$x30,$inp + sub $len,$len,$taillen + vxor $twk3,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in3,$in3,$in3,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out3,$in3,$twk3 + vxor $tweak,$tweak,$tmp + + lvx_u $in4,$x40,$inp + subi $len,$len,0x60 + vxor $twk4,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in4,$in4,$in4,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out4,$in4,$twk4 + vxor $tweak,$tweak,$tmp + + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + vxor $twk5,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in5,$in5,$in5,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out5,$in5,$twk5 + vxor $tweak,$tweak,$tmp + + vxor v31,v31,$rndkey0 + mtctr $rounds + b Loop_xts_enc6x + +.align 5 +Loop_xts_enc6x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_enc6x + + subic $len,$len,96 # $len-=96 + vxor $in0,$twk0,v31 # xor with last round key + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk0,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vand $tmp,$tmp,$eighty7 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vxor $tweak,$tweak,$tmp + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vxor $in1,$twk1,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk1,$tweak,$rndkey0 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + + and r0,r0,$len + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out0,$out0,v26 + vcipher $out1,$out1,v26 + vand $tmp,$tmp,$eighty7 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vxor $tweak,$tweak,$tmp + vcipher $out4,$out4,v26 + vcipher $out5,$out5,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in5 are loaded + # with last "words" + vxor $in2,$twk2,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk2,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vcipher $out0,$out0,v27 + vcipher $out1,$out1,v27 + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vand $tmp,$tmp,$eighty7 + vcipher $out4,$out4,v27 + vcipher $out5,$out5,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vxor $tweak,$tweak,$tmp + vcipher $out0,$out0,v28 + vcipher $out1,$out1,v28 + vxor $in3,$twk3,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk3,$tweak,$rndkey0 + vcipher $out2,$out2,v28 + vcipher $out3,$out3,v28 + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out4,$out4,v28 + vcipher $out5,$out5,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vand $tmp,$tmp,$eighty7 + + vcipher $out0,$out0,v29 + vcipher $out1,$out1,v29 + vxor $tweak,$tweak,$tmp + vcipher $out2,$out2,v29 + vcipher $out3,$out3,v29 + vxor $in4,$twk4,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk4,$tweak,$rndkey0 + vcipher $out4,$out4,v29 + vcipher $out5,$out5,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + + vcipher $out0,$out0,v30 + vcipher $out1,$out1,v30 + vand $tmp,$tmp,$eighty7 + vcipher $out2,$out2,v30 + vcipher $out3,$out3,v30 + vxor $tweak,$tweak,$tmp + vcipher $out4,$out4,v30 + vcipher $out5,$out5,v30 + vxor $in5,$twk5,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk5,$tweak,$rndkey0 + + vcipherlast $out0,$out0,$in0 + lvx_u $in0,$x00,$inp # load next input block + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipherlast $out1,$out1,$in1 + lvx_u $in1,$x10,$inp + vcipherlast $out2,$out2,$in2 + le?vperm $in0,$in0,$in0,$leperm + lvx_u $in2,$x20,$inp + vand $tmp,$tmp,$eighty7 + vcipherlast $out3,$out3,$in3 + le?vperm $in1,$in1,$in1,$leperm + lvx_u $in3,$x30,$inp + vcipherlast $out4,$out4,$in4 + le?vperm $in2,$in2,$in2,$leperm + lvx_u $in4,$x40,$inp + vxor $tweak,$tweak,$tmp + vcipherlast $tmp,$out5,$in5 # last block might be needed + # in stealing mode + le?vperm $in3,$in3,$in3,$leperm + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + le?vperm $in4,$in4,$in4,$leperm + le?vperm $in5,$in5,$in5,$leperm + + le?vperm $out0,$out0,$out0,$leperm + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk0 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $out1,$in1,$twk1 + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$twk2 + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$twk3 + le?vperm $out5,$tmp,$tmp,$leperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$twk4 + le?stvx_u $out5,$x50,$out + be?stvx_u $tmp, $x50,$out + vxor $out5,$in5,$twk5 + addi $out,$out,0x60 + + mtctr $rounds + beq Loop_xts_enc6x # did $len-=96 borrow? + + addic. $len,$len,0x60 + beq Lxts_enc6x_zero + cmpwi $len,0x20 + blt Lxts_enc6x_one + nop + beq Lxts_enc6x_two + cmpwi $len,0x40 + blt Lxts_enc6x_three + nop + beq Lxts_enc6x_four + +Lxts_enc6x_five: + vxor $out0,$in1,$twk0 + vxor $out1,$in2,$twk1 + vxor $out2,$in3,$twk2 + vxor $out3,$in4,$twk3 + vxor $out4,$in5,$twk4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk5 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $tmp,$out4,$twk5 # last block prep for stealing + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_four: + vxor $out0,$in2,$twk0 + vxor $out1,$in3,$twk1 + vxor $out2,$in4,$twk2 + vxor $out3,$in5,$twk3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk4 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $tmp,$out3,$twk4 # last block prep for stealing + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_three: + vxor $out0,$in3,$twk0 + vxor $out1,$in4,$twk1 + vxor $out2,$in5,$twk2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk3 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $tmp,$out2,$twk3 # last block prep for stealing + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_two: + vxor $out0,$in4,$twk0 + vxor $out1,$in5,$twk1 + vxor $out2,$out2,$out2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk2 # unused tweak + vxor $tmp,$out1,$twk2 # last block prep for stealing + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_one: + vxor $out0,$in5,$twk0 + nop +Loop_xts_enc1x: + vcipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_enc1x + + add $inp,$inp,$taillen + cmpwi $taillen,0 + vcipher $out0,$out0,v24 + + subi $inp,$inp,16 + vcipher $out0,$out0,v25 + + lvsr $inpperm,0,$taillen + vcipher $out0,$out0,v26 + + lvx_u $in0,0,$inp + vcipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vcipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk0,$twk0,v31 + + le?vperm $in0,$in0,$in0,$leperm + vcipher $out0,$out0,v30 + + vperm $in0,$in0,$in0,$inpperm + vcipherlast $out0,$out0,$twk0 + + vmr $twk0,$twk1 # unused tweak + vxor $tmp,$out0,$twk1 # last block prep for stealing + le?vperm $out0,$out0,$out0,$leperm + stvx_u $out0,$x00,$out # store output + addi $out,$out,0x10 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_zero: + cmpwi $taillen,0 + beq Lxts_enc6x_done + + add $inp,$inp,$taillen + subi $inp,$inp,16 + lvx_u $in0,0,$inp + lvsr $inpperm,0,$taillen # $in5 is no more + le?vperm $in0,$in0,$in0,$leperm + vperm $in0,$in0,$in0,$inpperm + vxor $tmp,$tmp,$twk0 +Lxts_enc6x_steal: + vxor $in0,$in0,$twk0 + vxor $out0,$out0,$out0 + vspltisb $out1,-1 + vperm $out0,$out0,$out1,$inpperm + vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? + + subi r30,$out,17 + subi $out,$out,16 + mtctr $taillen +Loop_xts_enc6x_steal: + lbzu r0,1(r30) + stb r0,16(r30) + bdnz Loop_xts_enc6x_steal + + li $taillen,0 + mtctr $rounds + b Loop_xts_enc1x # one more time... + +.align 4 +Lxts_enc6x_done: + ${UCMP}i $ivp,0 + beq Lxts_enc6x_ret + + vxor $tweak,$twk0,$rndkey0 + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_enc6x_ret: + mtlr r11 + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $seven,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,1,0x80,6,6,0 + .long 0 + +.align 5 +_aesp8_xts_enc5x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + lvx v25,$x10,$key_ # round[4] + bdnz _aesp8_xts_enc5x + + add $inp,$inp,$taillen + cmpwi $taillen,0 + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + + subi $inp,$inp,16 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vxor $twk0,$twk0,v31 + + vcipher $out0,$out0,v26 + lvsr $inpperm,r0,$taillen # $in5 is no more + vcipher $out1,$out1,v26 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vcipher $out4,$out4,v26 + vxor $in1,$twk1,v31 + + vcipher $out0,$out0,v27 + lvx_u $in0,0,$inp + vcipher $out1,$out1,v27 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vcipher $out4,$out4,v27 + vxor $in2,$twk2,v31 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v28 + vcipher $out1,$out1,v28 + vcipher $out2,$out2,v28 + vcipher $out3,$out3,v28 + vcipher $out4,$out4,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vxor $in3,$twk3,v31 + + vcipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$leperm + vcipher $out1,$out1,v29 + vcipher $out2,$out2,v29 + vcipher $out3,$out3,v29 + vcipher $out4,$out4,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $in4,$twk4,v31 + + vcipher $out0,$out0,v30 + vperm $in0,$in0,$in0,$inpperm + vcipher $out1,$out1,v30 + vcipher $out2,$out2,v30 + vcipher $out3,$out3,v30 + vcipher $out4,$out4,v30 + + vcipherlast $out0,$out0,$twk0 + vcipherlast $out1,$out1,$in1 + vcipherlast $out2,$out2,$in2 + vcipherlast $out3,$out3,$in3 + vcipherlast $out4,$out4,$in4 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +.align 5 +_aesp8_xts_decrypt6x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr r11 + li r7,`$FRAME+8*16+15` + li r3,`$FRAME+8*16+31` + $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + stvx v20,r7,$sp # ABI says so + addi r7,r7,32 + stvx v21,r3,$sp + addi r3,r3,32 + stvx v22,r7,$sp + addi r7,r7,32 + stvx v23,r3,$sp + addi r3,r3,32 + stvx v24,r7,$sp + addi r7,r7,32 + stvx v25,r3,$sp + addi r3,r3,32 + stvx v26,r7,$sp + addi r7,r7,32 + stvx v27,r3,$sp + addi r3,r3,32 + stvx v28,r7,$sp + addi r7,r7,32 + stvx v29,r3,$sp + addi r3,r3,32 + stvx v30,r7,$sp + stvx v31,r3,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key1 # load key schedule + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + lvx v31,$x00,$key1 + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_xts_dec_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key1 + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_xts_dec_key + + lvx v26,$x10,$key1 + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key1 + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key1 + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key1 + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key1 + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key1 + ?vperm v29,v29,v30,$keyperm + lvx $twk5,$x70,$key1 # borrow $twk5 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$twk5,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vperm $in0,$inout,$inptail,$inpperm + subi $inp,$inp,31 # undo "caller" + vxor $twk0,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $out0,$in0,$twk0 + vxor $tweak,$tweak,$tmp + + lvx_u $in1,$x10,$inp + vxor $twk1,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in1,$in1,$in1,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out1,$in1,$twk1 + vxor $tweak,$tweak,$tmp + + lvx_u $in2,$x20,$inp + andi. $taillen,$len,15 + vxor $twk2,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in2,$in2,$in2,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out2,$in2,$twk2 + vxor $tweak,$tweak,$tmp + + lvx_u $in3,$x30,$inp + sub $len,$len,$taillen + vxor $twk3,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in3,$in3,$in3,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out3,$in3,$twk3 + vxor $tweak,$tweak,$tmp + + lvx_u $in4,$x40,$inp + subi $len,$len,0x60 + vxor $twk4,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in4,$in4,$in4,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out4,$in4,$twk4 + vxor $tweak,$tweak,$tmp + + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + vxor $twk5,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in5,$in5,$in5,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out5,$in5,$twk5 + vxor $tweak,$tweak,$tmp + + vxor v31,v31,$rndkey0 + mtctr $rounds + b Loop_xts_dec6x + +.align 5 +Loop_xts_dec6x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_dec6x + + subic $len,$len,96 # $len-=96 + vxor $in0,$twk0,v31 # xor with last round key + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk0,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vand $tmp,$tmp,$eighty7 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vxor $tweak,$tweak,$tmp + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vxor $in1,$twk1,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk1,$tweak,$rndkey0 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + + and r0,r0,$len + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vand $tmp,$tmp,$eighty7 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vxor $tweak,$tweak,$tmp + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in5 are loaded + # with last "words" + vxor $in2,$twk2,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk2,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vncipher $out0,$out0,v27 + vncipher $out1,$out1,v27 + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vand $tmp,$tmp,$eighty7 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vxor $tweak,$tweak,$tmp + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vxor $in3,$twk3,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk3,$tweak,$rndkey0 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vand $tmp,$tmp,$eighty7 + + vncipher $out0,$out0,v29 + vncipher $out1,$out1,v29 + vxor $tweak,$tweak,$tmp + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vxor $in4,$twk4,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk4,$tweak,$rndkey0 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + + vncipher $out0,$out0,v30 + vncipher $out1,$out1,v30 + vand $tmp,$tmp,$eighty7 + vncipher $out2,$out2,v30 + vncipher $out3,$out3,v30 + vxor $tweak,$tweak,$tmp + vncipher $out4,$out4,v30 + vncipher $out5,$out5,v30 + vxor $in5,$twk5,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk5,$tweak,$rndkey0 + + vncipherlast $out0,$out0,$in0 + lvx_u $in0,$x00,$inp # load next input block + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipherlast $out1,$out1,$in1 + lvx_u $in1,$x10,$inp + vncipherlast $out2,$out2,$in2 + le?vperm $in0,$in0,$in0,$leperm + lvx_u $in2,$x20,$inp + vand $tmp,$tmp,$eighty7 + vncipherlast $out3,$out3,$in3 + le?vperm $in1,$in1,$in1,$leperm + lvx_u $in3,$x30,$inp + vncipherlast $out4,$out4,$in4 + le?vperm $in2,$in2,$in2,$leperm + lvx_u $in4,$x40,$inp + vxor $tweak,$tweak,$tmp + vncipherlast $out5,$out5,$in5 + le?vperm $in3,$in3,$in3,$leperm + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + le?vperm $in4,$in4,$in4,$leperm + le?vperm $in5,$in5,$in5,$leperm + + le?vperm $out0,$out0,$out0,$leperm + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk0 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $out1,$in1,$twk1 + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$twk2 + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$twk3 + le?vperm $out5,$out5,$out5,$leperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$twk4 + stvx_u $out5,$x50,$out + vxor $out5,$in5,$twk5 + addi $out,$out,0x60 + + mtctr $rounds + beq Loop_xts_dec6x # did $len-=96 borrow? + + addic. $len,$len,0x60 + beq Lxts_dec6x_zero + cmpwi $len,0x20 + blt Lxts_dec6x_one + nop + beq Lxts_dec6x_two + cmpwi $len,0x40 + blt Lxts_dec6x_three + nop + beq Lxts_dec6x_four + +Lxts_dec6x_five: + vxor $out0,$in1,$twk0 + vxor $out1,$in2,$twk1 + vxor $out2,$in3,$twk2 + vxor $out3,$in4,$twk3 + vxor $out4,$in5,$twk4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk5 # unused tweak + vxor $twk1,$tweak,$rndkey0 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk1 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_four: + vxor $out0,$in2,$twk0 + vxor $out1,$in3,$twk1 + vxor $out2,$in4,$twk2 + vxor $out3,$in5,$twk3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk4 # unused tweak + vmr $twk1,$twk5 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk5 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_three: + vxor $out0,$in3,$twk0 + vxor $out1,$in4,$twk1 + vxor $out2,$in5,$twk2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk3 # unused tweak + vmr $twk1,$twk4 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk4 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_two: + vxor $out0,$in4,$twk0 + vxor $out1,$in5,$twk1 + vxor $out2,$out2,$out2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk2 # unused tweak + vmr $twk1,$twk3 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk3 + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_one: + vxor $out0,$in5,$twk0 + nop +Loop_xts_dec1x: + vncipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_dec1x + + subi r0,$taillen,1 + vncipher $out0,$out0,v24 + + andi. r0,r0,16 + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + + sub $inp,$inp,r0 + vncipher $out0,$out0,v26 + + lvx_u $in0,0,$inp + vncipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk0,$twk0,v31 + + le?vperm $in0,$in0,$in0,$leperm + vncipher $out0,$out0,v30 + + mtctr $rounds + vncipherlast $out0,$out0,$twk0 + + vmr $twk0,$twk1 # unused tweak + vmr $twk1,$twk2 + le?vperm $out0,$out0,$out0,$leperm + stvx_u $out0,$x00,$out # store output + addi $out,$out,0x10 + vxor $out0,$in0,$twk2 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_zero: + cmpwi $taillen,0 + beq Lxts_dec6x_done + + lvx_u $in0,0,$inp + le?vperm $in0,$in0,$in0,$leperm + vxor $out0,$in0,$twk1 +Lxts_dec6x_steal: + vncipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Lxts_dec6x_steal + + add $inp,$inp,$taillen + vncipher $out0,$out0,v24 + + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + + lvx_u $in0,0,$inp + vncipher $out0,$out0,v26 + + lvsr $inpperm,0,$taillen # $in5 is no more + vncipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk1,$twk1,v31 + + le?vperm $in0,$in0,$in0,$leperm + vncipher $out0,$out0,v30 + + vperm $in0,$in0,$in0,$inpperm + vncipherlast $tmp,$out0,$twk1 + + le?vperm $out0,$tmp,$tmp,$leperm + le?stvx_u $out0,0,$out + be?stvx_u $tmp,0,$out + + vxor $out0,$out0,$out0 + vspltisb $out1,-1 + vperm $out0,$out0,$out1,$inpperm + vsel $out0,$in0,$tmp,$out0 + vxor $out0,$out0,$twk0 + + subi r30,$out,1 + mtctr $taillen +Loop_xts_dec6x_steal: + lbzu r0,1(r30) + stb r0,16(r30) + bdnz Loop_xts_dec6x_steal + + li $taillen,0 + mtctr $rounds + b Loop_xts_dec1x # one more time... + +.align 4 +Lxts_dec6x_done: + ${UCMP}i $ivp,0 + beq Lxts_dec6x_ret + + vxor $tweak,$twk0,$rndkey0 + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_dec6x_ret: + mtlr r11 + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $seven,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,1,0x80,6,6,0 + .long 0 + +.align 5 +_aesp8_xts_dec5x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + lvx v25,$x10,$key_ # round[4] + bdnz _aesp8_xts_dec5x + + subi r0,$taillen,1 + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + + andi. r0,r0,16 + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vxor $twk0,$twk0,v31 + + sub $inp,$inp,r0 + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vxor $in1,$twk1,v31 + + vncipher $out0,$out0,v27 + lvx_u $in0,0,$inp + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vxor $in2,$twk2,v31 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vxor $in3,$twk3,v31 + + vncipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$leperm + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $in4,$twk4,v31 + + vncipher $out0,$out0,v30 + vncipher $out1,$out1,v30 + vncipher $out2,$out2,v30 + vncipher $out3,$out3,v30 + vncipher $out4,$out4,v30 + + vncipherlast $out0,$out0,$twk0 + vncipherlast $out1,$out1,$in1 + vncipherlast $out2,$out2,$in2 + vncipherlast $out3,$out3,$in3 + vncipherlast $out4,$out4,$in4 + mtctr $rounds + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +___ +}} }}} + my $consts=1; foreach(split("\n",$code)) { s/\`([^\`]*)\`/eval($1)/geo; @@ -1898,7 +3757,7 @@ foreach(split("\n",$code)) { if ($flavour =~ /le$/o) { SWITCH: for($conv) { /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; - /\?rev/ && do { @bytes=reverse(@bytes); last; }; + /\?rev/ && do { @bytes=reverse(@bytes); last; }; } } diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl index b9997335f193..b18e67d0e065 100644 --- a/drivers/crypto/vmx/ppc-xlate.pl +++ b/drivers/crypto/vmx/ppc-xlate.pl @@ -139,6 +139,26 @@ my $vmr = sub { " vor $vx,$vy,$vy"; }; +# Some ABIs specify vrsave, special-purpose register #256, as reserved +# for system use. +my $no_vrsave = ($flavour =~ /linux-ppc64le/); +my $mtspr = sub { + my ($f,$idx,$ra) = @_; + if ($idx == 256 && $no_vrsave) { + " or $ra,$ra,$ra"; + } else { + " mtspr $idx,$ra"; + } +}; +my $mfspr = sub { + my ($f,$rd,$idx) = @_; + if ($idx == 256 && $no_vrsave) { + " li $rd,-1"; + } else { + " mfspr $rd,$idx"; + } +}; + # PowerISA 2.06 stuff sub vsxmem_op { my ($f, $vrt, $ra, $rb, $op) = @_; diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c index e163d5770438..31a98dc6f849 100644 --- a/drivers/crypto/vmx/vmx.c +++ b/drivers/crypto/vmx/vmx.c @@ -23,6 +23,7 @@ #include <linux/moduleparam.h> #include <linux/types.h> #include <linux/err.h> +#include <linux/cpufeature.h> #include <linux/crypto.h> #include <asm/cputable.h> #include <crypto/internal/hash.h> @@ -31,10 +32,12 @@ extern struct shash_alg p8_ghash_alg; extern struct crypto_alg p8_aes_alg; extern struct crypto_alg p8_aes_cbc_alg; extern struct crypto_alg p8_aes_ctr_alg; +extern struct crypto_alg p8_aes_xts_alg; static struct crypto_alg *algs[] = { &p8_aes_alg, &p8_aes_cbc_alg, &p8_aes_ctr_alg, + &p8_aes_xts_alg, NULL, }; @@ -43,9 +46,6 @@ int __init p8_init(void) int ret = 0; struct crypto_alg **alg_it; - if (!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) - return -ENODEV; - for (alg_it = algs; *alg_it; alg_it++) { ret = crypto_register_alg(*alg_it); printk(KERN_INFO "crypto_register_alg '%s' = %d\n", @@ -78,7 +78,7 @@ void __exit p8_exit(void) crypto_unregister_shash(&p8_ghash_alg); } -module_init(p8_init); +module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, p8_init); module_exit(p8_exit); MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>"); |