aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/crypto
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2015-04-20 11:32:26 +1000
committerDave Airlie <airlied@redhat.com>2015-04-20 13:05:20 +1000
commit2c33ce009ca2389dbf0535d0672214d09738e35e (patch)
tree6186a6458c3c160385d794a23eaf07c786a9e61b /drivers/crypto
parentmedia-bus: Fixup RGB444_1X12, RGB565_1X16, and YUV8_1X24 media bus format (diff)
parentMerge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux (diff)
downloadlinux-dev-2c33ce009ca2389dbf0535d0672214d09738e35e.tar.xz
linux-dev-2c33ce009ca2389dbf0535d0672214d09738e35e.zip
Merge Linus master into drm-next
The merge is clean, but the arm build fails afterwards, due to API changes in the regulator tree. I've included the patch into the merge to fix the build. Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/crypto')
-rw-r--r--drivers/crypto/Kconfig24
-rw-r--r--drivers/crypto/Makefile2
-rw-r--r--drivers/crypto/amcc/crypto4xx_core.c6
-rw-r--r--drivers/crypto/atmel-aes.c26
-rw-r--r--drivers/crypto/atmel-sha.c37
-rw-r--r--drivers/crypto/atmel-tdes.c3
-rw-r--r--drivers/crypto/caam/caamhash.c1
-rw-r--r--drivers/crypto/caam/caamrng.c6
-rw-r--r--drivers/crypto/ccp/Makefile9
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes-cmac.c12
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes-xts.c4
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes.c3
-rw-r--r--drivers/crypto/ccp/ccp-crypto-main.c5
-rw-r--r--drivers/crypto/ccp/ccp-crypto-sha.c12
-rw-r--r--drivers/crypto/ccp/ccp-crypto.h3
-rw-r--r--drivers/crypto/ccp/ccp-dev.c7
-rw-r--r--drivers/crypto/ccp/ccp-dev.h12
-rw-r--r--drivers/crypto/ccp/ccp-ops.c24
-rw-r--r--drivers/crypto/ccp/ccp-pci.c21
-rw-r--r--drivers/crypto/ccp/ccp-platform.c111
-rw-r--r--drivers/crypto/img-hash.c1029
-rw-r--r--drivers/crypto/mxs-dcp.c2
-rw-r--r--drivers/crypto/omap-aes.c14
-rw-r--r--drivers/crypto/omap-sham.c2
-rw-r--r--drivers/crypto/qat/qat_common/adf_accel_devices.h1
-rw-r--r--drivers/crypto/qat/qat_common/adf_accel_engine.c35
-rw-r--r--drivers/crypto/qat/qat_common/adf_aer.c21
-rw-r--r--drivers/crypto/qat/qat_common/adf_cfg.c5
-rw-r--r--drivers/crypto/qat/qat_common/adf_cfg_strings.h10
-rw-r--r--drivers/crypto/qat/qat_common/adf_common_drv.h2
-rw-r--r--drivers/crypto/qat/qat_common/adf_ctl_drv.c66
-rw-r--r--drivers/crypto/qat/qat_common/adf_dev_mgr.c3
-rw-r--r--drivers/crypto/qat/qat_common/adf_init.c88
-rw-r--r--drivers/crypto/qat/qat_common/adf_transport.c31
-rw-r--r--drivers/crypto/qat/qat_common/adf_transport_debug.c2
-rw-r--r--drivers/crypto/qat/qat_common/icp_qat_hw.h2
-rw-r--r--drivers/crypto/qat/qat_common/qat_crypto.c9
-rw-r--r--drivers/crypto/qat/qat_common/qat_hal.c6
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_admin.c3
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c3
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h6
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_drv.c4
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_isr.c15
-rw-r--r--drivers/crypto/sahara.c51
-rw-r--r--drivers/crypto/talitos.c17
-rw-r--r--drivers/crypto/ux500/hash/hash_core.c2
-rw-r--r--drivers/crypto/vmx/Kconfig8
-rw-r--r--drivers/crypto/vmx/Makefile19
-rw-r--r--drivers/crypto/vmx/aes.c139
-rw-r--r--drivers/crypto/vmx/aes_cbc.c184
-rw-r--r--drivers/crypto/vmx/aes_ctr.c167
-rw-r--r--drivers/crypto/vmx/aesp8-ppc.h20
-rw-r--r--drivers/crypto/vmx/aesp8-ppc.pl1930
-rw-r--r--drivers/crypto/vmx/ghash.c214
-rw-r--r--drivers/crypto/vmx/ghashp8-ppc.pl228
-rw-r--r--drivers/crypto/vmx/ppc-xlate.pl207
-rw-r--r--drivers/crypto/vmx/vmx.c88
57 files changed, 4686 insertions, 275 deletions
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 2fb0fdfc87df..800bf41718e1 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -391,7 +391,7 @@ config CRYPTO_DEV_ATMEL_SHA
config CRYPTO_DEV_CCP
bool "Support for AMD Cryptographic Coprocessor"
- depends on (X86 && PCI) || ARM64
+ depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM
default n
help
The AMD Cryptographic Coprocessor provides hardware support
@@ -436,4 +436,26 @@ config CRYPTO_DEV_QCE
hardware. To compile this driver as a module, choose M here. The
module will be called qcrypto.
+config CRYPTO_DEV_VMX
+ bool "Support for VMX cryptographic acceleration instructions"
+ depends on PPC64
+ default n
+ help
+ Support for VMX cryptographic acceleration instructions.
+
+source "drivers/crypto/vmx/Kconfig"
+
+config CRYPTO_DEV_IMGTEC_HASH
+ depends on MIPS || COMPILE_TEST
+ tristate "Imagination Technologies hardware hash accelerator"
+ select CRYPTO_ALGAPI
+ select CRYPTO_MD5
+ select CRYPTO_SHA1
+ select CRYPTO_SHA256
+ select CRYPTO_HASH
+ help
+ This driver interfaces with the Imagination Technologies
+ hardware hash accelerator. Supporting MD5/SHA1/SHA224/SHA256
+ hashing algorithms.
+
endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 3924f93d5774..fb84be7e6be5 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
+obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
@@ -25,3 +26,4 @@ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
+obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index d02b77150070..3b28e8c3de28 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -1155,7 +1155,7 @@ struct crypto4xx_alg_common crypto4xx_alg[] = {
/**
* Module Initialization Routine
*/
-static int __init crypto4xx_probe(struct platform_device *ofdev)
+static int crypto4xx_probe(struct platform_device *ofdev)
{
int rc;
struct resource res;
@@ -1263,7 +1263,7 @@ err_alloc_dev:
return rc;
}
-static int __exit crypto4xx_remove(struct platform_device *ofdev)
+static int crypto4xx_remove(struct platform_device *ofdev)
{
struct device *dev = &ofdev->dev;
struct crypto4xx_core_device *core_dev = dev_get_drvdata(dev);
@@ -1291,7 +1291,7 @@ static struct platform_driver crypto4xx_driver = {
.of_match_table = crypto4xx_match,
},
.probe = crypto4xx_probe,
- .remove = __exit_p(crypto4xx_remove),
+ .remove = crypto4xx_remove,
};
module_platform_driver(crypto4xx_driver);
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 6597aac9905d..0f9a9dc06a83 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -315,10 +315,10 @@ static int atmel_aes_crypt_dma(struct atmel_aes_dev *dd,
dd->dma_size = length;
- if (!(dd->flags & AES_FLAGS_FAST)) {
- dma_sync_single_for_device(dd->dev, dma_addr_in, length,
- DMA_TO_DEVICE);
- }
+ dma_sync_single_for_device(dd->dev, dma_addr_in, length,
+ DMA_TO_DEVICE);
+ dma_sync_single_for_device(dd->dev, dma_addr_out, length,
+ DMA_FROM_DEVICE);
if (dd->flags & AES_FLAGS_CFB8) {
dd->dma_lch_in.dma_conf.dst_addr_width =
@@ -391,6 +391,11 @@ static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd)
{
dd->flags &= ~AES_FLAGS_DMA;
+ dma_sync_single_for_cpu(dd->dev, dd->dma_addr_in,
+ dd->dma_size, DMA_TO_DEVICE);
+ dma_sync_single_for_cpu(dd->dev, dd->dma_addr_out,
+ dd->dma_size, DMA_FROM_DEVICE);
+
/* use cache buffers */
dd->nb_in_sg = atmel_aes_sg_length(dd->req, dd->in_sg);
if (!dd->nb_in_sg)
@@ -459,6 +464,9 @@ static int atmel_aes_crypt_dma_start(struct atmel_aes_dev *dd)
dd->flags |= AES_FLAGS_FAST;
} else {
+ dma_sync_single_for_cpu(dd->dev, dd->dma_addr_in,
+ dd->dma_size, DMA_TO_DEVICE);
+
/* use cache buffers */
count = atmel_aes_sg_copy(&dd->in_sg, &dd->in_offset,
dd->buf_in, dd->buflen, dd->total, 0);
@@ -619,7 +627,7 @@ static int atmel_aes_crypt_dma_stop(struct atmel_aes_dev *dd)
dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE);
dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
} else {
- dma_sync_single_for_device(dd->dev, dd->dma_addr_out,
+ dma_sync_single_for_cpu(dd->dev, dd->dma_addr_out,
dd->dma_size, DMA_FROM_DEVICE);
/* copy data */
@@ -1246,6 +1254,11 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
/* keep only major version number */
switch (dd->hw_version & 0xff0) {
+ case 0x200:
+ dd->caps.has_dualbuff = 1;
+ dd->caps.has_cfb64 = 1;
+ dd->caps.max_burst_size = 4;
+ break;
case 0x130:
dd->caps.has_dualbuff = 1;
dd->caps.has_cfb64 = 1;
@@ -1336,6 +1349,7 @@ static int atmel_aes_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, aes_dd);
INIT_LIST_HEAD(&aes_dd->list);
+ spin_lock_init(&aes_dd->lock);
tasklet_init(&aes_dd->done_task, atmel_aes_done_task,
(unsigned long)aes_dd);
@@ -1374,7 +1388,7 @@ static int atmel_aes_probe(struct platform_device *pdev)
/* Initializing the clock */
aes_dd->iclk = clk_get(&pdev->dev, "aes_clk");
if (IS_ERR(aes_dd->iclk)) {
- dev_err(dev, "clock intialization failed.\n");
+ dev_err(dev, "clock initialization failed.\n");
err = PTR_ERR(aes_dd->iclk);
goto clk_err;
}
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 34db04addc18..5b35433c5399 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -163,8 +163,20 @@ static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx)
count = min(ctx->sg->length - ctx->offset, ctx->total);
count = min(count, ctx->buflen - ctx->bufcnt);
- if (count <= 0)
- break;
+ if (count <= 0) {
+ /*
+ * Check if count <= 0 because the buffer is full or
+ * because the sg length is 0. In the latest case,
+ * check if there is another sg in the list, a 0 length
+ * sg doesn't necessarily mean the end of the sg list.
+ */
+ if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) {
+ ctx->sg = sg_next(ctx->sg);
+ continue;
+ } else {
+ break;
+ }
+ }
scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg,
ctx->offset, count, 0);
@@ -420,14 +432,8 @@ static int atmel_sha_xmit_dma(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
dev_dbg(dd->dev, "xmit_dma: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n",
ctx->digcnt[1], ctx->digcnt[0], length1, final);
- if (ctx->flags & (SHA_FLAGS_SHA1 | SHA_FLAGS_SHA224 |
- SHA_FLAGS_SHA256)) {
- dd->dma_lch_in.dma_conf.src_maxburst = 16;
- dd->dma_lch_in.dma_conf.dst_maxburst = 16;
- } else {
- dd->dma_lch_in.dma_conf.src_maxburst = 32;
- dd->dma_lch_in.dma_conf.dst_maxburst = 32;
- }
+ dd->dma_lch_in.dma_conf.src_maxburst = 16;
+ dd->dma_lch_in.dma_conf.dst_maxburst = 16;
dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf);
@@ -529,7 +535,7 @@ static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd)
if (final)
atmel_sha_fill_padding(ctx, 0);
- if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
+ if (final || (ctx->bufcnt == ctx->buflen)) {
count = ctx->bufcnt;
ctx->bufcnt = 0;
return atmel_sha_xmit_dma_map(dd, ctx, count, final);
@@ -1266,6 +1272,12 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd)
/* keep only major version number */
switch (dd->hw_version & 0xff0) {
+ case 0x420:
+ dd->caps.has_dma = 1;
+ dd->caps.has_dualbuff = 1;
+ dd->caps.has_sha224 = 1;
+ dd->caps.has_sha_384_512 = 1;
+ break;
case 0x410:
dd->caps.has_dma = 1;
dd->caps.has_dualbuff = 1;
@@ -1349,6 +1361,7 @@ static int atmel_sha_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, sha_dd);
INIT_LIST_HEAD(&sha_dd->list);
+ spin_lock_init(&sha_dd->lock);
tasklet_init(&sha_dd->done_task, atmel_sha_done_task,
(unsigned long)sha_dd);
@@ -1385,7 +1398,7 @@ static int atmel_sha_probe(struct platform_device *pdev)
/* Initializing the clock */
sha_dd->iclk = clk_get(&pdev->dev, "sha_clk");
if (IS_ERR(sha_dd->iclk)) {
- dev_err(dev, "clock intialization failed.\n");
+ dev_err(dev, "clock initialization failed.\n");
err = PTR_ERR(sha_dd->iclk);
goto clk_err;
}
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index 258772d9b22f..ca2999709eb4 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -1370,6 +1370,7 @@ static int atmel_tdes_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, tdes_dd);
INIT_LIST_HEAD(&tdes_dd->list);
+ spin_lock_init(&tdes_dd->lock);
tasklet_init(&tdes_dd->done_task, atmel_tdes_done_task,
(unsigned long)tdes_dd);
@@ -1408,7 +1409,7 @@ static int atmel_tdes_probe(struct platform_device *pdev)
/* Initializing the clock */
tdes_dd->iclk = clk_get(&pdev->dev, "tdes_clk");
if (IS_ERR(tdes_dd->iclk)) {
- dev_err(dev, "clock intialization failed.\n");
+ dev_err(dev, "clock initialization failed.\n");
err = PTR_ERR(tdes_dd->iclk);
goto clk_err;
}
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index f347ab7eea95..ba0532efd3ae 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -1172,6 +1172,7 @@ static int ahash_final_no_ctx(struct ahash_request *req)
return -ENOMEM;
}
+ edesc->sec4_sg_bytes = 0;
sh_len = desc_len(sh_desc);
desc = edesc->hw_desc;
init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index ae31e555793c..26a544b505f1 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -52,7 +52,7 @@
/* length of descriptors */
#define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2)
-#define DESC_RNG_LEN (10 * CAAM_CMD_SZ)
+#define DESC_RNG_LEN (4 * CAAM_CMD_SZ)
/* Buffer, its dma address and lock */
struct buf_data {
@@ -90,8 +90,8 @@ static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx)
struct device *jrdev = ctx->jrdev;
if (ctx->sh_desc_dma)
- dma_unmap_single(jrdev, ctx->sh_desc_dma, DESC_RNG_LEN,
- DMA_TO_DEVICE);
+ dma_unmap_single(jrdev, ctx->sh_desc_dma,
+ desc_bytes(ctx->sh_desc), DMA_TO_DEVICE);
rng_unmap_buf(jrdev, &ctx->bufs[0]);
rng_unmap_buf(jrdev, &ctx->bufs[1]);
}
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index 7f592d8d07bb..55a1f3951578 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -1,11 +1,6 @@
obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
-ccp-objs := ccp-dev.o ccp-ops.o
-ifdef CONFIG_X86
-ccp-objs += ccp-pci.o
-endif
-ifdef CONFIG_ARM64
-ccp-objs += ccp-platform.o
-endif
+ccp-objs := ccp-dev.o ccp-ops.o ccp-platform.o
+ccp-$(CONFIG_PCI) += ccp-pci.o
obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
ccp-crypto-objs := ccp-crypto-main.o \
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
index 8e162ad82085..ea7e8446956a 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -23,7 +23,6 @@
#include "ccp-crypto.h"
-
static int ccp_aes_cmac_complete(struct crypto_async_request *async_req,
int ret)
{
@@ -38,11 +37,13 @@ static int ccp_aes_cmac_complete(struct crypto_async_request *async_req,
if (rctx->hash_rem) {
/* Save remaining data to buffer */
unsigned int offset = rctx->nbytes - rctx->hash_rem;
+
scatterwalk_map_and_copy(rctx->buf, rctx->src,
offset, rctx->hash_rem, 0);
rctx->buf_count = rctx->hash_rem;
- } else
+ } else {
rctx->buf_count = 0;
+ }
/* Update result area if supplied */
if (req->result)
@@ -202,7 +203,7 @@ static int ccp_aes_cmac_digest(struct ahash_request *req)
}
static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
- unsigned int key_len)
+ unsigned int key_len)
{
struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
struct ccp_crypto_ahash_alg *alg =
@@ -292,7 +293,8 @@ static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm)
crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx));
cipher_tfm = crypto_alloc_cipher("aes", 0,
- CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(cipher_tfm)) {
pr_warn("could not load aes cipher driver\n");
return PTR_ERR(cipher_tfm);
@@ -354,7 +356,7 @@ int ccp_register_aes_cmac_algs(struct list_head *head)
ret = crypto_register_ahash(alg);
if (ret) {
pr_err("%s ahash algorithm registration error (%d)\n",
- base->cra_name, ret);
+ base->cra_name, ret);
kfree(ccp_alg);
return ret;
}
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 0cc5594b7de3..52c7395cb8d8 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -21,7 +21,6 @@
#include "ccp-crypto.h"
-
struct ccp_aes_xts_def {
const char *name;
const char *drv_name;
@@ -216,7 +215,6 @@ static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm)
ctx->u.aes.tfm_ablkcipher = NULL;
}
-
static int ccp_register_aes_xts_alg(struct list_head *head,
const struct ccp_aes_xts_def *def)
{
@@ -255,7 +253,7 @@ static int ccp_register_aes_xts_alg(struct list_head *head,
ret = crypto_register_alg(alg);
if (ret) {
pr_err("%s ablkcipher algorithm registration error (%d)\n",
- alg->cra_name, ret);
+ alg->cra_name, ret);
kfree(ccp_alg);
return ret;
}
diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c
index e46490db0f63..7984f910884d 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -22,7 +22,6 @@
#include "ccp-crypto.h"
-
static int ccp_aes_complete(struct crypto_async_request *async_req, int ret)
{
struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
@@ -345,7 +344,7 @@ static int ccp_register_aes_alg(struct list_head *head,
ret = crypto_register_alg(alg);
if (ret) {
pr_err("%s ablkcipher algorithm registration error (%d)\n",
- alg->cra_name, ret);
+ alg->cra_name, ret);
kfree(ccp_alg);
return ret;
}
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index 4d4e016d755b..bdec01ec608f 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -33,7 +33,6 @@ static unsigned int sha_disable;
module_param(sha_disable, uint, 0444);
MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value");
-
/* List heads for the supported algorithms */
static LIST_HEAD(hash_algs);
static LIST_HEAD(cipher_algs);
@@ -48,6 +47,7 @@ struct ccp_crypto_queue {
struct list_head *backlog;
unsigned int cmd_count;
};
+
#define CCP_CRYPTO_MAX_QLEN 100
static struct ccp_crypto_queue req_queue;
@@ -77,7 +77,6 @@ struct ccp_crypto_cpu {
int err;
};
-
static inline bool ccp_crypto_success(int err)
{
if (err && (err != -EINPROGRESS) && (err != -EBUSY))
@@ -143,7 +142,7 @@ static void ccp_crypto_complete(void *data, int err)
int ret;
if (err == -EINPROGRESS) {
- /* Only propogate the -EINPROGRESS if necessary */
+ /* Only propagate the -EINPROGRESS if necessary */
if (crypto_cmd->ret == -EBUSY) {
crypto_cmd->ret = -EINPROGRESS;
req->complete(req, -EINPROGRESS);
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 96531571f7cf..507b34e0cc19 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -23,7 +23,6 @@
#include "ccp-crypto.h"
-
static int ccp_sha_complete(struct crypto_async_request *async_req, int ret)
{
struct ahash_request *req = ahash_request_cast(async_req);
@@ -37,11 +36,13 @@ static int ccp_sha_complete(struct crypto_async_request *async_req, int ret)
if (rctx->hash_rem) {
/* Save remaining data to buffer */
unsigned int offset = rctx->nbytes - rctx->hash_rem;
+
scatterwalk_map_and_copy(rctx->buf, rctx->src,
offset, rctx->hash_rem, 0);
rctx->buf_count = rctx->hash_rem;
- } else
+ } else {
rctx->buf_count = 0;
+ }
/* Update result area if supplied */
if (req->result)
@@ -227,8 +228,9 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
}
key_len = digest_size;
- } else
+ } else {
memcpy(ctx->u.sha.key, key, key_len);
+ }
for (i = 0; i < block_size; i++) {
ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36;
@@ -355,7 +357,7 @@ static int ccp_register_hmac_alg(struct list_head *head,
ret = crypto_register_ahash(alg);
if (ret) {
pr_err("%s ahash algorithm registration error (%d)\n",
- base->cra_name, ret);
+ base->cra_name, ret);
kfree(ccp_alg);
return ret;
}
@@ -410,7 +412,7 @@ static int ccp_register_sha_alg(struct list_head *head,
ret = crypto_register_ahash(alg);
if (ret) {
pr_err("%s ahash algorithm registration error (%d)\n",
- base->cra_name, ret);
+ base->cra_name, ret);
kfree(ccp_alg);
return ret;
}
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index 9aa4ae184f7f..76a96f0f44c6 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -13,7 +13,6 @@
#ifndef __CCP_CRYPTO_H__
#define __CCP_CRYPTO_H__
-
#include <linux/list.h>
#include <linux/wait.h>
#include <linux/pci.h>
@@ -25,7 +24,6 @@
#include <crypto/hash.h>
#include <crypto/sha.h>
-
#define CCP_CRA_PRIORITY 300
struct ccp_crypto_ablkcipher_alg {
@@ -68,7 +66,6 @@ static inline struct ccp_crypto_ahash_alg *
return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg);
}
-
/***** AES related defines *****/
struct ccp_aes_ctx {
/* Fallback cipher for XTS with unsupported unit sizes */
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index ca29c120b85f..861bacc1bb94 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -37,7 +37,6 @@ struct ccp_tasklet_data {
struct ccp_cmd *cmd;
};
-
static struct ccp_device *ccp_dev;
static inline struct ccp_device *ccp_get_device(void)
{
@@ -296,11 +295,9 @@ struct ccp_device *ccp_alloc_struct(struct device *dev)
{
struct ccp_device *ccp;
- ccp = kzalloc(sizeof(*ccp), GFP_KERNEL);
- if (ccp == NULL) {
- dev_err(dev, "unable to allocate device struct\n");
+ ccp = devm_kzalloc(dev, sizeof(*ccp), GFP_KERNEL);
+ if (!ccp)
return NULL;
- }
ccp->dev = dev;
INIT_LIST_HEAD(&ccp->cmd);
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 62ff35a6b9ec..6ff89031fb96 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -21,7 +21,7 @@
#include <linux/wait.h>
#include <linux/dmapool.h>
#include <linux/hw_random.h>
-
+#include <linux/bitops.h>
#define MAX_DMAPOOL_NAME_LEN 32
@@ -33,7 +33,6 @@
#define CACHE_NONE 0x00
#define CACHE_WB_NO_ALLOC 0xb7
-
/****** Register Mappings ******/
#define Q_MASK_REG 0x000
#define TRNG_OUT_REG 0x00c
@@ -54,8 +53,8 @@
#define CMD_Q_CACHE_BASE 0x228
#define CMD_Q_CACHE_INC 0x20
-#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f);
-#define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f);
+#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f)
+#define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f)
/****** REQ0 Related Values ******/
#define REQ0_WAIT_FOR_WRITE 0x00000004
@@ -103,7 +102,6 @@
/****** REQ6 Related Values ******/
#define REQ6_MEMTYPE_SHIFT 16
-
/****** Key Storage Block ******/
#define KSB_START 77
#define KSB_END 127
@@ -114,7 +112,7 @@
#define CCP_JOBID_MASK 0x0000003f
#define CCP_DMAPOOL_MAX_SIZE 64
-#define CCP_DMAPOOL_ALIGN (1 << 5)
+#define CCP_DMAPOOL_ALIGN BIT(5)
#define CCP_REVERSE_BUF_SIZE 64
@@ -142,7 +140,6 @@
#define CCP_ECC_RESULT_OFFSET 60
#define CCP_ECC_RESULT_SUCCESS 0x0001
-
struct ccp_device;
struct ccp_cmd;
@@ -261,7 +258,6 @@ struct ccp_device {
unsigned int axcache;
};
-
int ccp_pci_init(void);
void ccp_pci_exit(void);
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 8729364261d7..71f2e3c89424 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -27,7 +27,6 @@
#include "ccp-dev.h"
-
enum ccp_memtype {
CCP_MEMTYPE_SYSTEM = 0,
CCP_MEMTYPE_KSB,
@@ -515,7 +514,6 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
if (!wa->dma_count)
return -ENOMEM;
-
return 0;
}
@@ -763,8 +761,9 @@ static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
op_len = min(sg_src_len, sg_dst_len);
- } else
+ } else {
op_len = sg_src_len;
+ }
/* The data operation length will be at least block_size in length
* or the smaller of available sg room remaining for the source or
@@ -1131,9 +1130,9 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
if (ret)
goto e_ctx;
- if (in_place)
+ if (in_place) {
dst = src;
- else {
+ } else {
ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
AES_BLOCK_SIZE, DMA_FROM_DEVICE);
if (ret)
@@ -1304,9 +1303,9 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
if (ret)
goto e_ctx;
- if (in_place)
+ if (in_place) {
dst = src;
- else {
+ } else {
ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
unit_size, DMA_FROM_DEVICE);
if (ret)
@@ -1451,8 +1450,9 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
goto e_ctx;
}
memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
- } else
+ } else {
ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
+ }
ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
CCP_PASSTHRU_BYTESWAP_256BIT);
@@ -1732,9 +1732,9 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
if (ret)
goto e_mask;
- if (in_place)
+ if (in_place) {
dst = src;
- else {
+ } else {
ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
if (ret)
@@ -1974,7 +1974,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
src.address += CCP_ECC_OPERAND_SIZE;
/* Set the first point Z coordianate to 1 */
- *(src.address) = 0x01;
+ *src.address = 0x01;
src.address += CCP_ECC_OPERAND_SIZE;
if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
@@ -1989,7 +1989,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
src.address += CCP_ECC_OPERAND_SIZE;
/* Set the second point Z coordianate to 1 */
- *(src.address) = 0x01;
+ *src.address = 0x01;
src.address += CCP_ECC_OPERAND_SIZE;
} else {
/* Copy the Domain "a" parameter */
diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c
index 7f89c946adfe..af190d4795a8 100644
--- a/drivers/crypto/ccp/ccp-pci.c
+++ b/drivers/crypto/ccp/ccp-pci.c
@@ -174,11 +174,10 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (!ccp)
goto e_err;
- ccp_pci = kzalloc(sizeof(*ccp_pci), GFP_KERNEL);
- if (!ccp_pci) {
- ret = -ENOMEM;
- goto e_free1;
- }
+ ccp_pci = devm_kzalloc(dev, sizeof(*ccp_pci), GFP_KERNEL);
+ if (!ccp_pci)
+ goto e_err;
+
ccp->dev_specific = ccp_pci;
ccp->get_irq = ccp_get_irqs;
ccp->free_irq = ccp_free_irqs;
@@ -186,7 +185,7 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
ret = pci_request_regions(pdev, "ccp");
if (ret) {
dev_err(dev, "pci_request_regions failed (%d)\n", ret);
- goto e_free2;
+ goto e_err;
}
ret = pci_enable_device(pdev);
@@ -204,7 +203,7 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
ret = -EIO;
ccp->io_map = pci_iomap(pdev, bar, 0);
- if (ccp->io_map == NULL) {
+ if (!ccp->io_map) {
dev_err(dev, "pci_iomap failed\n");
goto e_device;
}
@@ -239,12 +238,6 @@ e_device:
e_regions:
pci_release_regions(pdev);
-e_free2:
- kfree(ccp_pci);
-
-e_free1:
- kfree(ccp);
-
e_err:
dev_notice(dev, "initialization failed\n");
return ret;
@@ -266,8 +259,6 @@ static void ccp_pci_remove(struct pci_dev *pdev)
pci_release_regions(pdev);
- kfree(ccp);
-
dev_notice(dev, "disabled\n");
}
diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c
index 8c50bad25f7e..b1c20b2b5647 100644
--- a/drivers/crypto/ccp/ccp-platform.c
+++ b/drivers/crypto/ccp/ccp-platform.c
@@ -23,9 +23,15 @@
#include <linux/delay.h>
#include <linux/ccp.h>
#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/acpi.h>
#include "ccp-dev.h"
+struct ccp_platform {
+ int use_acpi;
+ int coherent;
+};
static int ccp_get_irq(struct ccp_device *ccp)
{
@@ -84,10 +90,64 @@ static struct resource *ccp_find_mmio_area(struct ccp_device *ccp)
return NULL;
}
+#ifdef CONFIG_ACPI
+static int ccp_acpi_support(struct ccp_device *ccp)
+{
+ struct ccp_platform *ccp_platform = ccp->dev_specific;
+ struct acpi_device *adev = ACPI_COMPANION(ccp->dev);
+ acpi_handle handle;
+ acpi_status status;
+ unsigned long long data;
+ int cca;
+
+ /* Retrieve the device cache coherency value */
+ handle = adev->handle;
+ do {
+ status = acpi_evaluate_integer(handle, "_CCA", NULL, &data);
+ if (!ACPI_FAILURE(status)) {
+ cca = data;
+ break;
+ }
+ } while (!ACPI_FAILURE(status));
+
+ if (ACPI_FAILURE(status)) {
+ dev_err(ccp->dev, "error obtaining acpi coherency value\n");
+ return -EINVAL;
+ }
+
+ ccp_platform->coherent = !!cca;
+
+ return 0;
+}
+#else /* CONFIG_ACPI */
+static int ccp_acpi_support(struct ccp_device *ccp)
+{
+ return -EINVAL;
+}
+#endif
+
+#ifdef CONFIG_OF
+static int ccp_of_support(struct ccp_device *ccp)
+{
+ struct ccp_platform *ccp_platform = ccp->dev_specific;
+
+ ccp_platform->coherent = of_dma_is_coherent(ccp->dev->of_node);
+
+ return 0;
+}
+#else
+static int ccp_of_support(struct ccp_device *ccp)
+{
+ return -EINVAL;
+}
+#endif
+
static int ccp_platform_probe(struct platform_device *pdev)
{
struct ccp_device *ccp;
+ struct ccp_platform *ccp_platform;
struct device *dev = &pdev->dev;
+ struct acpi_device *adev = ACPI_COMPANION(dev);
struct resource *ior;
int ret;
@@ -96,24 +156,40 @@ static int ccp_platform_probe(struct platform_device *pdev)
if (!ccp)
goto e_err;
- ccp->dev_specific = NULL;
+ ccp_platform = devm_kzalloc(dev, sizeof(*ccp_platform), GFP_KERNEL);
+ if (!ccp_platform)
+ goto e_err;
+
+ ccp->dev_specific = ccp_platform;
ccp->get_irq = ccp_get_irqs;
ccp->free_irq = ccp_free_irqs;
+ ccp_platform->use_acpi = (!adev || acpi_disabled) ? 0 : 1;
+
ior = ccp_find_mmio_area(ccp);
ccp->io_map = devm_ioremap_resource(dev, ior);
if (IS_ERR(ccp->io_map)) {
ret = PTR_ERR(ccp->io_map);
- goto e_free;
+ goto e_err;
}
ccp->io_regs = ccp->io_map;
if (!dev->dma_mask)
dev->dma_mask = &dev->coherent_dma_mask;
- *(dev->dma_mask) = DMA_BIT_MASK(48);
- dev->coherent_dma_mask = DMA_BIT_MASK(48);
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+ if (ret) {
+ dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);
+ goto e_err;
+ }
+
+ if (ccp_platform->use_acpi)
+ ret = ccp_acpi_support(ccp);
+ else
+ ret = ccp_of_support(ccp);
+ if (ret)
+ goto e_err;
- if (of_property_read_bool(dev->of_node, "dma-coherent"))
+ if (ccp_platform->coherent)
ccp->axcache = CACHE_WB_NO_ALLOC;
else
ccp->axcache = CACHE_NONE;
@@ -122,15 +198,12 @@ static int ccp_platform_probe(struct platform_device *pdev)
ret = ccp_init(ccp);
if (ret)
- goto e_free;
+ goto e_err;
dev_notice(dev, "enabled\n");
return 0;
-e_free:
- kfree(ccp);
-
e_err:
dev_notice(dev, "initialization failed\n");
return ret;
@@ -143,8 +216,6 @@ static int ccp_platform_remove(struct platform_device *pdev)
ccp_destroy(ccp);
- kfree(ccp);
-
dev_notice(dev, "disabled\n");
return 0;
@@ -200,15 +271,29 @@ static int ccp_platform_resume(struct platform_device *pdev)
}
#endif
-static const struct of_device_id ccp_platform_ids[] = {
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id ccp_acpi_match[] = {
+ { "AMDI0C00", 0 },
+ { },
+};
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id ccp_of_match[] = {
{ .compatible = "amd,ccp-seattle-v1a" },
{ },
};
+#endif
static struct platform_driver ccp_platform_driver = {
.driver = {
.name = "AMD Cryptographic Coprocessor",
- .of_match_table = ccp_platform_ids,
+#ifdef CONFIG_ACPI
+ .acpi_match_table = ccp_acpi_match,
+#endif
+#ifdef CONFIG_OF
+ .of_match_table = ccp_of_match,
+#endif
},
.probe = ccp_platform_probe,
.remove = ccp_platform_remove,
diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
new file mode 100644
index 000000000000..ad47d0d61098
--- /dev/null
+++ b/drivers/crypto/img-hash.c
@@ -0,0 +1,1029 @@
+/*
+ * Copyright (c) 2014 Imagination Technologies
+ * Authors: Will Thomas, James Hartley
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Interface structure taken from omap-sham driver
+ */
+
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+
+#include <crypto/internal/hash.h>
+#include <crypto/md5.h>
+#include <crypto/sha.h>
+
+#define CR_RESET 0
+#define CR_RESET_SET 1
+#define CR_RESET_UNSET 0
+
+#define CR_MESSAGE_LENGTH_H 0x4
+#define CR_MESSAGE_LENGTH_L 0x8
+
+#define CR_CONTROL 0xc
+#define CR_CONTROL_BYTE_ORDER_3210 0
+#define CR_CONTROL_BYTE_ORDER_0123 1
+#define CR_CONTROL_BYTE_ORDER_2310 2
+#define CR_CONTROL_BYTE_ORDER_1032 3
+#define CR_CONTROL_BYTE_ORDER_SHIFT 8
+#define CR_CONTROL_ALGO_MD5 0
+#define CR_CONTROL_ALGO_SHA1 1
+#define CR_CONTROL_ALGO_SHA224 2
+#define CR_CONTROL_ALGO_SHA256 3
+
+#define CR_INTSTAT 0x10
+#define CR_INTENAB 0x14
+#define CR_INTCLEAR 0x18
+#define CR_INT_RESULTS_AVAILABLE BIT(0)
+#define CR_INT_NEW_RESULTS_SET BIT(1)
+#define CR_INT_RESULT_READ_ERR BIT(2)
+#define CR_INT_MESSAGE_WRITE_ERROR BIT(3)
+#define CR_INT_STATUS BIT(8)
+
+#define CR_RESULT_QUEUE 0x1c
+#define CR_RSD0 0x40
+#define CR_CORE_REV 0x50
+#define CR_CORE_DES1 0x60
+#define CR_CORE_DES2 0x70
+
+#define DRIVER_FLAGS_BUSY BIT(0)
+#define DRIVER_FLAGS_FINAL BIT(1)
+#define DRIVER_FLAGS_DMA_ACTIVE BIT(2)
+#define DRIVER_FLAGS_OUTPUT_READY BIT(3)
+#define DRIVER_FLAGS_INIT BIT(4)
+#define DRIVER_FLAGS_CPU BIT(5)
+#define DRIVER_FLAGS_DMA_READY BIT(6)
+#define DRIVER_FLAGS_ERROR BIT(7)
+#define DRIVER_FLAGS_SG BIT(8)
+#define DRIVER_FLAGS_SHA1 BIT(18)
+#define DRIVER_FLAGS_SHA224 BIT(19)
+#define DRIVER_FLAGS_SHA256 BIT(20)
+#define DRIVER_FLAGS_MD5 BIT(21)
+
+#define IMG_HASH_QUEUE_LENGTH 20
+#define IMG_HASH_DMA_THRESHOLD 64
+
+#ifdef __LITTLE_ENDIAN
+#define IMG_HASH_BYTE_ORDER CR_CONTROL_BYTE_ORDER_3210
+#else
+#define IMG_HASH_BYTE_ORDER CR_CONTROL_BYTE_ORDER_0123
+#endif
+
+struct img_hash_dev;
+
+struct img_hash_request_ctx {
+ struct img_hash_dev *hdev;
+ u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
+ unsigned long flags;
+ size_t digsize;
+
+ dma_addr_t dma_addr;
+ size_t dma_ct;
+
+ /* sg root */
+ struct scatterlist *sgfirst;
+ /* walk state */
+ struct scatterlist *sg;
+ size_t nents;
+ size_t offset;
+ unsigned int total;
+ size_t sent;
+
+ unsigned long op;
+
+ size_t bufcnt;
+ u8 buffer[0] __aligned(sizeof(u32));
+ struct ahash_request fallback_req;
+};
+
+struct img_hash_ctx {
+ struct img_hash_dev *hdev;
+ unsigned long flags;
+ struct crypto_ahash *fallback;
+};
+
+struct img_hash_dev {
+ struct list_head list;
+ struct device *dev;
+ struct clk *hash_clk;
+ struct clk *sys_clk;
+ void __iomem *io_base;
+
+ phys_addr_t bus_addr;
+ void __iomem *cpu_addr;
+
+ spinlock_t lock;
+ int err;
+ struct tasklet_struct done_task;
+ struct tasklet_struct dma_task;
+
+ unsigned long flags;
+ struct crypto_queue queue;
+ struct ahash_request *req;
+
+ struct dma_chan *dma_lch;
+};
+
+struct img_hash_drv {
+ struct list_head dev_list;
+ spinlock_t lock;
+};
+
+static struct img_hash_drv img_hash = {
+ .dev_list = LIST_HEAD_INIT(img_hash.dev_list),
+ .lock = __SPIN_LOCK_UNLOCKED(img_hash.lock),
+};
+
+static inline u32 img_hash_read(struct img_hash_dev *hdev, u32 offset)
+{
+ return readl_relaxed(hdev->io_base + offset);
+}
+
+static inline void img_hash_write(struct img_hash_dev *hdev,
+ u32 offset, u32 value)
+{
+ writel_relaxed(value, hdev->io_base + offset);
+}
+
+static inline u32 img_hash_read_result_queue(struct img_hash_dev *hdev)
+{
+ return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE));
+}
+
+static void img_hash_start(struct img_hash_dev *hdev, bool dma)
+{
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+ u32 cr = IMG_HASH_BYTE_ORDER << CR_CONTROL_BYTE_ORDER_SHIFT;
+
+ if (ctx->flags & DRIVER_FLAGS_MD5)
+ cr |= CR_CONTROL_ALGO_MD5;
+ else if (ctx->flags & DRIVER_FLAGS_SHA1)
+ cr |= CR_CONTROL_ALGO_SHA1;
+ else if (ctx->flags & DRIVER_FLAGS_SHA224)
+ cr |= CR_CONTROL_ALGO_SHA224;
+ else if (ctx->flags & DRIVER_FLAGS_SHA256)
+ cr |= CR_CONTROL_ALGO_SHA256;
+ dev_dbg(hdev->dev, "Starting hash process\n");
+ img_hash_write(hdev, CR_CONTROL, cr);
+
+ /*
+ * The hardware block requires two cycles between writing the control
+ * register and writing the first word of data in non DMA mode, to
+ * ensure the first data write is not grouped in burst with the control
+ * register write a read is issued to 'flush' the bus.
+ */
+ if (!dma)
+ img_hash_read(hdev, CR_CONTROL);
+}
+
+static int img_hash_xmit_cpu(struct img_hash_dev *hdev, const u8 *buf,
+ size_t length, int final)
+{
+ u32 count, len32;
+ const u32 *buffer = (const u32 *)buf;
+
+ dev_dbg(hdev->dev, "xmit_cpu: length: %zu bytes\n", length);
+
+ if (final)
+ hdev->flags |= DRIVER_FLAGS_FINAL;
+
+ len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+ for (count = 0; count < len32; count++)
+ writel_relaxed(buffer[count], hdev->cpu_addr);
+
+ return -EINPROGRESS;
+}
+
+static void img_hash_dma_callback(void *data)
+{
+ struct img_hash_dev *hdev = (struct img_hash_dev *)data;
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+
+ if (ctx->bufcnt) {
+ img_hash_xmit_cpu(hdev, ctx->buffer, ctx->bufcnt, 0);
+ ctx->bufcnt = 0;
+ }
+ if (ctx->sg)
+ tasklet_schedule(&hdev->dma_task);
+}
+
+static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg)
+{
+ struct dma_async_tx_descriptor *desc;
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+
+ ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
+ if (ctx->dma_ct == 0) {
+ dev_err(hdev->dev, "Invalid DMA sg\n");
+ hdev->err = -EINVAL;
+ return -EINVAL;
+ }
+
+ desc = dmaengine_prep_slave_sg(hdev->dma_lch,
+ sg,
+ ctx->dma_ct,
+ DMA_MEM_TO_DEV,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ if (!desc) {
+ dev_err(hdev->dev, "Null DMA descriptor\n");
+ hdev->err = -EINVAL;
+ dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
+ return -EINVAL;
+ }
+ desc->callback = img_hash_dma_callback;
+ desc->callback_param = hdev;
+ dmaengine_submit(desc);
+ dma_async_issue_pending(hdev->dma_lch);
+
+ return 0;
+}
+
+static int img_hash_write_via_cpu(struct img_hash_dev *hdev)
+{
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+
+ ctx->bufcnt = sg_copy_to_buffer(hdev->req->src, sg_nents(ctx->sg),
+ ctx->buffer, hdev->req->nbytes);
+
+ ctx->total = hdev->req->nbytes;
+ ctx->bufcnt = 0;
+
+ hdev->flags |= (DRIVER_FLAGS_CPU | DRIVER_FLAGS_FINAL);
+
+ img_hash_start(hdev, false);
+
+ return img_hash_xmit_cpu(hdev, ctx->buffer, ctx->total, 1);
+}
+
+static int img_hash_finish(struct ahash_request *req)
+{
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+
+ if (!req->result)
+ return -EINVAL;
+
+ memcpy(req->result, ctx->digest, ctx->digsize);
+
+ return 0;
+}
+
+static void img_hash_copy_hash(struct ahash_request *req)
+{
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+ u32 *hash = (u32 *)ctx->digest;
+ int i;
+
+ for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--)
+ hash[i] = img_hash_read_result_queue(ctx->hdev);
+}
+
+static void img_hash_finish_req(struct ahash_request *req, int err)
+{
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+ struct img_hash_dev *hdev = ctx->hdev;
+
+ if (!err) {
+ img_hash_copy_hash(req);
+ if (DRIVER_FLAGS_FINAL & hdev->flags)
+ err = img_hash_finish(req);
+ } else {
+ dev_warn(hdev->dev, "Hash failed with error %d\n", err);
+ ctx->flags |= DRIVER_FLAGS_ERROR;
+ }
+
+ hdev->flags &= ~(DRIVER_FLAGS_DMA_READY | DRIVER_FLAGS_OUTPUT_READY |
+ DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY | DRIVER_FLAGS_FINAL);
+
+ if (req->base.complete)
+ req->base.complete(&req->base, err);
+}
+
+static int img_hash_write_via_dma(struct img_hash_dev *hdev)
+{
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+
+ img_hash_start(hdev, true);
+
+ dev_dbg(hdev->dev, "xmit dma size: %d\n", ctx->total);
+
+ if (!ctx->total)
+ hdev->flags |= DRIVER_FLAGS_FINAL;
+
+ hdev->flags |= DRIVER_FLAGS_DMA_ACTIVE | DRIVER_FLAGS_FINAL;
+
+ tasklet_schedule(&hdev->dma_task);
+
+ return -EINPROGRESS;
+}
+
+static int img_hash_dma_init(struct img_hash_dev *hdev)
+{
+ struct dma_slave_config dma_conf;
+ int err = -EINVAL;
+
+ hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx");
+ if (!hdev->dma_lch) {
+ dev_err(hdev->dev, "Couldn't aquire a slave DMA channel.\n");
+ return -EBUSY;
+ }
+ dma_conf.direction = DMA_MEM_TO_DEV;
+ dma_conf.dst_addr = hdev->bus_addr;
+ dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ dma_conf.dst_maxburst = 16;
+ dma_conf.device_fc = false;
+
+ err = dmaengine_slave_config(hdev->dma_lch, &dma_conf);
+ if (err) {
+ dev_err(hdev->dev, "Couldn't configure DMA slave.\n");
+ dma_release_channel(hdev->dma_lch);
+ return err;
+ }
+
+ return 0;
+}
+
+static void img_hash_dma_task(unsigned long d)
+{
+ struct img_hash_dev *hdev = (struct img_hash_dev *)d;
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+ u8 *addr;
+ size_t nbytes, bleft, wsend, len, tbc;
+ struct scatterlist tsg;
+
+ if (!ctx->sg)
+ return;
+
+ addr = sg_virt(ctx->sg);
+ nbytes = ctx->sg->length - ctx->offset;
+
+ /*
+ * The hash accelerator does not support a data valid mask. This means
+ * that if each dma (i.e. per page) is not a multiple of 4 bytes, the
+ * padding bytes in the last word written by that dma would erroneously
+ * be included in the hash. To avoid this we round down the transfer,
+ * and add the excess to the start of the next dma. It does not matter
+ * that the final dma may not be a multiple of 4 bytes as the hashing
+ * block is programmed to accept the correct number of bytes.
+ */
+
+ bleft = nbytes % 4;
+ wsend = (nbytes / 4);
+
+ if (wsend) {
+ sg_init_one(&tsg, addr + ctx->offset, wsend * 4);
+ if (img_hash_xmit_dma(hdev, &tsg)) {
+ dev_err(hdev->dev, "DMA failed, falling back to CPU");
+ ctx->flags |= DRIVER_FLAGS_CPU;
+ hdev->err = 0;
+ img_hash_xmit_cpu(hdev, addr + ctx->offset,
+ wsend * 4, 0);
+ ctx->sent += wsend * 4;
+ wsend = 0;
+ } else {
+ ctx->sent += wsend * 4;
+ }
+ }
+
+ if (bleft) {
+ ctx->bufcnt = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
+ ctx->buffer, bleft, ctx->sent);
+ tbc = 0;
+ ctx->sg = sg_next(ctx->sg);
+ while (ctx->sg && (ctx->bufcnt < 4)) {
+ len = ctx->sg->length;
+ if (likely(len > (4 - ctx->bufcnt)))
+ len = 4 - ctx->bufcnt;
+ tbc = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
+ ctx->buffer + ctx->bufcnt, len,
+ ctx->sent + ctx->bufcnt);
+ ctx->bufcnt += tbc;
+ if (tbc >= ctx->sg->length) {
+ ctx->sg = sg_next(ctx->sg);
+ tbc = 0;
+ }
+ }
+
+ ctx->sent += ctx->bufcnt;
+ ctx->offset = tbc;
+
+ if (!wsend)
+ img_hash_dma_callback(hdev);
+ } else {
+ ctx->offset = 0;
+ ctx->sg = sg_next(ctx->sg);
+ }
+}
+
+static int img_hash_write_via_dma_stop(struct img_hash_dev *hdev)
+{
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+
+ if (ctx->flags & DRIVER_FLAGS_SG)
+ dma_unmap_sg(hdev->dev, ctx->sg, ctx->dma_ct, DMA_TO_DEVICE);
+
+ return 0;
+}
+
+static int img_hash_process_data(struct img_hash_dev *hdev)
+{
+ struct ahash_request *req = hdev->req;
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+ int err = 0;
+
+ ctx->bufcnt = 0;
+
+ if (req->nbytes >= IMG_HASH_DMA_THRESHOLD) {
+ dev_dbg(hdev->dev, "process data request(%d bytes) using DMA\n",
+ req->nbytes);
+ err = img_hash_write_via_dma(hdev);
+ } else {
+ dev_dbg(hdev->dev, "process data request(%d bytes) using CPU\n",
+ req->nbytes);
+ err = img_hash_write_via_cpu(hdev);
+ }
+ return err;
+}
+
+static int img_hash_hw_init(struct img_hash_dev *hdev)
+{
+ unsigned long long nbits;
+ u32 u, l;
+
+ img_hash_write(hdev, CR_RESET, CR_RESET_SET);
+ img_hash_write(hdev, CR_RESET, CR_RESET_UNSET);
+ img_hash_write(hdev, CR_INTENAB, CR_INT_NEW_RESULTS_SET);
+
+ nbits = (u64)hdev->req->nbytes << 3;
+ u = nbits >> 32;
+ l = nbits;
+ img_hash_write(hdev, CR_MESSAGE_LENGTH_H, u);
+ img_hash_write(hdev, CR_MESSAGE_LENGTH_L, l);
+
+ if (!(DRIVER_FLAGS_INIT & hdev->flags)) {
+ hdev->flags |= DRIVER_FLAGS_INIT;
+ hdev->err = 0;
+ }
+ dev_dbg(hdev->dev, "hw initialized, nbits: %llx\n", nbits);
+ return 0;
+}
+
+static int img_hash_init(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+ struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+ rctx->fallback_req.base.flags = req->base.flags
+ & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ return crypto_ahash_init(&rctx->fallback_req);
+}
+
+static int img_hash_handle_queue(struct img_hash_dev *hdev,
+ struct ahash_request *req)
+{
+ struct crypto_async_request *async_req, *backlog;
+ struct img_hash_request_ctx *ctx;
+ unsigned long flags;
+ int err = 0, res = 0;
+
+ spin_lock_irqsave(&hdev->lock, flags);
+
+ if (req)
+ res = ahash_enqueue_request(&hdev->queue, req);
+
+ if (DRIVER_FLAGS_BUSY & hdev->flags) {
+ spin_unlock_irqrestore(&hdev->lock, flags);
+ return res;
+ }
+
+ backlog = crypto_get_backlog(&hdev->queue);
+ async_req = crypto_dequeue_request(&hdev->queue);
+ if (async_req)
+ hdev->flags |= DRIVER_FLAGS_BUSY;
+
+ spin_unlock_irqrestore(&hdev->lock, flags);
+
+ if (!async_req)
+ return res;
+
+ if (backlog)
+ backlog->complete(backlog, -EINPROGRESS);
+
+ req = ahash_request_cast(async_req);
+ hdev->req = req;
+
+ ctx = ahash_request_ctx(req);
+
+ dev_info(hdev->dev, "processing req, op: %lu, bytes: %d\n",
+ ctx->op, req->nbytes);
+
+ err = img_hash_hw_init(hdev);
+
+ if (!err)
+ err = img_hash_process_data(hdev);
+
+ if (err != -EINPROGRESS) {
+ /* done_task will not finish so do it here */
+ img_hash_finish_req(req, err);
+ }
+ return res;
+}
+
+static int img_hash_update(struct ahash_request *req)
+{
+ struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+ rctx->fallback_req.base.flags = req->base.flags
+ & CRYPTO_TFM_REQ_MAY_SLEEP;
+ rctx->fallback_req.nbytes = req->nbytes;
+ rctx->fallback_req.src = req->src;
+
+ return crypto_ahash_update(&rctx->fallback_req);
+}
+
+static int img_hash_final(struct ahash_request *req)
+{
+ struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+ rctx->fallback_req.base.flags = req->base.flags
+ & CRYPTO_TFM_REQ_MAY_SLEEP;
+ rctx->fallback_req.result = req->result;
+
+ return crypto_ahash_final(&rctx->fallback_req);
+}
+
+static int img_hash_finup(struct ahash_request *req)
+{
+ struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+ rctx->fallback_req.base.flags = req->base.flags
+ & CRYPTO_TFM_REQ_MAY_SLEEP;
+ rctx->fallback_req.nbytes = req->nbytes;
+ rctx->fallback_req.src = req->src;
+ rctx->fallback_req.result = req->result;
+
+ return crypto_ahash_finup(&rctx->fallback_req);
+}
+
+static int img_hash_digest(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct img_hash_ctx *tctx = crypto_ahash_ctx(tfm);
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+ struct img_hash_dev *hdev = NULL;
+ struct img_hash_dev *tmp;
+ int err;
+
+ spin_lock(&img_hash.lock);
+ if (!tctx->hdev) {
+ list_for_each_entry(tmp, &img_hash.dev_list, list) {
+ hdev = tmp;
+ break;
+ }
+ tctx->hdev = hdev;
+
+ } else {
+ hdev = tctx->hdev;
+ }
+
+ spin_unlock(&img_hash.lock);
+ ctx->hdev = hdev;
+ ctx->flags = 0;
+ ctx->digsize = crypto_ahash_digestsize(tfm);
+
+ switch (ctx->digsize) {
+ case SHA1_DIGEST_SIZE:
+ ctx->flags |= DRIVER_FLAGS_SHA1;
+ break;
+ case SHA256_DIGEST_SIZE:
+ ctx->flags |= DRIVER_FLAGS_SHA256;
+ break;
+ case SHA224_DIGEST_SIZE:
+ ctx->flags |= DRIVER_FLAGS_SHA224;
+ break;
+ case MD5_DIGEST_SIZE:
+ ctx->flags |= DRIVER_FLAGS_MD5;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ctx->bufcnt = 0;
+ ctx->offset = 0;
+ ctx->sent = 0;
+ ctx->total = req->nbytes;
+ ctx->sg = req->src;
+ ctx->sgfirst = req->src;
+ ctx->nents = sg_nents(ctx->sg);
+
+ err = img_hash_handle_queue(tctx->hdev, req);
+
+ return err;
+}
+
+static int img_hash_cra_init(struct crypto_tfm *tfm)
+{
+ struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+ const char *alg_name = crypto_tfm_alg_name(tfm);
+ int err = -ENOMEM;
+
+ ctx->fallback = crypto_alloc_ahash(alg_name, 0,
+ CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(ctx->fallback)) {
+ pr_err("img_hash: Could not load fallback driver.\n");
+ err = PTR_ERR(ctx->fallback);
+ goto err;
+ }
+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+ sizeof(struct img_hash_request_ctx) +
+ IMG_HASH_DMA_THRESHOLD);
+
+ return 0;
+
+err:
+ return err;
+}
+
+static void img_hash_cra_exit(struct crypto_tfm *tfm)
+{
+ struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm);
+
+ crypto_free_ahash(tctx->fallback);
+}
+
+static irqreturn_t img_irq_handler(int irq, void *dev_id)
+{
+ struct img_hash_dev *hdev = dev_id;
+ u32 reg;
+
+ reg = img_hash_read(hdev, CR_INTSTAT);
+ img_hash_write(hdev, CR_INTCLEAR, reg);
+
+ if (reg & CR_INT_NEW_RESULTS_SET) {
+ dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
+ if (DRIVER_FLAGS_BUSY & hdev->flags) {
+ hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
+ if (!(DRIVER_FLAGS_CPU & hdev->flags))
+ hdev->flags |= DRIVER_FLAGS_DMA_READY;
+ tasklet_schedule(&hdev->done_task);
+ } else {
+ dev_warn(hdev->dev,
+ "HASH interrupt when no active requests.\n");
+ }
+ } else if (reg & CR_INT_RESULTS_AVAILABLE) {
+ dev_warn(hdev->dev,
+ "IRQ triggered before the hash had completed\n");
+ } else if (reg & CR_INT_RESULT_READ_ERR) {
+ dev_warn(hdev->dev,
+ "Attempt to read from an empty result queue\n");
+ } else if (reg & CR_INT_MESSAGE_WRITE_ERROR) {
+ dev_warn(hdev->dev,
+ "Data written before the hardware was configured\n");
+ }
+ return IRQ_HANDLED;
+}
+
+static struct ahash_alg img_algs[] = {
+ {
+ .init = img_hash_init,
+ .update = img_hash_update,
+ .final = img_hash_final,
+ .finup = img_hash_finup,
+ .digest = img_hash_digest,
+ .halg = {
+ .digestsize = MD5_DIGEST_SIZE,
+ .base = {
+ .cra_name = "md5",
+ .cra_driver_name = "img-md5",
+ .cra_priority = 300,
+ .cra_flags =
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct img_hash_ctx),
+ .cra_init = img_hash_cra_init,
+ .cra_exit = img_hash_cra_exit,
+ .cra_module = THIS_MODULE,
+ }
+ }
+ },
+ {
+ .init = img_hash_init,
+ .update = img_hash_update,
+ .final = img_hash_final,
+ .finup = img_hash_finup,
+ .digest = img_hash_digest,
+ .halg = {
+ .digestsize = SHA1_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name = "img-sha1",
+ .cra_priority = 300,
+ .cra_flags =
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct img_hash_ctx),
+ .cra_init = img_hash_cra_init,
+ .cra_exit = img_hash_cra_exit,
+ .cra_module = THIS_MODULE,
+ }
+ }
+ },
+ {
+ .init = img_hash_init,
+ .update = img_hash_update,
+ .final = img_hash_final,
+ .finup = img_hash_finup,
+ .digest = img_hash_digest,
+ .halg = {
+ .digestsize = SHA224_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name = "img-sha224",
+ .cra_priority = 300,
+ .cra_flags =
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct img_hash_ctx),
+ .cra_init = img_hash_cra_init,
+ .cra_exit = img_hash_cra_exit,
+ .cra_module = THIS_MODULE,
+ }
+ }
+ },
+ {
+ .init = img_hash_init,
+ .update = img_hash_update,
+ .final = img_hash_final,
+ .finup = img_hash_finup,
+ .digest = img_hash_digest,
+ .halg = {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name = "img-sha256",
+ .cra_priority = 300,
+ .cra_flags =
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct img_hash_ctx),
+ .cra_init = img_hash_cra_init,
+ .cra_exit = img_hash_cra_exit,
+ .cra_module = THIS_MODULE,
+ }
+ }
+ }
+};
+
+static int img_register_algs(struct img_hash_dev *hdev)
+{
+ int i, err;
+
+ for (i = 0; i < ARRAY_SIZE(img_algs); i++) {
+ err = crypto_register_ahash(&img_algs[i]);
+ if (err)
+ goto err_reg;
+ }
+ return 0;
+
+err_reg:
+ for (; i--; )
+ crypto_unregister_ahash(&img_algs[i]);
+
+ return err;
+}
+
+static int img_unregister_algs(struct img_hash_dev *hdev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(img_algs); i++)
+ crypto_unregister_ahash(&img_algs[i]);
+ return 0;
+}
+
+static void img_hash_done_task(unsigned long data)
+{
+ struct img_hash_dev *hdev = (struct img_hash_dev *)data;
+ int err = 0;
+
+ if (hdev->err == -EINVAL) {
+ err = hdev->err;
+ goto finish;
+ }
+
+ if (!(DRIVER_FLAGS_BUSY & hdev->flags)) {
+ img_hash_handle_queue(hdev, NULL);
+ return;
+ }
+
+ if (DRIVER_FLAGS_CPU & hdev->flags) {
+ if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
+ hdev->flags &= ~DRIVER_FLAGS_OUTPUT_READY;
+ goto finish;
+ }
+ } else if (DRIVER_FLAGS_DMA_READY & hdev->flags) {
+ if (DRIVER_FLAGS_DMA_ACTIVE & hdev->flags) {
+ hdev->flags &= ~DRIVER_FLAGS_DMA_ACTIVE;
+ img_hash_write_via_dma_stop(hdev);
+ if (hdev->err) {
+ err = hdev->err;
+ goto finish;
+ }
+ }
+ if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
+ hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
+ DRIVER_FLAGS_OUTPUT_READY);
+ goto finish;
+ }
+ }
+ return;
+
+finish:
+ img_hash_finish_req(hdev->req, err);
+}
+
+static const struct of_device_id img_hash_match[] = {
+ { .compatible = "img,hash-accelerator" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, img_hash_match);
+
+static int img_hash_probe(struct platform_device *pdev)
+{
+ struct img_hash_dev *hdev;
+ struct device *dev = &pdev->dev;
+ struct resource *hash_res;
+ int irq;
+ int err;
+
+ hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL);
+ if (hdev == NULL)
+ return -ENOMEM;
+
+ spin_lock_init(&hdev->lock);
+
+ hdev->dev = dev;
+
+ platform_set_drvdata(pdev, hdev);
+
+ INIT_LIST_HEAD(&hdev->list);
+
+ tasklet_init(&hdev->done_task, img_hash_done_task, (unsigned long)hdev);
+ tasklet_init(&hdev->dma_task, img_hash_dma_task, (unsigned long)hdev);
+
+ crypto_init_queue(&hdev->queue, IMG_HASH_QUEUE_LENGTH);
+
+ /* Register bank */
+ hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+ hdev->io_base = devm_ioremap_resource(dev, hash_res);
+ if (IS_ERR(hdev->io_base)) {
+ err = PTR_ERR(hdev->io_base);
+ dev_err(dev, "can't ioremap, returned %d\n", err);
+
+ goto res_err;
+ }
+
+ /* Write port (DMA or CPU) */
+ hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ hdev->cpu_addr = devm_ioremap_resource(dev, hash_res);
+ if (IS_ERR(hdev->cpu_addr)) {
+ dev_err(dev, "can't ioremap write port\n");
+ err = PTR_ERR(hdev->cpu_addr);
+ goto res_err;
+ }
+ hdev->bus_addr = hash_res->start;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(dev, "no IRQ resource info\n");
+ err = irq;
+ goto res_err;
+ }
+
+ err = devm_request_irq(dev, irq, img_irq_handler, 0,
+ dev_name(dev), hdev);
+ if (err) {
+ dev_err(dev, "unable to request irq\n");
+ goto res_err;
+ }
+ dev_dbg(dev, "using IRQ channel %d\n", irq);
+
+ hdev->hash_clk = devm_clk_get(&pdev->dev, "hash");
+ if (IS_ERR(hdev->hash_clk)) {
+ dev_err(dev, "clock initialization failed.\n");
+ err = PTR_ERR(hdev->hash_clk);
+ goto res_err;
+ }
+
+ hdev->sys_clk = devm_clk_get(&pdev->dev, "sys");
+ if (IS_ERR(hdev->sys_clk)) {
+ dev_err(dev, "clock initialization failed.\n");
+ err = PTR_ERR(hdev->sys_clk);
+ goto res_err;
+ }
+
+ err = clk_prepare_enable(hdev->hash_clk);
+ if (err)
+ goto res_err;
+
+ err = clk_prepare_enable(hdev->sys_clk);
+ if (err)
+ goto clk_err;
+
+ err = img_hash_dma_init(hdev);
+ if (err)
+ goto dma_err;
+
+ dev_dbg(dev, "using %s for DMA transfers\n",
+ dma_chan_name(hdev->dma_lch));
+
+ spin_lock(&img_hash.lock);
+ list_add_tail(&hdev->list, &img_hash.dev_list);
+ spin_unlock(&img_hash.lock);
+
+ err = img_register_algs(hdev);
+ if (err)
+ goto err_algs;
+ dev_dbg(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator initialized\n");
+
+ return 0;
+
+err_algs:
+ spin_lock(&img_hash.lock);
+ list_del(&hdev->list);
+ spin_unlock(&img_hash.lock);
+ dma_release_channel(hdev->dma_lch);
+dma_err:
+ clk_disable_unprepare(hdev->sys_clk);
+clk_err:
+ clk_disable_unprepare(hdev->hash_clk);
+res_err:
+ tasklet_kill(&hdev->done_task);
+ tasklet_kill(&hdev->dma_task);
+
+ return err;
+}
+
+static int img_hash_remove(struct platform_device *pdev)
+{
+ static struct img_hash_dev *hdev;
+
+ hdev = platform_get_drvdata(pdev);
+ spin_lock(&img_hash.lock);
+ list_del(&hdev->list);
+ spin_unlock(&img_hash.lock);
+
+ img_unregister_algs(hdev);
+
+ tasklet_kill(&hdev->done_task);
+ tasklet_kill(&hdev->dma_task);
+
+ dma_release_channel(hdev->dma_lch);
+
+ clk_disable_unprepare(hdev->hash_clk);
+ clk_disable_unprepare(hdev->sys_clk);
+
+ return 0;
+}
+
+static struct platform_driver img_hash_driver = {
+ .probe = img_hash_probe,
+ .remove = img_hash_remove,
+ .driver = {
+ .name = "img-hash-accelerator",
+ .of_match_table = of_match_ptr(img_hash_match),
+ }
+};
+module_platform_driver(img_hash_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Imgtec SHA1/224/256 & MD5 hw accelerator driver");
+MODULE_AUTHOR("Will Thomas.");
+MODULE_AUTHOR("James Hartley <james.hartley@imgtec.com>");
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index 829d6394fb33..59ed54e464a9 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -153,7 +153,7 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx)
struct dcp *sdcp = global_sdcp;
const int chan = actx->chan;
uint32_t stat;
- int ret;
+ unsigned long ret;
struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc),
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index 42f95a4326b0..9a28b7e07c71 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -554,15 +554,23 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
return err;
}
-static int omap_aes_check_aligned(struct scatterlist *sg)
+static int omap_aes_check_aligned(struct scatterlist *sg, int total)
{
+ int len = 0;
+
while (sg) {
if (!IS_ALIGNED(sg->offset, 4))
return -1;
if (!IS_ALIGNED(sg->length, AES_BLOCK_SIZE))
return -1;
+
+ len += sg->length;
sg = sg_next(sg);
}
+
+ if (len != total)
+ return -1;
+
return 0;
}
@@ -633,8 +641,8 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd,
dd->in_sg = req->src;
dd->out_sg = req->dst;
- if (omap_aes_check_aligned(dd->in_sg) ||
- omap_aes_check_aligned(dd->out_sg)) {
+ if (omap_aes_check_aligned(dd->in_sg, dd->total) ||
+ omap_aes_check_aligned(dd->out_sg, dd->total)) {
if (omap_aes_copy_sgs(dd))
pr_err("Failed to copy SGs for unaligned cases\n");
dd->sgs_copied = 1;
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 3c76696ee578..4d63e0d4da9a 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -640,6 +640,7 @@ static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx)
while (ctx->sg) {
vaddr = kmap_atomic(sg_page(ctx->sg));
+ vaddr += ctx->sg->offset;
count = omap_sham_append_buffer(ctx,
vaddr + ctx->offset,
@@ -1945,6 +1946,7 @@ static int omap_sham_probe(struct platform_device *pdev)
dd->flags |= dd->pdata->flags;
pm_runtime_enable(dev);
+ pm_runtime_irq_safe(dev);
pm_runtime_get_sync(dev);
rev = omap_sham_read(dd, SHA_REG_REV(dd));
pm_runtime_put_sync(&pdev->dev);
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index 19c0efa29ab3..f22ce7169fa5 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -52,7 +52,6 @@
#include <linux/io.h>
#include "adf_cfg_common.h"
-#define PCI_VENDOR_ID_INTEL 0x8086
#define ADF_DH895XCC_DEVICE_NAME "dh895xcc"
#define ADF_DH895XCC_PCI_DEVICE_ID 0x435
#define ADF_PCI_MAX_BARS 3
diff --git a/drivers/crypto/qat/qat_common/adf_accel_engine.c b/drivers/crypto/qat/qat_common/adf_accel_engine.c
index c77453b900a3..7f8b66c915ed 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_engine.c
+++ b/drivers/crypto/qat/qat_common/adf_accel_engine.c
@@ -60,36 +60,40 @@ int adf_ae_fw_load(struct adf_accel_dev *accel_dev)
if (request_firmware(&loader_data->uof_fw, hw_device->fw_name,
&accel_dev->accel_pci_dev.pci_dev->dev)) {
- pr_err("QAT: Failed to load firmware %s\n", hw_device->fw_name);
+ dev_err(&GET_DEV(accel_dev), "Failed to load firmware %s\n",
+ hw_device->fw_name);
return -EFAULT;
}
uof_size = loader_data->uof_fw->size;
uof_addr = (void *)loader_data->uof_fw->data;
if (qat_uclo_map_uof_obj(loader_data->fw_loader, uof_addr, uof_size)) {
- pr_err("QAT: Failed to map UOF\n");
+ dev_err(&GET_DEV(accel_dev), "Failed to map UOF\n");
goto out_err;
}
if (qat_uclo_wr_all_uimage(loader_data->fw_loader)) {
- pr_err("QAT: Failed to map UOF\n");
+ dev_err(&GET_DEV(accel_dev), "Failed to map UOF\n");
goto out_err;
}
return 0;
out_err:
- release_firmware(loader_data->uof_fw);
+ adf_ae_fw_release(accel_dev);
return -EFAULT;
}
-int adf_ae_fw_release(struct adf_accel_dev *accel_dev)
+void adf_ae_fw_release(struct adf_accel_dev *accel_dev)
{
struct adf_fw_loader_data *loader_data = accel_dev->fw_loader;
- release_firmware(loader_data->uof_fw);
qat_uclo_del_uof_obj(loader_data->fw_loader);
qat_hal_deinit(loader_data->fw_loader);
+
+ if (loader_data->uof_fw)
+ release_firmware(loader_data->uof_fw);
+
+ loader_data->uof_fw = NULL;
loader_data->fw_loader = NULL;
- return 0;
}
int adf_ae_start(struct adf_accel_dev *accel_dev)
@@ -104,8 +108,9 @@ int adf_ae_start(struct adf_accel_dev *accel_dev)
ae_ctr++;
}
}
- pr_info("QAT: qat_dev%d started %d acceleration engines\n",
- accel_dev->accel_id, ae_ctr);
+ dev_info(&GET_DEV(accel_dev),
+ "qat_dev%d started %d acceleration engines\n",
+ accel_dev->accel_id, ae_ctr);
return 0;
}
@@ -121,8 +126,9 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev)
ae_ctr++;
}
}
- pr_info("QAT: qat_dev%d stopped %d acceleration engines\n",
- accel_dev->accel_id, ae_ctr);
+ dev_info(&GET_DEV(accel_dev),
+ "qat_dev%d stopped %d acceleration engines\n",
+ accel_dev->accel_id, ae_ctr);
return 0;
}
@@ -147,12 +153,12 @@ int adf_ae_init(struct adf_accel_dev *accel_dev)
accel_dev->fw_loader = loader_data;
if (qat_hal_init(accel_dev)) {
- pr_err("QAT: Failed to init the AEs\n");
+ dev_err(&GET_DEV(accel_dev), "Failed to init the AEs\n");
kfree(loader_data);
return -EFAULT;
}
if (adf_ae_reset(accel_dev, 0)) {
- pr_err("QAT: Failed to reset the AEs\n");
+ dev_err(&GET_DEV(accel_dev), "Failed to reset the AEs\n");
qat_hal_deinit(loader_data->fw_loader);
kfree(loader_data);
return -EFAULT;
@@ -162,6 +168,9 @@ int adf_ae_init(struct adf_accel_dev *accel_dev)
int adf_ae_shutdown(struct adf_accel_dev *accel_dev)
{
+ struct adf_fw_loader_data *loader_data = accel_dev->fw_loader;
+
+ qat_hal_deinit(loader_data->fw_loader);
kfree(accel_dev->fw_loader);
accel_dev->fw_loader = NULL;
return 0;
diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c
index fa1fef824de2..2dbc733b8ab2 100644
--- a/drivers/crypto/qat/qat_common/adf_aer.c
+++ b/drivers/crypto/qat/qat_common/adf_aer.c
@@ -60,14 +60,14 @@ static pci_ers_result_t adf_error_detected(struct pci_dev *pdev,
{
struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
- pr_info("QAT: Acceleration driver hardware error detected.\n");
+ dev_info(&pdev->dev, "Acceleration driver hardware error detected.\n");
if (!accel_dev) {
- pr_err("QAT: Can't find acceleration device\n");
+ dev_err(&pdev->dev, "Can't find acceleration device\n");
return PCI_ERS_RESULT_DISCONNECT;
}
if (state == pci_channel_io_perm_failure) {
- pr_err("QAT: Can't recover from device error\n");
+ dev_err(&pdev->dev, "Can't recover from device error\n");
return PCI_ERS_RESULT_DISCONNECT;
}
@@ -88,10 +88,12 @@ static void adf_dev_restore(struct adf_accel_dev *accel_dev)
struct pci_dev *parent = pdev->bus->self;
uint16_t bridge_ctl = 0;
- pr_info("QAT: Resetting device qat_dev%d\n", accel_dev->accel_id);
+ dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n",
+ accel_dev->accel_id);
if (!pci_wait_for_pending_transaction(pdev))
- pr_info("QAT: Transaction still in progress. Proceeding\n");
+ dev_info(&GET_DEV(accel_dev),
+ "Transaction still in progress. Proceeding\n");
pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl);
bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET;
@@ -158,7 +160,8 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
unsigned long timeout = wait_for_completion_timeout(
&reset_data->compl, wait_jiffies);
if (!timeout) {
- pr_err("QAT: Reset device timeout expired\n");
+ dev_err(&GET_DEV(accel_dev),
+ "Reset device timeout expired\n");
ret = -EFAULT;
}
kfree(reset_data);
@@ -184,8 +187,8 @@ static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev)
static void adf_resume(struct pci_dev *pdev)
{
- pr_info("QAT: Acceleration driver reset completed\n");
- pr_info("QAT: Device is up and runnig\n");
+ dev_info(&pdev->dev, "Acceleration driver reset completed\n");
+ dev_info(&pdev->dev, "Device is up and runnig\n");
}
static struct pci_error_handlers adf_err_handler = {
@@ -236,7 +239,7 @@ EXPORT_SYMBOL_GPL(adf_disable_aer);
int adf_init_aer(void)
{
device_reset_wq = create_workqueue("qat_device_reset_wq");
- return (device_reset_wq == NULL) ? -EFAULT : 0;
+ return !device_reset_wq ? -EFAULT : 0;
}
void adf_exit_aer(void)
diff --git a/drivers/crypto/qat/qat_common/adf_cfg.c b/drivers/crypto/qat/qat_common/adf_cfg.c
index de16da9070a5..ab65bc274561 100644
--- a/drivers/crypto/qat/qat_common/adf_cfg.c
+++ b/drivers/crypto/qat/qat_common/adf_cfg.c
@@ -142,7 +142,8 @@ int adf_cfg_dev_add(struct adf_accel_dev *accel_dev)
dev_cfg_data,
&qat_dev_cfg_fops);
if (!dev_cfg_data->debug) {
- pr_err("QAT: Failed to create qat cfg debugfs entry.\n");
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to create qat cfg debugfs entry.\n");
kfree(dev_cfg_data);
accel_dev->cfg = NULL;
return -EFAULT;
@@ -305,7 +306,7 @@ int adf_cfg_add_key_value_param(struct adf_accel_dev *accel_dev,
snprintf(key_val->val, ADF_CFG_MAX_VAL_LEN_IN_BYTES,
"0x%lx", (unsigned long)val);
} else {
- pr_err("QAT: Unknown type given.\n");
+ dev_err(&GET_DEV(accel_dev), "Unknown type given.\n");
kfree(key_val);
return -1;
}
diff --git a/drivers/crypto/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/qat/qat_common/adf_cfg_strings.h
index c7ac758ebc90..13575111382c 100644
--- a/drivers/crypto/qat/qat_common/adf_cfg_strings.h
+++ b/drivers/crypto/qat/qat_common/adf_cfg_strings.h
@@ -59,7 +59,7 @@
#define ADF_RING_SYM_TX "RingSymTx"
#define ADF_RING_RND_TX "RingNrbgTx"
#define ADF_RING_ASYM_RX "RingAsymRx"
-#define ADF_RING_SYM_RX "RinSymRx"
+#define ADF_RING_SYM_RX "RingSymRx"
#define ADF_RING_RND_RX "RingNrbgRx"
#define ADF_RING_DC_TX "RingTx"
#define ADF_RING_DC_RX "RingRx"
@@ -69,15 +69,15 @@
#define ADF_DC "Dc"
#define ADF_ETRMGR_COALESCING_ENABLED "InterruptCoalescingEnabled"
#define ADF_ETRMGR_COALESCING_ENABLED_FORMAT \
- ADF_ETRMGR_BANK"%d"ADF_ETRMGR_COALESCING_ENABLED
+ ADF_ETRMGR_BANK "%d" ADF_ETRMGR_COALESCING_ENABLED
#define ADF_ETRMGR_COALESCE_TIMER "InterruptCoalescingTimerNs"
#define ADF_ETRMGR_COALESCE_TIMER_FORMAT \
- ADF_ETRMGR_BANK"%d"ADF_ETRMGR_COALESCE_TIMER
+ ADF_ETRMGR_BANK "%d" ADF_ETRMGR_COALESCE_TIMER
#define ADF_ETRMGR_COALESCING_MSG_ENABLED "InterruptCoalescingNumResponses"
#define ADF_ETRMGR_COALESCING_MSG_ENABLED_FORMAT \
- ADF_ETRMGR_BANK"%d"ADF_ETRMGR_COALESCING_MSG_ENABLED
+ ADF_ETRMGR_BANK "%d" ADF_ETRMGR_COALESCING_MSG_ENABLED
#define ADF_ETRMGR_CORE_AFFINITY "CoreAffinity"
#define ADF_ETRMGR_CORE_AFFINITY_FORMAT \
- ADF_ETRMGR_BANK"%d"ADF_ETRMGR_CORE_AFFINITY
+ ADF_ETRMGR_BANK "%d" ADF_ETRMGR_CORE_AFFINITY
#define ADF_ACCEL_STR "Accelerator%d"
#endif
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index a62e485c8786..0666ee6a3360 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -115,7 +115,7 @@ int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev);
int adf_ae_init(struct adf_accel_dev *accel_dev);
int adf_ae_shutdown(struct adf_accel_dev *accel_dev);
int adf_ae_fw_load(struct adf_accel_dev *accel_dev);
-int adf_ae_fw_release(struct adf_accel_dev *accel_dev);
+void adf_ae_fw_release(struct adf_accel_dev *accel_dev);
int adf_ae_start(struct adf_accel_dev *accel_dev);
int adf_ae_stop(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index 74207a6f0516..cb5f066e93a6 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -77,14 +77,14 @@ struct adf_ctl_drv_info {
struct class *drv_class;
};
-static struct adf_ctl_drv_info adt_ctl_drv;
+static struct adf_ctl_drv_info adf_ctl_drv;
static void adf_chr_drv_destroy(void)
{
- device_destroy(adt_ctl_drv.drv_class, MKDEV(adt_ctl_drv.major, 0));
- cdev_del(&adt_ctl_drv.drv_cdev);
- class_destroy(adt_ctl_drv.drv_class);
- unregister_chrdev_region(MKDEV(adt_ctl_drv.major, 0), 1);
+ device_destroy(adf_ctl_drv.drv_class, MKDEV(adf_ctl_drv.major, 0));
+ cdev_del(&adf_ctl_drv.drv_cdev);
+ class_destroy(adf_ctl_drv.drv_class);
+ unregister_chrdev_region(MKDEV(adf_ctl_drv.major, 0), 1);
}
static int adf_chr_drv_create(void)
@@ -97,20 +97,20 @@ static int adf_chr_drv_create(void)
return -EFAULT;
}
- adt_ctl_drv.drv_class = class_create(THIS_MODULE, DEVICE_NAME);
- if (IS_ERR(adt_ctl_drv.drv_class)) {
+ adf_ctl_drv.drv_class = class_create(THIS_MODULE, DEVICE_NAME);
+ if (IS_ERR(adf_ctl_drv.drv_class)) {
pr_err("QAT: class_create failed for adf_ctl\n");
goto err_chrdev_unreg;
}
- adt_ctl_drv.major = MAJOR(dev_id);
- cdev_init(&adt_ctl_drv.drv_cdev, &adf_ctl_ops);
- if (cdev_add(&adt_ctl_drv.drv_cdev, dev_id, 1)) {
+ adf_ctl_drv.major = MAJOR(dev_id);
+ cdev_init(&adf_ctl_drv.drv_cdev, &adf_ctl_ops);
+ if (cdev_add(&adf_ctl_drv.drv_cdev, dev_id, 1)) {
pr_err("QAT: cdev add failed\n");
goto err_class_destr;
}
- drv_device = device_create(adt_ctl_drv.drv_class, NULL,
- MKDEV(adt_ctl_drv.major, 0),
+ drv_device = device_create(adf_ctl_drv.drv_class, NULL,
+ MKDEV(adf_ctl_drv.major, 0),
NULL, DEVICE_NAME);
if (IS_ERR(drv_device)) {
pr_err("QAT: failed to create device\n");
@@ -118,9 +118,9 @@ static int adf_chr_drv_create(void)
}
return 0;
err_cdev_del:
- cdev_del(&adt_ctl_drv.drv_cdev);
+ cdev_del(&adf_ctl_drv.drv_cdev);
err_class_destr:
- class_destroy(adt_ctl_drv.drv_class);
+ class_destroy(adf_ctl_drv.drv_class);
err_chrdev_unreg:
unregister_chrdev_region(dev_id, 1);
return -EFAULT;
@@ -159,14 +159,16 @@ static int adf_add_key_value_data(struct adf_accel_dev *accel_dev,
if (adf_cfg_add_key_value_param(accel_dev, section,
key_val->key, (void *)val,
key_val->type)) {
- pr_err("QAT: failed to add keyvalue.\n");
+ dev_err(&GET_DEV(accel_dev),
+ "failed to add hex keyvalue.\n");
return -EFAULT;
}
} else {
if (adf_cfg_add_key_value_param(accel_dev, section,
key_val->key, key_val->val,
key_val->type)) {
- pr_err("QAT: failed to add keyvalue.\n");
+ dev_err(&GET_DEV(accel_dev),
+ "failed to add keyvalue.\n");
return -EFAULT;
}
}
@@ -185,12 +187,14 @@ static int adf_copy_key_value_data(struct adf_accel_dev *accel_dev,
while (section_head) {
if (copy_from_user(&section, (void __user *)section_head,
sizeof(*section_head))) {
- pr_err("QAT: failed to copy section info\n");
+ dev_err(&GET_DEV(accel_dev),
+ "failed to copy section info\n");
goto out_err;
}
if (adf_cfg_section_add(accel_dev, section.name)) {
- pr_err("QAT: failed to add section.\n");
+ dev_err(&GET_DEV(accel_dev),
+ "failed to add section.\n");
goto out_err;
}
@@ -199,7 +203,8 @@ static int adf_copy_key_value_data(struct adf_accel_dev *accel_dev,
while (params_head) {
if (copy_from_user(&key_val, (void __user *)params_head,
sizeof(key_val))) {
- pr_err("QAT: Failed to copy keyvalue.\n");
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to copy keyvalue.\n");
goto out_err;
}
if (adf_add_key_value_data(accel_dev, section.name,
@@ -258,8 +263,9 @@ static int adf_ctl_is_device_in_use(int id)
if (id == dev->accel_id || id == ADF_CFG_ALL_DEVICES) {
if (adf_devmgr_in_reset(dev) || adf_dev_in_use(dev)) {
- pr_info("QAT: device qat_dev%d is busy\n",
- dev->accel_id);
+ dev_info(&GET_DEV(dev),
+ "device qat_dev%d is busy\n",
+ dev->accel_id);
return -EBUSY;
}
}
@@ -280,7 +286,8 @@ static int adf_ctl_stop_devices(uint32_t id)
continue;
if (adf_dev_stop(accel_dev)) {
- pr_err("QAT: Failed to stop qat_dev%d\n", id);
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to stop qat_dev%d\n", id);
ret = -EFAULT;
} else {
adf_dev_shutdown(accel_dev);
@@ -343,17 +350,20 @@ static int adf_ctl_ioctl_dev_start(struct file *fp, unsigned int cmd,
}
if (!adf_dev_started(accel_dev)) {
- pr_info("QAT: Starting acceleration device qat_dev%d.\n",
- ctl_data->device_id);
+ dev_info(&GET_DEV(accel_dev),
+ "Starting acceleration device qat_dev%d.\n",
+ ctl_data->device_id);
ret = adf_dev_init(accel_dev);
if (!ret)
ret = adf_dev_start(accel_dev);
} else {
- pr_info("QAT: Acceleration device qat_dev%d already started.\n",
- ctl_data->device_id);
+ dev_info(&GET_DEV(accel_dev),
+ "Acceleration device qat_dev%d already started.\n",
+ ctl_data->device_id);
}
if (ret) {
- pr_err("QAT: Failed to start qat_dev%d\n", ctl_data->device_id);
+ dev_err(&GET_DEV(accel_dev), "Failed to start qat_dev%d\n",
+ ctl_data->device_id);
adf_dev_stop(accel_dev);
adf_dev_shutdown(accel_dev);
}
@@ -408,7 +418,7 @@ static int adf_ctl_ioctl_get_status(struct file *fp, unsigned int cmd,
if (copy_to_user((void __user *)arg, &dev_info,
sizeof(struct adf_dev_status_info))) {
- pr_err("QAT: failed to copy status.\n");
+ dev_err(&GET_DEV(accel_dev), "failed to copy status.\n");
return -EFAULT;
}
return 0;
diff --git a/drivers/crypto/qat/qat_common/adf_dev_mgr.c b/drivers/crypto/qat/qat_common/adf_dev_mgr.c
index 4a0a829d4500..3f0ff9e7d840 100644
--- a/drivers/crypto/qat/qat_common/adf_dev_mgr.c
+++ b/drivers/crypto/qat/qat_common/adf_dev_mgr.c
@@ -67,7 +67,8 @@ int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev)
struct list_head *itr;
if (num_devices == ADF_MAX_DEVICES) {
- pr_err("QAT: Only support up to %d devices\n", ADF_MAX_DEVICES);
+ dev_err(&GET_DEV(accel_dev), "Only support up to %d devices\n",
+ ADF_MAX_DEVICES);
return -EFAULT;
}
diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c
index 8f0ca498ab87..245f43237a2d 100644
--- a/drivers/crypto/qat/qat_common/adf_init.c
+++ b/drivers/crypto/qat/qat_common/adf_init.c
@@ -124,12 +124,12 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
if (!hw_data) {
dev_err(&GET_DEV(accel_dev),
- "QAT: Failed to init device - hw_data not set\n");
+ "Failed to init device - hw_data not set\n");
return -EFAULT;
}
if (!test_bit(ADF_STATUS_CONFIGURED, &accel_dev->status)) {
- pr_info("QAT: Device not configured\n");
+ dev_err(&GET_DEV(accel_dev), "Device not configured\n");
return -EFAULT;
}
@@ -151,20 +151,21 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
hw_data->enable_ints(accel_dev);
if (adf_ae_init(accel_dev)) {
- pr_err("QAT: Failed to initialise Acceleration Engine\n");
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to initialise Acceleration Engine\n");
return -EFAULT;
}
set_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status);
if (adf_ae_fw_load(accel_dev)) {
- pr_err("QAT: Failed to load acceleration FW\n");
- adf_ae_fw_release(accel_dev);
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to load acceleration FW\n");
return -EFAULT;
}
set_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status);
if (hw_data->alloc_irq(accel_dev)) {
- pr_err("QAT: Failed to allocate interrupts\n");
+ dev_err(&GET_DEV(accel_dev), "Failed to allocate interrupts\n");
return -EFAULT;
}
set_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status);
@@ -179,8 +180,9 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
if (!service->admin)
continue;
if (service->event_hld(accel_dev, ADF_EVENT_INIT)) {
- pr_err("QAT: Failed to initialise service %s\n",
- service->name);
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to initialise service %s\n",
+ service->name);
return -EFAULT;
}
set_bit(accel_dev->accel_id, &service->init_status);
@@ -190,8 +192,9 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
if (service->admin)
continue;
if (service->event_hld(accel_dev, ADF_EVENT_INIT)) {
- pr_err("QAT: Failed to initialise service %s\n",
- service->name);
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to initialise service %s\n",
+ service->name);
return -EFAULT;
}
set_bit(accel_dev->accel_id, &service->init_status);
@@ -221,7 +224,7 @@ int adf_dev_start(struct adf_accel_dev *accel_dev)
set_bit(ADF_STATUS_STARTING, &accel_dev->status);
if (adf_ae_start(accel_dev)) {
- pr_err("QAT: AE Start Failed\n");
+ dev_err(&GET_DEV(accel_dev), "AE Start Failed\n");
return -EFAULT;
}
set_bit(ADF_STATUS_AE_STARTED, &accel_dev->status);
@@ -231,8 +234,9 @@ int adf_dev_start(struct adf_accel_dev *accel_dev)
if (!service->admin)
continue;
if (service->event_hld(accel_dev, ADF_EVENT_START)) {
- pr_err("QAT: Failed to start service %s\n",
- service->name);
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to start service %s\n",
+ service->name);
return -EFAULT;
}
set_bit(accel_dev->accel_id, &service->start_status);
@@ -242,8 +246,9 @@ int adf_dev_start(struct adf_accel_dev *accel_dev)
if (service->admin)
continue;
if (service->event_hld(accel_dev, ADF_EVENT_START)) {
- pr_err("QAT: Failed to start service %s\n",
- service->name);
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to start service %s\n",
+ service->name);
return -EFAULT;
}
set_bit(accel_dev->accel_id, &service->start_status);
@@ -253,7 +258,8 @@ int adf_dev_start(struct adf_accel_dev *accel_dev)
set_bit(ADF_STATUS_STARTED, &accel_dev->status);
if (qat_algs_register()) {
- pr_err("QAT: Failed to register crypto algs\n");
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to register crypto algs\n");
set_bit(ADF_STATUS_STARTING, &accel_dev->status);
clear_bit(ADF_STATUS_STARTED, &accel_dev->status);
return -EFAULT;
@@ -287,7 +293,8 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev)
clear_bit(ADF_STATUS_STARTED, &accel_dev->status);
if (qat_algs_unregister())
- pr_err("QAT: Failed to unregister crypto algs\n");
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to unregister crypto algs\n");
list_for_each(list_itr, &service_table) {
service = list_entry(list_itr, struct service_hndl, list);
@@ -310,8 +317,9 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev)
if (!test_bit(accel_dev->accel_id, &service->start_status))
continue;
if (service->event_hld(accel_dev, ADF_EVENT_STOP))
- pr_err("QAT: Failed to shutdown service %s\n",
- service->name);
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to shutdown service %s\n",
+ service->name);
else
clear_bit(accel_dev->accel_id, &service->start_status);
}
@@ -321,7 +329,7 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev)
if (test_bit(ADF_STATUS_AE_STARTED, &accel_dev->status)) {
if (adf_ae_stop(accel_dev))
- pr_err("QAT: failed to stop AE\n");
+ dev_err(&GET_DEV(accel_dev), "failed to stop AE\n");
else
clear_bit(ADF_STATUS_AE_STARTED, &accel_dev->status);
}
@@ -350,16 +358,14 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev)
}
if (test_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status)) {
- if (adf_ae_fw_release(accel_dev))
- pr_err("QAT: Failed to release the ucode\n");
- else
- clear_bit(ADF_STATUS_AE_UCODE_LOADED,
- &accel_dev->status);
+ adf_ae_fw_release(accel_dev);
+ clear_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status);
}
if (test_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status)) {
if (adf_ae_shutdown(accel_dev))
- pr_err("QAT: Failed to shutdown Accel Engine\n");
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to shutdown Accel Engine\n");
else
clear_bit(ADF_STATUS_AE_INITIALISED,
&accel_dev->status);
@@ -372,8 +378,9 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev)
if (!test_bit(accel_dev->accel_id, &service->init_status))
continue;
if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN))
- pr_err("QAT: Failed to shutdown service %s\n",
- service->name);
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to shutdown service %s\n",
+ service->name);
else
clear_bit(accel_dev->accel_id, &service->init_status);
}
@@ -384,8 +391,9 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev)
if (!test_bit(accel_dev->accel_id, &service->init_status))
continue;
if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN))
- pr_err("QAT: Failed to shutdown service %s\n",
- service->name);
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to shutdown service %s\n",
+ service->name);
else
clear_bit(accel_dev->accel_id, &service->init_status);
}
@@ -419,16 +427,18 @@ int adf_dev_restarting_notify(struct adf_accel_dev *accel_dev)
if (service->admin)
continue;
if (service->event_hld(accel_dev, ADF_EVENT_RESTARTING))
- pr_err("QAT: Failed to restart service %s.\n",
- service->name);
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to restart service %s.\n",
+ service->name);
}
list_for_each(list_itr, &service_table) {
service = list_entry(list_itr, struct service_hndl, list);
if (!service->admin)
continue;
if (service->event_hld(accel_dev, ADF_EVENT_RESTARTING))
- pr_err("QAT: Failed to restart service %s.\n",
- service->name);
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to restart service %s.\n",
+ service->name);
}
return 0;
}
@@ -443,16 +453,18 @@ int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev)
if (service->admin)
continue;
if (service->event_hld(accel_dev, ADF_EVENT_RESTARTED))
- pr_err("QAT: Failed to restart service %s.\n",
- service->name);
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to restart service %s.\n",
+ service->name);
}
list_for_each(list_itr, &service_table) {
service = list_entry(list_itr, struct service_hndl, list);
if (!service->admin)
continue;
if (service->event_hld(accel_dev, ADF_EVENT_RESTARTED))
- pr_err("QAT: Failed to restart service %s.\n",
- service->name);
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to restart service %s.\n",
+ service->name);
}
return 0;
}
diff --git a/drivers/crypto/qat/qat_common/adf_transport.c b/drivers/crypto/qat/qat_common/adf_transport.c
index 7dd54aaee9fa..ccec327489da 100644
--- a/drivers/crypto/qat/qat_common/adf_transport.c
+++ b/drivers/crypto/qat/qat_common/adf_transport.c
@@ -195,7 +195,7 @@ static int adf_init_ring(struct adf_etr_ring_data *ring)
memset(ring->base_addr, 0x7F, ring_size_bytes);
/* The base_addr has to be aligned to the size of the buffer */
if (adf_check_ring_alignment(ring->dma_addr, ring_size_bytes)) {
- pr_err("QAT: Ring address not aligned\n");
+ dev_err(&GET_DEV(accel_dev), "Ring address not aligned\n");
dma_free_coherent(&GET_DEV(accel_dev), ring_size_bytes,
ring->base_addr, ring->dma_addr);
return -EFAULT;
@@ -242,32 +242,33 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
int ret;
if (bank_num >= GET_MAX_BANKS(accel_dev)) {
- pr_err("QAT: Invalid bank number\n");
+ dev_err(&GET_DEV(accel_dev), "Invalid bank number\n");
return -EFAULT;
}
if (msg_size > ADF_MSG_SIZE_TO_BYTES(ADF_MAX_MSG_SIZE)) {
- pr_err("QAT: Invalid msg size\n");
+ dev_err(&GET_DEV(accel_dev), "Invalid msg size\n");
return -EFAULT;
}
if (ADF_MAX_INFLIGHTS(adf_verify_ring_size(msg_size, num_msgs),
ADF_BYTES_TO_MSG_SIZE(msg_size)) < 2) {
- pr_err("QAT: Invalid ring size for given msg size\n");
+ dev_err(&GET_DEV(accel_dev),
+ "Invalid ring size for given msg size\n");
return -EFAULT;
}
if (adf_cfg_get_param_value(accel_dev, section, ring_name, val)) {
- pr_err("QAT: Section %s, no such entry : %s\n",
- section, ring_name);
+ dev_err(&GET_DEV(accel_dev), "Section %s, no such entry : %s\n",
+ section, ring_name);
return -EFAULT;
}
if (kstrtouint(val, 10, &ring_num)) {
- pr_err("QAT: Can't get ring number\n");
+ dev_err(&GET_DEV(accel_dev), "Can't get ring number\n");
return -EFAULT;
}
bank = &transport_data->banks[bank_num];
if (adf_reserve_ring(bank, ring_num)) {
- pr_err("QAT: Ring %d, %s already exists.\n",
- ring_num, ring_name);
+ dev_err(&GET_DEV(accel_dev), "Ring %d, %s already exists.\n",
+ ring_num, ring_name);
return -EFAULT;
}
ring = &bank->rings[ring_num];
@@ -287,7 +288,8 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
accel_dev->hw_device->hw_arb_ring_enable(ring);
if (adf_ring_debugfs_add(ring, ring_name)) {
- pr_err("QAT: Couldn't add ring debugfs entry\n");
+ dev_err(&GET_DEV(accel_dev),
+ "Couldn't add ring debugfs entry\n");
ret = -EFAULT;
goto err;
}
@@ -428,7 +430,8 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev,
goto err;
} else {
if (i < hw_data->tx_rx_gap) {
- pr_err("QAT: Invalid tx rings mask config\n");
+ dev_err(&GET_DEV(accel_dev),
+ "Invalid tx rings mask config\n");
goto err;
}
tx_ring = &bank->rings[i - hw_data->tx_rx_gap];
@@ -436,7 +439,8 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev,
}
}
if (adf_bank_debugfs_add(bank)) {
- pr_err("QAT: Failed to add bank debugfs entry\n");
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to add bank debugfs entry\n");
goto err;
}
@@ -492,7 +496,8 @@ int adf_init_etr_data(struct adf_accel_dev *accel_dev)
etr_data->debug = debugfs_create_dir("transport",
accel_dev->debugfs_dir);
if (!etr_data->debug) {
- pr_err("QAT: Unable to create transport debugfs entry\n");
+ dev_err(&GET_DEV(accel_dev),
+ "Unable to create transport debugfs entry\n");
ret = -ENOENT;
goto err_bank_debug;
}
diff --git a/drivers/crypto/qat/qat_common/adf_transport_debug.c b/drivers/crypto/qat/qat_common/adf_transport_debug.c
index 6b6974553514..e41986967294 100644
--- a/drivers/crypto/qat/qat_common/adf_transport_debug.c
+++ b/drivers/crypto/qat/qat_common/adf_transport_debug.c
@@ -100,6 +100,8 @@ static int adf_ring_show(struct seq_file *sfile, void *v)
empty = READ_CSR_E_STAT(csr, bank->bank_number);
seq_puts(sfile, "------- Ring configuration -------\n");
+ seq_printf(sfile, "ring name: %s\n",
+ ring->ring_debug->ring_name);
seq_printf(sfile, "ring num %d, bank num %d\n",
ring->ring_number, ring->bank->bank_number);
seq_printf(sfile, "head %x, tail %x, empty: %d\n",
diff --git a/drivers/crypto/qat/qat_common/icp_qat_hw.h b/drivers/crypto/qat/qat_common/icp_qat_hw.h
index 68f191b653b0..121d5e6e46ca 100644
--- a/drivers/crypto/qat/qat_common/icp_qat_hw.h
+++ b/drivers/crypto/qat/qat_common/icp_qat_hw.h
@@ -145,7 +145,7 @@ struct icp_qat_hw_auth_setup {
};
#define QAT_HW_DEFAULT_ALIGNMENT 8
-#define QAT_HW_ROUND_UP(val, n) (((val) + ((n)-1)) & (~(n-1)))
+#define QAT_HW_ROUND_UP(val, n) (((val) + ((n) - 1)) & (~(n - 1)))
#define ICP_QAT_HW_NULL_STATE1_SZ 32
#define ICP_QAT_HW_MD5_STATE1_SZ 16
#define ICP_QAT_HW_SHA1_STATE1_SZ 20
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c
index 828f2a686aab..3bd705ca5973 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.c
+++ b/drivers/crypto/qat/qat_common/qat_crypto.c
@@ -110,13 +110,13 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node)
list_for_each(itr, adf_devmgr_get_head()) {
accel_dev = list_entry(itr, struct adf_accel_dev, list);
if ((node == dev_to_node(&GET_DEV(accel_dev)) ||
- dev_to_node(&GET_DEV(accel_dev)) < 0)
- && adf_dev_started(accel_dev))
+ dev_to_node(&GET_DEV(accel_dev)) < 0) &&
+ adf_dev_started(accel_dev))
break;
accel_dev = NULL;
}
if (!accel_dev) {
- pr_err("QAT: Could not find device on node %d\n", node);
+ pr_err("QAT: Could not find a device on node %d\n", node);
accel_dev = adf_devmgr_get_first();
}
if (!accel_dev || !adf_dev_started(accel_dev))
@@ -137,7 +137,8 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node)
if (atomic_add_return(1, &inst_best->refctr) == 1) {
if (adf_dev_get(accel_dev)) {
atomic_dec(&inst_best->refctr);
- pr_err("QAT: Could increment dev refctr\n");
+ dev_err(&GET_DEV(accel_dev),
+ "Could not increment dev refctr\n");
return NULL;
}
}
diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c
index b818c19713bf..274ff7e9de6e 100644
--- a/drivers/crypto/qat/qat_common/qat_hal.c
+++ b/drivers/crypto/qat/qat_common/qat_hal.c
@@ -434,8 +434,8 @@ static void qat_hal_reset_timestamp(struct icp_qat_fw_loader_handle *handle)
SET_GLB_CSR(handle, MISC_CONTROL, misc_ctl | MC_TIMESTAMP_ENABLE);
}
-#define ESRAM_AUTO_TINIT (1<<2)
-#define ESRAM_AUTO_TINIT_DONE (1<<3)
+#define ESRAM_AUTO_TINIT BIT(2)
+#define ESRAM_AUTO_TINIT_DONE BIT(3)
#define ESRAM_AUTO_INIT_USED_CYCLES (1640)
#define ESRAM_AUTO_INIT_CSR_OFFSET 0xC1C
static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle)
@@ -718,7 +718,7 @@ int qat_hal_init(struct adf_accel_dev *accel_dev)
handle->hal_handle->ae_max_num = max_en_ae_id + 1;
/* take all AEs out of reset */
if (qat_hal_clr_reset(handle)) {
- pr_err("QAT: qat_hal_clr_reset error\n");
+ dev_err(&GET_DEV(accel_dev), "qat_hal_clr_reset error\n");
goto out_err;
}
if (qat_hal_clear_gpr(handle))
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_admin.c b/drivers/crypto/qat/qat_dh895xcc/adf_admin.c
index 53c491b59f07..e4666065c399 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_admin.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_admin.c
@@ -93,7 +93,8 @@ int adf_put_admin_msg_sync(struct adf_accel_dev *accel_dev,
memcpy(out, admin->virt_addr + offset +
ADF_ADMINMSG_LEN, ADF_ADMINMSG_LEN);
else
- pr_err("QAT: Failed to send admin msg to accelerator\n");
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to send admin msg to accelerator\n");
mutex_unlock(&admin->lock);
return received ? 0 : -EFAULT;
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
index 6a735d5c0e37..b1386922d7a2 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
@@ -150,7 +150,8 @@ void adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev,
*arb_map_config = thrd_to_arb_map_sku6;
break;
default:
- pr_err("QAT: The configuration doesn't match any SKU");
+ dev_err(&GET_DEV(accel_dev),
+ "The configuration doesn't match any SKU");
*arb_map_config = NULL;
}
}
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
index 01e0be21e93a..25269a9f24a2 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
@@ -73,11 +73,11 @@
/* Error detection and correction */
#define ADF_DH895XCC_AE_CTX_ENABLES(i) (i * 0x1000 + 0x20818)
#define ADF_DH895XCC_AE_MISC_CONTROL(i) (i * 0x1000 + 0x20960)
-#define ADF_DH895XCC_ENABLE_AE_ECC_ERR (1 << 28)
-#define ADF_DH895XCC_ENABLE_AE_ECC_PARITY_CORR (1 << 24 | 1 << 12)
+#define ADF_DH895XCC_ENABLE_AE_ECC_ERR BIT(28)
+#define ADF_DH895XCC_ENABLE_AE_ECC_PARITY_CORR (BIT(24) | BIT(12))
#define ADF_DH895XCC_UERRSSMSH(i) (i * 0x4000 + 0x18)
#define ADF_DH895XCC_CERRSSMSH(i) (i * 0x4000 + 0x10)
-#define ADF_DH895XCC_ERRSSMSH_EN (1 << 3)
+#define ADF_DH895XCC_ERRSSMSH_EN BIT(3)
/* Admin Messages Registers */
#define ADF_DH895XCC_ADMINMSGUR_OFFSET (0x3A000 + 0x574)
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
index 8ffdb95c9804..9decea2779c6 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
@@ -236,7 +236,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
accel_dev = kzalloc_node(sizeof(*accel_dev), GFP_KERNEL,
- dev_to_node(&pdev->dev));
+ dev_to_node(&pdev->dev));
if (!accel_dev)
return -ENOMEM;
@@ -379,7 +379,7 @@ out_err:
return ret;
}
-static void __exit adf_remove(struct pci_dev *pdev)
+static void adf_remove(struct pci_dev *pdev)
{
struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_isr.c b/drivers/crypto/qat/qat_dh895xcc/adf_isr.c
index fe8f89697ad8..0d03c109c2d3 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_isr.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_isr.c
@@ -73,7 +73,7 @@ static int adf_enable_msix(struct adf_accel_dev *accel_dev)
if (pci_enable_msix_exact(pci_dev_info->pci_dev,
pci_dev_info->msix_entries.entries,
msix_num_entries)) {
- pr_err("QAT: Failed to enable MSIX IRQ\n");
+ dev_err(&GET_DEV(accel_dev), "Failed to enable MSIX IRQ\n");
return -EFAULT;
}
return 0;
@@ -97,7 +97,8 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
{
struct adf_accel_dev *accel_dev = dev_ptr;
- pr_info("QAT: qat_dev%d spurious AE interrupt\n", accel_dev->accel_id);
+ dev_info(&GET_DEV(accel_dev), "qat_dev%d spurious AE interrupt\n",
+ accel_dev->accel_id);
return IRQ_HANDLED;
}
@@ -121,8 +122,9 @@ static int adf_request_irqs(struct adf_accel_dev *accel_dev)
ret = request_irq(msixe[i].vector,
adf_msix_isr_bundle, 0, name, bank);
if (ret) {
- pr_err("QAT: failed to enable irq %d for %s\n",
- msixe[i].vector, name);
+ dev_err(&GET_DEV(accel_dev),
+ "failed to enable irq %d for %s\n",
+ msixe[i].vector, name);
return ret;
}
@@ -136,8 +138,9 @@ static int adf_request_irqs(struct adf_accel_dev *accel_dev)
"qat%d-ae-cluster", accel_dev->accel_id);
ret = request_irq(msixe[i].vector, adf_msix_isr_ae, 0, name, accel_dev);
if (ret) {
- pr_err("QAT: failed to enable irq %d, for %s\n",
- msixe[i].vector, name);
+ dev_err(&GET_DEV(accel_dev),
+ "failed to enable irq %d, for %s\n",
+ msixe[i].vector, name);
return ret;
}
return ret;
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index 290a7f0a681f..6be377f6b9e7 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -479,6 +479,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev)
struct scatterlist *sg;
int ret;
int i, j;
+ int idx = 0;
/* Copy new key if necessary */
if (ctx->flags & FLAGS_NEW_KEY) {
@@ -486,17 +487,20 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev)
ctx->flags &= ~FLAGS_NEW_KEY;
if (dev->flags & FLAGS_CBC) {
- dev->hw_desc[0]->len1 = AES_BLOCK_SIZE;
- dev->hw_desc[0]->p1 = dev->iv_phys_base;
+ dev->hw_desc[idx]->len1 = AES_BLOCK_SIZE;
+ dev->hw_desc[idx]->p1 = dev->iv_phys_base;
} else {
- dev->hw_desc[0]->len1 = 0;
- dev->hw_desc[0]->p1 = 0;
+ dev->hw_desc[idx]->len1 = 0;
+ dev->hw_desc[idx]->p1 = 0;
}
- dev->hw_desc[0]->len2 = ctx->keylen;
- dev->hw_desc[0]->p2 = dev->key_phys_base;
- dev->hw_desc[0]->next = dev->hw_phys_desc[1];
+ dev->hw_desc[idx]->len2 = ctx->keylen;
+ dev->hw_desc[idx]->p2 = dev->key_phys_base;
+ dev->hw_desc[idx]->next = dev->hw_phys_desc[1];
+
+ dev->hw_desc[idx]->hdr = sahara_aes_key_hdr(dev);
+
+ idx++;
}
- dev->hw_desc[0]->hdr = sahara_aes_key_hdr(dev);
dev->nb_in_sg = sahara_sg_length(dev->in_sg, dev->total);
dev->nb_out_sg = sahara_sg_length(dev->out_sg, dev->total);
@@ -520,7 +524,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev)
}
/* Create input links */
- dev->hw_desc[1]->p1 = dev->hw_phys_link[0];
+ dev->hw_desc[idx]->p1 = dev->hw_phys_link[0];
sg = dev->in_sg;
for (i = 0; i < dev->nb_in_sg; i++) {
dev->hw_link[i]->len = sg->length;
@@ -534,7 +538,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev)
}
/* Create output links */
- dev->hw_desc[1]->p2 = dev->hw_phys_link[i];
+ dev->hw_desc[idx]->p2 = dev->hw_phys_link[i];
sg = dev->out_sg;
for (j = i; j < dev->nb_out_sg + i; j++) {
dev->hw_link[j]->len = sg->length;
@@ -548,10 +552,10 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev)
}
/* Fill remaining fields of hw_desc[1] */
- dev->hw_desc[1]->hdr = sahara_aes_data_link_hdr(dev);
- dev->hw_desc[1]->len1 = dev->total;
- dev->hw_desc[1]->len2 = dev->total;
- dev->hw_desc[1]->next = 0;
+ dev->hw_desc[idx]->hdr = sahara_aes_data_link_hdr(dev);
+ dev->hw_desc[idx]->len1 = dev->total;
+ dev->hw_desc[idx]->len2 = dev->total;
+ dev->hw_desc[idx]->next = 0;
sahara_dump_descriptors(dev);
sahara_dump_links(dev);
@@ -576,6 +580,7 @@ static int sahara_aes_process(struct ablkcipher_request *req)
struct sahara_ctx *ctx;
struct sahara_aes_reqctx *rctx;
int ret;
+ unsigned long timeout;
/* Request is ready to be dispatched by the device */
dev_dbg(dev->device,
@@ -601,10 +606,12 @@ static int sahara_aes_process(struct ablkcipher_request *req)
reinit_completion(&dev->dma_completion);
ret = sahara_hw_descriptor_create(dev);
+ if (ret)
+ return -EINVAL;
- ret = wait_for_completion_timeout(&dev->dma_completion,
+ timeout = wait_for_completion_timeout(&dev->dma_completion,
msecs_to_jiffies(SAHARA_TIMEOUT_MS));
- if (!ret) {
+ if (!timeout) {
dev_err(dev->device, "AES timeout\n");
return -ETIMEDOUT;
}
@@ -1044,7 +1051,8 @@ static int sahara_sha_process(struct ahash_request *req)
{
struct sahara_dev *dev = dev_ptr;
struct sahara_sha_reqctx *rctx = ahash_request_ctx(req);
- int ret = -EINPROGRESS;
+ int ret;
+ unsigned long timeout;
ret = sahara_sha_prepare_request(req);
if (!ret)
@@ -1070,9 +1078,9 @@ static int sahara_sha_process(struct ahash_request *req)
sahara_write(dev, dev->hw_phys_desc[0], SAHARA_REG_DAR);
- ret = wait_for_completion_timeout(&dev->dma_completion,
+ timeout = wait_for_completion_timeout(&dev->dma_completion,
msecs_to_jiffies(SAHARA_TIMEOUT_MS));
- if (!ret) {
+ if (!timeout) {
dev_err(dev->device, "SHA timeout\n");
return -ETIMEDOUT;
}
@@ -1092,15 +1100,20 @@ static int sahara_queue_manage(void *data)
{
struct sahara_dev *dev = (struct sahara_dev *)data;
struct crypto_async_request *async_req;
+ struct crypto_async_request *backlog;
int ret = 0;
do {
__set_current_state(TASK_INTERRUPTIBLE);
mutex_lock(&dev->queue_mutex);
+ backlog = crypto_get_backlog(&dev->queue);
async_req = crypto_dequeue_request(&dev->queue);
mutex_unlock(&dev->queue_mutex);
+ if (backlog)
+ backlog->complete(backlog, -EINPROGRESS);
+
if (async_req) {
if (crypto_tfm_alg_type(async_req->tfm) ==
CRYPTO_ALG_TYPE_AHASH) {
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index ebbae8d3ce0d..857414afa29a 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -637,8 +637,6 @@ static void talitos_unregister_rng(struct device *dev)
#define TALITOS_MAX_KEY_SIZE 96
#define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
-#define MD5_BLOCK_SIZE 64
-
struct talitos_ctx {
struct device *dev;
int ch;
@@ -2195,7 +2193,7 @@ static struct talitos_alg_template driver_algs[] = {
.halg.base = {
.cra_name = "md5",
.cra_driver_name = "md5-talitos",
- .cra_blocksize = MD5_BLOCK_SIZE,
+ .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
}
@@ -2285,7 +2283,7 @@ static struct talitos_alg_template driver_algs[] = {
.halg.base = {
.cra_name = "hmac(md5)",
.cra_driver_name = "hmac-md5-talitos",
- .cra_blocksize = MD5_BLOCK_SIZE,
+ .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC,
}
@@ -2706,20 +2704,16 @@ static int talitos_probe(struct platform_device *ofdev)
goto err_out;
}
+ priv->fifo_len = roundup_pow_of_two(priv->chfifo_len);
+
for (i = 0; i < priv->num_channels; i++) {
priv->chan[i].reg = priv->reg + TALITOS_CH_STRIDE * (i + 1);
if (!priv->irq[1] || !(i & 1))
priv->chan[i].reg += TALITOS_CH_BASE_OFFSET;
- }
- for (i = 0; i < priv->num_channels; i++) {
spin_lock_init(&priv->chan[i].head_lock);
spin_lock_init(&priv->chan[i].tail_lock);
- }
- priv->fifo_len = roundup_pow_of_two(priv->chfifo_len);
-
- for (i = 0; i < priv->num_channels; i++) {
priv->chan[i].fifo = kzalloc(sizeof(struct talitos_request) *
priv->fifo_len, GFP_KERNEL);
if (!priv->chan[i].fifo) {
@@ -2727,11 +2721,10 @@ static int talitos_probe(struct platform_device *ofdev)
err = -ENOMEM;
goto err_out;
}
- }
- for (i = 0; i < priv->num_channels; i++)
atomic_set(&priv->chan[i].submit_count,
-(priv->chfifo_len - 1));
+ }
dma_set_mask(dev, DMA_BIT_MASK(36));
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 187a8fd7eee7..5f5f360628fc 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -184,7 +184,7 @@ static int hash_set_dma_transfer(struct hash_ctx *ctx, struct scatterlist *sg,
direction, DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
if (!desc) {
dev_err(ctx->device->dev,
- "%s: device_prep_slave_sg() failed!\n", __func__);
+ "%s: dmaengine_prep_slave_sg() failed!\n", __func__);
return -EFAULT;
}
diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig
new file mode 100644
index 000000000000..771babf16aa0
--- /dev/null
+++ b/drivers/crypto/vmx/Kconfig
@@ -0,0 +1,8 @@
+config CRYPTO_DEV_VMX_ENCRYPT
+ tristate "Encryption acceleration support on P8 CPU"
+ depends on PPC64 && CRYPTO_DEV_VMX
+ default y
+ help
+ Support for VMX cryptographic acceleration instructions on Power8 CPU.
+ This module supports acceleration for AES and GHASH in hardware. If you
+ choose 'M' here, this module will be called vmx-crypto.
diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile
new file mode 100644
index 000000000000..c699c6e6c82e
--- /dev/null
+++ b/drivers/crypto/vmx/Makefile
@@ -0,0 +1,19 @@
+obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
+vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o ghash.o
+
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
+TARGET := linux-ppc64le
+else
+TARGET := linux-pcc64
+endif
+
+quiet_cmd_perl = PERL $@
+ cmd_perl = $(PERL) $(<) $(TARGET) > $(@)
+
+$(src)/aesp8-ppc.S: $(src)/aesp8-ppc.pl
+ $(call cmd,perl)
+
+$(src)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl
+ $(call cmd,perl)
+
+.PRECIOUS: $(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S
diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c
new file mode 100644
index 000000000000..ab300ea19434
--- /dev/null
+++ b/drivers/crypto/vmx/aes.c
@@ -0,0 +1,139 @@
+/**
+ * AES routines supporting VMX instructions on the Power 8
+ *
+ * Copyright (C) 2015 International Business Machines Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 only.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/hardirq.h>
+#include <asm/switch_to.h>
+#include <crypto/aes.h>
+
+#include "aesp8-ppc.h"
+
+struct p8_aes_ctx {
+ struct crypto_cipher *fallback;
+ struct aes_key enc_key;
+ struct aes_key dec_key;
+};
+
+static int p8_aes_init(struct crypto_tfm *tfm)
+{
+ const char *alg;
+ struct crypto_cipher *fallback;
+ struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (!(alg = crypto_tfm_alg_name(tfm))) {
+ printk(KERN_ERR "Failed to get algorithm name.\n");
+ return -ENOENT;
+ }
+
+ fallback = crypto_alloc_cipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(fallback)) {
+ printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n",
+ alg, PTR_ERR(fallback));
+ return PTR_ERR(fallback);
+ }
+ printk(KERN_INFO "Using '%s' as fallback implementation.\n",
+ crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
+
+ crypto_cipher_set_flags(fallback,
+ crypto_cipher_get_flags((struct crypto_cipher *) tfm));
+ ctx->fallback = fallback;
+
+ return 0;
+}
+
+static void p8_aes_exit(struct crypto_tfm *tfm)
+{
+ struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (ctx->fallback) {
+ crypto_free_cipher(ctx->fallback);
+ ctx->fallback = NULL;
+ }
+}
+
+static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ int ret;
+ struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ pagefault_disable();
+ enable_kernel_altivec();
+ ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
+ ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key);
+ pagefault_enable();
+
+ ret += crypto_cipher_setkey(ctx->fallback, key, keylen);
+ return ret;
+}
+
+static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (in_interrupt()) {
+ crypto_cipher_encrypt_one(ctx->fallback, dst, src);
+ } else {
+ pagefault_disable();
+ enable_kernel_altivec();
+ aes_p8_encrypt(src, dst, &ctx->enc_key);
+ pagefault_enable();
+ }
+}
+
+static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (in_interrupt()) {
+ crypto_cipher_decrypt_one(ctx->fallback, dst, src);
+ } else {
+ pagefault_disable();
+ enable_kernel_altivec();
+ aes_p8_decrypt(src, dst, &ctx->dec_key);
+ pagefault_enable();
+ }
+}
+
+struct crypto_alg p8_aes_alg = {
+ .cra_name = "aes",
+ .cra_driver_name = "p8_aes",
+ .cra_module = THIS_MODULE,
+ .cra_priority = 1000,
+ .cra_type = NULL,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_alignmask = 0,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct p8_aes_ctx),
+ .cra_init = p8_aes_init,
+ .cra_exit = p8_aes_exit,
+ .cra_cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = p8_aes_setkey,
+ .cia_encrypt = p8_aes_encrypt,
+ .cia_decrypt = p8_aes_decrypt,
+ },
+};
+
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
new file mode 100644
index 000000000000..1a559b7dddb5
--- /dev/null
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -0,0 +1,184 @@
+/**
+ * AES CBC routines supporting VMX instructions on the Power 8
+ *
+ * Copyright (C) 2015 International Business Machines Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 only.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/hardirq.h>
+#include <asm/switch_to.h>
+#include <crypto/aes.h>
+#include <crypto/scatterwalk.h>
+
+#include "aesp8-ppc.h"
+
+struct p8_aes_cbc_ctx {
+ struct crypto_blkcipher *fallback;
+ struct aes_key enc_key;
+ struct aes_key dec_key;
+};
+
+static int p8_aes_cbc_init(struct crypto_tfm *tfm)
+{
+ const char *alg;
+ struct crypto_blkcipher *fallback;
+ struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (!(alg = crypto_tfm_alg_name(tfm))) {
+ printk(KERN_ERR "Failed to get algorithm name.\n");
+ return -ENOENT;
+ }
+
+ fallback = crypto_alloc_blkcipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(fallback)) {
+ printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n",
+ alg, PTR_ERR(fallback));
+ return PTR_ERR(fallback);
+ }
+ printk(KERN_INFO "Using '%s' as fallback implementation.\n",
+ crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
+
+ crypto_blkcipher_set_flags(fallback,
+ crypto_blkcipher_get_flags((struct crypto_blkcipher *) tfm));
+ ctx->fallback = fallback;
+
+ return 0;
+}
+
+static void p8_aes_cbc_exit(struct crypto_tfm *tfm)
+{
+ struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (ctx->fallback) {
+ crypto_free_blkcipher(ctx->fallback);
+ ctx->fallback = NULL;
+ }
+}
+
+static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ int ret;
+ struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ pagefault_disable();
+ enable_kernel_altivec();
+ ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
+ ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key);
+ pagefault_enable();
+
+ ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen);
+ return ret;
+}
+
+static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ int ret;
+ struct blkcipher_walk walk;
+ struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(
+ crypto_blkcipher_tfm(desc->tfm));
+ struct blkcipher_desc fallback_desc = {
+ .tfm = ctx->fallback,
+ .info = desc->info,
+ .flags = desc->flags
+ };
+
+ if (in_interrupt()) {
+ ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes);
+ } else {
+ pagefault_disable();
+ enable_kernel_altivec();
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ ret = blkcipher_walk_virt(desc, &walk);
+ while ((nbytes = walk.nbytes)) {
+ aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
+ nbytes & AES_BLOCK_MASK, &ctx->enc_key, walk.iv, 1);
+ nbytes &= AES_BLOCK_SIZE - 1;
+ ret = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+
+ pagefault_enable();
+ }
+
+ return ret;
+}
+
+static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ int ret;
+ struct blkcipher_walk walk;
+ struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(
+ crypto_blkcipher_tfm(desc->tfm));
+ struct blkcipher_desc fallback_desc = {
+ .tfm = ctx->fallback,
+ .info = desc->info,
+ .flags = desc->flags
+ };
+
+ if (in_interrupt()) {
+ ret = crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes);
+ } else {
+ pagefault_disable();
+ enable_kernel_altivec();
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ ret = blkcipher_walk_virt(desc, &walk);
+ while ((nbytes = walk.nbytes)) {
+ aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
+ nbytes & AES_BLOCK_MASK, &ctx->dec_key, walk.iv, 0);
+ nbytes &= AES_BLOCK_SIZE - 1;
+ ret = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+
+ pagefault_enable();
+ }
+
+ return ret;
+}
+
+
+struct crypto_alg p8_aes_cbc_alg = {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "p8_aes_cbc",
+ .cra_module = THIS_MODULE,
+ .cra_priority = 1000,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_alignmask = 0,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct p8_aes_cbc_ctx),
+ .cra_init = p8_aes_cbc_init,
+ .cra_exit = p8_aes_cbc_exit,
+ .cra_blkcipher = {
+ .ivsize = 0,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = p8_aes_cbc_setkey,
+ .encrypt = p8_aes_cbc_encrypt,
+ .decrypt = p8_aes_cbc_decrypt,
+ },
+};
+
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
new file mode 100644
index 000000000000..96dbee4bf4a6
--- /dev/null
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -0,0 +1,167 @@
+/**
+ * AES CTR routines supporting VMX instructions on the Power 8
+ *
+ * Copyright (C) 2015 International Business Machines Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 only.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/hardirq.h>
+#include <asm/switch_to.h>
+#include <crypto/aes.h>
+#include <crypto/scatterwalk.h>
+#include "aesp8-ppc.h"
+
+struct p8_aes_ctr_ctx {
+ struct crypto_blkcipher *fallback;
+ struct aes_key enc_key;
+};
+
+static int p8_aes_ctr_init(struct crypto_tfm *tfm)
+{
+ const char *alg;
+ struct crypto_blkcipher *fallback;
+ struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (!(alg = crypto_tfm_alg_name(tfm))) {
+ printk(KERN_ERR "Failed to get algorithm name.\n");
+ return -ENOENT;
+ }
+
+ fallback = crypto_alloc_blkcipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(fallback)) {
+ printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n",
+ alg, PTR_ERR(fallback));
+ return PTR_ERR(fallback);
+ }
+ printk(KERN_INFO "Using '%s' as fallback implementation.\n",
+ crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
+
+ crypto_blkcipher_set_flags(fallback,
+ crypto_blkcipher_get_flags((struct crypto_blkcipher *) tfm));
+ ctx->fallback = fallback;
+
+ return 0;
+}
+
+static void p8_aes_ctr_exit(struct crypto_tfm *tfm)
+{
+ struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (ctx->fallback) {
+ crypto_free_blkcipher(ctx->fallback);
+ ctx->fallback = NULL;
+ }
+}
+
+static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ int ret;
+ struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ pagefault_disable();
+ enable_kernel_altivec();
+ ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
+ pagefault_enable();
+
+ ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen);
+ return ret;
+}
+
+static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx,
+ struct blkcipher_walk *walk)
+{
+ u8 *ctrblk = walk->iv;
+ u8 keystream[AES_BLOCK_SIZE];
+ u8 *src = walk->src.virt.addr;
+ u8 *dst = walk->dst.virt.addr;
+ unsigned int nbytes = walk->nbytes;
+
+ pagefault_disable();
+ enable_kernel_altivec();
+ aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key);
+ pagefault_enable();
+
+ crypto_xor(keystream, src, nbytes);
+ memcpy(dst, keystream, nbytes);
+ crypto_inc(ctrblk, AES_BLOCK_SIZE);
+}
+
+static int p8_aes_ctr_crypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ int ret;
+ struct blkcipher_walk walk;
+ struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(
+ crypto_blkcipher_tfm(desc->tfm));
+ struct blkcipher_desc fallback_desc = {
+ .tfm = ctx->fallback,
+ .info = desc->info,
+ .flags = desc->flags
+ };
+
+ if (in_interrupt()) {
+ ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes);
+ } else {
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ ret = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+ while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+ pagefault_disable();
+ enable_kernel_altivec();
+ aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr, walk.dst.virt.addr,
+ (nbytes & AES_BLOCK_MASK)/AES_BLOCK_SIZE, &ctx->enc_key, walk.iv);
+ pagefault_enable();
+
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ nbytes &= AES_BLOCK_SIZE - 1;
+ ret = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ if (walk.nbytes) {
+ p8_aes_ctr_final(ctx, &walk);
+ ret = blkcipher_walk_done(desc, &walk, 0);
+ }
+ }
+
+ return ret;
+}
+
+struct crypto_alg p8_aes_ctr_alg = {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "p8_aes_ctr",
+ .cra_module = THIS_MODULE,
+ .cra_priority = 1000,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_alignmask = 0,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct p8_aes_ctr_ctx),
+ .cra_init = p8_aes_ctr_init,
+ .cra_exit = p8_aes_ctr_exit,
+ .cra_blkcipher = {
+ .ivsize = 0,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = p8_aes_ctr_setkey,
+ .encrypt = p8_aes_ctr_crypt,
+ .decrypt = p8_aes_ctr_crypt,
+ },
+};
diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h
new file mode 100644
index 000000000000..e963945a83e1
--- /dev/null
+++ b/drivers/crypto/vmx/aesp8-ppc.h
@@ -0,0 +1,20 @@
+#include <linux/types.h>
+#include <crypto/aes.h>
+
+#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
+
+struct aes_key {
+ u8 key[AES_MAX_KEYLENGTH];
+ int rounds;
+};
+
+int aes_p8_set_encrypt_key(const u8 *userKey, const int bits,
+ struct aes_key *key);
+int aes_p8_set_decrypt_key(const u8 *userKey, const int bits,
+ struct aes_key *key);
+void aes_p8_encrypt(const u8 *in, u8 *out, const struct aes_key *key);
+void aes_p8_decrypt(const u8 *in, u8 *out,const struct aes_key *key);
+void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len,
+ const struct aes_key *key, u8 *iv, const int enc);
+void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out,
+ size_t len, const struct aes_key *key, const u8 *iv);
diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl
new file mode 100644
index 000000000000..6c5c20c6108e
--- /dev/null
+++ b/drivers/crypto/vmx/aesp8-ppc.pl
@@ -0,0 +1,1930 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements support for AES instructions as per PowerISA
+# specification version 2.07, first implemented by POWER8 processor.
+# The module is endian-agnostic in sense that it supports both big-
+# and little-endian cases. Data alignment in parallelizable modes is
+# handled with VSX loads and stores, which implies MSR.VSX flag being
+# set. It should also be noted that ISA specification doesn't prohibit
+# alignment exceptions for these instructions on page boundaries.
+# Initially alignment was handled in pure AltiVec/VMX way [when data
+# is aligned programmatically, which in turn guarantees exception-
+# free execution], but it turned to hamper performance when vcipher
+# instructions are interleaved. It's reckoned that eventual
+# misalignment penalties at page boundaries are in average lower
+# than additional overhead in pure AltiVec approach.
+
+$flavour = shift;
+
+if ($flavour =~ /64/) {
+ $SIZE_T =8;
+ $LRSAVE =2*$SIZE_T;
+ $STU ="stdu";
+ $POP ="ld";
+ $PUSH ="std";
+ $UCMP ="cmpld";
+ $SHL ="sldi";
+} elsif ($flavour =~ /32/) {
+ $SIZE_T =4;
+ $LRSAVE =$SIZE_T;
+ $STU ="stwu";
+ $POP ="lwz";
+ $PUSH ="stw";
+ $UCMP ="cmplw";
+ $SHL ="slwi";
+} else { die "nonsense $flavour"; }
+
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
+
+$FRAME=8*$SIZE_T;
+$prefix="aes_p8";
+
+$sp="r1";
+$vrsave="r12";
+
+#########################################################################
+{{{ # Key setup procedures #
+my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
+my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
+my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
+
+$code.=<<___;
+.machine "any"
+
+.text
+
+.align 7
+rcon:
+.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
+.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
+.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
+.long 0,0,0,0 ?asis
+Lconsts:
+ mflr r0
+ bcl 20,31,\$+4
+ mflr $ptr #vvvvv "distance between . and rcon
+ addi $ptr,$ptr,-0x48
+ mtlr r0
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
+.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+
+.globl .${prefix}_set_encrypt_key
+Lset_encrypt_key:
+ mflr r11
+ $PUSH r11,$LRSAVE($sp)
+
+ li $ptr,-1
+ ${UCMP}i $inp,0
+ beq- Lenc_key_abort # if ($inp==0) return -1;
+ ${UCMP}i $out,0
+ beq- Lenc_key_abort # if ($out==0) return -1;
+ li $ptr,-2
+ cmpwi $bits,128
+ blt- Lenc_key_abort
+ cmpwi $bits,256
+ bgt- Lenc_key_abort
+ andi. r0,$bits,0x3f
+ bne- Lenc_key_abort
+
+ lis r0,0xfff0
+ mfspr $vrsave,256
+ mtspr 256,r0
+
+ bl Lconsts
+ mtlr r11
+
+ neg r9,$inp
+ lvx $in0,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ lvsr $key,0,r9 # borrow $key
+ li r8,0x20
+ cmpwi $bits,192
+ lvx $in1,0,$inp
+ le?vspltisb $mask,0x0f # borrow $mask
+ lvx $rcon,0,$ptr
+ le?vxor $key,$key,$mask # adjust for byte swap
+ lvx $mask,r8,$ptr
+ addi $ptr,$ptr,0x10
+ vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
+ li $cnt,8
+ vxor $zero,$zero,$zero
+ mtctr $cnt
+
+ ?lvsr $outperm,0,$out
+ vspltisb $outmask,-1
+ lvx $outhead,0,$out
+ ?vperm $outmask,$zero,$outmask,$outperm
+
+ blt Loop128
+ addi $inp,$inp,8
+ beq L192
+ addi $inp,$inp,8
+ b L256
+
+.align 4
+Loop128:
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+ bdnz Loop128
+
+ lvx $rcon,0,$ptr # last two round keys
+
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vxor $in0,$in0,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,0x50
+
+ li $rounds,10
+ b Ldone
+
+.align 4
+L192:
+ lvx $tmp,0,$inp
+ li $cnt,4
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $out,$out,16
+ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
+ vspltisb $key,8 # borrow $key
+ mtctr $cnt
+ vsububm $mask,$mask,$key # adjust the mask
+
+Loop192:
+ vperm $key,$in1,$in1,$mask # roate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vcipherlast $key,$key,$rcon
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+
+ vsldoi $stage,$zero,$in1,8
+ vspltw $tmp,$in0,3
+ vxor $tmp,$tmp,$in1
+ vsldoi $in1,$zero,$in1,12 # >>32
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in1,$in1,$tmp
+ vxor $in0,$in0,$key
+ vxor $in1,$in1,$key
+ vsldoi $stage,$stage,$in0,8
+
+ vperm $key,$in1,$in1,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$stage,$stage,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vsldoi $stage,$in0,$in1,8
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vperm $outtail,$stage,$stage,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vspltw $tmp,$in0,3
+ vxor $tmp,$tmp,$in1
+ vsldoi $in1,$zero,$in1,12 # >>32
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in1,$in1,$tmp
+ vxor $in0,$in0,$key
+ vxor $in1,$in1,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,16
+ bdnz Loop192
+
+ li $rounds,12
+ addi $out,$out,0x20
+ b Ldone
+
+.align 4
+L256:
+ lvx $tmp,0,$inp
+ li $cnt,7
+ li $rounds,14
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $out,$out,16
+ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
+ mtctr $cnt
+
+Loop256:
+ vperm $key,$in1,$in1,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in1,$in1,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,16
+ bdz Ldone
+
+ vspltw $key,$in0,3 # just splat
+ vsldoi $tmp,$zero,$in1,12 # >>32
+ vsbox $key,$key
+
+ vxor $in1,$in1,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in1,$in1,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in1,$in1,$tmp
+
+ vxor $in1,$in1,$key
+ b Loop256
+
+.align 4
+Ldone:
+ lvx $in1,0,$inp # redundant in aligned case
+ vsel $in1,$outhead,$in1,$outmask
+ stvx $in1,0,$inp
+ li $ptr,0
+ mtspr 256,$vrsave
+ stw $rounds,0($out)
+
+Lenc_key_abort:
+ mr r3,$ptr
+ blr
+ .long 0
+ .byte 0,12,0x14,1,0,0,3,0
+ .long 0
+.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
+
+.globl .${prefix}_set_decrypt_key
+ $STU $sp,-$FRAME($sp)
+ mflr r10
+ $PUSH r10,$FRAME+$LRSAVE($sp)
+ bl Lset_encrypt_key
+ mtlr r10
+
+ cmpwi r3,0
+ bne- Ldec_key_abort
+
+ slwi $cnt,$rounds,4
+ subi $inp,$out,240 # first round key
+ srwi $rounds,$rounds,1
+ add $out,$inp,$cnt # last round key
+ mtctr $rounds
+
+Ldeckey:
+ lwz r0, 0($inp)
+ lwz r6, 4($inp)
+ lwz r7, 8($inp)
+ lwz r8, 12($inp)
+ addi $inp,$inp,16
+ lwz r9, 0($out)
+ lwz r10,4($out)
+ lwz r11,8($out)
+ lwz r12,12($out)
+ stw r0, 0($out)
+ stw r6, 4($out)
+ stw r7, 8($out)
+ stw r8, 12($out)
+ subi $out,$out,16
+ stw r9, -16($inp)
+ stw r10,-12($inp)
+ stw r11,-8($inp)
+ stw r12,-4($inp)
+ bdnz Ldeckey
+
+ xor r3,r3,r3 # return value
+Ldec_key_abort:
+ addi $sp,$sp,$FRAME
+ blr
+ .long 0
+ .byte 0,12,4,1,0x80,0,3,0
+ .long 0
+.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
+___
+}}}
+#########################################################################
+{{{ # Single block en- and decrypt procedures #
+sub gen_block () {
+my $dir = shift;
+my $n = $dir eq "de" ? "n" : "";
+my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
+
+$code.=<<___;
+.globl .${prefix}_${dir}crypt
+ lwz $rounds,240($key)
+ lis r0,0xfc00
+ mfspr $vrsave,256
+ li $idx,15 # 15 is not typo
+ mtspr 256,r0
+
+ lvx v0,0,$inp
+ neg r11,$out
+ lvx v1,$idx,$inp
+ lvsl v2,0,$inp # inpperm
+ le?vspltisb v4,0x0f
+ ?lvsl v3,0,r11 # outperm
+ le?vxor v2,v2,v4
+ li $idx,16
+ vperm v0,v0,v1,v2 # align [and byte swap in LE]
+ lvx v1,0,$key
+ ?lvsl v5,0,$key # keyperm
+ srwi $rounds,$rounds,1
+ lvx v2,$idx,$key
+ addi $idx,$idx,16
+ subi $rounds,$rounds,1
+ ?vperm v1,v1,v2,v5 # align round key
+
+ vxor v0,v0,v1
+ lvx v1,$idx,$key
+ addi $idx,$idx,16
+ mtctr $rounds
+
+Loop_${dir}c:
+ ?vperm v2,v2,v1,v5
+ v${n}cipher v0,v0,v2
+ lvx v2,$idx,$key
+ addi $idx,$idx,16
+ ?vperm v1,v1,v2,v5
+ v${n}cipher v0,v0,v1
+ lvx v1,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_${dir}c
+
+ ?vperm v2,v2,v1,v5
+ v${n}cipher v0,v0,v2
+ lvx v2,$idx,$key
+ ?vperm v1,v1,v2,v5
+ v${n}cipherlast v0,v0,v1
+
+ vspltisb v2,-1
+ vxor v1,v1,v1
+ li $idx,15 # 15 is not typo
+ ?vperm v2,v1,v2,v3 # outmask
+ le?vxor v3,v3,v4
+ lvx v1,0,$out # outhead
+ vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
+ vsel v1,v1,v0,v2
+ lvx v4,$idx,$out
+ stvx v1,0,$out
+ vsel v0,v0,v4,v2
+ stvx v0,$idx,$out
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,3,0
+ .long 0
+.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
+___
+}
+&gen_block("en");
+&gen_block("de");
+}}}
+#########################################################################
+{{{ # CBC en- and decrypt procedures #
+my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
+ map("v$_",(4..10));
+$code.=<<___;
+.globl .${prefix}_cbc_encrypt
+ ${UCMP}i $len,16
+ bltlr-
+
+ cmpwi $enc,0 # test direction
+ lis r0,0xffe0
+ mfspr $vrsave,256
+ mtspr 256,r0
+
+ li $idx,15
+ vxor $rndkey0,$rndkey0,$rndkey0
+ le?vspltisb $tmp,0x0f
+
+ lvx $ivec,0,$ivp # load [unaligned] iv
+ lvsl $inpperm,0,$ivp
+ lvx $inptail,$idx,$ivp
+ le?vxor $inpperm,$inpperm,$tmp
+ vperm $ivec,$ivec,$inptail,$inpperm
+
+ neg r11,$inp
+ ?lvsl $keyperm,0,$key # prepare for unaligned key
+ lwz $rounds,240($key)
+
+ lvsr $inpperm,0,r11 # prepare for unaligned load
+ lvx $inptail,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ le?vxor $inpperm,$inpperm,$tmp
+
+ ?lvsr $outperm,0,$out # prepare for unaligned store
+ vspltisb $outmask,-1
+ lvx $outhead,0,$out
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
+ le?vxor $outperm,$outperm,$tmp
+
+ srwi $rounds,$rounds,1
+ li $idx,16
+ subi $rounds,$rounds,1
+ beq Lcbc_dec
+
+Lcbc_enc:
+ vmr $inout,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ mtctr $rounds
+ subi $len,$len,16 # len-=16
+
+ lvx $rndkey0,0,$key
+ vperm $inout,$inout,$inptail,$inpperm
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ vxor $inout,$inout,$ivec
+
+Loop_cbc_enc:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_cbc_enc
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipherlast $ivec,$inout,$rndkey0
+ ${UCMP}i $len,16
+
+ vperm $tmp,$ivec,$ivec,$outperm
+ vsel $inout,$outhead,$tmp,$outmask
+ vmr $outhead,$tmp
+ stvx $inout,0,$out
+ addi $out,$out,16
+ bge Lcbc_enc
+
+ b Lcbc_done
+
+.align 4
+Lcbc_dec:
+ ${UCMP}i $len,128
+ bge _aesp8_cbc_decrypt8x
+ vmr $tmp,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ mtctr $rounds
+ subi $len,$len,16 # len-=16
+
+ lvx $rndkey0,0,$key
+ vperm $tmp,$tmp,$inptail,$inpperm
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$tmp,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+
+Loop_cbc_dec:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vncipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_cbc_dec
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vncipherlast $inout,$inout,$rndkey0
+ ${UCMP}i $len,16
+
+ vxor $inout,$inout,$ivec
+ vmr $ivec,$tmp
+ vperm $tmp,$inout,$inout,$outperm
+ vsel $inout,$outhead,$tmp,$outmask
+ vmr $outhead,$tmp
+ stvx $inout,0,$out
+ addi $out,$out,16
+ bge Lcbc_dec
+
+Lcbc_done:
+ addi $out,$out,-1
+ lvx $inout,0,$out # redundant in aligned case
+ vsel $inout,$outhead,$inout,$outmask
+ stvx $inout,0,$out
+
+ neg $enc,$ivp # write [unaligned] iv
+ li $idx,15 # 15 is not typo
+ vxor $rndkey0,$rndkey0,$rndkey0
+ vspltisb $outmask,-1
+ le?vspltisb $tmp,0x0f
+ ?lvsl $outperm,0,$enc
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
+ le?vxor $outperm,$outperm,$tmp
+ lvx $outhead,0,$ivp
+ vperm $ivec,$ivec,$ivec,$outperm
+ vsel $inout,$outhead,$ivec,$outmask
+ lvx $inptail,$idx,$ivp
+ stvx $inout,0,$ivp
+ vsel $inout,$ivec,$inptail,$outmask
+ stvx $inout,$idx,$ivp
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,6,0
+ .long 0
+___
+#########################################################################
+{{ # Optimized CBC decrypt procedure #
+my $key_="r11";
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
+ # v26-v31 last 6 round keys
+my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
+
+$code.=<<___;
+.align 5
+_aesp8_cbc_decrypt8x:
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+ li r10,`$FRAME+8*16+15`
+ li r11,`$FRAME+8*16+31`
+ stvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ stvx v21,r11,$sp
+ addi r11,r11,32
+ stvx v22,r10,$sp
+ addi r10,r10,32
+ stvx v23,r11,$sp
+ addi r11,r11,32
+ stvx v24,r10,$sp
+ addi r10,r10,32
+ stvx v25,r11,$sp
+ addi r11,r11,32
+ stvx v26,r10,$sp
+ addi r10,r10,32
+ stvx v27,r11,$sp
+ addi r11,r11,32
+ stvx v28,r10,$sp
+ addi r10,r10,32
+ stvx v29,r11,$sp
+ addi r11,r11,32
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ li $x70,0x70
+ mtspr 256,r0
+
+ subi $rounds,$rounds,3 # -4 in total
+ subi $len,$len,128 # bias
+
+ lvx $rndkey0,$x00,$key # load key schedule
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ lvx v31,$x00,$key
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
+ addi $key_,$sp,$FRAME+15
+ mtctr $rounds
+
+Load_cbc_dec_key:
+ ?vperm v24,v30,v31,$keyperm
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ stvx v24,$x00,$key_ # off-load round[1]
+ ?vperm v25,v31,v30,$keyperm
+ lvx v31,$x00,$key
+ stvx v25,$x10,$key_ # off-load round[2]
+ addi $key_,$key_,0x20
+ bdnz Load_cbc_dec_key
+
+ lvx v26,$x10,$key
+ ?vperm v24,v30,v31,$keyperm
+ lvx v27,$x20,$key
+ stvx v24,$x00,$key_ # off-load round[3]
+ ?vperm v25,v31,v26,$keyperm
+ lvx v28,$x30,$key
+ stvx v25,$x10,$key_ # off-load round[4]
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ ?vperm v26,v26,v27,$keyperm
+ lvx v29,$x40,$key
+ ?vperm v27,v27,v28,$keyperm
+ lvx v30,$x50,$key
+ ?vperm v28,v28,v29,$keyperm
+ lvx v31,$x60,$key
+ ?vperm v29,v29,v30,$keyperm
+ lvx $out0,$x70,$key # borrow $out0
+ ?vperm v30,v30,v31,$keyperm
+ lvx v24,$x00,$key_ # pre-load round[1]
+ ?vperm v31,v31,$out0,$keyperm
+ lvx v25,$x10,$key_ # pre-load round[2]
+
+ #lvx $inptail,0,$inp # "caller" already did this
+ #addi $inp,$inp,15 # 15 is not typo
+ subi $inp,$inp,15 # undo "caller"
+
+ le?li $idx,8
+ lvx_u $in0,$x00,$inp # load first 8 "words"
+ le?lvsl $inpperm,0,$idx
+ le?vspltisb $tmp,0x0f
+ lvx_u $in1,$x10,$inp
+ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
+ lvx_u $in2,$x20,$inp
+ le?vperm $in0,$in0,$in0,$inpperm
+ lvx_u $in3,$x30,$inp
+ le?vperm $in1,$in1,$in1,$inpperm
+ lvx_u $in4,$x40,$inp
+ le?vperm $in2,$in2,$in2,$inpperm
+ vxor $out0,$in0,$rndkey0
+ lvx_u $in5,$x50,$inp
+ le?vperm $in3,$in3,$in3,$inpperm
+ vxor $out1,$in1,$rndkey0
+ lvx_u $in6,$x60,$inp
+ le?vperm $in4,$in4,$in4,$inpperm
+ vxor $out2,$in2,$rndkey0
+ lvx_u $in7,$x70,$inp
+ addi $inp,$inp,0x80
+ le?vperm $in5,$in5,$in5,$inpperm
+ vxor $out3,$in3,$rndkey0
+ le?vperm $in6,$in6,$in6,$inpperm
+ vxor $out4,$in4,$rndkey0
+ le?vperm $in7,$in7,$in7,$inpperm
+ vxor $out5,$in5,$rndkey0
+ vxor $out6,$in6,$rndkey0
+ vxor $out7,$in7,$rndkey0
+
+ mtctr $rounds
+ b Loop_cbc_dec8x
+.align 5
+Loop_cbc_dec8x:
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_cbc_dec8x
+
+ subic $len,$len,128 # $len-=128
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+
+ subfe. r0,r0,r0 # borrow?-1:0
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+
+ and r0,r0,$len
+ vncipher $out0,$out0,v26
+ vncipher $out1,$out1,v26
+ vncipher $out2,$out2,v26
+ vncipher $out3,$out3,v26
+ vncipher $out4,$out4,v26
+ vncipher $out5,$out5,v26
+ vncipher $out6,$out6,v26
+ vncipher $out7,$out7,v26
+
+ add $inp,$inp,r0 # $inp is adjusted in such
+ # way that at exit from the
+ # loop inX-in7 are loaded
+ # with last "words"
+ vncipher $out0,$out0,v27
+ vncipher $out1,$out1,v27
+ vncipher $out2,$out2,v27
+ vncipher $out3,$out3,v27
+ vncipher $out4,$out4,v27
+ vncipher $out5,$out5,v27
+ vncipher $out6,$out6,v27
+ vncipher $out7,$out7,v27
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vncipher $out0,$out0,v28
+ vncipher $out1,$out1,v28
+ vncipher $out2,$out2,v28
+ vncipher $out3,$out3,v28
+ vncipher $out4,$out4,v28
+ vncipher $out5,$out5,v28
+ vncipher $out6,$out6,v28
+ vncipher $out7,$out7,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ vncipher $out0,$out0,v29
+ vncipher $out1,$out1,v29
+ vncipher $out2,$out2,v29
+ vncipher $out3,$out3,v29
+ vncipher $out4,$out4,v29
+ vncipher $out5,$out5,v29
+ vncipher $out6,$out6,v29
+ vncipher $out7,$out7,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+
+ vncipher $out0,$out0,v30
+ vxor $ivec,$ivec,v31 # xor with last round key
+ vncipher $out1,$out1,v30
+ vxor $in0,$in0,v31
+ vncipher $out2,$out2,v30
+ vxor $in1,$in1,v31
+ vncipher $out3,$out3,v30
+ vxor $in2,$in2,v31
+ vncipher $out4,$out4,v30
+ vxor $in3,$in3,v31
+ vncipher $out5,$out5,v30
+ vxor $in4,$in4,v31
+ vncipher $out6,$out6,v30
+ vxor $in5,$in5,v31
+ vncipher $out7,$out7,v30
+ vxor $in6,$in6,v31
+
+ vncipherlast $out0,$out0,$ivec
+ vncipherlast $out1,$out1,$in0
+ lvx_u $in0,$x00,$inp # load next input block
+ vncipherlast $out2,$out2,$in1
+ lvx_u $in1,$x10,$inp
+ vncipherlast $out3,$out3,$in2
+ le?vperm $in0,$in0,$in0,$inpperm
+ lvx_u $in2,$x20,$inp
+ vncipherlast $out4,$out4,$in3
+ le?vperm $in1,$in1,$in1,$inpperm
+ lvx_u $in3,$x30,$inp
+ vncipherlast $out5,$out5,$in4
+ le?vperm $in2,$in2,$in2,$inpperm
+ lvx_u $in4,$x40,$inp
+ vncipherlast $out6,$out6,$in5
+ le?vperm $in3,$in3,$in3,$inpperm
+ lvx_u $in5,$x50,$inp
+ vncipherlast $out7,$out7,$in6
+ le?vperm $in4,$in4,$in4,$inpperm
+ lvx_u $in6,$x60,$inp
+ vmr $ivec,$in7
+ le?vperm $in5,$in5,$in5,$inpperm
+ lvx_u $in7,$x70,$inp
+ addi $inp,$inp,0x80
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $in6,$in6,$in6,$inpperm
+ vxor $out0,$in0,$rndkey0
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $in7,$in7,$in7,$inpperm
+ vxor $out1,$in1,$rndkey0
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ vxor $out2,$in2,$rndkey0
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ vxor $out3,$in3,$rndkey0
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ vxor $out4,$in4,$rndkey0
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x50,$out
+ vxor $out5,$in5,$rndkey0
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x60,$out
+ vxor $out6,$in6,$rndkey0
+ stvx_u $out7,$x70,$out
+ addi $out,$out,0x80
+ vxor $out7,$in7,$rndkey0
+
+ mtctr $rounds
+ beq Loop_cbc_dec8x # did $len-=128 borrow?
+
+ addic. $len,$len,128
+ beq Lcbc_dec8x_done
+ nop
+ nop
+
+Loop_cbc_dec8x_tail: # up to 7 "words" tail...
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_cbc_dec8x_tail
+
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+
+ vncipher $out1,$out1,v26
+ vncipher $out2,$out2,v26
+ vncipher $out3,$out3,v26
+ vncipher $out4,$out4,v26
+ vncipher $out5,$out5,v26
+ vncipher $out6,$out6,v26
+ vncipher $out7,$out7,v26
+
+ vncipher $out1,$out1,v27
+ vncipher $out2,$out2,v27
+ vncipher $out3,$out3,v27
+ vncipher $out4,$out4,v27
+ vncipher $out5,$out5,v27
+ vncipher $out6,$out6,v27
+ vncipher $out7,$out7,v27
+
+ vncipher $out1,$out1,v28
+ vncipher $out2,$out2,v28
+ vncipher $out3,$out3,v28
+ vncipher $out4,$out4,v28
+ vncipher $out5,$out5,v28
+ vncipher $out6,$out6,v28
+ vncipher $out7,$out7,v28
+
+ vncipher $out1,$out1,v29
+ vncipher $out2,$out2,v29
+ vncipher $out3,$out3,v29
+ vncipher $out4,$out4,v29
+ vncipher $out5,$out5,v29
+ vncipher $out6,$out6,v29
+ vncipher $out7,$out7,v29
+
+ vncipher $out1,$out1,v30
+ vxor $ivec,$ivec,v31 # last round key
+ vncipher $out2,$out2,v30
+ vxor $in1,$in1,v31
+ vncipher $out3,$out3,v30
+ vxor $in2,$in2,v31
+ vncipher $out4,$out4,v30
+ vxor $in3,$in3,v31
+ vncipher $out5,$out5,v30
+ vxor $in4,$in4,v31
+ vncipher $out6,$out6,v30
+ vxor $in5,$in5,v31
+ vncipher $out7,$out7,v30
+ vxor $in6,$in6,v31
+
+ cmplwi $len,32 # switch($len)
+ blt Lcbc_dec8x_one
+ nop
+ beq Lcbc_dec8x_two
+ cmplwi $len,64
+ blt Lcbc_dec8x_three
+ nop
+ beq Lcbc_dec8x_four
+ cmplwi $len,96
+ blt Lcbc_dec8x_five
+ nop
+ beq Lcbc_dec8x_six
+
+Lcbc_dec8x_seven:
+ vncipherlast $out1,$out1,$ivec
+ vncipherlast $out2,$out2,$in1
+ vncipherlast $out3,$out3,$in2
+ vncipherlast $out4,$out4,$in3
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out1,$out1,$out1,$inpperm
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x00,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x10,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x20,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x30,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x40,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x50,$out
+ stvx_u $out7,$x60,$out
+ addi $out,$out,0x70
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_six:
+ vncipherlast $out2,$out2,$ivec
+ vncipherlast $out3,$out3,$in2
+ vncipherlast $out4,$out4,$in3
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out2,$out2,$out2,$inpperm
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x00,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x10,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x20,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x30,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x40,$out
+ stvx_u $out7,$x50,$out
+ addi $out,$out,0x60
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_five:
+ vncipherlast $out3,$out3,$ivec
+ vncipherlast $out4,$out4,$in3
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out3,$out3,$out3,$inpperm
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x00,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x10,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x20,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x30,$out
+ stvx_u $out7,$x40,$out
+ addi $out,$out,0x50
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_four:
+ vncipherlast $out4,$out4,$ivec
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out4,$out4,$out4,$inpperm
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x00,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x10,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x20,$out
+ stvx_u $out7,$x30,$out
+ addi $out,$out,0x40
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_three:
+ vncipherlast $out5,$out5,$ivec
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out5,$out5,$out5,$inpperm
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x00,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x10,$out
+ stvx_u $out7,$x20,$out
+ addi $out,$out,0x30
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_two:
+ vncipherlast $out6,$out6,$ivec
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out6,$out6,$out6,$inpperm
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x00,$out
+ stvx_u $out7,$x10,$out
+ addi $out,$out,0x20
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_one:
+ vncipherlast $out7,$out7,$ivec
+ vmr $ivec,$in7
+
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out7,0,$out
+ addi $out,$out,0x10
+
+Lcbc_dec8x_done:
+ le?vperm $ivec,$ivec,$ivec,$inpperm
+ stvx_u $ivec,0,$ivp # write [unaligned] iv
+
+ li r10,`$FRAME+15`
+ li r11,`$FRAME+31`
+ stvx $inpperm,r10,$sp # wipe copies of round keys
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0x80,6,6,0
+ .long 0
+.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
+___
+}} }}}
+
+#########################################################################
+{{{ # CTR procedure[s] #
+my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
+ map("v$_",(4..11));
+my $dat=$tmp;
+
+$code.=<<___;
+.globl .${prefix}_ctr32_encrypt_blocks
+ ${UCMP}i $len,1
+ bltlr-
+
+ lis r0,0xfff0
+ mfspr $vrsave,256
+ mtspr 256,r0
+
+ li $idx,15
+ vxor $rndkey0,$rndkey0,$rndkey0
+ le?vspltisb $tmp,0x0f
+
+ lvx $ivec,0,$ivp # load [unaligned] iv
+ lvsl $inpperm,0,$ivp
+ lvx $inptail,$idx,$ivp
+ vspltisb $one,1
+ le?vxor $inpperm,$inpperm,$tmp
+ vperm $ivec,$ivec,$inptail,$inpperm
+ vsldoi $one,$rndkey0,$one,1
+
+ neg r11,$inp
+ ?lvsl $keyperm,0,$key # prepare for unaligned key
+ lwz $rounds,240($key)
+
+ lvsr $inpperm,0,r11 # prepare for unaligned load
+ lvx $inptail,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ le?vxor $inpperm,$inpperm,$tmp
+
+ srwi $rounds,$rounds,1
+ li $idx,16
+ subi $rounds,$rounds,1
+
+ ${UCMP}i $len,8
+ bge _aesp8_ctr32_encrypt8x
+
+ ?lvsr $outperm,0,$out # prepare for unaligned store
+ vspltisb $outmask,-1
+ lvx $outhead,0,$out
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
+ le?vxor $outperm,$outperm,$tmp
+
+ lvx $rndkey0,0,$key
+ mtctr $rounds
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$ivec,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ b Loop_ctr32_enc
+
+.align 5
+Loop_ctr32_enc:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_ctr32_enc
+
+ vadduwm $ivec,$ivec,$one
+ vmr $dat,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ subic. $len,$len,1 # blocks--
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ vperm $dat,$dat,$inptail,$inpperm
+ li $idx,16
+ ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
+ lvx $rndkey0,0,$key
+ vxor $dat,$dat,$rndkey1 # last round key
+ vcipherlast $inout,$inout,$dat
+
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ vperm $inout,$inout,$inout,$outperm
+ vsel $dat,$outhead,$inout,$outmask
+ mtctr $rounds
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vmr $outhead,$inout
+ vxor $inout,$ivec,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ stvx $dat,0,$out
+ addi $out,$out,16
+ bne Loop_ctr32_enc
+
+ addi $out,$out,-1
+ lvx $inout,0,$out # redundant in aligned case
+ vsel $inout,$outhead,$inout,$outmask
+ stvx $inout,0,$out
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,6,0
+ .long 0
+___
+#########################################################################
+{{ # Optimized CTR procedure #
+my $key_="r11";
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
+ # v26-v31 last 6 round keys
+my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
+my ($two,$three,$four)=($outhead,$outperm,$outmask);
+
+$code.=<<___;
+.align 5
+_aesp8_ctr32_encrypt8x:
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+ li r10,`$FRAME+8*16+15`
+ li r11,`$FRAME+8*16+31`
+ stvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ stvx v21,r11,$sp
+ addi r11,r11,32
+ stvx v22,r10,$sp
+ addi r10,r10,32
+ stvx v23,r11,$sp
+ addi r11,r11,32
+ stvx v24,r10,$sp
+ addi r10,r10,32
+ stvx v25,r11,$sp
+ addi r11,r11,32
+ stvx v26,r10,$sp
+ addi r10,r10,32
+ stvx v27,r11,$sp
+ addi r11,r11,32
+ stvx v28,r10,$sp
+ addi r10,r10,32
+ stvx v29,r11,$sp
+ addi r11,r11,32
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ li $x70,0x70
+ mtspr 256,r0
+
+ subi $rounds,$rounds,3 # -4 in total
+
+ lvx $rndkey0,$x00,$key # load key schedule
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ lvx v31,$x00,$key
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
+ addi $key_,$sp,$FRAME+15
+ mtctr $rounds
+
+Load_ctr32_enc_key:
+ ?vperm v24,v30,v31,$keyperm
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ stvx v24,$x00,$key_ # off-load round[1]
+ ?vperm v25,v31,v30,$keyperm
+ lvx v31,$x00,$key
+ stvx v25,$x10,$key_ # off-load round[2]
+ addi $key_,$key_,0x20
+ bdnz Load_ctr32_enc_key
+
+ lvx v26,$x10,$key
+ ?vperm v24,v30,v31,$keyperm
+ lvx v27,$x20,$key
+ stvx v24,$x00,$key_ # off-load round[3]
+ ?vperm v25,v31,v26,$keyperm
+ lvx v28,$x30,$key
+ stvx v25,$x10,$key_ # off-load round[4]
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ ?vperm v26,v26,v27,$keyperm
+ lvx v29,$x40,$key
+ ?vperm v27,v27,v28,$keyperm
+ lvx v30,$x50,$key
+ ?vperm v28,v28,v29,$keyperm
+ lvx v31,$x60,$key
+ ?vperm v29,v29,v30,$keyperm
+ lvx $out0,$x70,$key # borrow $out0
+ ?vperm v30,v30,v31,$keyperm
+ lvx v24,$x00,$key_ # pre-load round[1]
+ ?vperm v31,v31,$out0,$keyperm
+ lvx v25,$x10,$key_ # pre-load round[2]
+
+ vadduwm $two,$one,$one
+ subi $inp,$inp,15 # undo "caller"
+ $SHL $len,$len,4
+
+ vadduwm $out1,$ivec,$one # counter values ...
+ vadduwm $out2,$ivec,$two
+ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
+ le?li $idx,8
+ vadduwm $out3,$out1,$two
+ vxor $out1,$out1,$rndkey0
+ le?lvsl $inpperm,0,$idx
+ vadduwm $out4,$out2,$two
+ vxor $out2,$out2,$rndkey0
+ le?vspltisb $tmp,0x0f
+ vadduwm $out5,$out3,$two
+ vxor $out3,$out3,$rndkey0
+ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
+ vadduwm $out6,$out4,$two
+ vxor $out4,$out4,$rndkey0
+ vadduwm $out7,$out5,$two
+ vxor $out5,$out5,$rndkey0
+ vadduwm $ivec,$out6,$two # next counter value
+ vxor $out6,$out6,$rndkey0
+ vxor $out7,$out7,$rndkey0
+
+ mtctr $rounds
+ b Loop_ctr32_enc8x
+.align 5
+Loop_ctr32_enc8x:
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ vcipher $out5,$out5,v24
+ vcipher $out6,$out6,v24
+ vcipher $out7,$out7,v24
+Loop_ctr32_enc8x_middle:
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ vcipher $out5,$out5,v25
+ vcipher $out6,$out6,v25
+ vcipher $out7,$out7,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_ctr32_enc8x
+
+ subic r11,$len,256 # $len-256, borrow $key_
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ vcipher $out5,$out5,v24
+ vcipher $out6,$out6,v24
+ vcipher $out7,$out7,v24
+
+ subfe r0,r0,r0 # borrow?-1:0
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ vcipher $out5,$out5,v25
+ vcipher $out6,$out6,v25
+ vcipher $out7,$out7,v25
+
+ and r0,r0,r11
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vcipher $out0,$out0,v26
+ vcipher $out1,$out1,v26
+ vcipher $out2,$out2,v26
+ vcipher $out3,$out3,v26
+ vcipher $out4,$out4,v26
+ vcipher $out5,$out5,v26
+ vcipher $out6,$out6,v26
+ vcipher $out7,$out7,v26
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ subic $len,$len,129 # $len-=129
+ vcipher $out0,$out0,v27
+ addi $len,$len,1 # $len-=128 really
+ vcipher $out1,$out1,v27
+ vcipher $out2,$out2,v27
+ vcipher $out3,$out3,v27
+ vcipher $out4,$out4,v27
+ vcipher $out5,$out5,v27
+ vcipher $out6,$out6,v27
+ vcipher $out7,$out7,v27
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+
+ vcipher $out0,$out0,v28
+ lvx_u $in0,$x00,$inp # load input
+ vcipher $out1,$out1,v28
+ lvx_u $in1,$x10,$inp
+ vcipher $out2,$out2,v28
+ lvx_u $in2,$x20,$inp
+ vcipher $out3,$out3,v28
+ lvx_u $in3,$x30,$inp
+ vcipher $out4,$out4,v28
+ lvx_u $in4,$x40,$inp
+ vcipher $out5,$out5,v28
+ lvx_u $in5,$x50,$inp
+ vcipher $out6,$out6,v28
+ lvx_u $in6,$x60,$inp
+ vcipher $out7,$out7,v28
+ lvx_u $in7,$x70,$inp
+ addi $inp,$inp,0x80
+
+ vcipher $out0,$out0,v29
+ le?vperm $in0,$in0,$in0,$inpperm
+ vcipher $out1,$out1,v29
+ le?vperm $in1,$in1,$in1,$inpperm
+ vcipher $out2,$out2,v29
+ le?vperm $in2,$in2,$in2,$inpperm
+ vcipher $out3,$out3,v29
+ le?vperm $in3,$in3,$in3,$inpperm
+ vcipher $out4,$out4,v29
+ le?vperm $in4,$in4,$in4,$inpperm
+ vcipher $out5,$out5,v29
+ le?vperm $in5,$in5,$in5,$inpperm
+ vcipher $out6,$out6,v29
+ le?vperm $in6,$in6,$in6,$inpperm
+ vcipher $out7,$out7,v29
+ le?vperm $in7,$in7,$in7,$inpperm
+
+ add $inp,$inp,r0 # $inp is adjusted in such
+ # way that at exit from the
+ # loop inX-in7 are loaded
+ # with last "words"
+ subfe. r0,r0,r0 # borrow?-1:0
+ vcipher $out0,$out0,v30
+ vxor $in0,$in0,v31 # xor with last round key
+ vcipher $out1,$out1,v30
+ vxor $in1,$in1,v31
+ vcipher $out2,$out2,v30
+ vxor $in2,$in2,v31
+ vcipher $out3,$out3,v30
+ vxor $in3,$in3,v31
+ vcipher $out4,$out4,v30
+ vxor $in4,$in4,v31
+ vcipher $out5,$out5,v30
+ vxor $in5,$in5,v31
+ vcipher $out6,$out6,v30
+ vxor $in6,$in6,v31
+ vcipher $out7,$out7,v30
+ vxor $in7,$in7,v31
+
+ bne Lctr32_enc8x_break # did $len-129 borrow?
+
+ vcipherlast $in0,$out0,$in0
+ vcipherlast $in1,$out1,$in1
+ vadduwm $out1,$ivec,$one # counter values ...
+ vcipherlast $in2,$out2,$in2
+ vadduwm $out2,$ivec,$two
+ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
+ vcipherlast $in3,$out3,$in3
+ vadduwm $out3,$out1,$two
+ vxor $out1,$out1,$rndkey0
+ vcipherlast $in4,$out4,$in4
+ vadduwm $out4,$out2,$two
+ vxor $out2,$out2,$rndkey0
+ vcipherlast $in5,$out5,$in5
+ vadduwm $out5,$out3,$two
+ vxor $out3,$out3,$rndkey0
+ vcipherlast $in6,$out6,$in6
+ vadduwm $out6,$out4,$two
+ vxor $out4,$out4,$rndkey0
+ vcipherlast $in7,$out7,$in7
+ vadduwm $out7,$out5,$two
+ vxor $out5,$out5,$rndkey0
+ le?vperm $in0,$in0,$in0,$inpperm
+ vadduwm $ivec,$out6,$two # next counter value
+ vxor $out6,$out6,$rndkey0
+ le?vperm $in1,$in1,$in1,$inpperm
+ vxor $out7,$out7,$rndkey0
+ mtctr $rounds
+
+ vcipher $out0,$out0,v24
+ stvx_u $in0,$x00,$out
+ le?vperm $in2,$in2,$in2,$inpperm
+ vcipher $out1,$out1,v24
+ stvx_u $in1,$x10,$out
+ le?vperm $in3,$in3,$in3,$inpperm
+ vcipher $out2,$out2,v24
+ stvx_u $in2,$x20,$out
+ le?vperm $in4,$in4,$in4,$inpperm
+ vcipher $out3,$out3,v24
+ stvx_u $in3,$x30,$out
+ le?vperm $in5,$in5,$in5,$inpperm
+ vcipher $out4,$out4,v24
+ stvx_u $in4,$x40,$out
+ le?vperm $in6,$in6,$in6,$inpperm
+ vcipher $out5,$out5,v24
+ stvx_u $in5,$x50,$out
+ le?vperm $in7,$in7,$in7,$inpperm
+ vcipher $out6,$out6,v24
+ stvx_u $in6,$x60,$out
+ vcipher $out7,$out7,v24
+ stvx_u $in7,$x70,$out
+ addi $out,$out,0x80
+
+ b Loop_ctr32_enc8x_middle
+
+.align 5
+Lctr32_enc8x_break:
+ cmpwi $len,-0x60
+ blt Lctr32_enc8x_one
+ nop
+ beq Lctr32_enc8x_two
+ cmpwi $len,-0x40
+ blt Lctr32_enc8x_three
+ nop
+ beq Lctr32_enc8x_four
+ cmpwi $len,-0x20
+ blt Lctr32_enc8x_five
+ nop
+ beq Lctr32_enc8x_six
+ cmpwi $len,0x00
+ blt Lctr32_enc8x_seven
+
+Lctr32_enc8x_eight:
+ vcipherlast $out0,$out0,$in0
+ vcipherlast $out1,$out1,$in1
+ vcipherlast $out2,$out2,$in2
+ vcipherlast $out3,$out3,$in3
+ vcipherlast $out4,$out4,$in4
+ vcipherlast $out5,$out5,$in5
+ vcipherlast $out6,$out6,$in6
+ vcipherlast $out7,$out7,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x50,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x60,$out
+ stvx_u $out7,$x70,$out
+ addi $out,$out,0x80
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_seven:
+ vcipherlast $out0,$out0,$in1
+ vcipherlast $out1,$out1,$in2
+ vcipherlast $out2,$out2,$in3
+ vcipherlast $out3,$out3,$in4
+ vcipherlast $out4,$out4,$in5
+ vcipherlast $out5,$out5,$in6
+ vcipherlast $out6,$out6,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x50,$out
+ stvx_u $out6,$x60,$out
+ addi $out,$out,0x70
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_six:
+ vcipherlast $out0,$out0,$in2
+ vcipherlast $out1,$out1,$in3
+ vcipherlast $out2,$out2,$in4
+ vcipherlast $out3,$out3,$in5
+ vcipherlast $out4,$out4,$in6
+ vcipherlast $out5,$out5,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ stvx_u $out5,$x50,$out
+ addi $out,$out,0x60
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_five:
+ vcipherlast $out0,$out0,$in3
+ vcipherlast $out1,$out1,$in4
+ vcipherlast $out2,$out2,$in5
+ vcipherlast $out3,$out3,$in6
+ vcipherlast $out4,$out4,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ stvx_u $out4,$x40,$out
+ addi $out,$out,0x50
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_four:
+ vcipherlast $out0,$out0,$in4
+ vcipherlast $out1,$out1,$in5
+ vcipherlast $out2,$out2,$in6
+ vcipherlast $out3,$out3,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ stvx_u $out3,$x30,$out
+ addi $out,$out,0x40
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_three:
+ vcipherlast $out0,$out0,$in5
+ vcipherlast $out1,$out1,$in6
+ vcipherlast $out2,$out2,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ stvx_u $out2,$x20,$out
+ addi $out,$out,0x30
+ b Lcbc_dec8x_done
+
+.align 5
+Lctr32_enc8x_two:
+ vcipherlast $out0,$out0,$in6
+ vcipherlast $out1,$out1,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ stvx_u $out1,$x10,$out
+ addi $out,$out,0x20
+ b Lcbc_dec8x_done
+
+.align 5
+Lctr32_enc8x_one:
+ vcipherlast $out0,$out0,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ stvx_u $out0,0,$out
+ addi $out,$out,0x10
+
+Lctr32_enc8x_done:
+ li r10,`$FRAME+15`
+ li r11,`$FRAME+31`
+ stvx $inpperm,r10,$sp # wipe copies of round keys
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0x80,6,6,0
+ .long 0
+.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
+___
+}} }}}
+
+my $consts=1;
+foreach(split("\n",$code)) {
+ s/\`([^\`]*)\`/eval($1)/geo;
+
+ # constants table endian-specific conversion
+ if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
+ my $conv=$3;
+ my @bytes=();
+
+ # convert to endian-agnostic format
+ if ($1 eq "long") {
+ foreach (split(/,\s*/,$2)) {
+ my $l = /^0/?oct:int;
+ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
+ }
+ } else {
+ @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
+ }
+
+ # little-endian conversion
+ if ($flavour =~ /le$/o) {
+ SWITCH: for($conv) {
+ /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
+ /\?rev/ && do { @bytes=reverse(@bytes); last; };
+ }
+ }
+
+ #emit
+ print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
+ next;
+ }
+ $consts=0 if (m/Lconsts:/o); # end of table
+
+ # instructions prefixed with '?' are endian-specific and need
+ # to be adjusted accordingly...
+ if ($flavour =~ /le$/o) { # little-endian
+ s/le\?//o or
+ s/be\?/#be#/o or
+ s/\?lvsr/lvsl/o or
+ s/\?lvsl/lvsr/o or
+ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
+ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
+ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
+ } else { # big-endian
+ s/le\?/#le#/o or
+ s/be\?//o or
+ s/\?([a-z]+)/$1/o;
+ }
+
+ print $_,"\n";
+}
+
+close STDOUT;
diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c
new file mode 100644
index 000000000000..d0ffe277af5c
--- /dev/null
+++ b/drivers/crypto/vmx/ghash.c
@@ -0,0 +1,214 @@
+/**
+ * GHASH routines supporting VMX instructions on the Power 8
+ *
+ * Copyright (C) 2015 International Business Machines Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 only.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/hardirq.h>
+#include <asm/switch_to.h>
+#include <crypto/aes.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/hash.h>
+#include <crypto/b128ops.h>
+
+#define IN_INTERRUPT in_interrupt()
+
+#define GHASH_BLOCK_SIZE (16)
+#define GHASH_DIGEST_SIZE (16)
+#define GHASH_KEY_LEN (16)
+
+void gcm_init_p8(u128 htable[16], const u64 Xi[2]);
+void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]);
+void gcm_ghash_p8(u64 Xi[2], const u128 htable[16],
+ const u8 *in,size_t len);
+
+struct p8_ghash_ctx {
+ u128 htable[16];
+ struct crypto_shash *fallback;
+};
+
+struct p8_ghash_desc_ctx {
+ u64 shash[2];
+ u8 buffer[GHASH_DIGEST_SIZE];
+ int bytes;
+ struct shash_desc fallback_desc;
+};
+
+static int p8_ghash_init_tfm(struct crypto_tfm *tfm)
+{
+ const char *alg;
+ struct crypto_shash *fallback;
+ struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm);
+ struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (!(alg = crypto_tfm_alg_name(tfm))) {
+ printk(KERN_ERR "Failed to get algorithm name.\n");
+ return -ENOENT;
+ }
+
+ fallback = crypto_alloc_shash(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(fallback)) {
+ printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n",
+ alg, PTR_ERR(fallback));
+ return PTR_ERR(fallback);
+ }
+ printk(KERN_INFO "Using '%s' as fallback implementation.\n",
+ crypto_tfm_alg_driver_name(crypto_shash_tfm(fallback)));
+
+ crypto_shash_set_flags(fallback,
+ crypto_shash_get_flags((struct crypto_shash *) tfm));
+ ctx->fallback = fallback;
+
+ shash_tfm->descsize = sizeof(struct p8_ghash_desc_ctx)
+ + crypto_shash_descsize(fallback);
+
+ return 0;
+}
+
+static void p8_ghash_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (ctx->fallback) {
+ crypto_free_shash(ctx->fallback);
+ ctx->fallback = NULL;
+ }
+}
+
+static int p8_ghash_init(struct shash_desc *desc)
+{
+ struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
+ struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ dctx->bytes = 0;
+ memset(dctx->shash, 0, GHASH_DIGEST_SIZE);
+ dctx->fallback_desc.tfm = ctx->fallback;
+ dctx->fallback_desc.flags = desc->flags;
+ return crypto_shash_init(&dctx->fallback_desc);
+}
+
+static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm));
+
+ if (keylen != GHASH_KEY_LEN)
+ return -EINVAL;
+
+ pagefault_disable();
+ enable_kernel_altivec();
+ enable_kernel_fp();
+ gcm_init_p8(ctx->htable, (const u64 *) key);
+ pagefault_enable();
+ return crypto_shash_setkey(ctx->fallback, key, keylen);
+}
+
+static int p8_ghash_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ unsigned int len;
+ struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
+ struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (IN_INTERRUPT) {
+ return crypto_shash_update(&dctx->fallback_desc, src, srclen);
+ } else {
+ if (dctx->bytes) {
+ if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) {
+ memcpy(dctx->buffer + dctx->bytes, src, srclen);
+ dctx->bytes += srclen;
+ return 0;
+ }
+ memcpy(dctx->buffer + dctx->bytes, src,
+ GHASH_DIGEST_SIZE - dctx->bytes);
+ pagefault_disable();
+ enable_kernel_altivec();
+ enable_kernel_fp();
+ gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer,
+ GHASH_DIGEST_SIZE);
+ pagefault_enable();
+ src += GHASH_DIGEST_SIZE - dctx->bytes;
+ srclen -= GHASH_DIGEST_SIZE - dctx->bytes;
+ dctx->bytes = 0;
+ }
+ len = srclen & ~(GHASH_DIGEST_SIZE - 1);
+ if (len) {
+ pagefault_disable();
+ enable_kernel_altivec();
+ enable_kernel_fp();
+ gcm_ghash_p8(dctx->shash, ctx->htable, src, len);
+ pagefault_enable();
+ src += len;
+ srclen -= len;
+ }
+ if (srclen) {
+ memcpy(dctx->buffer, src, srclen);
+ dctx->bytes = srclen;
+ }
+ return 0;
+ }
+}
+
+static int p8_ghash_final(struct shash_desc *desc, u8 *out)
+{
+ int i;
+ struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
+ struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (IN_INTERRUPT) {
+ return crypto_shash_final(&dctx->fallback_desc, out);
+ } else {
+ if (dctx->bytes) {
+ for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++)
+ dctx->buffer[i] = 0;
+ pagefault_disable();
+ enable_kernel_altivec();
+ enable_kernel_fp();
+ gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer,
+ GHASH_DIGEST_SIZE);
+ pagefault_enable();
+ dctx->bytes = 0;
+ }
+ memcpy(out, dctx->shash, GHASH_DIGEST_SIZE);
+ return 0;
+ }
+}
+
+struct shash_alg p8_ghash_alg = {
+ .digestsize = GHASH_DIGEST_SIZE,
+ .init = p8_ghash_init,
+ .update = p8_ghash_update,
+ .final = p8_ghash_final,
+ .setkey = p8_ghash_setkey,
+ .descsize = sizeof(struct p8_ghash_desc_ctx),
+ .base = {
+ .cra_name = "ghash",
+ .cra_driver_name = "p8_ghash",
+ .cra_priority = 1000,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct p8_ghash_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = p8_ghash_init_tfm,
+ .cra_exit = p8_ghash_exit_tfm,
+ },
+};
diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl
new file mode 100644
index 000000000000..0a6f899839dd
--- /dev/null
+++ b/drivers/crypto/vmx/ghashp8-ppc.pl
@@ -0,0 +1,228 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# GHASH for for PowerISA v2.07.
+#
+# July 2014
+#
+# Accurate performance measurements are problematic, because it's
+# always virtualized setup with possibly throttled processor.
+# Relative comparison is therefore more informative. This initial
+# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
+# faster than "4-bit" integer-only compiler-generated 64-bit code.
+# "Initial version" means that there is room for futher improvement.
+
+$flavour=shift;
+$output =shift;
+
+if ($flavour =~ /64/) {
+ $SIZE_T=8;
+ $LRSAVE=2*$SIZE_T;
+ $STU="stdu";
+ $POP="ld";
+ $PUSH="std";
+} elsif ($flavour =~ /32/) {
+ $SIZE_T=4;
+ $LRSAVE=$SIZE_T;
+ $STU="stwu";
+ $POP="lwz";
+ $PUSH="stw";
+} else { die "nonsense $flavour"; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
+
+my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
+
+my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
+my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
+my $vrsave="r12";
+
+$code=<<___;
+.machine "any"
+
+.text
+
+.globl .gcm_init_p8
+ lis r0,0xfff0
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $H,0,r4 # load H
+
+ vspltisb $xC2,-16 # 0xf0
+ vspltisb $t0,1 # one
+ vaddubm $xC2,$xC2,$xC2 # 0xe0
+ vxor $zero,$zero,$zero
+ vor $xC2,$xC2,$t0 # 0xe1
+ vsldoi $xC2,$xC2,$zero,15 # 0xe1...
+ vsldoi $t1,$zero,$t0,1 # ...1
+ vaddubm $xC2,$xC2,$xC2 # 0xc2...
+ vspltisb $t2,7
+ vor $xC2,$xC2,$t1 # 0xc2....01
+ vspltb $t1,$H,0 # most significant byte
+ vsl $H,$H,$t0 # H<<=1
+ vsrab $t1,$t1,$t2 # broadcast carry bit
+ vand $t1,$t1,$xC2
+ vxor $H,$H,$t1 # twisted H
+
+ vsldoi $H,$H,$H,8 # twist even more ...
+ vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
+ vsldoi $Hl,$zero,$H,8 # ... and split
+ vsldoi $Hh,$H,$zero,8
+
+ stvx_u $xC2,0,r3 # save pre-computed table
+ stvx_u $Hl,r8,r3
+ stvx_u $H, r9,r3
+ stvx_u $Hh,r10,r3
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
+.size .gcm_init_p8,.-.gcm_init_p8
+
+.globl .gcm_gmult_p8
+ lis r0,0xfff8
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $IN,0,$Xip # load Xi
+
+ lvx_u $Hl,r8,$Htbl # load pre-computed table
+ le?lvsl $lemask,r0,r0
+ lvx_u $H, r9,$Htbl
+ le?vspltisb $t0,0x07
+ lvx_u $Hh,r10,$Htbl
+ le?vxor $lemask,$lemask,$t0
+ lvx_u $xC2,0,$Htbl
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $zero,$zero,$zero
+
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $Xl,$Xl,$t1
+
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
+.size .gcm_gmult_p8,.-.gcm_gmult_p8
+
+.globl .gcm_ghash_p8
+ lis r0,0xfff8
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $Xl,0,$Xip # load Xi
+
+ lvx_u $Hl,r8,$Htbl # load pre-computed table
+ le?lvsl $lemask,r0,r0
+ lvx_u $H, r9,$Htbl
+ le?vspltisb $t0,0x07
+ lvx_u $Hh,r10,$Htbl
+ le?vxor $lemask,$lemask,$t0
+ lvx_u $xC2,0,$Htbl
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ vxor $zero,$zero,$zero
+
+ lvx_u $IN,0,$inp
+ addi $inp,$inp,16
+ subi $len,$len,16
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $IN,$IN,$Xl
+ b Loop
+
+.align 5
+Loop:
+ subic $len,$len,16
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ subfe. r0,r0,r0 # borrow?-1:0
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ and r0,r0,$len
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+ add $inp,$inp,r0
+
+ vpmsumd $t2,$Xl,$xC2 # 1st phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+ lvx_u $IN,0,$inp
+ addi $inp,$inp,16
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd phase
+ vpmsumd $Xl,$Xl,$xC2
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $t1,$t1,$Xh
+ vxor $IN,$IN,$t1
+ vxor $IN,$IN,$Xl
+ beq Loop # did $len-=16 borrow?
+
+ vxor $Xl,$Xl,$t1
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,4,0
+ .long 0
+.size .gcm_ghash_p8,.-.gcm_ghash_p8
+
+.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+___
+
+foreach (split("\n",$code)) {
+ if ($flavour =~ /le$/o) { # little-endian
+ s/le\?//o or
+ s/be\?/#be#/o;
+ } else {
+ s/le\?/#le#/o or
+ s/be\?//o;
+ }
+ print $_,"\n";
+}
+
+close STDOUT; # enforce flush
diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl
new file mode 100644
index 000000000000..a59188494af8
--- /dev/null
+++ b/drivers/crypto/vmx/ppc-xlate.pl
@@ -0,0 +1,207 @@
+#!/usr/bin/env perl
+
+# PowerPC assembler distiller by <appro>.
+
+my $flavour = shift;
+my $output = shift;
+open STDOUT,">$output" || die "can't open $output: $!";
+
+my %GLOBALS;
+my $dotinlocallabels=($flavour=~/linux/)?1:0;
+
+################################################################
+# directives which need special treatment on different platforms
+################################################################
+my $globl = sub {
+ my $junk = shift;
+ my $name = shift;
+ my $global = \$GLOBALS{$name};
+ my $ret;
+
+ $name =~ s|^[\.\_]||;
+
+ SWITCH: for ($flavour) {
+ /aix/ && do { $name = ".$name";
+ last;
+ };
+ /osx/ && do { $name = "_$name";
+ last;
+ };
+ /linux/
+ && do { $ret = "_GLOBAL($name)";
+ last;
+ };
+ }
+
+ $ret = ".globl $name\nalign 5\n$name:" if (!$ret);
+ $$global = $name;
+ $ret;
+};
+my $text = sub {
+ my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
+ $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/);
+ $ret;
+};
+my $machine = sub {
+ my $junk = shift;
+ my $arch = shift;
+ if ($flavour =~ /osx/)
+ { $arch =~ s/\"//g;
+ $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
+ }
+ ".machine $arch";
+};
+my $size = sub {
+ if ($flavour =~ /linux/)
+ { shift;
+ my $name = shift; $name =~ s|^[\.\_]||;
+ my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name;
+ $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/);
+ $ret;
+ }
+ else
+ { ""; }
+};
+my $asciz = sub {
+ shift;
+ my $line = join(",",@_);
+ if ($line =~ /^"(.*)"$/)
+ { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
+ else
+ { ""; }
+};
+my $quad = sub {
+ shift;
+ my @ret;
+ my ($hi,$lo);
+ for (@_) {
+ if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
+ { $hi=$1?"0x$1":"0"; $lo="0x$2"; }
+ elsif (/^([0-9]+)$/o)
+ { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
+ else
+ { $hi=undef; $lo=$_; }
+
+ if (defined($hi))
+ { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
+ else
+ { push(@ret,".quad $lo"); }
+ }
+ join("\n",@ret);
+};
+
+################################################################
+# simplified mnemonics not handled by at least one assembler
+################################################################
+my $cmplw = sub {
+ my $f = shift;
+ my $cr = 0; $cr = shift if ($#_>1);
+ # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
+ ($flavour =~ /linux.*32/) ?
+ " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
+ " cmplw ".join(',',$cr,@_);
+};
+my $bdnz = sub {
+ my $f = shift;
+ my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
+ " bc $bo,0,".shift;
+} if ($flavour!~/linux/);
+my $bltlr = sub {
+ my $f = shift;
+ my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
+ " bclr $bo,0";
+};
+my $bnelr = sub {
+ my $f = shift;
+ my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
+ " bclr $bo,2";
+};
+my $beqlr = sub {
+ my $f = shift;
+ my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
+ " bclr $bo,2";
+};
+# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
+# arguments is 64, with "operand out of range" error.
+my $extrdi = sub {
+ my ($f,$ra,$rs,$n,$b) = @_;
+ $b = ($b+$n)&63; $n = 64-$n;
+ " rldicl $ra,$rs,$b,$n";
+};
+my $vmr = sub {
+ my ($f,$vx,$vy) = @_;
+ " vor $vx,$vy,$vy";
+};
+
+# PowerISA 2.06 stuff
+sub vsxmem_op {
+ my ($f, $vrt, $ra, $rb, $op) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
+}
+# made-up unaligned memory reference AltiVec/VMX instructions
+my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
+my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
+my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
+my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
+my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
+my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
+
+# PowerISA 2.07 stuff
+sub vcrypto_op {
+ my ($f, $vrt, $vra, $vrb, $op) = @_;
+ " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
+}
+my $vcipher = sub { vcrypto_op(@_, 1288); };
+my $vcipherlast = sub { vcrypto_op(@_, 1289); };
+my $vncipher = sub { vcrypto_op(@_, 1352); };
+my $vncipherlast= sub { vcrypto_op(@_, 1353); };
+my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
+my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
+my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
+my $vpmsumb = sub { vcrypto_op(@_, 1032); };
+my $vpmsumd = sub { vcrypto_op(@_, 1224); };
+my $vpmsubh = sub { vcrypto_op(@_, 1096); };
+my $vpmsumw = sub { vcrypto_op(@_, 1160); };
+my $vaddudm = sub { vcrypto_op(@_, 192); };
+
+my $mtsle = sub {
+ my ($f, $arg) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
+};
+
+print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/;
+
+while($line=<>) {
+
+ $line =~ s|[#!;].*$||; # get rid of asm-style comments...
+ $line =~ s|/\*.*\*/||; # ... and C-style comments...
+ $line =~ s|^\s+||; # ... and skip white spaces in beginning...
+ $line =~ s|\s+$||; # ... and at the end
+
+ {
+ $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel
+ $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
+ }
+
+ {
+ $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
+ my $c = $1; $c = "\t" if ($c eq "");
+ my $mnemonic = $2;
+ my $f = $3;
+ my $opcode = eval("\$$mnemonic");
+ $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
+ if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
+ elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
+ }
+
+ print $line if ($line);
+ print "\n";
+}
+
+close STDOUT;
diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c
new file mode 100644
index 000000000000..44d8d5cfe40d
--- /dev/null
+++ b/drivers/crypto/vmx/vmx.c
@@ -0,0 +1,88 @@
+/**
+ * Routines supporting VMX instructions on the Power 8
+ *
+ * Copyright (C) 2015 International Business Machines Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 only.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/crypto.h>
+#include <asm/cputable.h>
+#include <crypto/internal/hash.h>
+
+extern struct shash_alg p8_ghash_alg;
+extern struct crypto_alg p8_aes_alg;
+extern struct crypto_alg p8_aes_cbc_alg;
+extern struct crypto_alg p8_aes_ctr_alg;
+static struct crypto_alg *algs[] = {
+ &p8_aes_alg,
+ &p8_aes_cbc_alg,
+ &p8_aes_ctr_alg,
+ NULL,
+};
+
+int __init p8_init(void)
+{
+ int ret = 0;
+ struct crypto_alg **alg_it;
+
+ if (!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO))
+ return -ENODEV;
+
+ for (alg_it = algs; *alg_it; alg_it++) {
+ ret = crypto_register_alg(*alg_it);
+ printk(KERN_INFO "crypto_register_alg '%s' = %d\n",
+ (*alg_it)->cra_name, ret);
+ if (ret) {
+ for (alg_it--; alg_it >= algs; alg_it--)
+ crypto_unregister_alg(*alg_it);
+ break;
+ }
+ }
+ if (ret)
+ return ret;
+
+ ret = crypto_register_shash(&p8_ghash_alg);
+ if (ret) {
+ for (alg_it = algs; *alg_it; alg_it++)
+ crypto_unregister_alg(*alg_it);
+ }
+ return ret;
+}
+
+void __exit p8_exit(void)
+{
+ struct crypto_alg **alg_it;
+
+ for (alg_it = algs; *alg_it; alg_it++) {
+ printk(KERN_INFO "Removing '%s'\n", (*alg_it)->cra_name);
+ crypto_unregister_alg(*alg_it);
+ }
+ crypto_unregister_shash(&p8_ghash_alg);
+}
+
+module_init(p8_init);
+module_exit(p8_exit);
+
+MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>");
+MODULE_DESCRIPTION("IBM VMX cryptogaphic acceleration instructions support on Power 8");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0.0");
+