diff options
Diffstat (limited to 'crypto')
-rw-r--r-- | crypto/Kconfig | 35 | ||||
-rw-r--r-- | crypto/Makefile | 1 | ||||
-rw-r--r-- | crypto/aes_generic.c | 8 | ||||
-rw-r--r-- | crypto/af_alg.c | 2 | ||||
-rw-r--r-- | crypto/api.c | 9 | ||||
-rw-r--r-- | crypto/async_tx/async_memcpy.c | 37 | ||||
-rw-r--r-- | crypto/async_tx/async_pq.c | 174 | ||||
-rw-r--r-- | crypto/async_tx/async_raid6_recov.c | 61 | ||||
-rw-r--r-- | crypto/async_tx/async_tx.c | 4 | ||||
-rw-r--r-- | crypto/async_tx/async_xor.c | 123 | ||||
-rw-r--r-- | crypto/async_tx/raid6test.c | 10 | ||||
-rw-r--r-- | crypto/camellia_generic.c | 48 | ||||
-rw-r--r-- | crypto/cast_common.c | 8 | ||||
-rw-r--r-- | crypto/crct10dif_common.c | 82 | ||||
-rw-r--r-- | crypto/crct10dif_generic.c | 127 | ||||
-rw-r--r-- | crypto/fcrypt.c | 2 | ||||
-rw-r--r-- | crypto/scatterwalk.c | 22 | ||||
-rw-r--r-- | crypto/tcrypt.c | 12 | ||||
-rw-r--r-- | crypto/testmgr.c | 24 | ||||
-rw-r--r-- | crypto/testmgr.h | 33 |
20 files changed, 608 insertions, 214 deletions
diff --git a/crypto/Kconfig b/crypto/Kconfig index aca01164f002..71f337aefa39 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -376,6 +376,25 @@ config CRYPTO_CRC32_PCLMUL which will enable any routine to use the CRC-32-IEEE 802.3 checksum and gain better performance as compared with the table implementation. +config CRYPTO_CRCT10DIF + tristate "CRCT10DIF algorithm" + select CRYPTO_HASH + help + CRC T10 Data Integrity Field computation is being cast as + a crypto transform. This allows for faster crc t10 diff + transforms to be used if they are available. + +config CRYPTO_CRCT10DIF_PCLMUL + tristate "CRCT10DIF PCLMULQDQ hardware acceleration" + depends on X86 && 64BIT && CRC_T10DIF + select CRYPTO_HASH + help + For x86_64 processors with SSE4.2 and PCLMULQDQ supported, + CRC T10 DIF PCLMULQDQ computation can be hardware + accelerated PCLMULQDQ instruction. This option will create + 'crct10dif-plcmul' module, which is faster when computing the + crct10dif checksum as compared with the generic table implementation. + config CRYPTO_GHASH tristate "GHASH digest algorithm" select CRYPTO_GF128MUL @@ -757,6 +776,22 @@ config CRYPTO_AES_ARM See <http://csrc.nist.gov/encryption/aes/> for more information. +config CRYPTO_AES_ARM_BS + tristate "Bit sliced AES using NEON instructions" + depends on ARM && KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AES_ARM + select CRYPTO_ABLK_HELPER + help + Use a faster and more secure NEON based implementation of AES in CBC, + CTR and XTS modes + + Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode + and for XTS mode encryption, CBC and XTS mode decryption speedup is + around 25%. (CBC encryption speed is not affected by this driver.) + This implementation does not rely on any lookup tables so it is + believed to be invulnerable to cache timing attacks. + config CRYPTO_ANUBIS tristate "Anubis cipher algorithm" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index 2ba0df2f908f..80019ba8da3a 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o obj-$(CONFIG_CRYPTO_CRC32) += crc32.o +obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c index 47f2e5c71759..fd0d6b454975 100644 --- a/crypto/aes_generic.c +++ b/crypto/aes_generic.c @@ -62,7 +62,7 @@ static inline u8 byte(const u32 x, const unsigned n) static const u32 rco_tab[10] = { 1, 2, 4, 8, 16, 32, 64, 128, 27, 54 }; -const u32 crypto_ft_tab[4][256] = { +__visible const u32 crypto_ft_tab[4][256] = { { 0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591, @@ -326,7 +326,7 @@ const u32 crypto_ft_tab[4][256] = { } }; -const u32 crypto_fl_tab[4][256] = { +__visible const u32 crypto_fl_tab[4][256] = { { 0x00000063, 0x0000007c, 0x00000077, 0x0000007b, 0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5, @@ -590,7 +590,7 @@ const u32 crypto_fl_tab[4][256] = { } }; -const u32 crypto_it_tab[4][256] = { +__visible const u32 crypto_it_tab[4][256] = { { 0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b, @@ -854,7 +854,7 @@ const u32 crypto_it_tab[4][256] = { } }; -const u32 crypto_il_tab[4][256] = { +__visible const u32 crypto_il_tab[4][256] = { { 0x00000052, 0x00000009, 0x0000006a, 0x000000d5, 0x00000030, 0x00000036, 0x000000a5, 0x00000038, diff --git a/crypto/af_alg.c b/crypto/af_alg.c index ac33d5f30778..966f893711b3 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -434,7 +434,7 @@ int af_alg_wait_for_completion(int err, struct af_alg_completion *completion) case -EINPROGRESS: case -EBUSY: wait_for_completion(&completion->completion); - INIT_COMPLETION(completion->completion); + reinit_completion(&completion->completion); err = completion->err; break; }; diff --git a/crypto/api.c b/crypto/api.c index 3b6180336d3d..a2b39c5f3649 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -34,6 +34,8 @@ EXPORT_SYMBOL_GPL(crypto_alg_sem); BLOCKING_NOTIFIER_HEAD(crypto_chain); EXPORT_SYMBOL_GPL(crypto_chain); +static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg); + struct crypto_alg *crypto_mod_get(struct crypto_alg *alg) { return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL; @@ -144,8 +146,11 @@ static struct crypto_alg *crypto_larval_add(const char *name, u32 type, } up_write(&crypto_alg_sem); - if (alg != &larval->alg) + if (alg != &larval->alg) { kfree(larval); + if (crypto_is_larval(alg)) + alg = crypto_larval_wait(alg); + } return alg; } @@ -391,7 +396,7 @@ EXPORT_SYMBOL_GPL(__crypto_alloc_tfm); * @mask: Mask for type comparison * * This function should not be used by new algorithm types. - * Plesae use crypto_alloc_tfm instead. + * Please use crypto_alloc_tfm instead. * * crypto_alloc_base() will first attempt to locate an already loaded * algorithm. If that fails and the kernel supports dynamically loadable diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index 9e62feffb374..f8c0b8dbeb75 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -50,33 +50,36 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, &dest, 1, &src, 1, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; + struct dmaengine_unmap_data *unmap = NULL; - if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) { - dma_addr_t dma_dest, dma_src; + if (device) + unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO); + + if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) { unsigned long dma_prep_flags = 0; if (submit->cb_fn) dma_prep_flags |= DMA_PREP_INTERRUPT; if (submit->flags & ASYNC_TX_FENCE) dma_prep_flags |= DMA_PREP_FENCE; - dma_dest = dma_map_page(device->dev, dest, dest_offset, len, - DMA_FROM_DEVICE); - - dma_src = dma_map_page(device->dev, src, src_offset, len, - DMA_TO_DEVICE); - - tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src, - len, dma_prep_flags); - if (!tx) { - dma_unmap_page(device->dev, dma_dest, len, - DMA_FROM_DEVICE); - dma_unmap_page(device->dev, dma_src, len, - DMA_TO_DEVICE); - } + + unmap->to_cnt = 1; + unmap->addr[0] = dma_map_page(device->dev, src, src_offset, len, + DMA_TO_DEVICE); + unmap->from_cnt = 1; + unmap->addr[1] = dma_map_page(device->dev, dest, dest_offset, len, + DMA_FROM_DEVICE); + unmap->len = len; + + tx = device->device_prep_dma_memcpy(chan, unmap->addr[1], + unmap->addr[0], len, + dma_prep_flags); } if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); + + dma_set_unmap(tx, unmap); async_tx_submit(chan, tx, submit); } else { void *dest_buf, *src_buf; @@ -96,6 +99,8 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, async_tx_sync_epilog(submit); } + dmaengine_unmap_put(unmap); + return tx; } EXPORT_SYMBOL_GPL(async_memcpy); diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 91d5d385899e..d05327caf69d 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -46,49 +46,24 @@ static struct page *pq_scribble_page; * do_async_gen_syndrome - asynchronously calculate P and/or Q */ static __async_inline struct dma_async_tx_descriptor * -do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, - const unsigned char *scfs, unsigned int offset, int disks, - size_t len, dma_addr_t *dma_src, +do_async_gen_syndrome(struct dma_chan *chan, + const unsigned char *scfs, int disks, + struct dmaengine_unmap_data *unmap, + enum dma_ctrl_flags dma_flags, struct async_submit_ctl *submit) { struct dma_async_tx_descriptor *tx = NULL; struct dma_device *dma = chan->device; - enum dma_ctrl_flags dma_flags = 0; enum async_tx_flags flags_orig = submit->flags; dma_async_tx_callback cb_fn_orig = submit->cb_fn; dma_async_tx_callback cb_param_orig = submit->cb_param; int src_cnt = disks - 2; - unsigned char coefs[src_cnt]; unsigned short pq_src_cnt; dma_addr_t dma_dest[2]; int src_off = 0; - int idx; - int i; - /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */ - if (P(blocks, disks)) - dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset, - len, DMA_BIDIRECTIONAL); - else - dma_flags |= DMA_PREP_PQ_DISABLE_P; - if (Q(blocks, disks)) - dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset, - len, DMA_BIDIRECTIONAL); - else - dma_flags |= DMA_PREP_PQ_DISABLE_Q; - - /* convert source addresses being careful to collapse 'empty' - * sources and update the coefficients accordingly - */ - for (i = 0, idx = 0; i < src_cnt; i++) { - if (blocks[i] == NULL) - continue; - dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len, - DMA_TO_DEVICE); - coefs[idx] = scfs[i]; - idx++; - } - src_cnt = idx; + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; while (src_cnt > 0) { submit->flags = flags_orig; @@ -100,28 +75,25 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, if (src_cnt > pq_src_cnt) { submit->flags &= ~ASYNC_TX_ACK; submit->flags |= ASYNC_TX_FENCE; - dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP; submit->cb_fn = NULL; submit->cb_param = NULL; } else { - dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP; submit->cb_fn = cb_fn_orig; submit->cb_param = cb_param_orig; if (cb_fn_orig) dma_flags |= DMA_PREP_INTERRUPT; } - if (submit->flags & ASYNC_TX_FENCE) - dma_flags |= DMA_PREP_FENCE; - /* Since we have clobbered the src_list we are committed - * to doing this asynchronously. Drivers force forward - * progress in case they can not provide a descriptor + /* Drivers force forward progress in case they can not provide + * a descriptor */ for (;;) { + dma_dest[0] = unmap->addr[disks - 2]; + dma_dest[1] = unmap->addr[disks - 1]; tx = dma->device_prep_dma_pq(chan, dma_dest, - &dma_src[src_off], + &unmap->addr[src_off], pq_src_cnt, - &coefs[src_off], len, + &scfs[src_off], unmap->len, dma_flags); if (likely(tx)) break; @@ -129,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, dma_async_issue_pending(chan); } + dma_set_unmap(tx, unmap); async_tx_submit(chan, tx, submit); submit->depend_tx = tx; @@ -188,10 +161,6 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, * set to NULL those buffers will be replaced with the raid6_zero_page * in the synchronous path and omitted in the hardware-asynchronous * path. - * - * 'blocks' note: if submit->scribble is NULL then the contents of - * 'blocks' may be overwritten to perform address conversions - * (dma_map_page() or page_address()). */ struct dma_async_tx_descriptor * async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, @@ -202,26 +171,69 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, &P(blocks, disks), 2, blocks, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; - dma_addr_t *dma_src = NULL; + struct dmaengine_unmap_data *unmap = NULL; BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks))); - if (submit->scribble) - dma_src = submit->scribble; - else if (sizeof(dma_addr_t) <= sizeof(struct page *)) - dma_src = (dma_addr_t *) blocks; + if (device) + unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO); - if (dma_src && device && + if (unmap && (src_cnt <= dma_maxpq(device, 0) || dma_maxpq(device, DMA_PREP_CONTINUE) > 0) && is_dma_pq_aligned(device, offset, 0, len)) { + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = 0; + unsigned char coefs[src_cnt]; + int i, j; + /* run the p+q asynchronously */ pr_debug("%s: (async) disks: %d len: %zu\n", __func__, disks, len); - return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset, - disks, len, dma_src, submit); + + /* convert source addresses being careful to collapse 'empty' + * sources and update the coefficients accordingly + */ + unmap->len = len; + for (i = 0, j = 0; i < src_cnt; i++) { + if (blocks[i] == NULL) + continue; + unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset, + len, DMA_TO_DEVICE); + coefs[j] = raid6_gfexp[i]; + unmap->to_cnt++; + j++; + } + + /* + * DMAs use destinations as sources, + * so use BIDIRECTIONAL mapping + */ + unmap->bidi_cnt++; + if (P(blocks, disks)) + unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks), + offset, len, DMA_BIDIRECTIONAL); + else { + unmap->addr[j++] = 0; + dma_flags |= DMA_PREP_PQ_DISABLE_P; + } + + unmap->bidi_cnt++; + if (Q(blocks, disks)) + unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks), + offset, len, DMA_BIDIRECTIONAL); + else { + unmap->addr[j++] = 0; + dma_flags |= DMA_PREP_PQ_DISABLE_Q; + } + + tx = do_async_gen_syndrome(chan, coefs, j, unmap, dma_flags, submit); + dmaengine_unmap_put(unmap); + return tx; } + dmaengine_unmap_put(unmap); + /* run the pq synchronously */ pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len); @@ -277,50 +289,60 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, struct dma_async_tx_descriptor *tx; unsigned char coefs[disks-2]; enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; - dma_addr_t *dma_src = NULL; - int src_cnt = 0; + struct dmaengine_unmap_data *unmap = NULL; BUG_ON(disks < 4); - if (submit->scribble) - dma_src = submit->scribble; - else if (sizeof(dma_addr_t) <= sizeof(struct page *)) - dma_src = (dma_addr_t *) blocks; + if (device) + unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO); - if (dma_src && device && disks <= dma_maxpq(device, 0) && + if (unmap && disks <= dma_maxpq(device, 0) && is_dma_pq_aligned(device, offset, 0, len)) { struct device *dev = device->dev; - dma_addr_t *pq = &dma_src[disks-2]; - int i; + dma_addr_t pq[2]; + int i, j = 0, src_cnt = 0; pr_debug("%s: (async) disks: %d len: %zu\n", __func__, disks, len); - if (!P(blocks, disks)) + + unmap->len = len; + for (i = 0; i < disks-2; i++) + if (likely(blocks[i])) { + unmap->addr[j] = dma_map_page(dev, blocks[i], + offset, len, + DMA_TO_DEVICE); + coefs[j] = raid6_gfexp[i]; + unmap->to_cnt++; + src_cnt++; + j++; + } + + if (!P(blocks, disks)) { + pq[0] = 0; dma_flags |= DMA_PREP_PQ_DISABLE_P; - else + } else { pq[0] = dma_map_page(dev, P(blocks, disks), offset, len, DMA_TO_DEVICE); - if (!Q(blocks, disks)) + unmap->addr[j++] = pq[0]; + unmap->to_cnt++; + } + if (!Q(blocks, disks)) { + pq[1] = 0; dma_flags |= DMA_PREP_PQ_DISABLE_Q; - else + } else { pq[1] = dma_map_page(dev, Q(blocks, disks), offset, len, DMA_TO_DEVICE); + unmap->addr[j++] = pq[1]; + unmap->to_cnt++; + } if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; - for (i = 0; i < disks-2; i++) - if (likely(blocks[i])) { - dma_src[src_cnt] = dma_map_page(dev, blocks[i], - offset, len, - DMA_TO_DEVICE); - coefs[src_cnt] = raid6_gfexp[i]; - src_cnt++; - } - for (;;) { - tx = device->device_prep_dma_pq_val(chan, pq, dma_src, + tx = device->device_prep_dma_pq_val(chan, pq, + unmap->addr, src_cnt, coefs, len, pqres, @@ -330,6 +352,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, async_tx_quiesce(&submit->depend_tx); dma_async_issue_pending(chan); } + + dma_set_unmap(tx, unmap); async_tx_submit(chan, tx, submit); return tx; diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index a9f08a6a582e..934a84981495 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -26,6 +26,7 @@ #include <linux/dma-mapping.h> #include <linux/raid/pq.h> #include <linux/async_tx.h> +#include <linux/dmaengine.h> static struct dma_async_tx_descriptor * async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, @@ -34,35 +35,45 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, &dest, 1, srcs, 2, len); struct dma_device *dma = chan ? chan->device : NULL; + struct dmaengine_unmap_data *unmap = NULL; const u8 *amul, *bmul; u8 ax, bx; u8 *a, *b, *c; - if (dma) { - dma_addr_t dma_dest[2]; - dma_addr_t dma_src[2]; + if (dma) + unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO); + + if (unmap) { struct device *dev = dma->dev; + dma_addr_t pq[2]; struct dma_async_tx_descriptor *tx; enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; - dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); - dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); - dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); - tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef, + unmap->addr[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); + unmap->addr[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); + unmap->to_cnt = 2; + + unmap->addr[2] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + unmap->bidi_cnt = 1; + /* engine only looks at Q, but expects it to follow P */ + pq[1] = unmap->addr[2]; + + unmap->len = len; + tx = dma->device_prep_dma_pq(chan, pq, unmap->addr, 2, coef, len, dma_flags); if (tx) { + dma_set_unmap(tx, unmap); async_tx_submit(chan, tx, submit); + dmaengine_unmap_put(unmap); return tx; } /* could not get a descriptor, unmap and fall through to * the synchronous path */ - dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); - dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); - dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE); + dmaengine_unmap_put(unmap); } /* run the operation synchronously */ @@ -89,23 +100,38 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len, struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, &dest, 1, &src, 1, len); struct dma_device *dma = chan ? chan->device : NULL; + struct dmaengine_unmap_data *unmap = NULL; const u8 *qmul; /* Q multiplier table */ u8 *d, *s; - if (dma) { + if (dma) + unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO); + + if (unmap) { dma_addr_t dma_dest[2]; - dma_addr_t dma_src[1]; struct device *dev = dma->dev; struct dma_async_tx_descriptor *tx; enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; - dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); - dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); - tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, - len, dma_flags); + unmap->addr[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); + unmap->to_cnt++; + unmap->addr[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + dma_dest[1] = unmap->addr[1]; + unmap->bidi_cnt++; + unmap->len = len; + + /* this looks funny, but the engine looks for Q at + * dma_dest[1] and ignores dma_dest[0] as a dest + * due to DMA_PREP_PQ_DISABLE_P + */ + tx = dma->device_prep_dma_pq(chan, dma_dest, unmap->addr, + 1, &coef, len, dma_flags); + if (tx) { + dma_set_unmap(tx, unmap); + dmaengine_unmap_put(unmap); async_tx_submit(chan, tx, submit); return tx; } @@ -113,8 +139,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len, /* could not get a descriptor, unmap and fall through to * the synchronous path */ - dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); - dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); + dmaengine_unmap_put(unmap); } /* no channel available, or failed to allocate a descriptor, so diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 7be34248b450..39ea4791a3c9 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -128,7 +128,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, } device->device_issue_pending(chan); } else { - if (dma_wait_for_async_tx(depend_tx) != DMA_SUCCESS) + if (dma_wait_for_async_tx(depend_tx) != DMA_COMPLETE) panic("%s: DMA error waiting for depend_tx\n", __func__); tx->tx_submit(tx); @@ -280,7 +280,7 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx) * we are referring to the correct operation */ BUG_ON(async_tx_test_ack(*tx)); - if (dma_wait_for_async_tx(*tx) != DMA_SUCCESS) + if (dma_wait_for_async_tx(*tx) != DMA_COMPLETE) panic("%s: DMA error waiting for transaction\n", __func__); async_tx_ack(*tx); diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 8ade0a0481c6..3c562f5a60bb 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -33,48 +33,31 @@ /* do_async_xor - dma map the pages and perform the xor with an engine */ static __async_inline struct dma_async_tx_descriptor * -do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src, +do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap, struct async_submit_ctl *submit) { struct dma_device *dma = chan->device; struct dma_async_tx_descriptor *tx = NULL; - int src_off = 0; - int i; dma_async_tx_callback cb_fn_orig = submit->cb_fn; void *cb_param_orig = submit->cb_param; enum async_tx_flags flags_orig = submit->flags; - enum dma_ctrl_flags dma_flags; - int xor_src_cnt = 0; - dma_addr_t dma_dest; - - /* map the dest bidrectional in case it is re-used as a source */ - dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_BIDIRECTIONAL); - for (i = 0; i < src_cnt; i++) { - /* only map the dest once */ - if (!src_list[i]) - continue; - if (unlikely(src_list[i] == dest)) { - dma_src[xor_src_cnt++] = dma_dest; - continue; - } - dma_src[xor_src_cnt++] = dma_map_page(dma->dev, src_list[i], offset, - len, DMA_TO_DEVICE); - } - src_cnt = xor_src_cnt; + enum dma_ctrl_flags dma_flags = 0; + int src_cnt = unmap->to_cnt; + int xor_src_cnt; + dma_addr_t dma_dest = unmap->addr[unmap->to_cnt]; + dma_addr_t *src_list = unmap->addr; while (src_cnt) { + dma_addr_t tmp; + submit->flags = flags_orig; - dma_flags = 0; xor_src_cnt = min(src_cnt, (int)dma->max_xor); - /* if we are submitting additional xors, leave the chain open, - * clear the callback parameters, and leave the destination - * buffer mapped + /* if we are submitting additional xors, leave the chain open + * and clear the callback parameters */ if (src_cnt > xor_src_cnt) { submit->flags &= ~ASYNC_TX_ACK; submit->flags |= ASYNC_TX_FENCE; - dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; submit->cb_fn = NULL; submit->cb_param = NULL; } else { @@ -85,12 +68,18 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, dma_flags |= DMA_PREP_INTERRUPT; if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; - /* Since we have clobbered the src_list we are committed - * to doing this asynchronously. Drivers force forward progress - * in case they can not provide a descriptor + + /* Drivers force forward progress in case they can not provide a + * descriptor */ - tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off], - xor_src_cnt, len, dma_flags); + tmp = src_list[0]; + if (src_list > unmap->addr) + src_list[0] = dma_dest; + tx = dma->device_prep_dma_xor(chan, dma_dest, src_list, + xor_src_cnt, unmap->len, + dma_flags); + src_list[0] = tmp; + if (unlikely(!tx)) async_tx_quiesce(&submit->depend_tx); @@ -99,22 +88,21 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, while (unlikely(!tx)) { dma_async_issue_pending(chan); tx = dma->device_prep_dma_xor(chan, dma_dest, - &dma_src[src_off], - xor_src_cnt, len, + src_list, + xor_src_cnt, unmap->len, dma_flags); } + dma_set_unmap(tx, unmap); async_tx_submit(chan, tx, submit); submit->depend_tx = tx; if (src_cnt > xor_src_cnt) { /* drop completed sources */ src_cnt -= xor_src_cnt; - src_off += xor_src_cnt; - /* use the intermediate result a source */ - dma_src[--src_off] = dma_dest; src_cnt++; + src_list += xor_src_cnt - 1; } else break; } @@ -189,22 +177,40 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, &dest, 1, src_list, src_cnt, len); - dma_addr_t *dma_src = NULL; + struct dma_device *device = chan ? chan->device : NULL; + struct dmaengine_unmap_data *unmap = NULL; BUG_ON(src_cnt <= 1); - if (submit->scribble) - dma_src = submit->scribble; - else if (sizeof(dma_addr_t) <= sizeof(struct page *)) - dma_src = (dma_addr_t *) src_list; + if (device) + unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOIO); + + if (unmap && is_dma_xor_aligned(device, offset, 0, len)) { + struct dma_async_tx_descriptor *tx; + int i, j; - if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) { /* run the xor asynchronously */ pr_debug("%s (async): len: %zu\n", __func__, len); - return do_async_xor(chan, dest, src_list, offset, src_cnt, len, - dma_src, submit); + unmap->len = len; + for (i = 0, j = 0; i < src_cnt; i++) { + if (!src_list[i]) + continue; + unmap->to_cnt++; + unmap->addr[j++] = dma_map_page(device->dev, src_list[i], + offset, len, DMA_TO_DEVICE); + } + + /* map it bidirectional as it may be re-used as a source */ + unmap->addr[j] = dma_map_page(device->dev, dest, offset, len, + DMA_BIDIRECTIONAL); + unmap->bidi_cnt = 1; + + tx = do_async_xor(chan, unmap, submit); + dmaengine_unmap_put(unmap); + return tx; } else { + dmaengine_unmap_put(unmap); /* run the xor synchronously */ pr_debug("%s (sync): len: %zu\n", __func__, len); WARN_ONCE(chan, "%s: no space for dma address conversion\n", @@ -268,16 +274,14 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; - dma_addr_t *dma_src = NULL; + struct dmaengine_unmap_data *unmap = NULL; BUG_ON(src_cnt <= 1); - if (submit->scribble) - dma_src = submit->scribble; - else if (sizeof(dma_addr_t) <= sizeof(struct page *)) - dma_src = (dma_addr_t *) src_list; + if (device) + unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOIO); - if (dma_src && device && src_cnt <= device->max_xor && + if (unmap && src_cnt <= device->max_xor && is_dma_xor_aligned(device, offset, 0, len)) { unsigned long dma_prep_flags = 0; int i; @@ -288,11 +292,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, dma_prep_flags |= DMA_PREP_INTERRUPT; if (submit->flags & ASYNC_TX_FENCE) dma_prep_flags |= DMA_PREP_FENCE; - for (i = 0; i < src_cnt; i++) - dma_src[i] = dma_map_page(device->dev, src_list[i], - offset, len, DMA_TO_DEVICE); - tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt, + for (i = 0; i < src_cnt; i++) { + unmap->addr[i] = dma_map_page(device->dev, src_list[i], + offset, len, DMA_TO_DEVICE); + unmap->to_cnt++; + } + unmap->len = len; + + tx = device->device_prep_dma_xor_val(chan, unmap->addr, src_cnt, len, result, dma_prep_flags); if (unlikely(!tx)) { @@ -301,11 +309,11 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, while (!tx) { dma_async_issue_pending(chan); tx = device->device_prep_dma_xor_val(chan, - dma_src, src_cnt, len, result, + unmap->addr, src_cnt, len, result, dma_prep_flags); } } - + dma_set_unmap(tx, unmap); async_tx_submit(chan, tx, submit); } else { enum async_tx_flags flags_orig = submit->flags; @@ -327,6 +335,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, async_tx_sync_epilog(submit); submit->flags = flags_orig; } + dmaengine_unmap_put(unmap); return tx; } diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c index 4a92bac744dc..dad95f45b88f 100644 --- a/crypto/async_tx/raid6test.c +++ b/crypto/async_tx/raid6test.c @@ -28,7 +28,7 @@ #undef pr #define pr(fmt, args...) pr_info("raid6test: " fmt, ##args) -#define NDISKS 16 /* Including P and Q */ +#define NDISKS 64 /* Including P and Q */ static struct page *dataptrs[NDISKS]; static addr_conv_t addr_conv[NDISKS]; @@ -219,6 +219,14 @@ static int raid6_test(void) err += test(11, &tests); err += test(12, &tests); } + + /* the 24 disk case is special for ioatdma as it is the boudary point + * at which it needs to switch from 8-source ops to 16-source + * ops for continuation (assumes DMA_HAS_PQ_CONTINUE is not set) + */ + if (NDISKS > 24) + err += test(24, &tests); + err += test(NDISKS, &tests); pr("\n"); diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c index 75efa2052305..26bcd7a2d6b4 100644 --- a/crypto/camellia_generic.c +++ b/crypto/camellia_generic.c @@ -388,8 +388,8 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) /* round 6 */ subL[7] ^= subL[1]; subR[7] ^= subR[1]; subL[1] ^= subR[1] & ~subR[9]; - dw = subL[1] & subL[9], - subR[1] ^= rol32(dw, 1); /* modified for FLinv(kl2) */ + dw = subL[1] & subL[9]; + subR[1] ^= rol32(dw, 1); /* modified for FLinv(kl2) */ /* round 8 */ subL[11] ^= subL[1]; subR[11] ^= subR[1]; /* round 10 */ @@ -397,8 +397,8 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) /* round 12 */ subL[15] ^= subL[1]; subR[15] ^= subR[1]; subL[1] ^= subR[1] & ~subR[17]; - dw = subL[1] & subL[17], - subR[1] ^= rol32(dw, 1); /* modified for FLinv(kl4) */ + dw = subL[1] & subL[17]; + subR[1] ^= rol32(dw, 1); /* modified for FLinv(kl4) */ /* round 14 */ subL[19] ^= subL[1]; subR[19] ^= subR[1]; /* round 16 */ @@ -413,8 +413,8 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) kw4l = subL[25]; kw4r = subR[25]; } else { subL[1] ^= subR[1] & ~subR[25]; - dw = subL[1] & subL[25], - subR[1] ^= rol32(dw, 1); /* modified for FLinv(kl6) */ + dw = subL[1] & subL[25]; + subR[1] ^= rol32(dw, 1); /* modified for FLinv(kl6) */ /* round 20 */ subL[27] ^= subL[1]; subR[27] ^= subR[1]; /* round 22 */ @@ -433,8 +433,8 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) /* round 19 */ subL[26] ^= kw4l; subR[26] ^= kw4r; kw4l ^= kw4r & ~subR[24]; - dw = kw4l & subL[24], - kw4r ^= rol32(dw, 1); /* modified for FL(kl5) */ + dw = kw4l & subL[24]; + kw4r ^= rol32(dw, 1); /* modified for FL(kl5) */ } /* round 17 */ subL[22] ^= kw4l; subR[22] ^= kw4r; @@ -443,8 +443,8 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) /* round 13 */ subL[18] ^= kw4l; subR[18] ^= kw4r; kw4l ^= kw4r & ~subR[16]; - dw = kw4l & subL[16], - kw4r ^= rol32(dw, 1); /* modified for FL(kl3) */ + dw = kw4l & subL[16]; + kw4r ^= rol32(dw, 1); /* modified for FL(kl3) */ /* round 11 */ subL[14] ^= kw4l; subR[14] ^= kw4r; /* round 9 */ @@ -452,8 +452,8 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) /* round 7 */ subL[10] ^= kw4l; subR[10] ^= kw4r; kw4l ^= kw4r & ~subR[8]; - dw = kw4l & subL[8], - kw4r ^= rol32(dw, 1); /* modified for FL(kl1) */ + dw = kw4l & subL[8]; + kw4r ^= rol32(dw, 1); /* modified for FL(kl1) */ /* round 5 */ subL[6] ^= kw4l; subR[6] ^= kw4r; /* round 3 */ @@ -477,8 +477,8 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) SUBKEY_L(6) = subL[5] ^ subL[7]; /* round 5 */ SUBKEY_R(6) = subR[5] ^ subR[7]; tl = subL[10] ^ (subR[10] & ~subR[8]); - dw = tl & subL[8], /* FL(kl1) */ - tr = subR[10] ^ rol32(dw, 1); + dw = tl & subL[8]; /* FL(kl1) */ + tr = subR[10] ^ rol32(dw, 1); SUBKEY_L(7) = subL[6] ^ tl; /* round 6 */ SUBKEY_R(7) = subR[6] ^ tr; SUBKEY_L(8) = subL[8]; /* FL(kl1) */ @@ -486,8 +486,8 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) SUBKEY_L(9) = subL[9]; /* FLinv(kl2) */ SUBKEY_R(9) = subR[9]; tl = subL[7] ^ (subR[7] & ~subR[9]); - dw = tl & subL[9], /* FLinv(kl2) */ - tr = subR[7] ^ rol32(dw, 1); + dw = tl & subL[9]; /* FLinv(kl2) */ + tr = subR[7] ^ rol32(dw, 1); SUBKEY_L(10) = tl ^ subL[11]; /* round 7 */ SUBKEY_R(10) = tr ^ subR[11]; SUBKEY_L(11) = subL[10] ^ subL[12]; /* round 8 */ @@ -499,8 +499,8 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) SUBKEY_L(14) = subL[13] ^ subL[15]; /* round 11 */ SUBKEY_R(14) = subR[13] ^ subR[15]; tl = subL[18] ^ (subR[18] & ~subR[16]); - dw = tl & subL[16], /* FL(kl3) */ - tr = subR[18] ^ rol32(dw, 1); + dw = tl & subL[16]; /* FL(kl3) */ + tr = subR[18] ^ rol32(dw, 1); SUBKEY_L(15) = subL[14] ^ tl; /* round 12 */ SUBKEY_R(15) = subR[14] ^ tr; SUBKEY_L(16) = subL[16]; /* FL(kl3) */ @@ -508,8 +508,8 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) SUBKEY_L(17) = subL[17]; /* FLinv(kl4) */ SUBKEY_R(17) = subR[17]; tl = subL[15] ^ (subR[15] & ~subR[17]); - dw = tl & subL[17], /* FLinv(kl4) */ - tr = subR[15] ^ rol32(dw, 1); + dw = tl & subL[17]; /* FLinv(kl4) */ + tr = subR[15] ^ rol32(dw, 1); SUBKEY_L(18) = tl ^ subL[19]; /* round 13 */ SUBKEY_R(18) = tr ^ subR[19]; SUBKEY_L(19) = subL[18] ^ subL[20]; /* round 14 */ @@ -527,8 +527,8 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) SUBKEY_R(24) = subR[24] ^ subR[23]; } else { tl = subL[26] ^ (subR[26] & ~subR[24]); - dw = tl & subL[24], /* FL(kl5) */ - tr = subR[26] ^ rol32(dw, 1); + dw = tl & subL[24]; /* FL(kl5) */ + tr = subR[26] ^ rol32(dw, 1); SUBKEY_L(23) = subL[22] ^ tl; /* round 18 */ SUBKEY_R(23) = subR[22] ^ tr; SUBKEY_L(24) = subL[24]; /* FL(kl5) */ @@ -536,8 +536,8 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) SUBKEY_L(25) = subL[25]; /* FLinv(kl6) */ SUBKEY_R(25) = subR[25]; tl = subL[23] ^ (subR[23] & ~subR[25]); - dw = tl & subL[25], /* FLinv(kl6) */ - tr = subR[23] ^ rol32(dw, 1); + dw = tl & subL[25]; /* FLinv(kl6) */ + tr = subR[23] ^ rol32(dw, 1); SUBKEY_L(26) = tl ^ subL[27]; /* round 19 */ SUBKEY_R(26) = tr ^ subR[27]; SUBKEY_L(27) = subL[26] ^ subL[28]; /* round 20 */ diff --git a/crypto/cast_common.c b/crypto/cast_common.c index a15f523d5f56..117dd8250f27 100644 --- a/crypto/cast_common.c +++ b/crypto/cast_common.c @@ -15,7 +15,7 @@ #include <linux/module.h> #include <crypto/cast_common.h> -const u32 cast_s1[256] = { +__visible const u32 cast_s1[256] = { 0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f, 0x9c004dd3, 0x6003e540, 0xcf9fc949, 0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0, @@ -83,7 +83,7 @@ const u32 cast_s1[256] = { }; EXPORT_SYMBOL_GPL(cast_s1); -const u32 cast_s2[256] = { +__visible const u32 cast_s2[256] = { 0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a, 0xeec5207a, 0x55889c94, 0x72fc0651, 0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef, @@ -151,7 +151,7 @@ const u32 cast_s2[256] = { }; EXPORT_SYMBOL_GPL(cast_s2); -const u32 cast_s3[256] = { +__visible const u32 cast_s3[256] = { 0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff, 0x369fe44b, 0x8c1fc644, 0xaececa90, 0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806, @@ -219,7 +219,7 @@ const u32 cast_s3[256] = { }; EXPORT_SYMBOL_GPL(cast_s3); -const u32 cast_s4[256] = { +__visible const u32 cast_s4[256] = { 0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb, 0x64ad8c57, 0x85510443, 0xfa020ed1, 0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43, diff --git a/crypto/crct10dif_common.c b/crypto/crct10dif_common.c new file mode 100644 index 000000000000..b2fab366f518 --- /dev/null +++ b/crypto/crct10dif_common.c @@ -0,0 +1,82 @@ +/* + * Cryptographic API. + * + * T10 Data Integrity Field CRC16 Crypto Transform + * + * Copyright (c) 2007 Oracle Corporation. All rights reserved. + * Written by Martin K. Petersen <martin.petersen@oracle.com> + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen <tim.c.chen@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/crc-t10dif.h> +#include <linux/module.h> +#include <linux/kernel.h> + +/* Table generated using the following polynomium: + * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 + * gt: 0x8bb7 + */ +static const __u16 t10_dif_crc_table[256] = { + 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, + 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, + 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, + 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, + 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, + 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, + 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, + 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, + 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, + 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, + 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, + 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, + 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, + 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, + 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, + 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, + 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, + 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, + 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, + 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, + 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, + 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, + 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, + 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, + 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, + 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, + 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, + 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, + 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, + 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, + 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, + 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 +}; + +__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len) +{ + unsigned int i; + + for (i = 0 ; i < len ; i++) + crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; + + return crc; +} +EXPORT_SYMBOL(crc_t10dif_generic); + +MODULE_DESCRIPTION("T10 DIF CRC calculation common code"); +MODULE_LICENSE("GPL"); diff --git a/crypto/crct10dif_generic.c b/crypto/crct10dif_generic.c new file mode 100644 index 000000000000..877e7114ec5c --- /dev/null +++ b/crypto/crct10dif_generic.c @@ -0,0 +1,127 @@ +/* + * Cryptographic API. + * + * T10 Data Integrity Field CRC16 Crypto Transform + * + * Copyright (c) 2007 Oracle Corporation. All rights reserved. + * Written by Martin K. Petersen <martin.petersen@oracle.com> + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen <tim.c.chen@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/module.h> +#include <linux/crc-t10dif.h> +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/kernel.h> + +struct chksum_desc_ctx { + __u16 crc; +}; + +/* + * Steps through buffer one byte at at time, calculates reflected + * crc using table. + */ + +static int chksum_init(struct shash_desc *desc) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = 0; + + return 0; +} + +static int chksum_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = crc_t10dif_generic(ctx->crc, data, length); + return 0; +} + +static int chksum_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + *(__u16 *)out = ctx->crc; + return 0; +} + +static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); + return 0; +} + +static int chksum_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(&ctx->crc, data, len, out); +} + +static int chksum_digest(struct shash_desc *desc, const u8 *data, + unsigned int length, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(&ctx->crc, data, length, out); +} + +static struct shash_alg alg = { + .digestsize = CRC_T10DIF_DIGEST_SIZE, + .init = chksum_init, + .update = chksum_update, + .final = chksum_final, + .finup = chksum_finup, + .digest = chksum_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crct10dif", + .cra_driver_name = "crct10dif-generic", + .cra_priority = 100, + .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init crct10dif_mod_init(void) +{ + int ret; + + ret = crypto_register_shash(&alg); + return ret; +} + +static void __exit crct10dif_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crct10dif_mod_init); +module_exit(crct10dif_mod_fini); + +MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); +MODULE_DESCRIPTION("T10 DIF CRC calculation."); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("crct10dif"); diff --git a/crypto/fcrypt.c b/crypto/fcrypt.c index 3b2cf569c684..021d7fec6bc8 100644 --- a/crypto/fcrypt.c +++ b/crypto/fcrypt.c @@ -110,7 +110,7 @@ static const __be32 sbox0[256] = { }; #undef Z -#define Z(x) cpu_to_be32((x << 27) | (x >> 5)) +#define Z(x) cpu_to_be32(((x & 0x1f) << 27) | (x >> 5)) static const __be32 sbox1[256] = { Z(0x77), Z(0x14), Z(0xa6), Z(0xfe), Z(0xb2), Z(0x5e), Z(0x8c), Z(0x3e), Z(0x67), Z(0x6c), Z(0xa1), Z(0x0d), Z(0xc2), Z(0xa2), Z(0xc1), Z(0x85), diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index 7281b8a93ad3..79ca2278c2a3 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -124,3 +124,25 @@ void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg, scatterwalk_done(&walk, out, 0); } EXPORT_SYMBOL_GPL(scatterwalk_map_and_copy); + +int scatterwalk_bytes_sglen(struct scatterlist *sg, int num_bytes) +{ + int offset = 0, n = 0; + + /* num_bytes is too small */ + if (num_bytes < sg->length) + return -1; + + do { + offset += sg->length; + n++; + sg = scatterwalk_sg_next(sg); + + /* num_bytes is too large */ + if (unlikely(!sg && (num_bytes < offset))) + return -1; + } while (sg && (num_bytes > offset)); + + return n; +} +EXPORT_SYMBOL_GPL(scatterwalk_bytes_sglen); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 66d254ce0d11..1ab8258fcf56 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -493,7 +493,7 @@ static inline int do_one_ahash_op(struct ahash_request *req, int ret) ret = wait_for_completion_interruptible(&tr->completion); if (!ret) ret = tr->err; - INIT_COMPLETION(tr->completion); + reinit_completion(&tr->completion); } return ret; } @@ -721,7 +721,7 @@ static inline int do_one_acipher_op(struct ablkcipher_request *req, int ret) ret = wait_for_completion_interruptible(&tr->completion); if (!ret) ret = tr->err; - INIT_COMPLETION(tr->completion); + reinit_completion(&tr->completion); } return ret; @@ -1174,6 +1174,10 @@ static int do_test(int m) ret += tcrypt_test("ghash"); break; + case 47: + ret += tcrypt_test("crct10dif"); + break; + case 100: ret += tcrypt_test("hmac(md5)"); break; @@ -1498,6 +1502,10 @@ static int do_test(int m) test_hash_speed("crc32c", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; + case 320: + test_hash_speed("crct10dif", sec, generic_hash_speed_template); + if (mode > 300 && mode < 400) break; + case 399: break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index ecddf921a9db..432afc03e7c3 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -179,7 +179,7 @@ static int do_one_async_hash_op(struct ahash_request *req, ret = wait_for_completion_interruptible(&tr->completion); if (!ret) ret = tr->err; - INIT_COMPLETION(tr->completion); + reinit_completion(&tr->completion); } return ret; } @@ -336,7 +336,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, ret = wait_for_completion_interruptible( &tresult.completion); if (!ret && !(ret = tresult.err)) { - INIT_COMPLETION(tresult.completion); + reinit_completion(&tresult.completion); break; } /* fall through */ @@ -543,7 +543,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc, ret = wait_for_completion_interruptible( &result.completion); if (!ret && !(ret = result.err)) { - INIT_COMPLETION(result.completion); + reinit_completion(&result.completion); break; } case -EBADMSG: @@ -697,7 +697,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc, ret = wait_for_completion_interruptible( &result.completion); if (!ret && !(ret = result.err)) { - INIT_COMPLETION(result.completion); + reinit_completion(&result.completion); break; } case -EBADMSG: @@ -983,7 +983,7 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, ret = wait_for_completion_interruptible( &result.completion); if (!ret && !((ret = result.err))) { - INIT_COMPLETION(result.completion); + reinit_completion(&result.completion); break; } /* fall through */ @@ -1086,7 +1086,7 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, ret = wait_for_completion_interruptible( &result.completion); if (!ret && !((ret = result.err))) { - INIT_COMPLETION(result.completion); + reinit_completion(&result.completion); break; } /* fall through */ @@ -2046,6 +2046,16 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "crct10dif", + .test = alg_test_hash, + .fips_allowed = 1, + .suite = { + .hash = { + .vecs = crct10dif_tv_template, + .count = CRCT10DIF_TEST_VECTORS + } + } + }, { .alg = "cryptd(__driver-cbc-aes-aesni)", .test = alg_test_null, .fips_allowed = 1, @@ -3224,7 +3234,7 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask) if (i >= 0) rc |= alg_test_descs[i].test(alg_test_descs + i, driver, type, mask); - if (j >= 0) + if (j >= 0 && j != i) rc |= alg_test_descs[j].test(alg_test_descs + j, driver, type, mask); diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 1e701bc075b9..7d44aa3d6b44 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -450,6 +450,39 @@ static struct hash_testvec rmd320_tv_template[] = { } }; +#define CRCT10DIF_TEST_VECTORS 3 +static struct hash_testvec crct10dif_tv_template[] = { + { + .plaintext = "abc", + .psize = 3, +#ifdef __LITTLE_ENDIAN + .digest = "\x3b\x44", +#else + .digest = "\x44\x3b", +#endif + }, { + .plaintext = "1234567890123456789012345678901234567890" + "123456789012345678901234567890123456789", + .psize = 79, +#ifdef __LITTLE_ENDIAN + .digest = "\x70\x4b", +#else + .digest = "\x4b\x70", +#endif + }, { + .plaintext = + "abcddddddddddddddddddddddddddddddddddddddddddddddddddddd", + .psize = 56, +#ifdef __LITTLE_ENDIAN + .digest = "\xe3\x9c", +#else + .digest = "\x9c\xe3", +#endif + .np = 2, + .tap = { 28, 28 } + } +}; + /* * SHA1 test vectors from from FIPS PUB 180-1 * Long vector from CAVS 5.0 |