From 099f53cb50e45ef617a9f1d63ceec799e489418b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 8 Apr 2009 14:28:37 -0700 Subject: async_tx: rename zero_sum to val 'zero_sum' does not properly describe the operation of generating parity and checking that it validates against an existing buffer. Change the name of the operation to 'val' (for 'validate'). This is in anticipation of the p+q case where it is a requirement to identify the target parity buffers separately from the source buffers, because the target parity buffers will not have corresponding pq coefficients. Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- crypto/async_tx/async_xor.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 95fe2c8d6c51..e0580b0ea533 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -222,7 +222,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) } /** - * async_xor_zero_sum - attempt a xor parity check with a dma engine. + * async_xor_val - attempt a xor parity check with a dma engine. * @dest: destination page used if the xor is performed synchronously * @src_list: array of source pages. The dest page must be listed as a source * at index zero. The contents of this array may be overwritten. @@ -236,13 +236,13 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) * @cb_param: parameter to pass to the callback routine */ struct dma_async_tx_descriptor * -async_xor_zero_sum(struct page *dest, struct page **src_list, +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, u32 *result, enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, dma_async_tx_callback cb_fn, void *cb_param) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM, + struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR_VAL, &dest, 1, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; @@ -261,15 +261,15 @@ async_xor_zero_sum(struct page *dest, struct page **src_list, dma_src[i] = dma_map_page(device->dev, src_list[i], offset, len, DMA_TO_DEVICE); - tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, - len, result, - dma_prep_flags); + tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt, + len, result, + dma_prep_flags); if (unlikely(!tx)) { async_tx_quiesce(&depend_tx); while (!tx) { dma_async_issue_pending(chan); - tx = device->device_prep_dma_zero_sum(chan, + tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt, len, result, dma_prep_flags); } @@ -296,7 +296,7 @@ async_xor_zero_sum(struct page *dest, struct page **src_list, return tx; } -EXPORT_SYMBOL_GPL(async_xor_zero_sum); +EXPORT_SYMBOL_GPL(async_xor_val); static int __init async_xor_init(void) { -- cgit v1.2.3-59-g8ed1b From 88ba2aa586c874681c072101287e15d40de7e6e2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 9 Apr 2009 16:16:18 -0700 Subject: async_tx: kill ASYNC_TX_DEP_ACK flag In support of inter-channel chaining async_tx utilizes an ack flag to gate whether a dependent operation can be chained to another. While the flag is not set the chain can be considered open for appending. Setting the ack flag closes the chain and flags the descriptor for garbage collection. The ASYNC_TX_DEP_ACK flag essentially means "close the chain after adding this dependency". Since each operation can only have one child the api now implicitly sets the ack flag at dependency submission time. This removes an unnecessary management burden from clients of the api. [ Impact: clean up and enforce one dependency per operation ] Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 9 ++++----- crypto/async_tx/async_memcpy.c | 2 +- crypto/async_tx/async_memset.c | 2 +- crypto/async_tx/async_tx.c | 4 ++-- crypto/async_tx/async_xor.c | 6 ++---- drivers/md/raid5.c | 25 +++++++++++-------------- include/linux/async_tx.h | 4 +--- 7 files changed, 22 insertions(+), 30 deletions(-) (limited to 'crypto') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 4af12180d191..76feda8541dc 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -80,8 +80,8 @@ acknowledged by the application before the offload engine driver is allowed to recycle (or free) the descriptor. A descriptor can be acked by one of the following methods: 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted -2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent - descriptor of a new operation. +2/ submitting an unacknowledged descriptor as a dependency to another + async_tx call will implicitly set the acknowledged state. 3/ calling async_tx_ack() on the descriptor. 3.4 When does the operation execute? @@ -136,10 +136,9 @@ int run_xor_copy_xor(struct page **xor_srcs, tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL); - tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, - ASYNC_TX_DEP_ACK, tx, NULL, NULL); + tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, tx, NULL, NULL); tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, - ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, + ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx, complete_xor_copy_xor, NULL); async_tx_issue_pending_all(); diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index ddccfb01c416..7117ec6f1b74 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -35,7 +35,7 @@ * @src: src page * @offset: offset in pages to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, + * @flags: ASYNC_TX_ACK * @depend_tx: memcpy depends on the result of this transaction * @cb_fn: function to call when the memcpy completes * @cb_param: parameter to pass to the callback routine diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index 5b5eb99bb244..b2f133885b7f 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -35,7 +35,7 @@ * @val: fill value * @offset: offset in pages to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @flags: ASYNC_TX_ACK * @depend_tx: memset depends on the result of this transaction * @cb_fn: function to call when the memcpy completes * @cb_param: parameter to pass to the callback routine diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 06eb6cc09fef..3766bc3d7d89 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -223,7 +223,7 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, if (flags & ASYNC_TX_ACK) async_tx_ack(tx); - if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) + if (depend_tx) async_tx_ack(depend_tx); } EXPORT_SYMBOL_GPL(async_tx_submit); @@ -231,7 +231,7 @@ EXPORT_SYMBOL_GPL(async_tx_submit); /** * async_trigger_callback - schedules the callback function to be run after * any dependent operations have been completed. - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @flags: ASYNC_TX_ACK * @depend_tx: 'callback' requires the completion of this transaction * @cb_fn: function to call after depend_tx completes * @cb_param: parameter to pass to the callback routine diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index e0580b0ea533..3cc5dc763b54 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -105,7 +105,6 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, _cb_param); depend_tx = tx; - flags |= ASYNC_TX_DEP_ACK; if (src_cnt > xor_src_cnt) { /* drop completed sources */ @@ -168,8 +167,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, * @offset: offset in pages to start transaction * @src_cnt: number of source pages * @len: length in bytes - * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, - * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, ASYNC_TX_ACK * @depend_tx: xor depends on the result of this transaction. * @cb_fn: function to call when the xor completes * @cb_param: parameter to pass to the callback routine @@ -230,7 +228,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) * @src_cnt: number of source pages * @len: length in bytes * @result: 0 if sum == 0 else non-zero - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @flags: ASYNC_TX_ACK * @depend_tx: xor depends on the result of this transaction. * @cb_fn: function to call when the xor completes * @cb_param: parameter to pass to the callback routine diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f8d2d35ed298..0ef5362c8d02 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -525,14 +525,12 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, bio_page = bio_iovec_idx(bio, i)->bv_page; if (frombio) tx = async_memcpy(page, bio_page, page_offset, - b_offset, clen, - ASYNC_TX_DEP_ACK, - tx, NULL, NULL); + b_offset, clen, 0, + tx, NULL, NULL); else tx = async_memcpy(bio_page, page, b_offset, - page_offset, clen, - ASYNC_TX_DEP_ACK, - tx, NULL, NULL); + page_offset, clen, 0, + tx, NULL, NULL); } if (clen < len) /* hit end of page */ break; @@ -615,8 +613,7 @@ static void ops_run_biofill(struct stripe_head *sh) } atomic_inc(&sh->count); - async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, - ops_complete_biofill, sh); + async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_biofill, sh); } static void ops_complete_compute5(void *stripe_head_ref) @@ -701,8 +698,8 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) } tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, - ops_complete_prexor, sh); + ASYNC_TX_XOR_DROP_DST, tx, + ops_complete_prexor, sh); return tx; } @@ -809,7 +806,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST * for the synchronous xor case */ - flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | + flags = ASYNC_TX_ACK | (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); atomic_inc(&sh->count); @@ -858,7 +855,7 @@ static void ops_run_check(struct stripe_head *sh) &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); atomic_inc(&sh->count); - tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, + tx = async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_check, sh); } @@ -2687,8 +2684,8 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, /* place all the copies on one channel */ tx = async_memcpy(sh2->dev[dd_idx].page, - sh->dev[i].page, 0, 0, STRIPE_SIZE, - ASYNC_TX_DEP_ACK, tx, NULL, NULL); + sh->dev[i].page, 0, 0, STRIPE_SIZE, + 0, tx, NULL, NULL); set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 513150d8c25b..9f14cd540cd2 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -58,13 +58,11 @@ struct dma_chan_ref { * array. * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a * dependency chain - * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. */ enum async_tx_flags { ASYNC_TX_XOR_ZERO_DST = (1 << 0), ASYNC_TX_XOR_DROP_DST = (1 << 1), - ASYNC_TX_ACK = (1 << 3), - ASYNC_TX_DEP_ACK = (1 << 4), + ASYNC_TX_ACK = (1 << 2), }; #ifdef CONFIG_DMA_ENGINE -- cgit v1.2.3-59-g8ed1b From a08abd8ca890a377521d65d493d174bebcaf694b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 3 Jun 2009 11:43:59 -0700 Subject: async_tx: structify submission arguments, add scribble Prepare the api for the arrival of a new parameter, 'scribble'. This will allow callers to identify scratchpad memory for dma address or page address conversions. As this adds yet another parameter, take this opportunity to convert the common submission parameters (flags, dependency, callback, and callback argument) into an object that is passed by reference. Also, take this opportunity to fix up the kerneldoc and add notes about the relevant ASYNC_TX_* flags for each routine. [ Impact: moves api pass-by-value parameters to a pass-by-reference struct ] Signed-off-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 6 +- crypto/async_tx/async_memcpy.c | 26 +++---- crypto/async_tx/async_memset.c | 25 +++---- crypto/async_tx/async_tx.c | 51 +++++++------- crypto/async_tx/async_xor.c | 123 +++++++++++++++++----------------- drivers/md/raid5.c | 59 +++++++++------- include/linux/async_tx.h | 84 ++++++++++++++--------- 7 files changed, 200 insertions(+), 174 deletions(-) (limited to 'crypto') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 76feda8541dc..dfe0475f7919 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -54,11 +54,7 @@ features surfaced as a result: 3.1 General format of the API: struct dma_async_tx_descriptor * -async_(, - enum async_tx_flags flags, - struct dma_async_tx_descriptor *dependency, - dma_async_tx_callback callback_routine, - void *callback_parameter); +async_(, struct async_submit ctl *submit) 3.2 Supported operations: memcpy - memory copy between a source and a destination buffer diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index 7117ec6f1b74..89e05556f3df 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -33,28 +33,28 @@ * async_memcpy - attempt to copy memory with a dma engine. * @dest: destination page * @src: src page - * @offset: offset in pages to start transaction + * @dest_offset: offset into 'dest' to start transaction + * @src_offset: offset into 'src' to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK - * @depend_tx: memcpy depends on the result of this transaction - * @cb_fn: function to call when the memcpy completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK */ struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, - unsigned int src_offset, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + unsigned int src_offset, size_t len, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY, &dest, 1, &src, 1, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; if (device) { dma_addr_t dma_dest, dma_src; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + unsigned long dma_prep_flags; + dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; dma_dest = dma_map_page(device->dev, dest, dest_offset, len, DMA_FROM_DEVICE); @@ -67,13 +67,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { void *dest_buf, *src_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; src_buf = kmap_atomic(src, KM_USER1) + src_offset; @@ -83,7 +83,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, kunmap_atomic(dest_buf, KM_USER0); kunmap_atomic(src_buf, KM_USER1); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index b2f133885b7f..c14437238f4c 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -35,26 +35,23 @@ * @val: fill value * @offset: offset in pages to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK - * @depend_tx: memset depends on the result of this transaction - * @cb_fn: function to call when the memcpy completes - * @cb_param: parameter to pass to the callback routine + * + * honored flags: ASYNC_TX_ACK */ struct dma_async_tx_descriptor * -async_memset(struct page *dest, int val, unsigned int offset, - size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_memset(struct page *dest, int val, unsigned int offset, size_t len, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET, &dest, 1, NULL, 0, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; if (device) { dma_addr_t dma_dest; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + unsigned long dma_prep_flags; + dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; dma_dest = dma_map_page(device->dev, dest, offset, len, DMA_FROM_DEVICE); @@ -64,19 +61,19 @@ async_memset(struct page *dest, int val, unsigned int offset, if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { /* run the memset synchronously */ void *dest_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); - dest_buf = (void *) (((char *) page_address(dest)) + offset); + dest_buf = page_address(dest) + offset; /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); memset(dest_buf, val, len); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 3766bc3d7d89..802a5ce437d9 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -45,13 +45,15 @@ static void __exit async_tx_exit(void) /** * __async_tx_find_channel - find a channel to carry out the operation or let * the transaction execute synchronously - * @depend_tx: transaction dependency + * @submit: transaction dependency and submission modifiers * @tx_type: transaction type */ struct dma_chan * -__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type) +__async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type) { + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; + /* see if we can keep the chain on one channel */ if (depend_tx && dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) @@ -144,13 +146,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, /** - * submit_disposition - while holding depend_tx->lock we must avoid submitting - * new operations to prevent a circular locking dependency with - * drivers that already hold a channel lock when calling - * async_tx_run_dependencies. + * submit_disposition - flags for routing an incoming operation * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly + * + * while holding depend_tx->lock we must avoid submitting new operations + * to prevent a circular locking dependency with drivers that already + * hold a channel lock when calling async_tx_run_dependencies. */ enum submit_disposition { ASYNC_TX_SUBMITTED, @@ -160,11 +163,12 @@ enum submit_disposition { void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, - enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + struct async_submit_ctl *submit) { - tx->callback = cb_fn; - tx->callback_param = cb_param; + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; + + tx->callback = submit->cb_fn; + tx->callback_param = submit->cb_param; if (depend_tx) { enum submit_disposition s; @@ -220,7 +224,7 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, tx->tx_submit(tx); } - if (flags & ASYNC_TX_ACK) + if (submit->flags & ASYNC_TX_ACK) async_tx_ack(tx); if (depend_tx) @@ -229,21 +233,20 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, EXPORT_SYMBOL_GPL(async_tx_submit); /** - * async_trigger_callback - schedules the callback function to be run after - * any dependent operations have been completed. - * @flags: ASYNC_TX_ACK - * @depend_tx: 'callback' requires the completion of this transaction - * @cb_fn: function to call after depend_tx completes - * @cb_param: parameter to pass to the callback routine + * async_trigger_callback - schedules the callback function to be run + * @submit: submission and completion parameters + * + * honored flags: ASYNC_TX_ACK + * + * The callback is run after any dependent operations have completed. */ struct dma_async_tx_descriptor * -async_trigger_callback(enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_trigger_callback(struct async_submit_ctl *submit) { struct dma_chan *chan; struct dma_device *device; struct dma_async_tx_descriptor *tx; + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; if (depend_tx) { chan = depend_tx->chan; @@ -262,14 +265,14 @@ async_trigger_callback(enum async_tx_flags flags, if (tx) { pr_debug("%s: (async)\n", __func__); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { pr_debug("%s: (sync)\n", __func__); /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 3cc5dc763b54..691fa98a18c4 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -34,18 +34,16 @@ static __async_inline struct dma_async_tx_descriptor * do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, - enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + struct async_submit_ctl *submit) { struct dma_device *dma = chan->device; dma_addr_t *dma_src = (dma_addr_t *) src_list; struct dma_async_tx_descriptor *tx = NULL; int src_off = 0; int i; - dma_async_tx_callback _cb_fn; - void *_cb_param; - enum async_tx_flags async_flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + void *cb_param_orig = submit->cb_param; + enum async_tx_flags flags_orig = submit->flags; enum dma_ctrl_flags dma_flags; int xor_src_cnt; dma_addr_t dma_dest; @@ -63,7 +61,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, } while (src_cnt) { - async_flags = flags; + submit->flags = flags_orig; dma_flags = 0; xor_src_cnt = min(src_cnt, dma->max_xor); /* if we are submitting additional xors, leave the chain open, @@ -71,15 +69,15 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, * buffer mapped */ if (src_cnt > xor_src_cnt) { - async_flags &= ~ASYNC_TX_ACK; + submit->flags &= ~ASYNC_TX_ACK; dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; - _cb_fn = NULL; - _cb_param = NULL; + submit->cb_fn = NULL; + submit->cb_param = NULL; } else { - _cb_fn = cb_fn; - _cb_param = cb_param; + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; } - if (_cb_fn) + if (submit->cb_fn) dma_flags |= DMA_PREP_INTERRUPT; /* Since we have clobbered the src_list we are committed @@ -90,7 +88,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, xor_src_cnt, len, dma_flags); if (unlikely(!tx)) - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); /* spin wait for the preceeding transactions to complete */ while (unlikely(!tx)) { @@ -101,10 +99,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, dma_flags); } - async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn, - _cb_param); - - depend_tx = tx; + async_tx_submit(chan, tx, submit); + submit->depend_tx = tx; if (src_cnt > xor_src_cnt) { /* drop completed sources */ @@ -123,8 +119,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, static void do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - dma_async_tx_callback cb_fn, void *cb_param) + int src_cnt, size_t len, struct async_submit_ctl *submit) { int i; int xor_src_cnt; @@ -139,7 +134,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, /* set destination address */ dest_buf = page_address(dest) + offset; - if (flags & ASYNC_TX_XOR_ZERO_DST) + if (submit->flags & ASYNC_TX_XOR_ZERO_DST) memset(dest_buf, 0, len); while (src_cnt > 0) { @@ -152,33 +147,35 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, src_off += xor_src_cnt; } - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } /** * async_xor - attempt to xor a set of blocks with a dma engine. - * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST - * flag must be set to not include dest data in the calculation. The - * assumption with dma eninges is that they only use the destination - * buffer as a source when it is explicity specified in the source list. * @dest: destination page - * @src_list: array of source pages (if the dest is also a source it must be - * at index zero). The contents of this array may be overwritten. - * @offset: offset in pages to start transaction + * @src_list: array of source pages + * @offset: common src/dst offset to start transaction * @src_cnt: number of source pages * @len: length in bytes - * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, ASYNC_TX_ACK - * @depend_tx: xor depends on the result of this transaction. - * @cb_fn: function to call when the xor completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST + * + * xor_blocks always uses the dest as a source so the + * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in + * the calculation. The assumption with dma eninges is that they only + * use the destination buffer as a source when it is explicity specified + * in the source list. + * + * src_list note: if the dest is also a source it must be at index zero. + * The contents of this array will be overwritten if a scribble region + * is not specified. */ struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + int src_cnt, size_t len, struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, &dest, 1, src_list, src_cnt, len); BUG_ON(src_cnt <= 1); @@ -188,7 +185,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, pr_debug("%s (async): len: %zu\n", __func__, len); return do_async_xor(chan, dest, src_list, offset, src_cnt, len, - flags, depend_tx, cb_fn, cb_param); + submit); } else { /* run the xor synchronously */ pr_debug("%s (sync): len: %zu\n", __func__, len); @@ -196,16 +193,15 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, /* in the sync case the dest is an implied source * (assumes the dest is the first source) */ - if (flags & ASYNC_TX_XOR_DROP_DST) { + if (submit->flags & ASYNC_TX_XOR_DROP_DST) { src_cnt--; src_list++; } /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); - do_sync_xor(dest, src_list, offset, src_cnt, len, - flags, cb_fn, cb_param); + do_sync_xor(dest, src_list, offset, src_cnt, len, submit); return NULL; } @@ -222,25 +218,25 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) /** * async_xor_val - attempt a xor parity check with a dma engine. * @dest: destination page used if the xor is performed synchronously - * @src_list: array of source pages. The dest page must be listed as a source - * at index zero. The contents of this array may be overwritten. + * @src_list: array of source pages * @offset: offset in pages to start transaction * @src_cnt: number of source pages * @len: length in bytes * @result: 0 if sum == 0 else non-zero - * @flags: ASYNC_TX_ACK - * @depend_tx: xor depends on the result of this transaction. - * @cb_fn: function to call when the xor completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK + * + * src_list note: if the dest is also a source it must be at index zero. + * The contents of this array will be overwritten if a scribble region + * is not specified. */ struct dma_async_tx_descriptor * -async_xor_val(struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, - u32 *result, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, u32 *result, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR_VAL, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL, &dest, 1, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; @@ -250,11 +246,12 @@ async_xor_val(struct page *dest, struct page **src_list, if (device && src_cnt <= device->max_xor) { dma_addr_t *dma_src = (dma_addr_t *) src_list; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + unsigned long dma_prep_flags; int i; pr_debug("%s: (async) len: %zu\n", __func__, len); + dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; for (i = 0; i < src_cnt; i++) dma_src[i] = dma_map_page(device->dev, src_list[i], offset, len, DMA_TO_DEVICE); @@ -263,7 +260,7 @@ async_xor_val(struct page *dest, struct page **src_list, len, result, dma_prep_flags); if (unlikely(!tx)) { - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); while (!tx) { dma_async_issue_pending(chan); @@ -273,23 +270,23 @@ async_xor_val(struct page *dest, struct page **src_list, } } - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { - unsigned long xor_flags = flags; + enum async_tx_flags flags_orig = submit->flags; pr_debug("%s: (sync) len: %zu\n", __func__, len); - xor_flags |= ASYNC_TX_XOR_DROP_DST; - xor_flags &= ~ASYNC_TX_ACK; + submit->flags |= ASYNC_TX_XOR_DROP_DST; + submit->flags &= ~ASYNC_TX_ACK; - tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, - depend_tx, NULL, NULL); + tx = async_xor(dest, src_list, offset, src_cnt, len, submit); async_tx_quiesce(&tx); *result = page_is_zero(dest, offset, len) ? 0 : 1; - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); + submit->flags = flags_orig; } return tx; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0ef5362c8d02..e1920f23579f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -499,11 +499,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, struct page *bio_page; int i; int page_offset; + struct async_submit_ctl submit; if (bio->bi_sector >= sector) page_offset = (signed)(bio->bi_sector - sector) * 512; else page_offset = (signed)(sector - bio->bi_sector) * -512; + + init_async_submit(&submit, 0, tx, NULL, NULL, NULL); bio_for_each_segment(bvl, bio, i) { int len = bio_iovec_idx(bio, i)->bv_len; int clen; @@ -525,13 +528,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, bio_page = bio_iovec_idx(bio, i)->bv_page; if (frombio) tx = async_memcpy(page, bio_page, page_offset, - b_offset, clen, 0, - tx, NULL, NULL); + b_offset, clen, &submit); else tx = async_memcpy(bio_page, page, b_offset, - page_offset, clen, 0, - tx, NULL, NULL); + page_offset, clen, &submit); } + /* chain the operations */ + submit.depend_tx = tx; + if (clen < len) /* hit end of page */ break; page_offset += len; @@ -590,6 +594,7 @@ static void ops_run_biofill(struct stripe_head *sh) { struct dma_async_tx_descriptor *tx = NULL; raid5_conf_t *conf = sh->raid_conf; + struct async_submit_ctl submit; int i; pr_debug("%s: stripe %llu\n", __func__, @@ -613,7 +618,8 @@ static void ops_run_biofill(struct stripe_head *sh) } atomic_inc(&sh->count); - async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_biofill, sh); + init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL); + async_trigger_callback(&submit); } static void ops_complete_compute5(void *stripe_head_ref) @@ -645,6 +651,7 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) struct page *xor_dest = tgt->page; int count = 0; struct dma_async_tx_descriptor *tx; + struct async_submit_ctl submit; int i; pr_debug("%s: stripe %llu block: %d\n", @@ -657,13 +664,12 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) atomic_inc(&sh->count); + init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, + ops_complete_compute5, sh, NULL); if (unlikely(count == 1)) - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, - 0, NULL, ops_complete_compute5, sh); + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); else - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - ASYNC_TX_XOR_ZERO_DST, NULL, - ops_complete_compute5, sh); + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); return tx; } @@ -683,6 +689,7 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) int disks = sh->disks; struct page *xor_srcs[disks]; int count = 0, pd_idx = sh->pd_idx, i; + struct async_submit_ctl submit; /* existing parity data subtracted */ struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; @@ -697,9 +704,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) xor_srcs[count++] = dev->page; } - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - ASYNC_TX_XOR_DROP_DST, tx, - ops_complete_prexor, sh); + init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx, + ops_complete_prexor, sh, NULL); + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); return tx; } @@ -772,7 +779,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) /* kernel stack size limits the total number of disks */ int disks = sh->disks; struct page *xor_srcs[disks]; - + struct async_submit_ctl submit; int count = 0, pd_idx = sh->pd_idx, i; struct page *xor_dest; int prexor = 0; @@ -811,13 +818,11 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) atomic_inc(&sh->count); - if (unlikely(count == 1)) { - flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, - flags, tx, ops_complete_postxor, sh); - } else - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - flags, tx, ops_complete_postxor, sh); + init_async_submit(&submit, flags, tx, ops_complete_postxor, sh, NULL); + if (unlikely(count == 1)) + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); + else + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); } static void ops_complete_check(void *stripe_head_ref) @@ -838,6 +843,7 @@ static void ops_run_check(struct stripe_head *sh) int disks = sh->disks; struct page *xor_srcs[disks]; struct dma_async_tx_descriptor *tx; + struct async_submit_ctl submit; int count = 0, pd_idx = sh->pd_idx, i; struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; @@ -851,12 +857,13 @@ static void ops_run_check(struct stripe_head *sh) xor_srcs[count++] = dev->page; } + init_async_submit(&submit, 0, NULL, NULL, NULL, NULL); tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); + &sh->ops.zero_sum_result, &submit); atomic_inc(&sh->count); - tx = async_trigger_callback(ASYNC_TX_ACK, tx, - ops_complete_check, sh); + init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL); + tx = async_trigger_callback(&submit); } static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) @@ -2664,6 +2671,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, if (i != sh->pd_idx && i != sh->qd_idx) { int dd_idx, j; struct stripe_head *sh2; + struct async_submit_ctl submit; sector_t bn = compute_blocknr(sh, i, 1); sector_t s = raid5_compute_sector(conf, bn, 0, @@ -2683,9 +2691,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, } /* place all the copies on one channel */ + init_async_submit(&submit, 0, tx, NULL, NULL, NULL); tx = async_memcpy(sh2->dev[dd_idx].page, sh->dev[i].page, 0, 0, STRIPE_SIZE, - 0, tx, NULL, NULL); + &submit); set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 9f14cd540cd2..00cfb637ddf2 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -65,6 +65,22 @@ enum async_tx_flags { ASYNC_TX_ACK = (1 << 2), }; +/** + * struct async_submit_ctl - async_tx submission/completion modifiers + * @flags: submission modifiers + * @depend_tx: parent dependency of the current operation being submitted + * @cb_fn: callback routine to run at operation completion + * @cb_param: parameter for the callback routine + * @scribble: caller provided space for dma/page address conversions + */ +struct async_submit_ctl { + enum async_tx_flags flags; + struct dma_async_tx_descriptor *depend_tx; + dma_async_tx_callback cb_fn; + void *cb_param; + void *scribble; +}; + #ifdef CONFIG_DMA_ENGINE #define async_tx_issue_pending_all dma_issue_pending_all #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL @@ -73,8 +89,8 @@ enum async_tx_flags { #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ __async_tx_find_channel(dep, type) struct dma_chan * -__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type); +__async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type); #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ #else static inline void async_tx_issue_pending_all(void) @@ -83,9 +99,10 @@ static inline void async_tx_issue_pending_all(void) } static inline struct dma_chan * -async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type, struct page **dst, int dst_count, - struct page **src, int src_count, size_t len) +async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type, struct page **dst, + int dst_count, struct page **src, int src_count, + size_t len) { return NULL; } @@ -97,46 +114,53 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, * @cb_fn_param: parameter to pass to the callback routine */ static inline void -async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param) +async_tx_sync_epilog(struct async_submit_ctl *submit) +{ + if (submit->cb_fn) + submit->cb_fn(submit->cb_param); +} + +typedef union { + unsigned long addr; + struct page *page; + dma_addr_t dma; +} addr_conv_t; + +static inline void +init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags, + struct dma_async_tx_descriptor *tx, + dma_async_tx_callback cb_fn, void *cb_param, + addr_conv_t *scribble) { - if (cb_fn) - cb_fn(cb_fn_param); + args->flags = flags; + args->depend_tx = tx; + args->cb_fn = cb_fn; + args->cb_param = cb_param; + args->scribble = scribble; } -void -async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, - enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + int src_cnt, size_t len, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_xor_val(struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, - u32 *result, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, u32 *result, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, - unsigned int src_offset, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + unsigned int src_offset, size_t len, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_memset(struct page *dest, int val, unsigned int offset, - size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + size_t len, struct async_submit_ctl *submit); -struct dma_async_tx_descriptor * -async_trigger_callback(enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ -- cgit v1.2.3-59-g8ed1b From 04ce9ab385dc97eb55299d533cd3af79b8fc7529 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 3 Jun 2009 14:22:28 -0700 Subject: async_xor: permit callers to pass in a 'dma/page scribble' region async_xor() needs space to perform dma and page address conversions. In most cases the code can simply reuse the struct page * array because the size of the native pointer matches the size of a dma/page address. In order to support archs where sizeof(dma_addr_t) is larger than sizeof(struct page *), or to preserve the input parameters, we utilize a memory region passed in by the caller. Since the code is now prepared to handle the case where it cannot perform address conversions on the stack, we no longer need the !HIGHMEM64G dependency in drivers/dma/Kconfig. [ Impact: don't clobber input buffers for address conversions ] Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 43 ++++++++++++++++--------- crypto/async_tx/async_xor.c | 60 +++++++++++++++++------------------ drivers/dma/Kconfig | 2 +- 3 files changed, 58 insertions(+), 47 deletions(-) (limited to 'crypto') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index dfe0475f7919..6b15e488c0e7 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -115,29 +115,42 @@ of an operation. Perform a xor->copy->xor operation where each operation depends on the result from the previous operation: -void complete_xor_copy_xor(void *param) +void callback(void *param) { - printk("complete\n"); + struct completion *cmp = param; + + complete(cmp); } -int run_xor_copy_xor(struct page **xor_srcs, - int xor_src_cnt, - struct page *xor_dest, - size_t xor_len, - struct page *copy_src, - struct page *copy_dest, - size_t copy_len) +void run_xor_copy_xor(struct page **xor_srcs, + int xor_src_cnt, + struct page *xor_dest, + size_t xor_len, + struct page *copy_src, + struct page *copy_dest, + size_t copy_len) { struct dma_async_tx_descriptor *tx; + addr_conv_t addr_conv[xor_src_cnt]; + struct async_submit_ctl submit; + addr_conv_t addr_conv[NDISKS]; + struct completion cmp; + + init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL, + addr_conv); + tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit) - tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, - ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL); - tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, tx, NULL, NULL); - tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, - ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, - tx, complete_xor_copy_xor, NULL); + submit->depend_tx = tx; + tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit); + + init_completion(&cmp); + init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx, + callback, &cmp, addr_conv); + tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit); async_tx_issue_pending_all(); + + wait_for_completion(&cmp); } See include/linux/async_tx.h for more information on the flags. See the diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 691fa98a18c4..1e96c4df7061 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -33,11 +33,10 @@ /* do_async_xor - dma map the pages and perform the xor with an engine */ static __async_inline struct dma_async_tx_descriptor * do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, + unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src, struct async_submit_ctl *submit) { struct dma_device *dma = chan->device; - dma_addr_t *dma_src = (dma_addr_t *) src_list; struct dma_async_tx_descriptor *tx = NULL; int src_off = 0; int i; @@ -125,9 +124,14 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, int xor_src_cnt; int src_off = 0; void *dest_buf; - void **srcs = (void **) src_list; + void **srcs; - /* reuse the 'src_list' array to convert to buffer pointers */ + if (submit->scribble) + srcs = submit->scribble; + else + srcs = (void **) src_list; + + /* convert to buffer pointers */ for (i = 0; i < src_cnt; i++) srcs[i] = page_address(src_list[i]) + offset; @@ -178,17 +182,26 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, &dest, 1, src_list, src_cnt, len); + dma_addr_t *dma_src = NULL; + BUG_ON(src_cnt <= 1); - if (chan) { + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) src_list; + + if (dma_src && chan) { /* run the xor asynchronously */ pr_debug("%s (async): len: %zu\n", __func__, len); return do_async_xor(chan, dest, src_list, offset, src_cnt, len, - submit); + dma_src, submit); } else { /* run the xor synchronously */ pr_debug("%s (sync): len: %zu\n", __func__, len); + WARN_ONCE(chan, "%s: no space for dma address conversion\n", + __func__); /* in the sync case the dest is an implied source * (assumes the dest is the first source) @@ -241,11 +254,16 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; + dma_addr_t *dma_src = NULL; BUG_ON(src_cnt <= 1); - if (device && src_cnt <= device->max_xor) { - dma_addr_t *dma_src = (dma_addr_t *) src_list; + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) src_list; + + if (dma_src && device && src_cnt <= device->max_xor) { unsigned long dma_prep_flags; int i; @@ -275,6 +293,9 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, enum async_tx_flags flags_orig = submit->flags; pr_debug("%s: (sync) len: %zu\n", __func__, len); + WARN_ONCE(device && src_cnt <= device->max_xor, + "%s: no space for dma address conversion\n", + __func__); submit->flags |= ASYNC_TX_XOR_DROP_DST; submit->flags &= ~ASYNC_TX_ACK; @@ -293,29 +314,6 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, } EXPORT_SYMBOL_GPL(async_xor_val); -static int __init async_xor_init(void) -{ - #ifdef CONFIG_DMA_ENGINE - /* To conserve stack space the input src_list (array of page pointers) - * is reused to hold the array of dma addresses passed to the driver. - * This conversion is only possible when dma_addr_t is less than the - * the size of a pointer. HIGHMEM64G is known to violate this - * assumption. - */ - BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *)); - #endif - - return 0; -} - -static void __exit async_xor_exit(void) -{ - do { } while (0); -} - -module_init(async_xor_init); -module_exit(async_xor_exit); - MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); MODULE_LICENSE("GPL"); diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 3b3c01b6f1ee..912a51b5cbd3 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -4,7 +4,7 @@ menuconfig DMADEVICES bool "DMA Engine support" - depends on !HIGHMEM64G && HAS_DMA + depends on HAS_DMA help DMA engines can do asynchronous data transfers without involving the host CPU. Currently, this framework can be -- cgit v1.2.3-59-g8ed1b From ad283ea4a3ce82cda2efe33163748a397b31b1eb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 29 Aug 2009 19:09:26 -0700 Subject: async_tx: add sum check flags Replace the flat zero_sum_result with a collection of flags to contain the P (xor) zero-sum result, and the soon to be utilized Q (raid6 reed solomon syndrome) zero-sum result. Use the SUM_CHECK_ namespace instead of DMA_ since these flags will be used on non-dma-zero-sum enabled platforms. Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- arch/arm/include/asm/hardware/iop3xx-adma.h | 5 +++-- arch/arm/mach-iop13xx/include/mach/adma.h | 12 +++++++----- crypto/async_tx/async_xor.c | 4 ++-- drivers/md/raid5.c | 2 +- drivers/md/raid5.h | 5 +++-- include/linux/async_tx.h | 2 +- include/linux/dmaengine.h | 21 ++++++++++++++++++++- 7 files changed, 37 insertions(+), 14 deletions(-) (limited to 'crypto') diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h index 83e6ba338e2c..26eefea02314 100644 --- a/arch/arm/include/asm/hardware/iop3xx-adma.h +++ b/arch/arm/include/asm/hardware/iop3xx-adma.h @@ -756,13 +756,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, hw_desc->src[0] = val; } -static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) +static inline enum sum_check_flags +iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) { struct iop3xx_desc_aau *hw_desc = desc->hw_desc; struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); - return desc_ctrl.zero_result_err; + return desc_ctrl.zero_result_err << SUM_CHECK_P; } static inline void iop_chan_append(struct iop_adma_chan *chan) diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h index 5722e86f2174..1cd31df8924d 100644 --- a/arch/arm/mach-iop13xx/include/mach/adma.h +++ b/arch/arm/mach-iop13xx/include/mach/adma.h @@ -428,18 +428,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, hw_desc->block_fill_data = val; } -static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) +static inline enum sum_check_flags +iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) { struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; + enum sum_check_flags flags; BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); - if (desc_ctrl.pq_xfer_en) - return byte_count.zero_result_err_q; - else - return byte_count.zero_result_err; + flags = byte_count.zero_result_err_q << SUM_CHECK_Q; + flags |= byte_count.zero_result_err << SUM_CHECK_P; + + return flags; } static inline void iop_chan_append(struct iop_adma_chan *chan) diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 1e96c4df7061..78fb7780272a 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -246,7 +246,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) */ struct dma_async_tx_descriptor * async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, u32 *result, + int src_cnt, size_t len, enum sum_check_flags *result, struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL, @@ -304,7 +304,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, async_tx_quiesce(&tx); - *result = page_is_zero(dest, offset, len) ? 0 : 1; + *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P; async_tx_sync_epilog(submit); submit->flags = flags_orig; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7727954cf726..1f2a266f3cf7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2590,7 +2590,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, * we are done. Otherwise update the mismatch count and repair * parity if !MD_RECOVERY_CHECK */ - if (sh->ops.zero_sum_result == 0) + if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0) /* parity is correct (on disc, * not in buffer any more) */ diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index e7baabffee86..75f2c6c4cf90 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -2,6 +2,7 @@ #define _RAID5_H #include +#include /* * @@ -215,8 +216,8 @@ struct stripe_head { * @target - STRIPE_OP_COMPUTE_BLK target */ struct stripe_operations { - int target; - u32 zero_sum_result; + int target; + enum sum_check_flags zero_sum_result; } ops; struct r5dev { struct bio req; diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 00cfb637ddf2..3d21a2517518 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -148,7 +148,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, struct dma_async_tx_descriptor * async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, u32 *result, + int src_cnt, size_t len, enum sum_check_flags *result, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 6768727d00d7..02447afcebad 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -86,6 +86,25 @@ enum dma_ctrl_flags { DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), }; +/** + * enum sum_check_bits - bit position of pq_check_flags + */ +enum sum_check_bits { + SUM_CHECK_P = 0, + SUM_CHECK_Q = 1, +}; + +/** + * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations + * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise + * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise + */ +enum sum_check_flags { + SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P), + SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q), +}; + + /** * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. * See linux/cpumask.h @@ -245,7 +264,7 @@ struct dma_device { unsigned int src_cnt, size_t len, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, - size_t len, u32 *result, unsigned long flags); + size_t len, enum sum_check_flags *result, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags); -- cgit v1.2.3-59-g8ed1b From af1f951eb6ef27b01cbfb3f6c21b770af4368a6d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 29 Aug 2009 19:09:26 -0700 Subject: async_tx: kill needless module_{init|exit} If module_init and module_exit are nops then neither need to be defined. [ Impact: pure cleanup ] Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- crypto/async_tx/async_memcpy.c | 13 ------------- crypto/async_tx/async_memset.c | 13 ------------- crypto/async_tx/async_tx.c | 17 +++-------------- 3 files changed, 3 insertions(+), 40 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index 89e05556f3df..98e15bd0dcb5 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -90,19 +90,6 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, } EXPORT_SYMBOL_GPL(async_memcpy); -static int __init async_memcpy_init(void) -{ - return 0; -} - -static void __exit async_memcpy_exit(void) -{ - do { } while (0); -} - -module_init(async_memcpy_init); -module_exit(async_memcpy_exit); - MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("asynchronous memcpy api"); MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index c14437238f4c..b896a6e5f673 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -80,19 +80,6 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len, } EXPORT_SYMBOL_GPL(async_memset); -static int __init async_memset_init(void) -{ - return 0; -} - -static void __exit async_memset_exit(void) -{ - do { } while (0); -} - -module_init(async_memset_init); -module_exit(async_memset_exit); - MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("asynchronous memset api"); MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 802a5ce437d9..6e37ad3f4417 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -42,6 +42,9 @@ static void __exit async_tx_exit(void) async_dmaengine_put(); } +module_init(async_tx_init); +module_exit(async_tx_exit); + /** * __async_tx_find_channel - find a channel to carry out the operation or let * the transaction execute synchronously @@ -61,17 +64,6 @@ __async_tx_find_channel(struct async_submit_ctl *submit, return async_dma_find_channel(tx_type); } EXPORT_SYMBOL_GPL(__async_tx_find_channel); -#else -static int __init async_tx_init(void) -{ - printk(KERN_INFO "async_tx: api initialized (sync-only)\n"); - return 0; -} - -static void __exit async_tx_exit(void) -{ - do { } while (0); -} #endif @@ -298,9 +290,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx) } EXPORT_SYMBOL_GPL(async_tx_quiesce); -module_init(async_tx_init); -module_exit(async_tx_exit); - MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); MODULE_LICENSE("GPL"); -- cgit v1.2.3-59-g8ed1b From 95475e57113c66aac7583925736ed2e2d58c990d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:19:02 -0700 Subject: async_tx: remove walk of tx->parent chain in dma_wait_for_async_tx We currently walk the parent chain when waiting for a given tx to complete however this walk may race with the driver cleanup routine. The routines in async_raid6_recov.c may fall back to the synchronous path at any point so we need to be prepared to call async_tx_quiesce() (which calls dma_wait_for_async_tx). To remove the ->parent walk we guarantee that every time a dependency is attached ->issue_pending() is invoked, then we can simply poll the initial descriptor until completion. This also allows for a lighter weight 'issue pending' implementation as there is no longer a requirement to iterate through all the channels' ->issue_pending() routines as long as operations have been submitted in an ordered chain. async_tx_issue_pending() is added for this case. Signed-off-by: Dan Williams --- crypto/async_tx/async_tx.c | 13 +++++++------ drivers/dma/dmaengine.c | 45 ++++++++++----------------------------------- include/linux/async_tx.h | 23 +++++++++++++++++++++++ 3 files changed, 40 insertions(+), 41 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 6e37ad3f4417..60615fedcf5e 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -77,8 +77,8 @@ static void async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, struct dma_async_tx_descriptor *tx) { - struct dma_chan *chan; - struct dma_device *device; + struct dma_chan *chan = depend_tx->chan; + struct dma_device *device = chan->device; struct dma_async_tx_descriptor *intr_tx = (void *) ~0; /* first check to see if we can still append to depend_tx */ @@ -90,11 +90,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, } spin_unlock_bh(&depend_tx->lock); - if (!intr_tx) + /* attached dependency, flush the parent channel */ + if (!intr_tx) { + device->device_issue_pending(chan); return; - - chan = depend_tx->chan; - device = chan->device; + } /* see if we can schedule an interrupt * otherwise poll for completion @@ -128,6 +128,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, intr_tx->tx_submit(intr_tx); async_tx_ack(intr_tx); } + device->device_issue_pending(chan); } else { if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) panic("%s: DMA_ERROR waiting for depend_tx\n", diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 6781e8f3c064..e002e0e0d055 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -934,49 +934,24 @@ EXPORT_SYMBOL(dma_async_tx_descriptor_init); /* dma_wait_for_async_tx - spin wait for a transaction to complete * @tx: in-flight transaction to wait on - * - * This routine assumes that tx was obtained from a call to async_memcpy, - * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped - * and submitted). Walking the parent chain is only meant to cover for DMA - * drivers that do not implement the DMA_INTERRUPT capability and may race with - * the driver's descriptor cleanup routine. */ enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) { - enum dma_status status; - struct dma_async_tx_descriptor *iter; - struct dma_async_tx_descriptor *parent; + unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); if (!tx) return DMA_SUCCESS; - WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for" - " %s\n", __func__, dma_chan_name(tx->chan)); - - /* poll through the dependency chain, return when tx is complete */ - do { - iter = tx; - - /* find the root of the unsubmitted dependency chain */ - do { - parent = iter->parent; - if (!parent) - break; - else - iter = parent; - } while (parent); - - /* there is a small window for ->parent == NULL and - * ->cookie == -EBUSY - */ - while (iter->cookie == -EBUSY) - cpu_relax(); - - status = dma_sync_wait(iter->chan, iter->cookie); - } while (status == DMA_IN_PROGRESS || (iter != tx)); - - return status; + while (tx->cookie == -EBUSY) { + if (time_after_eq(jiffies, dma_sync_wait_timeout)) { + pr_err("%s timeout waiting for descriptor submission\n", + __func__); + return DMA_ERROR; + } + cpu_relax(); + } + return dma_sync_wait(tx->chan, tx->cookie); } EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 3d21a2517518..12a2efcbd565 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -83,6 +83,24 @@ struct async_submit_ctl { #ifdef CONFIG_DMA_ENGINE #define async_tx_issue_pending_all dma_issue_pending_all + +/** + * async_tx_issue_pending - send pending descriptor to the hardware channel + * @tx: descriptor handle to retrieve hardware context + * + * Note: any dependent operations will have already been issued by + * async_tx_channel_switch, or (in the case of no channel switch) will + * be already pending on this channel. + */ +static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) +{ + if (likely(tx)) { + struct dma_chan *chan = tx->chan; + struct dma_device *dma = chan->device; + + dma->device_issue_pending(chan); + } +} #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL #include #else @@ -98,6 +116,11 @@ static inline void async_tx_issue_pending_all(void) do { } while (0); } +static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) +{ + do { } while (0); +} + static inline struct dma_chan * async_tx_find_channel(struct async_submit_ctl *submit, enum dma_transaction_type tx_type, struct page **dst, -- cgit v1.2.3-59-g8ed1b From b2f46fd8ef3dff2ab30f31126833f78b7480283a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:20:36 -0700 Subject: async_tx: add support for asynchronous GF multiplication [ Based on an original patch by Yuri Tikhonov ] This adds support for doing asynchronous GF multiplication by adding two additional functions to the async_tx API: async_gen_syndrome() does simultaneous XOR and Galois field multiplication of sources. async_syndrome_val() validates the given source buffers against known P and Q values. When a request is made to run async_pq against more than the hardware maximum number of supported sources we need to reuse the previous generated P and Q values as sources into the next operation. Care must be taken to remove Q from P' and P from Q'. For example to perform a 5 source pq op with hardware that only supports 4 sources at a time the following approach is taken: p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})) p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10})) p' = p + q + q + src4 = p + src4 q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10}*src4 Note: 4 is the minimum acceptable maxpq otherwise we punt to synchronous-software path. The DMA_PREP_CONTINUE flag indicates to the driver to reuse p and q as sources (in the above manner) and fill the remaining slots up to maxpq with the new sources/coefficients. Note1: Some devices have native support for P+Q continuation and can skip this extra work. Devices with this capability can advertise it with dma_set_maxpq. It is up to each driver how to handle the DMA_PREP_CONTINUE flag. Note2: The api supports disabling the generation of P when generating Q, this is ignored by the synchronous path but is implemented by some dma devices to save unnecessary writes. In this case the continuation algorithm is simplified to only reuse Q as a source. Cc: H. Peter Anvin Cc: David Woodhouse Signed-off-by: Yuri Tikhonov Signed-off-by: Ilya Yanok Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 3 + arch/arm/mach-iop13xx/setup.c | 2 +- crypto/async_tx/Kconfig | 4 + crypto/async_tx/Makefile | 1 + crypto/async_tx/async_pq.c | 388 ++++++++++++++++++++++++++++++++++ crypto/async_tx/async_xor.c | 2 +- drivers/dma/dmaengine.c | 4 + drivers/dma/iop-adma.c | 2 +- include/linux/async_tx.h | 9 + include/linux/dmaengine.h | 87 +++++++- 10 files changed, 493 insertions(+), 9 deletions(-) create mode 100644 crypto/async_tx/async_pq.c (limited to 'crypto') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 6b15e488c0e7..0e48e054d69a 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -64,6 +64,9 @@ xor - xor a series of source buffers and write the result to a xor_val - xor a series of source buffers and set a flag if the result is zero. The implementation attempts to prevent writes to memory +pq - generate the p+q (raid6 syndrome) from a series of source buffers +pq_val - validate that a p and or q buffer are in sync with a given series of + sources 3.3 Descriptor management: The return value is non-NULL and points to a 'descriptor' when the operation diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index 9800228b71d3..2e7ca0d75f8a 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c @@ -506,7 +506,7 @@ void __init iop13xx_platform_init(void) dma_cap_set(DMA_MEMSET, plat_data->cap_mask); dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); - dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); + dma_cap_set(DMA_PQ, plat_data->cap_mask); dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask); break; diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index d8fb39145986..cb6d7314f198 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -14,3 +14,7 @@ config ASYNC_MEMSET tristate select ASYNC_CORE +config ASYNC_PQ + tristate + select ASYNC_CORE + diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index 27baa7d52fbc..1b9926588259 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o +obj-$(CONFIG_ASYNC_PQ) += async_pq.o diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c new file mode 100644 index 000000000000..108b21efb499 --- /dev/null +++ b/crypto/async_tx/async_pq.c @@ -0,0 +1,388 @@ +/* + * Copyright(c) 2007 Yuri Tikhonov + * Copyright(c) 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#include +#include +#include +#include +#include + +/** + * scribble - space to hold throwaway P buffer for synchronous gen_syndrome + */ +static struct page *scribble; + +static bool is_raid6_zero_block(struct page *p) +{ + return p == (void *) raid6_empty_zero_page; +} + +/* the struct page *blocks[] parameter passed to async_gen_syndrome() + * and async_syndrome_val() contains the 'P' destination address at + * blocks[disks-2] and the 'Q' destination address at blocks[disks-1] + * + * note: these are macros as they are used as lvalues + */ +#define P(b, d) (b[d-2]) +#define Q(b, d) (b[d-1]) + +/** + * do_async_gen_syndrome - asynchronously calculate P and/or Q + */ +static __async_inline struct dma_async_tx_descriptor * +do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, + const unsigned char *scfs, unsigned int offset, int disks, + size_t len, dma_addr_t *dma_src, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct dma_device *dma = chan->device; + enum dma_ctrl_flags dma_flags = 0; + enum async_tx_flags flags_orig = submit->flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + dma_async_tx_callback cb_param_orig = submit->cb_param; + int src_cnt = disks - 2; + unsigned char coefs[src_cnt]; + unsigned short pq_src_cnt; + dma_addr_t dma_dest[2]; + int src_off = 0; + int idx; + int i; + + /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */ + if (P(blocks, disks)) + dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset, + len, DMA_BIDIRECTIONAL); + else + dma_flags |= DMA_PREP_PQ_DISABLE_P; + if (Q(blocks, disks)) + dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset, + len, DMA_BIDIRECTIONAL); + else + dma_flags |= DMA_PREP_PQ_DISABLE_Q; + + /* convert source addresses being careful to collapse 'empty' + * sources and update the coefficients accordingly + */ + for (i = 0, idx = 0; i < src_cnt; i++) { + if (is_raid6_zero_block(blocks[i])) + continue; + dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len, + DMA_TO_DEVICE); + coefs[idx] = scfs[i]; + idx++; + } + src_cnt = idx; + + while (src_cnt > 0) { + submit->flags = flags_orig; + pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags)); + /* if we are submitting additional pqs, leave the chain open, + * clear the callback parameters, and leave the destination + * buffers mapped + */ + if (src_cnt > pq_src_cnt) { + submit->flags &= ~ASYNC_TX_ACK; + dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP; + submit->cb_fn = NULL; + submit->cb_param = NULL; + } else { + dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP; + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; + if (cb_fn_orig) + dma_flags |= DMA_PREP_INTERRUPT; + } + + /* Since we have clobbered the src_list we are committed + * to doing this asynchronously. Drivers force forward + * progress in case they can not provide a descriptor + */ + for (;;) { + tx = dma->device_prep_dma_pq(chan, dma_dest, + &dma_src[src_off], + pq_src_cnt, + &coefs[src_off], len, + dma_flags); + if (likely(tx)) + break; + async_tx_quiesce(&submit->depend_tx); + dma_async_issue_pending(chan); + } + + async_tx_submit(chan, tx, submit); + submit->depend_tx = tx; + + /* drop completed sources */ + src_cnt -= pq_src_cnt; + src_off += pq_src_cnt; + + dma_flags |= DMA_PREP_CONTINUE; + } + + return tx; +} + +/** + * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome + */ +static void +do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, + size_t len, struct async_submit_ctl *submit) +{ + void **srcs; + int i; + + if (submit->scribble) + srcs = submit->scribble; + else + srcs = (void **) blocks; + + for (i = 0; i < disks; i++) { + if (is_raid6_zero_block(blocks[i])) { + BUG_ON(i > disks - 3); /* P or Q can't be zero */ + srcs[i] = blocks[i]; + } else + srcs[i] = page_address(blocks[i]) + offset; + } + raid6_call.gen_syndrome(disks, len, srcs); + async_tx_sync_epilog(submit); +} + +/** + * async_gen_syndrome - asynchronously calculate a raid6 syndrome + * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 + * @offset: common offset into each block (src and dest) to start transaction + * @disks: number of blocks (including missing P or Q, see below) + * @len: length of operation in bytes + * @submit: submission/completion modifiers + * + * General note: This routine assumes a field of GF(2^8) with a + * primitive polynomial of 0x11d and a generator of {02}. + * + * 'disks' note: callers can optionally omit either P or Q (but not + * both) from the calculation by setting blocks[disks-2] or + * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <= + * PAGE_SIZE as a temporary buffer of this size is used in the + * synchronous path. 'disks' always accounts for both destination + * buffers. + * + * 'blocks' note: if submit->scribble is NULL then the contents of + * 'blocks' may be overridden + */ +struct dma_async_tx_descriptor * +async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, + size_t len, struct async_submit_ctl *submit) +{ + int src_cnt = disks - 2; + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &P(blocks, disks), 2, + blocks, src_cnt, len); + struct dma_device *device = chan ? chan->device : NULL; + dma_addr_t *dma_src = NULL; + + BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks))); + + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) blocks; + + if (dma_src && device && + (src_cnt <= dma_maxpq(device, 0) || + dma_maxpq(device, DMA_PREP_CONTINUE) > 0)) { + /* run the p+q asynchronously */ + pr_debug("%s: (async) disks: %d len: %zu\n", + __func__, disks, len); + return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset, + disks, len, dma_src, submit); + } + + /* run the pq synchronously */ + pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len); + + /* wait for any prerequisite operations */ + async_tx_quiesce(&submit->depend_tx); + + if (!P(blocks, disks)) { + P(blocks, disks) = scribble; + BUG_ON(len + offset > PAGE_SIZE); + } + if (!Q(blocks, disks)) { + Q(blocks, disks) = scribble; + BUG_ON(len + offset > PAGE_SIZE); + } + do_sync_gen_syndrome(blocks, offset, disks, len, submit); + + return NULL; +} +EXPORT_SYMBOL_GPL(async_gen_syndrome); + +/** + * async_syndrome_val - asynchronously validate a raid6 syndrome + * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 + * @offset: common offset into each block (src and dest) to start transaction + * @disks: number of blocks (including missing P or Q, see below) + * @len: length of operation in bytes + * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set + * @spare: temporary result buffer for the synchronous case + * @submit: submission / completion modifiers + * + * The same notes from async_gen_syndrome apply to the 'blocks', + * and 'disks' parameters of this routine. The synchronous path + * requires a temporary result buffer and submit->scribble to be + * specified. + */ +struct dma_async_tx_descriptor * +async_syndrome_val(struct page **blocks, unsigned int offset, int disks, + size_t len, enum sum_check_flags *pqres, struct page *spare, + struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL, + NULL, 0, blocks, disks, + len); + struct dma_device *device = chan ? chan->device : NULL; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; + dma_addr_t *dma_src = NULL; + + BUG_ON(disks < 4); + + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) blocks; + + if (dma_src && device && disks <= dma_maxpq(device, 0)) { + struct device *dev = device->dev; + dma_addr_t *pq = &dma_src[disks-2]; + int i; + + pr_debug("%s: (async) disks: %d len: %zu\n", + __func__, disks, len); + if (!P(blocks, disks)) + dma_flags |= DMA_PREP_PQ_DISABLE_P; + if (!Q(blocks, disks)) + dma_flags |= DMA_PREP_PQ_DISABLE_Q; + for (i = 0; i < disks; i++) + if (likely(blocks[i])) { + BUG_ON(is_raid6_zero_block(blocks[i])); + dma_src[i] = dma_map_page(dev, blocks[i], + offset, len, + DMA_TO_DEVICE); + } + + for (;;) { + tx = device->device_prep_dma_pq_val(chan, pq, dma_src, + disks - 2, + raid6_gfexp, + len, pqres, + dma_flags); + if (likely(tx)) + break; + async_tx_quiesce(&submit->depend_tx); + dma_async_issue_pending(chan); + } + async_tx_submit(chan, tx, submit); + + return tx; + } else { + struct page *p_src = P(blocks, disks); + struct page *q_src = Q(blocks, disks); + enum async_tx_flags flags_orig = submit->flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + void *scribble = submit->scribble; + void *cb_param_orig = submit->cb_param; + void *p, *q, *s; + + pr_debug("%s: (sync) disks: %d len: %zu\n", + __func__, disks, len); + + /* caller must provide a temporary result buffer and + * allow the input parameters to be preserved + */ + BUG_ON(!spare || !scribble); + + /* wait for any prerequisite operations */ + async_tx_quiesce(&submit->depend_tx); + + /* recompute p and/or q into the temporary buffer and then + * check to see the result matches the current value + */ + tx = NULL; + *pqres = 0; + if (p_src) { + init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL, + NULL, NULL, scribble); + tx = async_xor(spare, blocks, offset, disks-2, len, submit); + async_tx_quiesce(&tx); + p = page_address(p_src) + offset; + s = page_address(spare) + offset; + *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P; + } + + if (q_src) { + P(blocks, disks) = NULL; + Q(blocks, disks) = spare; + init_async_submit(submit, 0, NULL, NULL, NULL, scribble); + tx = async_gen_syndrome(blocks, offset, disks, len, submit); + async_tx_quiesce(&tx); + q = page_address(q_src) + offset; + s = page_address(spare) + offset; + *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q; + } + + /* restore P, Q and submit */ + P(blocks, disks) = p_src; + Q(blocks, disks) = q_src; + + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; + submit->flags = flags_orig; + async_tx_sync_epilog(submit); + + return NULL; + } +} +EXPORT_SYMBOL_GPL(async_syndrome_val); + +static int __init async_pq_init(void) +{ + scribble = alloc_page(GFP_KERNEL); + + if (scribble) + return 0; + + pr_err("%s: failed to allocate required spare page\n", __func__); + + return -ENOMEM; +} + +static void __exit async_pq_exit(void) +{ + put_page(scribble); +} + +module_init(async_pq_init); +module_exit(async_pq_exit); + +MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation"); +MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 78fb7780272a..56b5f98da463 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -62,7 +62,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, while (src_cnt) { submit->flags = flags_orig; dma_flags = 0; - xor_src_cnt = min(src_cnt, dma->max_xor); + xor_src_cnt = min(src_cnt, (int)dma->max_xor); /* if we are submitting additional xors, leave the chain open, * clear the callback parameters, and leave the destination * buffer mapped diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index e002e0e0d055..cd5673d3043b 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -646,6 +646,10 @@ int dma_async_device_register(struct dma_device *device) !device->device_prep_dma_xor); BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) && !device->device_prep_dma_xor_val); + BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) && + !device->device_prep_dma_pq); + BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) && + !device->device_prep_dma_pq_val); BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && !device->device_prep_dma_memset); BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 6ff79a672699..4496bc606662 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -1257,7 +1257,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " "( %s%s%s%s%s%s%s%s%s%s)\n", - dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", + dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "", dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 12a2efcbd565..e6ce5f004f98 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -185,5 +185,14 @@ async_memset(struct page *dest, int val, unsigned int offset, struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); +struct dma_async_tx_descriptor * +async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt, + size_t len, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, + size_t len, enum sum_check_flags *pqres, struct page *spare, + struct async_submit_ctl *submit); + void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 02447afcebad..ce010cd991d2 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -52,7 +52,7 @@ enum dma_status { enum dma_transaction_type { DMA_MEMCPY, DMA_XOR, - DMA_PQ_XOR, + DMA_PQ, DMA_DUAL_XOR, DMA_PQ_UPDATE, DMA_XOR_VAL, @@ -70,20 +70,28 @@ enum dma_transaction_type { /** * enum dma_ctrl_flags - DMA flags to augment operation preparation, - * control completion, and communicate status. + * control completion, and communicate status. * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of - * this transaction + * this transaction * @DMA_CTRL_ACK - the descriptor cannot be reused until the client - * acknowledges receipt, i.e. has has a chance to establish any - * dependency chains + * acknowledges receipt, i.e. has has a chance to establish any dependency + * chains * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) + * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q + * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P + * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as + * sources that were the result of a previous operation, in the case of a PQ + * operation it continues the calculation with new sources */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), DMA_CTRL_ACK = (1 << 1), DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2), DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), + DMA_PREP_PQ_DISABLE_P = (1 << 4), + DMA_PREP_PQ_DISABLE_Q = (1 << 5), + DMA_PREP_CONTINUE = (1 << 6), }; /** @@ -226,6 +234,7 @@ struct dma_async_tx_descriptor { * @global_node: list_head for global dma_device_list * @cap_mask: one or more dma_capability flags * @max_xor: maximum number of xor sources, 0 if no capability + * @max_pq: maximum number of PQ sources and PQ-continue capability * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @device_alloc_chan_resources: allocate resources and return the @@ -234,6 +243,8 @@ struct dma_async_tx_descriptor { * @device_prep_dma_memcpy: prepares a memcpy operation * @device_prep_dma_xor: prepares a xor operation * @device_prep_dma_xor_val: prepares a xor validation operation + * @device_prep_dma_pq: prepares a pq operation + * @device_prep_dma_pq_val: prepares a pqzero_sum operation * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation @@ -248,7 +259,9 @@ struct dma_device { struct list_head channels; struct list_head global_node; dma_cap_mask_t cap_mask; - int max_xor; + unsigned short max_xor; + unsigned short max_pq; + #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; struct device *dev; @@ -265,6 +278,14 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, size_t len, enum sum_check_flags *result, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_pq)( + struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)( + struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags); @@ -283,6 +304,60 @@ struct dma_device { void (*device_issue_pending)(struct dma_chan *chan); }; +static inline void +dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) +{ + dma->max_pq = maxpq; + if (has_pq_continue) + dma->max_pq |= DMA_HAS_PQ_CONTINUE; +} + +static inline bool dmaf_continue(enum dma_ctrl_flags flags) +{ + return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE; +} + +static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags) +{ + enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P; + + return (flags & mask) == mask; +} + +static inline bool dma_dev_has_pq_continue(struct dma_device *dma) +{ + return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; +} + +static unsigned short dma_dev_to_maxpq(struct dma_device *dma) +{ + return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; +} + +/* dma_maxpq - reduce maxpq in the face of continued operations + * @dma - dma device with PQ capability + * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set + * + * When an engine does not support native continuation we need 3 extra + * source slots to reuse P and Q with the following coefficients: + * 1/ {00} * P : remove P from Q', but use it as a source for P' + * 2/ {01} * Q : use Q to continue Q' calculation + * 3/ {00} * Q : subtract Q from P' to cancel (2) + * + * In the case where P is disabled we only need 1 extra source: + * 1/ {01} * Q : use Q to continue Q' calculation + */ +static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags) +{ + if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags)) + return dma_dev_to_maxpq(dma); + else if (dmaf_p_disabled_continue(flags)) + return dma_dev_to_maxpq(dma) - 1; + else if (dmaf_continue(flags)) + return dma_dev_to_maxpq(dma) - 3; + BUG(); +} + /* --- public DMA engine API --- */ #ifdef CONFIG_DMA_ENGINE -- cgit v1.2.3-59-g8ed1b From 0a82a6239beecc95db6e05fe43ee62d16b381d38 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:20:37 -0700 Subject: async_tx: add support for asynchronous RAID6 recovery operations async_raid6_2data_recov() recovers two data disk failures async_raid6_datap_recov() recovers a data disk and the P disk These routines are a port of the synchronous versions found in drivers/md/raid6recov.c. The primary difference is breaking out the xor operations into separate calls to async_xor. Two helper routines are introduced to perform scalar multiplication where needed. async_sum_product() multiplies two sources by scalar coefficients and then sums (xor) the result. async_mult() simply multiplies a single source by a scalar. This implemention also includes, in contrast to the original synchronous-only code, special case handling for the 4-disk and 5-disk array cases. In these situations the default N-disk algorithm will present 0-source or 1-source operations to dma devices. To cover for dma devices where the minimum source count is 2 we implement 4-disk and 5-disk handling in the recovery code. [ Impact: asynchronous raid6 recovery routines for 2data and datap cases ] Cc: Yuri Tikhonov Cc: Ilya Yanok Cc: H. Peter Anvin Cc: David Woodhouse Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 4 + crypto/async_tx/Kconfig | 5 + crypto/async_tx/Makefile | 1 + crypto/async_tx/async_raid6_recov.c | 448 ++++++++++++++++++++++++++++++++++ include/linux/async_tx.h | 8 + 5 files changed, 466 insertions(+) create mode 100644 crypto/async_tx/async_raid6_recov.c (limited to 'crypto') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 0e48e054d69a..ba046b8fa92f 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -67,6 +67,10 @@ xor_val - xor a series of source buffers and set a flag if the pq - generate the p+q (raid6 syndrome) from a series of source buffers pq_val - validate that a p and or q buffer are in sync with a given series of sources +datap - (raid6_datap_recov) recover a raid6 data block and the p block + from the given sources +2data - (raid6_2data_recov) recover 2 raid6 data blocks from the given + sources 3.3 Descriptor management: The return value is non-NULL and points to a 'descriptor' when the operation diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index cb6d7314f198..e5aeb2b79e6f 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -18,3 +18,8 @@ config ASYNC_PQ tristate select ASYNC_CORE +config ASYNC_RAID6_RECOV + tristate + select ASYNC_CORE + select ASYNC_PQ + diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index 1b9926588259..9a1a76811b80 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o obj-$(CONFIG_ASYNC_PQ) += async_pq.o +obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c new file mode 100644 index 000000000000..0c14d48c9896 --- /dev/null +++ b/crypto/async_tx/async_raid6_recov.c @@ -0,0 +1,448 @@ +/* + * Asynchronous RAID-6 recovery calculations ASYNC_TX API. + * Copyright(c) 2009 Intel Corporation + * + * based on raid6recov.c: + * Copyright 2002 H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include +#include +#include +#include +#include + +static struct dma_async_tx_descriptor * +async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, + size_t len, struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &dest, 1, srcs, 2, len); + struct dma_device *dma = chan ? chan->device : NULL; + const u8 *amul, *bmul; + u8 ax, bx; + u8 *a, *b, *c; + + if (dma) { + dma_addr_t dma_dest[2]; + dma_addr_t dma_src[2]; + struct device *dev = dma->dev; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; + + dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); + dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); + tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef, + len, dma_flags); + if (tx) { + async_tx_submit(chan, tx, submit); + return tx; + } + } + + /* run the operation synchronously */ + async_tx_quiesce(&submit->depend_tx); + amul = raid6_gfmul[coef[0]]; + bmul = raid6_gfmul[coef[1]]; + a = page_address(srcs[0]); + b = page_address(srcs[1]); + c = page_address(dest); + + while (len--) { + ax = amul[*a++]; + bx = bmul[*b++]; + *c++ = ax ^ bx; + } + + return NULL; +} + +static struct dma_async_tx_descriptor * +async_mult(struct page *dest, struct page *src, u8 coef, size_t len, + struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &dest, 1, &src, 1, len); + struct dma_device *dma = chan ? chan->device : NULL; + const u8 *qmul; /* Q multiplier table */ + u8 *d, *s; + + if (dma) { + dma_addr_t dma_dest[2]; + dma_addr_t dma_src[1]; + struct device *dev = dma->dev; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; + + dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); + tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, + len, dma_flags); + if (tx) { + async_tx_submit(chan, tx, submit); + return tx; + } + } + + /* no channel available, or failed to allocate a descriptor, so + * perform the operation synchronously + */ + async_tx_quiesce(&submit->depend_tx); + qmul = raid6_gfmul[coef]; + d = page_address(dest); + s = page_address(src); + + while (len--) + *d++ = qmul[*s++]; + + return NULL; +} + +static struct dma_async_tx_descriptor * +__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *a, *b; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + + p = blocks[4-2]; + q = blocks[4-1]; + + a = blocks[faila]; + b = blocks[failb]; + + /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */ + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = p; + srcs[1] = q; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_sum_product(b, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = p; + srcs[1] = b; + init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(a, srcs, 0, 2, bytes, submit); + + return tx; + +} + +static struct dma_async_tx_descriptor * +__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *g, *dp, *dq; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + int uninitialized_var(good); + int i; + + for (i = 0; i < 3; i++) { + if (i == faila || i == failb) + continue; + else { + good = i; + break; + } + } + BUG_ON(i >= 3); + + p = blocks[5-2]; + q = blocks[5-1]; + g = blocks[good]; + + /* Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for delta p and + * delta q + */ + dp = blocks[faila]; + dq = blocks[failb]; + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_memcpy(dp, g, 0, 0, bytes, submit); + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); + + /* compute P + Pxy */ + srcs[0] = dp; + srcs[1] = p; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + /* compute Q + Qxy */ + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = dp; + srcs[1] = dq; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_sum_product(dq, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = dp; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + return tx; +} + +static struct dma_async_tx_descriptor * +__2data_recov_n(int disks, size_t bytes, int faila, int failb, + struct page **blocks, struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *dp, *dq; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + + p = blocks[disks-2]; + q = blocks[disks-1]; + + /* Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = blocks[faila]; + blocks[faila] = (void *)raid6_empty_zero_page; + blocks[disks-2] = dp; + dq = blocks[failb]; + blocks[failb] = (void *)raid6_empty_zero_page; + blocks[disks-1] = dq; + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); + + /* Restore pointer table */ + blocks[faila] = dp; + blocks[failb] = dq; + blocks[disks-2] = p; + blocks[disks-1] = q; + + /* compute P + Pxy */ + srcs[0] = dp; + srcs[1] = p; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + /* compute Q + Qxy */ + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = dp; + srcs[1] = dq; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_sum_product(dq, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = dp; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + return tx; +} + +/** + * async_raid6_2data_recov - asynchronously calculate two missing data blocks + * @disks: number of disks in the RAID-6 array + * @bytes: block size + * @faila: first failed drive index + * @failb: second failed drive index + * @blocks: array of source pointers where the last two entries are p and q + * @submit: submission/completion modifiers + */ +struct dma_async_tx_descriptor * +async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, + struct page **blocks, struct async_submit_ctl *submit) +{ + BUG_ON(faila == failb); + if (failb < faila) + swap(faila, failb); + + pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); + + /* we need to preserve the contents of 'blocks' for the async + * case, so punt to synchronous if a scribble buffer is not available + */ + if (!submit->scribble) { + void **ptrs = (void **) blocks; + int i; + + async_tx_quiesce(&submit->depend_tx); + for (i = 0; i < disks; i++) + ptrs[i] = page_address(blocks[i]); + + raid6_2data_recov(disks, bytes, faila, failb, ptrs); + + async_tx_sync_epilog(submit); + + return NULL; + } + + switch (disks) { + case 4: + /* dma devices do not uniformly understand a zero source pq + * operation (in contrast to the synchronous case), so + * explicitly handle the 4 disk special case + */ + return __2data_recov_4(bytes, faila, failb, blocks, submit); + case 5: + /* dma devices do not uniformly understand a single + * source pq operation (in contrast to the synchronous + * case), so explicitly handle the 5 disk special case + */ + return __2data_recov_5(bytes, faila, failb, blocks, submit); + default: + return __2data_recov_n(disks, bytes, faila, failb, blocks, submit); + } +} +EXPORT_SYMBOL_GPL(async_raid6_2data_recov); + +/** + * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block + * @disks: number of disks in the RAID-6 array + * @bytes: block size + * @faila: failed drive index + * @blocks: array of source pointers where the last two entries are p and q + * @submit: submission/completion modifiers + */ +struct dma_async_tx_descriptor * +async_raid6_datap_recov(int disks, size_t bytes, int faila, + struct page **blocks, struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *dq; + u8 coef; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + struct page *srcs[2]; + + pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); + + /* we need to preserve the contents of 'blocks' for the async + * case, so punt to synchronous if a scribble buffer is not available + */ + if (!scribble) { + void **ptrs = (void **) blocks; + int i; + + async_tx_quiesce(&submit->depend_tx); + for (i = 0; i < disks; i++) + ptrs[i] = page_address(blocks[i]); + + raid6_datap_recov(disks, bytes, faila, ptrs); + + async_tx_sync_epilog(submit); + + return NULL; + } + + p = blocks[disks-2]; + q = blocks[disks-1]; + + /* Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = blocks[faila]; + blocks[faila] = (void *)raid6_empty_zero_page; + blocks[disks-1] = dq; + + /* in the 4 disk case we only need to perform a single source + * multiplication + */ + if (disks == 4) { + int good = faila == 0 ? 1 : 0; + struct page *g = blocks[good]; + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_memcpy(p, g, 0, 0, bytes, submit); + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); + } else { + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); + } + + /* Restore pointer table */ + blocks[faila] = dq; + blocks[disks-1] = q; + + /* calculate g^{-faila} */ + coef = raid6_gfinv[raid6_gfexp[faila]]; + + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_mult(dq, dq, coef, bytes, submit); + + srcs[0] = p; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(p, srcs, 0, 2, bytes, submit); + + return tx; +} +EXPORT_SYMBOL_GPL(async_raid6_datap_recov); + +MODULE_AUTHOR("Dan Williams "); +MODULE_DESCRIPTION("asynchronous RAID-6 recovery api"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index e6ce5f004f98..866e61c4e2e0 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -194,5 +194,13 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, size_t len, enum sum_check_flags *pqres, struct page *spare, struct async_submit_ctl *submit); +struct dma_async_tx_descriptor * +async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb, + struct page **ptrs, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_raid6_datap_recov(int src_num, size_t bytes, int faila, + struct page **ptrs, struct async_submit_ctl *submit); + void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ -- cgit v1.2.3-59-g8ed1b From cb3c82992f62f838e6476a0bff12909158007fc6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:20:37 -0700 Subject: async_tx: raid6 recovery self test Port drivers/md/raid6test/test.c to use the async raid6 recovery routines. This is meant as a unit test for raid6 acceleration drivers. In addition to the 16-drive test case this implements tests for the 4-disk and 5-disk special cases (dma devices can not generically handle less than 2 sources), and adds a test for the D+Q case. Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- crypto/async_tx/Makefile | 1 + crypto/async_tx/raid6test.c | 241 ++++++++++++++++++++++++++++++++++++++++++++ drivers/md/Kconfig | 13 +++ 3 files changed, 255 insertions(+) create mode 100644 crypto/async_tx/raid6test.c (limited to 'crypto') diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index 9a1a76811b80..d1e0e6f72bc1 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o obj-$(CONFIG_ASYNC_PQ) += async_pq.o obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o +obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c new file mode 100644 index 000000000000..98c83ca96c83 --- /dev/null +++ b/crypto/async_tx/raid6test.c @@ -0,0 +1,241 @@ +/* + * asynchronous raid6 recovery self test + * Copyright (c) 2009, Intel Corporation. + * + * based on drivers/md/raid6test/test.c: + * Copyright 2002-2007 H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include +#include + +#undef pr +#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args) + +#define NDISKS 16 /* Including P and Q */ + +static struct page *dataptrs[NDISKS]; +static struct page *data[NDISKS+3]; +static struct page *spare; +static struct page *recovi; +static struct page *recovj; + +static void callback(void *param) +{ + struct completion *cmp = param; + + complete(cmp); +} + +static void makedata(int disks) +{ + int i, j; + + for (i = 0; i < disks; i++) { + for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) { + u32 *p = page_address(data[i]) + j; + + *p = random32(); + } + + dataptrs[i] = data[i]; + } +} + +static char disk_type(int d, int disks) +{ + if (d == disks - 2) + return 'P'; + else if (d == disks - 1) + return 'Q'; + else + return 'D'; +} + +/* Recover two failed blocks. */ +static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs) +{ + struct async_submit_ctl submit; + addr_conv_t addr_conv[disks]; + struct completion cmp; + struct dma_async_tx_descriptor *tx = NULL; + enum sum_check_flags result = ~0; + + if (faila > failb) + swap(faila, failb); + + if (failb == disks-1) { + if (faila == disks-2) { + /* P+Q failure. Just rebuild the syndrome. */ + init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); + tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); + } else { + struct page *blocks[disks]; + struct page *dest; + int count = 0; + int i; + + /* data+Q failure. Reconstruct data from P, + * then rebuild syndrome + */ + for (i = disks; i-- ; ) { + if (i == faila || i == failb) + continue; + blocks[count++] = ptrs[i]; + } + dest = ptrs[faila]; + init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, + NULL, NULL, addr_conv); + tx = async_xor(dest, blocks, 0, count, bytes, &submit); + + init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv); + tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); + } + } else { + if (failb == disks-2) { + /* data+P failure. */ + init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); + tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit); + } else { + /* data+data failure. */ + init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); + tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit); + } + } + init_completion(&cmp); + init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv); + tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit); + async_tx_issue_pending(tx); + + if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) + pr("%s: timeout! (faila: %d failb: %d disks: %d)\n", + __func__, faila, failb, disks); + + if (result != 0) + pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n", + __func__, faila, failb, result); +} + +static int test_disks(int i, int j, int disks) +{ + int erra, errb; + + memset(page_address(recovi), 0xf0, PAGE_SIZE); + memset(page_address(recovj), 0xba, PAGE_SIZE); + + dataptrs[i] = recovi; + dataptrs[j] = recovj; + + raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs); + + erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE); + errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE); + + pr("%s(%d, %d): faila=%3d(%c) failb=%3d(%c) %s\n", + __func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks), + (!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB"); + + dataptrs[i] = data[i]; + dataptrs[j] = data[j]; + + return erra || errb; +} + +static int test(int disks, int *tests) +{ + addr_conv_t addr_conv[disks]; + struct dma_async_tx_descriptor *tx; + struct async_submit_ctl submit; + struct completion cmp; + int err = 0; + int i, j; + + recovi = data[disks]; + recovj = data[disks+1]; + spare = data[disks+2]; + + makedata(disks); + + /* Nuke syndromes */ + memset(page_address(data[disks-2]), 0xee, PAGE_SIZE); + memset(page_address(data[disks-1]), 0xee, PAGE_SIZE); + + /* Generate assumed good syndrome */ + init_completion(&cmp); + init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv); + tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit); + async_tx_issue_pending(tx); + + if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) { + pr("error: initial gen_syndrome(%d) timed out\n", disks); + return 1; + } + + pr("testing the %d-disk case...\n", disks); + for (i = 0; i < disks-1; i++) + for (j = i+1; j < disks; j++) { + (*tests)++; + err += test_disks(i, j, disks); + } + + return err; +} + + +static int raid6_test(void) +{ + int err = 0; + int tests = 0; + int i; + + for (i = 0; i < NDISKS+3; i++) { + data[i] = alloc_page(GFP_KERNEL); + if (!data[i]) { + while (i--) + put_page(data[i]); + return -ENOMEM; + } + } + + /* the 4-disk and 5-disk cases are special for the recovery code */ + if (NDISKS > 4) + err += test(4, &tests); + if (NDISKS > 5) + err += test(5, &tests); + err += test(NDISKS, &tests); + + pr("\n"); + pr("complete (%d tests, %d failure%s)\n", + tests, err, err == 1 ? "" : "s"); + + for (i = 0; i < NDISKS+3; i++) + put_page(data[i]); + + return 0; +} + +static void raid6_test_exit(void) +{ +} + +/* when compiled-in wait for drivers to load first (assumes dma drivers + * are also compliled-in) + */ +late_initcall(raid6_test); +module_exit(raid6_test_exit); +MODULE_AUTHOR("Dan Williams "); +MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests"); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 36e0675be9f7..41b3ae25b813 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -155,6 +155,19 @@ config MD_RAID456 config MD_RAID6_PQ tristate +config ASYNC_RAID6_TEST + tristate "Self test for hardware accelerated raid6 recovery" + depends on MD_RAID6_PQ + select ASYNC_RAID6_RECOV + ---help--- + This is a one-shot self test that permutes through the + recovery of all the possible two disk failure scenarios for a + N-disk array. Recovery is performed with the asynchronous + raid6 recovery routines, and will optionally use an offload + engine if one is available. + + If unsure, say N. + config MD_MULTIPATH tristate "Multipath I/O support" depends on BLK_DEV_MD -- cgit v1.2.3-59-g8ed1b From 0403e3827788d878163f9ef0541b748b0f88ca5d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:50 -0700 Subject: dmaengine: add fence support Some engines optimize operation by reading ahead in the descriptor chain such that descriptor2 may start execution before descriptor1 completes. If descriptor2 depends on the result from descriptor1 then a fence is required (on descriptor2) to disable this optimization. The async_tx api could implicitly identify dependencies via the 'depend_tx' parameter, but that would constrain cases where the dependency chain only specifies a completion order rather than a data dependency. So, provide an ASYNC_TX_FENCE to explicitly identify data dependencies. Signed-off-by: Dan Williams --- crypto/async_tx/async_memcpy.c | 7 ++++-- crypto/async_tx/async_memset.c | 7 ++++-- crypto/async_tx/async_pq.c | 5 ++++ crypto/async_tx/async_raid6_recov.c | 47 +++++++++++++++++++++---------------- crypto/async_tx/async_xor.c | 11 ++++++--- drivers/md/raid5.c | 37 ++++++++++++++++++----------- include/linux/async_tx.h | 3 +++ include/linux/dmaengine.h | 3 +++ 8 files changed, 79 insertions(+), 41 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index 98e15bd0dcb5..b38cbb3fd527 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -52,9 +52,12 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, if (device) { dma_addr_t dma_dest, dma_src; - unsigned long dma_prep_flags; + unsigned long dma_prep_flags = 0; - dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; + if (submit->cb_fn) + dma_prep_flags |= DMA_PREP_INTERRUPT; + if (submit->flags & ASYNC_TX_FENCE) + dma_prep_flags |= DMA_PREP_FENCE; dma_dest = dma_map_page(device->dev, dest, dest_offset, len, DMA_FROM_DEVICE); diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index b896a6e5f673..a374784e3329 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -49,9 +49,12 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len, if (device) { dma_addr_t dma_dest; - unsigned long dma_prep_flags; + unsigned long dma_prep_flags = 0; - dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; + if (submit->cb_fn) + dma_prep_flags |= DMA_PREP_INTERRUPT; + if (submit->flags & ASYNC_TX_FENCE) + dma_prep_flags |= DMA_PREP_FENCE; dma_dest = dma_map_page(device->dev, dest, offset, len, DMA_FROM_DEVICE); diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 108b21efb499..a25e290c39fb 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -101,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, */ if (src_cnt > pq_src_cnt) { submit->flags &= ~ASYNC_TX_ACK; + submit->flags |= ASYNC_TX_FENCE; dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP; submit->cb_fn = NULL; submit->cb_param = NULL; @@ -111,6 +112,8 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, if (cb_fn_orig) dma_flags |= DMA_PREP_INTERRUPT; } + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; /* Since we have clobbered the src_list we are committed * to doing this asynchronously. Drivers force forward @@ -282,6 +285,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, dma_flags |= DMA_PREP_PQ_DISABLE_P; if (!Q(blocks, disks)) dma_flags |= DMA_PREP_PQ_DISABLE_Q; + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; for (i = 0; i < disks; i++) if (likely(blocks[i])) { BUG_ON(is_raid6_zero_block(blocks[i])); diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index 0c14d48c9896..822a42d10061 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -44,6 +44,8 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, struct dma_async_tx_descriptor *tx; enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); @@ -89,6 +91,8 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len, struct dma_async_tx_descriptor *tx; enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, @@ -138,7 +142,7 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, srcs[1] = q; coef[0] = raid6_gfexi[failb-faila]; coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); tx = async_sum_product(b, srcs, coef, bytes, submit); /* Dy = P+Pxy+Dx */ @@ -188,23 +192,23 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, dp = blocks[faila]; dq = blocks[failb]; - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); tx = async_memcpy(dp, g, 0, 0, bytes, submit); - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); /* compute P + Pxy */ srcs[0] = dp; srcs[1] = p; - init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, - scribble); + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); tx = async_xor(dp, srcs, 0, 2, bytes, submit); /* compute Q + Qxy */ srcs[0] = dq; srcs[1] = q; - init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, - scribble); + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); tx = async_xor(dq, srcs, 0, 2, bytes, submit); /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ @@ -212,7 +216,7 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, srcs[1] = dq; coef[0] = raid6_gfexi[failb-faila]; coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); tx = async_sum_product(dq, srcs, coef, bytes, submit); /* Dy = P+Pxy+Dx */ @@ -252,7 +256,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb, blocks[failb] = (void *)raid6_empty_zero_page; blocks[disks-1] = dq; - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); /* Restore pointer table */ @@ -264,15 +268,15 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb, /* compute P + Pxy */ srcs[0] = dp; srcs[1] = p; - init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, - scribble); + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); tx = async_xor(dp, srcs, 0, 2, bytes, submit); /* compute Q + Qxy */ srcs[0] = dq; srcs[1] = q; - init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, - scribble); + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); tx = async_xor(dq, srcs, 0, 2, bytes, submit); /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ @@ -280,7 +284,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb, srcs[1] = dq; coef[0] = raid6_gfexi[failb-faila]; coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); tx = async_sum_product(dq, srcs, coef, bytes, submit); /* Dy = P+Pxy+Dx */ @@ -407,13 +411,16 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, int good = faila == 0 ? 1 : 0; struct page *g = blocks[good]; - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, + scribble); tx = async_memcpy(p, g, 0, 0, bytes, submit); - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, + scribble); tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); } else { - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, + scribble); tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); } @@ -426,11 +433,11 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, srcs[0] = dq; srcs[1] = q; - init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, - scribble); + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); tx = async_xor(dq, srcs, 0, 2, bytes, submit); - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); tx = async_mult(dq, dq, coef, bytes, submit); srcs[0] = p; diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 56b5f98da463..db279872ef3d 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -69,6 +69,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, */ if (src_cnt > xor_src_cnt) { submit->flags &= ~ASYNC_TX_ACK; + submit->flags |= ASYNC_TX_FENCE; dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; submit->cb_fn = NULL; submit->cb_param = NULL; @@ -78,7 +79,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, } if (submit->cb_fn) dma_flags |= DMA_PREP_INTERRUPT; - + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; /* Since we have clobbered the src_list we are committed * to doing this asynchronously. Drivers force forward progress * in case they can not provide a descriptor @@ -264,12 +266,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, dma_src = (dma_addr_t *) src_list; if (dma_src && device && src_cnt <= device->max_xor) { - unsigned long dma_prep_flags; + unsigned long dma_prep_flags = 0; int i; pr_debug("%s: (async) len: %zu\n", __func__, len); - dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; + if (submit->cb_fn) + dma_prep_flags |= DMA_PREP_INTERRUPT; + if (submit->flags & ASYNC_TX_FENCE) + dma_prep_flags |= DMA_PREP_FENCE; for (i = 0; i < src_cnt; i++) dma_src[i] = dma_map_page(device->dev, src_list[i], offset, len, DMA_TO_DEVICE); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0a5cf2171214..54ef8d75541d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -502,13 +502,17 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, int i; int page_offset; struct async_submit_ctl submit; + enum async_tx_flags flags = 0; if (bio->bi_sector >= sector) page_offset = (signed)(bio->bi_sector - sector) * 512; else page_offset = (signed)(sector - bio->bi_sector) * -512; - init_async_submit(&submit, 0, tx, NULL, NULL, NULL); + if (frombio) + flags |= ASYNC_TX_FENCE; + init_async_submit(&submit, flags, tx, NULL, NULL, NULL); + bio_for_each_segment(bvl, bio, i) { int len = bio_iovec_idx(bio, i)->bv_len; int clen; @@ -685,7 +689,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) atomic_inc(&sh->count); - init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, + init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu)); if (unlikely(count == 1)) tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); @@ -763,7 +767,8 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) count = set_syndrome_sources(blocks, sh); blocks[count] = NULL; /* regenerating p is not necessary */ BUG_ON(blocks[count+1] != dest); /* q should already be set */ - init_async_submit(&submit, 0, NULL, ops_complete_compute, sh, + init_async_submit(&submit, ASYNC_TX_FENCE, NULL, + ops_complete_compute, sh, to_addr_conv(sh, percpu)); tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); } else { @@ -775,8 +780,8 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) blocks[count++] = sh->dev[i].page; } - init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, - ops_complete_compute, sh, + init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, + NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu)); tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit); } @@ -837,8 +842,9 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) /* Q disk is one of the missing disks */ if (faila == syndrome_disks) { /* Missing P+Q, just recompute */ - init_async_submit(&submit, 0, NULL, ops_complete_compute, - sh, to_addr_conv(sh, percpu)); + init_async_submit(&submit, ASYNC_TX_FENCE, NULL, + ops_complete_compute, sh, + to_addr_conv(sh, percpu)); return async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); } else { @@ -859,21 +865,24 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) blocks[count++] = sh->dev[i].page; } dest = sh->dev[data_target].page; - init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, - NULL, NULL, to_addr_conv(sh, percpu)); + init_async_submit(&submit, + ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, + NULL, NULL, NULL, + to_addr_conv(sh, percpu)); tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit); count = set_syndrome_sources(blocks, sh); - init_async_submit(&submit, 0, tx, ops_complete_compute, - sh, to_addr_conv(sh, percpu)); + init_async_submit(&submit, ASYNC_TX_FENCE, tx, + ops_complete_compute, sh, + to_addr_conv(sh, percpu)); return async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); } } - init_async_submit(&submit, 0, NULL, ops_complete_compute, sh, - to_addr_conv(sh, percpu)); + init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute, + sh, to_addr_conv(sh, percpu)); if (failb == syndrome_disks) { /* We're missing D+P. */ return async_raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, @@ -916,7 +925,7 @@ ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu, xor_srcs[count++] = dev->page; } - init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx, + init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, ops_complete_prexor, sh, to_addr_conv(sh, percpu)); tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 866e61c4e2e0..a1c486a88e88 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -58,11 +58,14 @@ struct dma_chan_ref { * array. * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a * dependency chain + * @ASYNC_TX_FENCE: specify that the next operation in the dependency + * chain uses this operation's result as an input */ enum async_tx_flags { ASYNC_TX_XOR_ZERO_DST = (1 << 0), ASYNC_TX_XOR_DROP_DST = (1 << 1), ASYNC_TX_ACK = (1 << 2), + ASYNC_TX_FENCE = (1 << 3), }; /** diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 1012f1abcb54..4d6c1c925fd4 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -87,6 +87,8 @@ enum dma_transaction_type { * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as * sources that were the result of a previous operation, in the case of a PQ * operation it continues the calculation with new sources + * @DMA_PREP_FENCE - tell the driver that subsequent operations depend + * on the result of this operation */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), @@ -98,6 +100,7 @@ enum dma_ctrl_flags { DMA_PREP_PQ_DISABLE_P = (1 << 6), DMA_PREP_PQ_DISABLE_Q = (1 << 7), DMA_PREP_CONTINUE = (1 << 8), + DMA_PREP_FENCE = (1 << 9), }; /** -- cgit v1.2.3-59-g8ed1b From 138f4c359d23d2ec38d18bd70dd9613ae515fe93 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:51 -0700 Subject: dmaengine, async_tx: add a "no channel switch" allocator Channel switching is problematic for some dmaengine drivers as the architecture precludes separating the ->prep from ->submit. In these cases the driver can select ASYNC_TX_DISABLE_CHANNEL_SWITCH to modify the async_tx allocator to only return channels that support all of the required asynchronous operations. For example MD_RAID456=y selects support for asynchronous xor, xor validate, pq, pq validate, and memcpy. When ASYNC_TX_DISABLE_CHANNEL_SWITCH=y any channel with all these capabilities is marked DMA_ASYNC_TX allowing async_tx_find_channel() to quickly locate compatible channels with the guarantee that dependency chains will remain on one channel. When ASYNC_TX_DISABLE_CHANNEL_SWITCH=n async_tx_find_channel() may select channels that lead to operation chains that need to cross channel boundaries using the async_tx channel switch capability. Signed-off-by: Dan Williams --- crypto/async_tx/async_tx.c | 4 ++++ drivers/dma/Kconfig | 4 ++++ drivers/dma/dmaengine.c | 40 ++++++++++++++++++++++++++++++++++++++++ include/linux/dmaengine.h | 10 +++++++++- 4 files changed, 57 insertions(+), 1 deletion(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 60615fedcf5e..f9cdf04fe7c0 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -81,6 +81,10 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, struct dma_device *device = chan->device; struct dma_async_tx_descriptor *intr_tx = (void *) ~0; + #ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH + BUG(); + #endif + /* first check to see if we can still append to depend_tx */ spin_lock_bh(&depend_tx->lock); if (depend_tx->parent && depend_tx->chan == tx->chan) { diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 912a51b5cbd3..ddcd9793b25c 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -17,11 +17,15 @@ if DMADEVICES comment "DMA Devices" +config ASYNC_TX_DISABLE_CHANNEL_SWITCH + bool + config INTEL_IOATDMA tristate "Intel I/OAT DMA support" depends on PCI && X86 select DMA_ENGINE select DCA + select ASYNC_TX_DISABLE_CHANNEL_SWITCH help Enable support for the Intel(R) I/OAT DMA engine present in recent Intel Xeon chipsets. diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 96598479eece..d5bc628d207c 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -608,6 +608,40 @@ void dmaengine_put(void) } EXPORT_SYMBOL(dmaengine_put); +static bool device_has_all_tx_types(struct dma_device *device) +{ + /* A device that satisfies this test has channels that will never cause + * an async_tx channel switch event as all possible operation types can + * be handled. + */ + #ifdef CONFIG_ASYNC_TX_DMA + if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE) + if (!dma_has_cap(DMA_MEMCPY, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE) + if (!dma_has_cap(DMA_MEMSET, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) + if (!dma_has_cap(DMA_XOR, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE) + if (!dma_has_cap(DMA_PQ, device->cap_mask)) + return false; + #endif + + return true; +} + static int get_dma_id(struct dma_device *device) { int rc; @@ -665,6 +699,12 @@ int dma_async_device_register(struct dma_device *device) BUG_ON(!device->device_issue_pending); BUG_ON(!device->dev); + /* note: this only matters in the + * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case + */ + if (device_has_all_tx_types(device)) + dma_cap_set(DMA_ASYNC_TX, device->cap_mask); + idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL); if (!idr_ref) return -ENOMEM; diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 4d6c1c925fd4..86853ed7970b 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -48,6 +48,9 @@ enum dma_status { /** * enum dma_transaction_type - DMA transaction types/indexes + * + * Note: The DMA_ASYNC_TX capability is not to be set by drivers. It is + * automatically set as dma devices are registered. */ enum dma_transaction_type { DMA_MEMCPY, @@ -61,6 +64,7 @@ enum dma_transaction_type { DMA_MEMCPY_CRC32C, DMA_INTERRUPT, DMA_PRIVATE, + DMA_ASYNC_TX, DMA_SLAVE, }; @@ -396,7 +400,11 @@ static inline void net_dmaengine_put(void) #ifdef CONFIG_ASYNC_TX_DMA #define async_dmaengine_get() dmaengine_get() #define async_dmaengine_put() dmaengine_put() +#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH +#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX) +#else #define async_dma_find_channel(type) dma_find_channel(type) +#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */ #else static inline void async_dmaengine_get(void) { @@ -409,7 +417,7 @@ async_dma_find_channel(enum dma_transaction_type type) { return NULL; } -#endif +#endif /* CONFIG_ASYNC_TX_DMA */ dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); -- cgit v1.2.3-59-g8ed1b From 83544ae9f3991bfc7d5e0fe9a3008cd05a8d57b7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:53 -0700 Subject: dmaengine, async_tx: support alignment checks Some engines have transfer size and address alignment restrictions. Add a per-operation alignment property to struct dma_device that the async routines and dmatest can use to check alignment capabilities. Signed-off-by: Dan Williams --- crypto/async_tx/async_memcpy.c | 2 +- crypto/async_tx/async_memset.c | 2 +- crypto/async_tx/async_pq.c | 6 ++++-- crypto/async_tx/async_xor.c | 5 +++-- drivers/dma/dmatest.c | 14 ++++++++++++++ include/linux/dmaengine.h | 44 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 67 insertions(+), 6 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index b38cbb3fd527..0ec1fb69d4ea 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -50,7 +50,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; - if (device) { + if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) { dma_addr_t dma_dest, dma_src; unsigned long dma_prep_flags = 0; diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index a374784e3329..58e4a8752aee 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -47,7 +47,7 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len, struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; - if (device) { + if (device && is_dma_fill_aligned(device, offset, 0, len)) { dma_addr_t dma_dest; unsigned long dma_prep_flags = 0; diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index a25e290c39fb..b88db6d1dc65 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -211,7 +211,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, if (dma_src && device && (src_cnt <= dma_maxpq(device, 0) || - dma_maxpq(device, DMA_PREP_CONTINUE) > 0)) { + dma_maxpq(device, DMA_PREP_CONTINUE) > 0) && + is_dma_pq_aligned(device, offset, 0, len)) { /* run the p+q asynchronously */ pr_debug("%s: (async) disks: %d len: %zu\n", __func__, disks, len); @@ -274,7 +275,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, else if (sizeof(dma_addr_t) <= sizeof(struct page *)) dma_src = (dma_addr_t *) blocks; - if (dma_src && device && disks <= dma_maxpq(device, 0)) { + if (dma_src && device && disks <= dma_maxpq(device, 0) && + is_dma_pq_aligned(device, offset, 0, len)) { struct device *dev = device->dev; dma_addr_t *pq = &dma_src[disks-2]; int i; diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index db279872ef3d..b459a9034aac 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -193,7 +193,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, else if (sizeof(dma_addr_t) <= sizeof(struct page *)) dma_src = (dma_addr_t *) src_list; - if (dma_src && chan) { + if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) { /* run the xor asynchronously */ pr_debug("%s (async): len: %zu\n", __func__, len); @@ -265,7 +265,8 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, else if (sizeof(dma_addr_t) <= sizeof(struct page *)) dma_src = (dma_addr_t *) src_list; - if (dma_src && device && src_cnt <= device->max_xor) { + if (dma_src && device && src_cnt <= device->max_xor && + is_dma_xor_aligned(device, offset, 0, len)) { unsigned long dma_prep_flags = 0; int i; diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 58e49e41c7a3..a3722a7384b5 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -288,6 +288,7 @@ static int dmatest_func(void *data) dma_addr_t dma_dsts[dst_cnt]; struct completion cmp; unsigned long tmo = msecs_to_jiffies(3000); + u8 align = 0; total_tests++; @@ -295,6 +296,18 @@ static int dmatest_func(void *data) src_off = dmatest_random() % (test_buf_size - len + 1); dst_off = dmatest_random() % (test_buf_size - len + 1); + /* honor alignment restrictions */ + if (thread->type == DMA_MEMCPY) + align = dev->copy_align; + else if (thread->type == DMA_XOR) + align = dev->xor_align; + else if (thread->type == DMA_PQ) + align = dev->pq_align; + + len = (len >> align) << align; + src_off = (src_off >> align) << align; + dst_off = (dst_off >> align) << align; + dmatest_init_srcs(thread->srcs, src_off, len); dmatest_init_dsts(thread->dsts, dst_off, len); @@ -311,6 +324,7 @@ static int dmatest_func(void *data) DMA_BIDIRECTIONAL); } + if (thread->type == DMA_MEMCPY) tx = dev->device_prep_dma_memcpy(chan, dma_dsts[0] + dst_off, diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index db23fd583f98..835b9c7bf1c2 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -245,6 +245,10 @@ struct dma_async_tx_descriptor { * @cap_mask: one or more dma_capability flags * @max_xor: maximum number of xor sources, 0 if no capability * @max_pq: maximum number of PQ sources and PQ-continue capability + * @copy_align: alignment shift for memcpy operations + * @xor_align: alignment shift for xor operations + * @pq_align: alignment shift for pq operations + * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @device_alloc_chan_resources: allocate resources and return the @@ -271,6 +275,10 @@ struct dma_device { dma_cap_mask_t cap_mask; unsigned short max_xor; unsigned short max_pq; + u8 copy_align; + u8 xor_align; + u8 pq_align; + u8 fill_align; #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; @@ -314,6 +322,42 @@ struct dma_device { void (*device_issue_pending)(struct dma_chan *chan); }; +static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len) +{ + size_t mask; + + if (!align) + return true; + mask = (1 << align) - 1; + if (mask & (off1 | off2 | len)) + return false; + return true; +} + +static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->copy_align, off1, off2, len); +} + +static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->xor_align, off1, off2, len); +} + +static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->pq_align, off1, off2, len); +} + +static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->fill_align, off1, off2, len); +} + static inline void dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) { -- cgit v1.2.3-59-g8ed1b From 1b6df6930994d5d027375b07ac9da63644eb5758 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 16 Sep 2009 21:03:29 -0700 Subject: raid6test: fix stack overflow Testing on x86_64 with NDISKS=255 yields: do_IRQ: modprobe near stack overflow (cur:ffff88007d19c000,sp:ffff88007d19c128) ...and eventually general protection fault: 0000 [#1] Moving the scribble buffers off the stack allows the test to complete successfully. Signed-off-by: Dan Williams --- crypto/async_tx/raid6test.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c index 98c83ca96c83..3ec27c7e62ea 100644 --- a/crypto/async_tx/raid6test.c +++ b/crypto/async_tx/raid6test.c @@ -28,6 +28,7 @@ #define NDISKS 16 /* Including P and Q */ static struct page *dataptrs[NDISKS]; +static addr_conv_t addr_conv[NDISKS]; static struct page *data[NDISKS+3]; static struct page *spare; static struct page *recovi; @@ -69,7 +70,6 @@ static char disk_type(int d, int disks) static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs) { struct async_submit_ctl submit; - addr_conv_t addr_conv[disks]; struct completion cmp; struct dma_async_tx_descriptor *tx = NULL; enum sum_check_flags result = ~0; @@ -156,7 +156,6 @@ static int test_disks(int i, int j, int disks) static int test(int disks, int *tests) { - addr_conv_t addr_conv[disks]; struct dma_async_tx_descriptor *tx; struct async_submit_ctl submit; struct completion cmp; -- cgit v1.2.3-59-g8ed1b From 1f6672d44c1ae7408b43c06170ec34eb0a0e9b9f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 21 Sep 2009 10:47:40 -0700 Subject: async_tx/raid6: add missing dma_unmap calls to the async fail case If we are unable to offload async_mult() or async_sum_product(), then unmap the buffers before falling through to the synchronous path. Signed-off-by: Dan Williams --- crypto/async_tx/async_raid6_recov.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'crypto') diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index 822a42d10061..6d73dde4786d 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -55,6 +55,13 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, async_tx_submit(chan, tx, submit); return tx; } + + /* could not get a descriptor, unmap and fall through to + * the synchronous path + */ + dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); + dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE); } /* run the operation synchronously */ @@ -101,6 +108,12 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len, async_tx_submit(chan, tx, submit); return tx; } + + /* could not get a descriptor, unmap and fall through to + * the synchronous path + */ + dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); } /* no channel available, or failed to allocate a descriptor, so -- cgit v1.2.3-59-g8ed1b From d43c36dc6b357fa1806800f18aa30123c747a6d1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 7 Oct 2009 17:09:06 +0400 Subject: headers: remove sched.h from interrupt.h After m68k's task_thread_info() doesn't refer to current, it's possible to remove sched.h from interrupt.h and not break m68k! Many thanks to Heiko Carstens for allowing this. Signed-off-by: Alexey Dobriyan --- arch/arm/kernel/time.c | 1 + arch/arm/mach-integrator/pci_v3.c | 1 + arch/arm/plat-s3c24xx/adc.c | 1 + arch/blackfin/kernel/time.c | 1 + arch/m32r/kernel/smp.c | 1 + arch/um/drivers/line.c | 1 + arch/um/drivers/port_kern.c | 1 + arch/um/kernel/irq.c | 1 + arch/x86/kernel/cpu/mcheck/mce_intel.c | 1 + arch/x86/kernel/pci-gart_64.c | 1 + arch/x86/kernel/reboot.c | 1 + arch/xtensa/kernel/time.c | 1 + crypto/aead.c | 1 + drivers/char/applicom.c | 1 + drivers/char/epca.c | 1 + drivers/char/generic_serial.c | 1 + drivers/char/istallion.c | 1 + drivers/char/nozomi.c | 1 + drivers/char/pty.c | 1 + drivers/char/rio/riocmd.c | 1 + drivers/char/rio/rioctrl.c | 1 + drivers/char/rio/riotty.c | 1 + drivers/char/ser_a2232.c | 1 + drivers/char/stallion.c | 1 + drivers/char/tlclk.c | 1 + drivers/hwmon/sht15.c | 1 + drivers/ieee1394/raw1394.c | 1 + drivers/ieee1394/video1394.c | 1 + drivers/infiniband/core/iwcm.c | 1 + drivers/infiniband/core/ucma.c | 1 + drivers/infiniband/hw/cxgb3/iwch_provider.c | 1 + drivers/infiniband/hw/cxgb3/iwch_qp.c | 1 + drivers/infiniband/hw/ipath/ipath_driver.c | 1 + drivers/infiniband/hw/ipath/ipath_iba7220.c | 1 + drivers/infiniband/hw/ipath/ipath_intr.c | 1 + drivers/infiniband/hw/ipath/ipath_qp.c | 1 + drivers/infiniband/hw/ipath/ipath_ruc.c | 1 + drivers/infiniband/hw/ipath/ipath_ud.c | 1 + drivers/infiniband/hw/ipath/ipath_user_pages.c | 1 + drivers/infiniband/hw/ipath/ipath_user_sdma.c | 1 + drivers/infiniband/hw/ipath/ipath_verbs_mcast.c | 1 + drivers/input/keyboard/hilkbd.c | 1 + drivers/input/keyboard/sunkbd.c | 1 + drivers/input/serio/libps2.c | 1 + drivers/input/serio/serio_raw.c | 1 + drivers/input/serio/serport.c | 1 + drivers/isdn/capi/kcapi.c | 1 + drivers/isdn/hisax/arcofi.c | 1 + drivers/isdn/hisax/hfc_2bds0.c | 1 + drivers/isdn/hisax/hfc_pci.c | 1 + drivers/isdn/hysdn/hysdn_procconf.c | 1 + drivers/isdn/hysdn/hysdn_proclog.c | 1 + drivers/isdn/pcbit/drv.c | 1 + drivers/isdn/pcbit/layer2.c | 1 + drivers/isdn/sc/init.c | 1 + drivers/lguest/interrupts_and_traps.c | 1 + drivers/media/dvb/dvb-core/dvb_net.c | 1 + drivers/media/video/meye.c | 1 + drivers/media/video/videobuf-core.c | 1 + drivers/media/video/videobuf-dma-sg.c | 1 + drivers/message/fusion/mptlan.c | 1 + drivers/mfd/ucb1x00-core.c | 1 + drivers/misc/hpilo.c | 1 + drivers/misc/ibmasm/command.c | 1 + drivers/misc/ibmasm/event.c | 1 + drivers/misc/ibmasm/r_heartbeat.c | 1 + drivers/misc/phantom.c | 1 + drivers/mtd/devices/m25p80.c | 1 + drivers/mtd/devices/sst25l.c | 1 + drivers/net/bonding/bond_sysfs.c | 1 + drivers/net/depca.c | 1 + drivers/net/e100.c | 1 + drivers/net/eql.c | 1 + drivers/net/ethoc.c | 1 + drivers/net/ewrk3.c | 1 + drivers/net/forcedeth.c | 1 + drivers/net/hamachi.c | 1 + drivers/net/hamradio/baycom_epp.c | 1 + drivers/net/hamradio/baycom_ser_fdx.c | 1 + drivers/net/hamradio/baycom_ser_hdx.c | 1 + drivers/net/hamradio/hdlcdrv.c | 1 + drivers/net/hp100.c | 1 + drivers/net/igb/igb_ethtool.c | 1 + drivers/net/irda/toim3232-sir.c | 1 + drivers/net/ns83820.c | 1 + drivers/net/pcnet32.c | 1 + drivers/net/sb1000.c | 1 + drivers/net/sis900.c | 1 + drivers/net/skfp/skfddi.c | 1 + drivers/net/skge.c | 1 + drivers/net/slip.c | 1 + drivers/net/sungem.c | 1 + drivers/net/tokenring/ibmtr.c | 1 + drivers/net/typhoon.c | 1 + drivers/net/wan/cosa.c | 1 + drivers/net/wan/cycx_x25.c | 1 + drivers/net/wan/dscc4.c | 1 + drivers/net/wan/farsync.c | 1 + drivers/net/wireless/b43/pio.c | 1 + drivers/net/wireless/b43legacy/main.c | 1 + drivers/net/wireless/b43legacy/phy.c | 1 + drivers/net/wireless/hostap/hostap_info.c | 1 + drivers/net/wireless/hostap/hostap_ioctl.c | 1 + drivers/net/wireless/ipw2x00/ipw2200.c | 1 + drivers/net/wireless/iwlwifi/iwl-3945.c | 1 + drivers/net/wireless/iwlwifi/iwl-4965.c | 1 + drivers/net/wireless/iwlwifi/iwl-5000.c | 1 + drivers/net/wireless/iwlwifi/iwl-agn.c | 1 + drivers/net/wireless/iwlwifi/iwl-core.c | 1 + drivers/net/wireless/iwlwifi/iwl-hcmd.c | 1 + drivers/net/wireless/iwlwifi/iwl-tx.c | 1 + drivers/net/wireless/iwlwifi/iwl3945-base.c | 1 + drivers/net/wireless/iwmc3200wifi/cfg80211.c | 1 + drivers/net/wireless/iwmc3200wifi/commands.c | 1 + drivers/net/wireless/iwmc3200wifi/main.c | 1 + drivers/net/wireless/iwmc3200wifi/rx.c | 1 + drivers/net/wireless/libertas/cmd.c | 1 + drivers/net/wireless/libertas/tx.c | 1 + drivers/net/wireless/prism54/isl_ioctl.c | 1 + drivers/net/wireless/prism54/islpci_dev.c | 1 + drivers/net/wireless/prism54/islpci_mgt.c | 1 + drivers/net/wireless/rt2x00/rt2x00debug.c | 1 + drivers/pci/pcie/aer/aerdrv.c | 1 + drivers/rtc/interface.c | 1 + drivers/rtc/rtc-dev.c | 1 + drivers/uio/uio.c | 1 + drivers/uwb/whc-rc.c | 1 + fs/file.c | 1 + include/linux/interrupt.h | 2 +- include/linux/mmc/host.h | 1 + kernel/irq/handle.c | 1 + kernel/mutex-debug.c | 1 + kernel/time/timekeeping.c | 1 + lib/debugobjects.c | 1 + lib/fault-inject.c | 1 + mm/vmalloc.c | 1 + net/irda/ircomm/ircomm_tty_attach.c | 1 + net/irda/irlan/irlan_common.c | 1 + net/irda/irlan/irlan_eth.c | 1 + net/irda/irnet/irnet_irda.c | 1 + net/irda/irnet/irnet_ppp.c | 1 + net/mac80211/rc80211_pid_debugfs.c | 1 + net/netfilter/nf_conntrack_core.c | 1 + net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 + net/wireless/core.c | 1 + 145 files changed, 145 insertions(+), 1 deletion(-) (limited to 'crypto') diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 4cdc4a0bd02d..d38cdf2c8276 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c index 901cc205015e..148d25fc636f 100644 --- a/arch/arm/mach-integrator/pci_v3.c +++ b/arch/arm/mach-integrator/pci_v3.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm/plat-s3c24xx/adc.c b/arch/arm/plat-s3c24xx/adc.c index 11117a7ba911..4d36b784fb8b 100644 --- a/arch/arm/plat-s3c24xx/adc.c +++ b/arch/arm/plat-s3c24xx/adc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c index e5069fe6861e..bd3b53da295e 100644 --- a/arch/blackfin/kernel/time.c +++ b/arch/blackfin/kernel/time.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c index 1b7598e6f6e8..8a88f1f0a3e2 100644 --- a/arch/m32r/kernel/smp.c +++ b/arch/m32r/kernel/smp.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 14a102e877d6..cf8a97f34518 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -5,6 +5,7 @@ #include "linux/irqreturn.h" #include "linux/kd.h" +#include "linux/sched.h" #include "chan_kern.h" #include "irq_kern.h" #include "irq_user.h" diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c index 19930081d3d8..4ebc8a34738f 100644 --- a/arch/um/drivers/port_kern.c +++ b/arch/um/drivers/port_kern.c @@ -7,6 +7,7 @@ #include "linux/interrupt.h" #include "linux/list.h" #include "linux/mutex.h" +#include "linux/workqueue.h" #include "asm/atomic.h" #include "init.h" #include "irq_kern.h" diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 454cdb43e351..039270b9b73b 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -10,6 +10,7 @@ #include "linux/interrupt.h" #include "linux/kernel_stat.h" #include "linux/module.h" +#include "linux/sched.h" #include "linux/seq_file.h" #include "as-layout.h" #include "kern_util.h" diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 889f665fe93d..7c785634af2b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 98a827ee9ed7..a7f1b64f86e0 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 27349f92a6d7..a1a3cdda06e1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 19085ff0484a..19f7df30937f 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include diff --git a/crypto/aead.c b/crypto/aead.c index d9aa733db164..0a55da70845e 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c index 73a0765344b6..fe2cb2f5db17 100644 --- a/drivers/char/applicom.c +++ b/drivers/char/applicom.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/epca.c b/drivers/char/epca.c index 9d589e3144de..dde5134713e2 100644 --- a/drivers/char/epca.c +++ b/drivers/char/epca.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/generic_serial.c b/drivers/char/generic_serial.c index 9e4e569dc00d..d400cbd280f2 100644 --- a/drivers/char/generic_serial.c +++ b/drivers/char/generic_serial.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index ab2f3349c5c4..402838f4083e 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -19,6 +19,7 @@ /*****************************************************************************/ #include +#include #include #include #include diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c index ec58d8c387ff..d3400b20444f 100644 --- a/drivers/char/nozomi.c +++ b/drivers/char/nozomi.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/pty.c b/drivers/char/pty.c index e066c4fdf81b..62f282e67638 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/rio/riocmd.c b/drivers/char/rio/riocmd.c index 01f2654d5a2e..f121357e5af0 100644 --- a/drivers/char/rio/riocmd.c +++ b/drivers/char/rio/riocmd.c @@ -32,6 +32,7 @@ */ #include +#include #include #include #include diff --git a/drivers/char/rio/rioctrl.c b/drivers/char/rio/rioctrl.c index 74339559f0b9..780506326a73 100644 --- a/drivers/char/rio/rioctrl.c +++ b/drivers/char/rio/rioctrl.c @@ -31,6 +31,7 @@ */ #include +#include #include #include #include diff --git a/drivers/char/rio/riotty.c b/drivers/char/rio/riotty.c index 2fb49e89b324..47fab7c33073 100644 --- a/drivers/char/rio/riotty.c +++ b/drivers/char/rio/riotty.c @@ -33,6 +33,7 @@ #define __EXPLICIT_DEF_H__ #include +#include #include #include #include diff --git a/drivers/char/ser_a2232.c b/drivers/char/ser_a2232.c index 33a2b531802e..9610861d1f5f 100644 --- a/drivers/char/ser_a2232.c +++ b/drivers/char/ser_a2232.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c index 53e504f41b20..db6dcfa35ba0 100644 --- a/drivers/char/stallion.c +++ b/drivers/char/stallion.c @@ -27,6 +27,7 @@ /*****************************************************************************/ #include +#include #include #include #include diff --git a/drivers/char/tlclk.c b/drivers/char/tlclk.c index 8f2284be68e1..80ea6bcfffdc 100644 --- a/drivers/char/tlclk.c +++ b/drivers/char/tlclk.c @@ -32,6 +32,7 @@ #include /* printk() */ #include /* everything... */ #include /* error codes */ +#include #include #include #include diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c index 303c02694c3c..2da6fb2c325e 100644 --- a/drivers/hwmon/sht15.c +++ b/drivers/hwmon/sht15.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c index 0bc3d78ce7b1..8aa56ac07e29 100644 --- a/drivers/ieee1394/raw1394.c +++ b/drivers/ieee1394/raw1394.c @@ -29,6 +29,7 @@ #include #include +#include #include #include #include diff --git a/drivers/ieee1394/video1394.c b/drivers/ieee1394/video1394.c index d287ba79821d..949064a05675 100644 --- a/drivers/ieee1394/video1394.c +++ b/drivers/ieee1394/video1394.c @@ -30,6 +30,7 @@ */ #include #include +#include #include #include #include diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 625fec5a741c..0f89909abce9 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 4346a24568fb..bb96d3c4b0f4 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 03cfaecc3bb7..ed7175549ebd 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 6e8653471941..1cecf98829ac 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -29,6 +29,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include #include "iwch_provider.h" #include "iwch.h" #include "iwch_cm.h" diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 04e88b600558..013d1380e77c 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include #include #include #include diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c index b2a9d4c155d1..a805402dd4ae 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba7220.c +++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c @@ -37,6 +37,7 @@ #include #include +#include #include #include #include diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 6c21b4b5ec71..c0a03ac03ee7 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -33,6 +33,7 @@ #include #include +#include #include "ipath_kernel.h" #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 3a5a89b609c4..cb2d3ef2ae12 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -32,6 +32,7 @@ */ #include +#include #include #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 2296832f94da..1f95bbaf7602 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include #include #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 6076cb61bf6a..7420715256a9 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include #include #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index 855911e7396d..82878e348627 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -33,6 +33,7 @@ #include #include +#include #include "ipath_kernel.h" diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c index 7bff4b9baa0a..be78f6643c06 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index d73e32232879..6923e1d986da 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -32,6 +32,7 @@ */ #include +#include #include "ipath_verbs.h" diff --git a/drivers/input/keyboard/hilkbd.c b/drivers/input/keyboard/hilkbd.c index e9d639ec283d..5f72440b50c8 100644 --- a/drivers/input/keyboard/hilkbd.c +++ b/drivers/input/keyboard/hilkbd.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_HP300 diff --git a/drivers/input/keyboard/sunkbd.c b/drivers/input/keyboard/sunkbd.c index 472b56639cdb..a99a04b03ee4 100644 --- a/drivers/input/keyboard/sunkbd.c +++ b/drivers/input/keyboard/sunkbd.c @@ -27,6 +27,7 @@ */ #include +#include #include #include #include diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c index 769ba65a585a..f3876acc3e83 100644 --- a/drivers/input/serio/libps2.c +++ b/drivers/input/serio/libps2.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include diff --git a/drivers/input/serio/serio_raw.c b/drivers/input/serio/serio_raw.c index b03009bb7468..27fdaaffbb40 100644 --- a/drivers/input/serio/serio_raw.c +++ b/drivers/input/serio/serio_raw.c @@ -9,6 +9,7 @@ * the Free Software Foundation. */ +#include #include #include #include diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c index b9694b6445d0..6d345112bcb7 100644 --- a/drivers/input/serio/serport.c +++ b/drivers/input/serio/serport.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index 57d26360f64e..dc506ab99cac 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/isdn/hisax/arcofi.c b/drivers/isdn/hisax/arcofi.c index d30ce5b978c2..85a8fd8dd0b7 100644 --- a/drivers/isdn/hisax/arcofi.c +++ b/drivers/isdn/hisax/arcofi.c @@ -10,6 +10,7 @@ * */ +#include #include "hisax.h" #include "isdnl1.h" #include "isac.h" diff --git a/drivers/isdn/hisax/hfc_2bds0.c b/drivers/isdn/hisax/hfc_2bds0.c index 5c46a7130e06..8d22f50760eb 100644 --- a/drivers/isdn/hisax/hfc_2bds0.c +++ b/drivers/isdn/hisax/hfc_2bds0.c @@ -11,6 +11,7 @@ */ #include +#include #include "hisax.h" #include "hfc_2bds0.h" #include "isdnl1.h" diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c index d110a77940a4..10914731b304 100644 --- a/drivers/isdn/hisax/hfc_pci.c +++ b/drivers/isdn/hisax/hfc_pci.c @@ -20,6 +20,7 @@ #include "hfc_pci.h" #include "isdnl1.h" #include +#include #include static const char *hfcpci_revision = "$Revision: 1.48.2.4 $"; diff --git a/drivers/isdn/hysdn/hysdn_procconf.c b/drivers/isdn/hysdn/hysdn_procconf.c index 8f9f4912de32..90b35e1a4b7e 100644 --- a/drivers/isdn/hysdn/hysdn_procconf.c +++ b/drivers/isdn/hysdn/hysdn_procconf.c @@ -11,6 +11,7 @@ * */ +#include #include #include #include diff --git a/drivers/isdn/hysdn/hysdn_proclog.c b/drivers/isdn/hysdn/hysdn_proclog.c index 8991d2c8ee4a..8bcae28c4409 100644 --- a/drivers/isdn/hysdn/hysdn_proclog.c +++ b/drivers/isdn/hysdn/hysdn_proclog.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "hysdn_defs.h" diff --git a/drivers/isdn/pcbit/drv.c b/drivers/isdn/pcbit/drv.c index 8c66bcb953a1..123c1d6c43b4 100644 --- a/drivers/isdn/pcbit/drv.c +++ b/drivers/isdn/pcbit/drv.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include diff --git a/drivers/isdn/pcbit/layer2.c b/drivers/isdn/pcbit/layer2.c index e075e8d2fce0..30f0f45e3139 100644 --- a/drivers/isdn/pcbit/layer2.c +++ b/drivers/isdn/pcbit/layer2.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c index dd0acd06750b..5a0774880d56 100644 --- a/drivers/isdn/sc/init.c +++ b/drivers/isdn/sc/init.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "includes.h" #include "hardware.h" #include "card.h" diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 18648180db02..daaf86631647 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "lg.h" /* Allow Guests to use a non-128 (ie. non-Linux) syscall trap. */ diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c index 8c9ae0a3a272..0241a7c5c34a 100644 --- a/drivers/media/dvb/dvb-core/dvb_net.c +++ b/drivers/media/dvb/dvb-core/dvb_net.c @@ -63,6 +63,7 @@ #include #include #include +#include #include "dvb_demux.h" #include "dvb_net.h" diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c index 4b1bc05a462c..01e1eefcf1eb 100644 --- a/drivers/media/video/meye.c +++ b/drivers/media/video/meye.c @@ -28,6 +28,7 @@ */ #include #include +#include #include #include #include diff --git a/drivers/media/video/videobuf-core.c b/drivers/media/video/videobuf-core.c index f1ccf98c0a6f..8e93c6f25c83 100644 --- a/drivers/media/video/videobuf-core.c +++ b/drivers/media/video/videobuf-core.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c index 53cdd67cebe1..032ebae0134a 100644 --- a/drivers/media/video/videobuf-dma-sg.c +++ b/drivers/media/video/videobuf-dma-sg.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c index bc2ec2182c00..34f3f36f819b 100644 --- a/drivers/message/fusion/mptlan.c +++ b/drivers/message/fusion/mptlan.c @@ -56,6 +56,7 @@ #include #include #include +#include #define my_VERSION MPT_LINUX_VERSION_COMMON #define MYNAM "mptlan" diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index fea9085fe52c..60c3988f3cf3 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -18,6 +18,7 @@ */ #include #include +#include #include #include #include diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c index 1ad27c6abcca..a92a3a742b43 100644 --- a/drivers/misc/hpilo.c +++ b/drivers/misc/hpilo.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/misc/ibmasm/command.c b/drivers/misc/ibmasm/command.c index 276d3fb68094..e2031739aa29 100644 --- a/drivers/misc/ibmasm/command.c +++ b/drivers/misc/ibmasm/command.c @@ -22,6 +22,7 @@ * */ +#include #include "ibmasm.h" #include "lowlevel.h" diff --git a/drivers/misc/ibmasm/event.c b/drivers/misc/ibmasm/event.c index 68a0a5b94795..572d41ffc186 100644 --- a/drivers/misc/ibmasm/event.c +++ b/drivers/misc/ibmasm/event.c @@ -22,6 +22,7 @@ * */ +#include #include "ibmasm.h" #include "lowlevel.h" diff --git a/drivers/misc/ibmasm/r_heartbeat.c b/drivers/misc/ibmasm/r_heartbeat.c index bec9e2c44bef..2de487ac788c 100644 --- a/drivers/misc/ibmasm/r_heartbeat.c +++ b/drivers/misc/ibmasm/r_heartbeat.c @@ -20,6 +20,7 @@ * */ +#include #include "ibmasm.h" #include "dot_command.h" diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c index 90a95ce8dc34..04c27266f567 100644 --- a/drivers/misc/phantom.c +++ b/drivers/misc/phantom.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index 379c316f329e..4c19269de91a 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/drivers/mtd/devices/sst25l.c b/drivers/mtd/devices/sst25l.c index c2baf3353f84..0a11721f146e 100644 --- a/drivers/mtd/devices/sst25l.c +++ b/drivers/mtd/devices/sst25l.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index ff449de6f3c0..8762a27a2a18 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/depca.c b/drivers/net/depca.c index 9686c1fa28f1..7a3bdac84abe 100644 --- a/drivers/net/depca.c +++ b/drivers/net/depca.c @@ -237,6 +237,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 679965c2bb86..5d2f48f02251 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -151,6 +151,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/eql.c b/drivers/net/eql.c index d4d9a3eda695..f5b96cadeb25 100644 --- a/drivers/net/eql.c +++ b/drivers/net/eql.c @@ -111,6 +111,7 @@ * Sorry, I had to rewrite most of this for 2.5.x -DaveM */ +#include #include #include #include diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c index 34d0c69e67f7..96f5b2a2d2c5 100644 --- a/drivers/net/ethoc.c +++ b/drivers/net/ethoc.c @@ -17,6 +17,7 @@ #include #include #include +#include #include static int buffer_size = 0x8000; /* 32 KBytes */ diff --git a/drivers/net/ewrk3.c b/drivers/net/ewrk3.c index b2a5ec8f3721..dd4ba01fd92d 100644 --- a/drivers/net/ewrk3.c +++ b/drivers/net/ewrk3.c @@ -145,6 +145,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 0a1c2bb27d4d..e1da4666f204 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c index 1d5064a09aca..f7519a594945 100644 --- a/drivers/net/hamachi.c +++ b/drivers/net/hamachi.c @@ -145,6 +145,7 @@ static int tx_params[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; /* Time in jiffies before concluding the transmitter is hung. */ #define TX_TIMEOUT (5*HZ) +#include #include #include #include diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 7bcaf7c66243..e344c84c0ef9 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c index aa4488e871b2..ed60fd664273 100644 --- a/drivers/net/hamradio/baycom_ser_fdx.c +++ b/drivers/net/hamradio/baycom_ser_fdx.c @@ -71,6 +71,7 @@ /*****************************************************************************/ +#include #include #include #include diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c index 88c593596020..1686f6dcbbce 100644 --- a/drivers/net/hamradio/baycom_ser_hdx.c +++ b/drivers/net/hamradio/baycom_ser_hdx.c @@ -61,6 +61,7 @@ /*****************************************************************************/ +#include #include #include #include diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index 0013c409782c..91c5790c9581 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -42,6 +42,7 @@ /*****************************************************************************/ +#include #include #include #include diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c index a9a1a99f02dd..dd8665138062 100644 --- a/drivers/net/hp100.c +++ b/drivers/net/hp100.c @@ -98,6 +98,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/igb/igb_ethtool.c b/drivers/net/igb/igb_ethtool.c index d004c359244c..deaea8fa1032 100644 --- a/drivers/net/igb/igb_ethtool.c +++ b/drivers/net/igb/igb_ethtool.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "igb.h" diff --git a/drivers/net/irda/toim3232-sir.c b/drivers/net/irda/toim3232-sir.c index fcf287b749db..99e1ec02a011 100644 --- a/drivers/net/irda/toim3232-sir.c +++ b/drivers/net/irda/toim3232-sir.c @@ -120,6 +120,7 @@ #include #include #include +#include #include diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index c594e1946476..57fd483dbb1f 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -111,6 +111,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index 6d28b18e7e28..c1b3f09f452c 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c @@ -31,6 +31,7 @@ static const char *const version = #include #include +#include #include #include #include diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c index ee366c5a8fa3..c9c70ab0cce0 100644 --- a/drivers/net/sb1000.c +++ b/drivers/net/sb1000.c @@ -36,6 +36,7 @@ static char version[] = "sb1000.c:v1.1.2 6/01/98 (fventuri@mediaone.net)\n"; #include #include +#include #include #include #include diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c index 97949d0a699b..c072f7f36acf 100644 --- a/drivers/net/sis900.c +++ b/drivers/net/sis900.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c index 38a508b4aad9..b27156eaf267 100644 --- a/drivers/net/skfp/skfddi.c +++ b/drivers/net/skfp/skfddi.c @@ -73,6 +73,7 @@ static const char * const boot_msg = /* Include files */ +#include #include #include #include diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 01f6811f1324..8f5414348e86 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/slip.c b/drivers/net/slip.c index e17c535a577e..fe3cebb984de 100644 --- a/drivers/net/slip.c +++ b/drivers/net/slip.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index 305ec3d783db..7019a0d1a82b 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/tokenring/ibmtr.c b/drivers/net/tokenring/ibmtr.c index 525bbc5b9c9d..36cb2423bcf1 100644 --- a/drivers/net/tokenring/ibmtr.c +++ b/drivers/net/tokenring/ibmtr.c @@ -108,6 +108,7 @@ in the event that chatty debug messages are desired - jjs 12/30/98 */ #define IBMTR_DEBUG_MESSAGES 0 #include +#include #ifdef PCMCIA /* required for ibmtr_cs.c to build */ #undef MODULE /* yes, really */ diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c index d6d345229fe9..5921f5bdd764 100644 --- a/drivers/net/typhoon.c +++ b/drivers/net/typhoon.c @@ -108,6 +108,7 @@ static const int multicast_filter_limit = 32; #include #include +#include #include #include #include diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 66360a2a14c2..e2c33c06190b 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -76,6 +76,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c index 2573c18b6aa5..cd8cb95c5bd7 100644 --- a/drivers/net/wan/cycx_x25.c +++ b/drivers/net/wan/cycx_x25.c @@ -84,6 +84,7 @@ #include /* printk(), and other useful stuff */ #include #include /* inline memset(), etc. */ +#include #include /* kmalloc(), kfree() */ #include /* offsetof(), etc. */ #include /* WAN router definitions */ diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index 81c8aec9df92..07d00b4cf48a 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -81,6 +81,7 @@ */ #include +#include #include #include #include diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 3e90eb816181..beda387f2fc7 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/b43/pio.c b/drivers/net/wireless/b43/pio.c index 9c1397996e0a..5e87650b07fb 100644 --- a/drivers/net/wireless/b43/pio.c +++ b/drivers/net/wireless/b43/pio.c @@ -30,6 +30,7 @@ #include "xmit.h" #include +#include static u16 generate_cookie(struct b43_pio_txqueue *q, diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 1d9223b3d4c4..4b60148a5e61 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/b43legacy/phy.c b/drivers/net/wireless/b43legacy/phy.c index 11319ec2d64a..aaf227203a98 100644 --- a/drivers/net/wireless/b43legacy/phy.c +++ b/drivers/net/wireless/b43legacy/phy.c @@ -31,6 +31,7 @@ #include #include +#include #include #include "b43legacy.h" diff --git a/drivers/net/wireless/hostap/hostap_info.c b/drivers/net/wireless/hostap/hostap_info.c index 6fa14a4e4b53..4dfb40a84c96 100644 --- a/drivers/net/wireless/hostap/hostap_info.c +++ b/drivers/net/wireless/hostap/hostap_info.c @@ -1,6 +1,7 @@ /* Host AP driver Info Frame processing (part of hostap.o module) */ #include +#include #include "hostap_wlan.h" #include "hostap.h" #include "hostap_ap.h" diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c index 3f2bda881a4f..9419cebca8a5 100644 --- a/drivers/net/wireless/hostap/hostap_ioctl.c +++ b/drivers/net/wireless/hostap/hostap_ioctl.c @@ -1,6 +1,7 @@ /* ioctl() (mostly Linux Wireless Extensions) routines for Host AP driver */ #include +#include #include #include #include diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c index 8d58e6ed4e7d..827824d45de9 100644 --- a/drivers/net/wireless/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/ipw2x00/ipw2200.c @@ -30,6 +30,7 @@ ******************************************************************************/ +#include #include "ipw2200.h" diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c index e70c5b0af364..68136172b823 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c index a22a0501c190..6f703a041847 100644 --- a/drivers/net/wireless/iwlwifi/iwl-4965.c +++ b/drivers/net/wireless/iwlwifi/iwl-4965.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c index eb08f4411000..d6bc0e051043 100644 --- a/drivers/net/wireless/iwlwifi/iwl-5000.c +++ b/drivers/net/wireless/iwlwifi/iwl-5000.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index cdc07c477457..313d3e5ee84b 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 484d5c1a7312..2dc928755454 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "iwl-eeprom.h" diff --git a/drivers/net/wireless/iwlwifi/iwl-hcmd.c b/drivers/net/wireless/iwlwifi/iwl-hcmd.c index 532c8d6cd8da..a6856daf14cb 100644 --- a/drivers/net/wireless/iwlwifi/iwl-hcmd.c +++ b/drivers/net/wireless/iwlwifi/iwl-hcmd.c @@ -28,6 +28,7 @@ #include #include +#include #include #include "iwl-dev.h" /* FIXME: remove */ diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index c18907544701..fb9bcfa6d947 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -28,6 +28,7 @@ *****************************************************************************/ #include +#include #include #include "iwl-eeprom.h" #include "iwl-dev.h" diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index c390dbd877e4..aa49230422f3 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/iwmc3200wifi/cfg80211.c b/drivers/net/wireless/iwmc3200wifi/cfg80211.c index a56a2b0ac99a..f3c55658225b 100644 --- a/drivers/net/wireless/iwmc3200wifi/cfg80211.c +++ b/drivers/net/wireless/iwmc3200wifi/cfg80211.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/wireless/iwmc3200wifi/commands.c b/drivers/net/wireless/iwmc3200wifi/commands.c index 23b52fa2605f..84158b6d35d8 100644 --- a/drivers/net/wireless/iwmc3200wifi/commands.c +++ b/drivers/net/wireless/iwmc3200wifi/commands.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "iwm.h" #include "bus.h" diff --git a/drivers/net/wireless/iwmc3200wifi/main.c b/drivers/net/wireless/iwmc3200wifi/main.c index d668e4756324..222eb2cf1b30 100644 --- a/drivers/net/wireless/iwmc3200wifi/main.c +++ b/drivers/net/wireless/iwmc3200wifi/main.c @@ -38,6 +38,7 @@ #include #include +#include #include #include diff --git a/drivers/net/wireless/iwmc3200wifi/rx.c b/drivers/net/wireless/iwmc3200wifi/rx.c index 40dbcbc16593..771a301003c9 100644 --- a/drivers/net/wireless/iwmc3200wifi/rx.c +++ b/drivers/net/wireless/iwmc3200wifi/rx.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c index 685098148e10..0a324dcd264c 100644 --- a/drivers/net/wireless/libertas/cmd.c +++ b/drivers/net/wireless/libertas/cmd.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "host.h" #include "hostcmd.h" #include "decl.h" diff --git a/drivers/net/wireless/libertas/tx.c b/drivers/net/wireless/libertas/tx.c index 4c018f7a0a8d..8c3766a6e8e7 100644 --- a/drivers/net/wireless/libertas/tx.c +++ b/drivers/net/wireless/libertas/tx.c @@ -3,6 +3,7 @@ */ #include #include +#include #include "hostcmd.h" #include "radiotap.h" diff --git a/drivers/net/wireless/prism54/isl_ioctl.c b/drivers/net/wireless/prism54/isl_ioctl.c index 4c97c6ad6f5d..bc08464d8323 100644 --- a/drivers/net/wireless/prism54/isl_ioctl.c +++ b/drivers/net/wireless/prism54/isl_ioctl.c @@ -19,6 +19,7 @@ * */ +#include #include #include #include diff --git a/drivers/net/wireless/prism54/islpci_dev.c b/drivers/net/wireless/prism54/islpci_dev.c index e26d7b3ceab5..2505be56ae39 100644 --- a/drivers/net/wireless/prism54/islpci_dev.c +++ b/drivers/net/wireless/prism54/islpci_dev.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/prism54/islpci_mgt.c b/drivers/net/wireless/prism54/islpci_mgt.c index f7c677e2094d..69d2f882fd06 100644 --- a/drivers/net/wireless/prism54/islpci_mgt.c +++ b/drivers/net/wireless/prism54/islpci_mgt.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/wireless/rt2x00/rt2x00debug.c b/drivers/net/wireless/rt2x00/rt2x00debug.c index 7b3ee8c2eaef..68bc9bb1dbf9 100644 --- a/drivers/net/wireless/rt2x00/rt2x00debug.c +++ b/drivers/net/wireless/rt2x00/rt2x00debug.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "rt2x00.h" diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 2ce8f9ccc66e..d49ecc94bd49 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 4cdb31a362ca..a0c816238aa9 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -12,6 +12,7 @@ */ #include +#include #include int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm) diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c index 8a11de9552cd..62227cd52410 100644 --- a/drivers/rtc/rtc-dev.c +++ b/drivers/rtc/rtc-dev.c @@ -13,6 +13,7 @@ #include #include +#include #include "rtc-core.h" static dev_t rtc_devt; diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index a9d707047202..e941367dd28f 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c index 1d9a6f54658e..01950c62dc8d 100644 --- a/drivers/uwb/whc-rc.c +++ b/drivers/uwb/whc-rc.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/file.c b/fs/file.c index f313314f996f..87e129030ab1 100644 --- a/fs/file.c +++ b/fs/file.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index b78cf8194957..7ca72b74eec7 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -610,6 +609,7 @@ extern void debug_poll_all_shared_irqs(void); static inline void debug_poll_all_shared_irqs(void) { } #endif +struct seq_file; int show_interrupts(struct seq_file *p, void *v); struct irq_desc; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 81bb42358595..eaf36364b7d4 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -11,6 +11,7 @@ #define LINUX_MMC_HOST_H #include +#include #include diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index a81cf80554db..17c71bb565c6 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index 50d022e5a560..ec815a960b5d 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index fb0f46fa1ecd..c3a4e2907eaa 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 2755a3bd16a1..eae56fddfa3b 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -9,6 +9,7 @@ */ #include #include +#include #include #include #include diff --git a/lib/fault-inject.c b/lib/fault-inject.c index f97af55bdd96..7e65af70635e 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 5e7aed0802bf..0f551a4a44cd 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index eafc010907c2..3c1754023022 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -30,6 +30,7 @@ ********************************************************************/ #include +#include #include #include diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 62116829b817..315ead3cb926 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 7b6b631f647f..d340110f5c0c 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c index cf9a4b531a98..cccc2e93234f 100644 --- a/net/irda/irnet/irnet_irda.c +++ b/net/irda/irnet/irnet_irda.c @@ -9,6 +9,7 @@ */ #include "irnet_irda.h" /* Private header */ +#include #include #include diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 68cbcb19cbd8..7dea882dbb75 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -13,6 +13,7 @@ * 2) as a control channel (write commands, read events) */ +#include #include #include "irnet_ppp.h" /* Private header */ /* Please put other headers in irnet.h - Thanks */ diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c index a59043fbb0ff..45667054a5f3 100644 --- a/net/mac80211/rc80211_pid_debugfs.c +++ b/net/mac80211/rc80211_pid_debugfs.c @@ -6,6 +6,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7c9ec3dee96e..ca6e68dcd8a8 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 0cf5e8c27a10..3fa5751af0ec 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include diff --git a/net/wireless/core.c b/net/wireless/core.c index 45b2be3274db..a595f712b5bf 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include "nl80211.h" -- cgit v1.2.3-59-g8ed1b From 5dd33c9a4c29015f6d87568d33521c98931a387e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 16 Oct 2009 16:40:25 +1100 Subject: md/async: don't pass a memory pointer as a page pointer. md/raid6 passes a list of 'struct page *' to the async_tx routines, which then either DMA map them for offload, or take the page_address for CPU based calculations. For RAID6 we sometime leave 'blanks' in the list of pages. For CPU based calcs, we want to treat theses as a page of zeros. For offloaded calculations, we simply don't pass a page to the hardware. Currently the 'blanks' are encoded as a pointer to raid6_empty_zero_page. This is a 4096 byte memory region, not a 'struct page'. This is mostly handled correctly but is rather ugly. So change the code to pass and expect a NULL pointer for the blanks. When taking page_address of a page, we need to check for a NULL and in that case use raid6_empty_zero_page. Signed-off-by: NeilBrown --- crypto/async_tx/async_pq.c | 15 ++++----------- crypto/async_tx/async_raid6_recov.c | 16 +++++++++++----- drivers/md/raid5.c | 4 ++-- 3 files changed, 17 insertions(+), 18 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index b88db6d1dc65..9ab1ce4af3cc 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -30,11 +30,6 @@ */ static struct page *scribble; -static bool is_raid6_zero_block(struct page *p) -{ - return p == (void *) raid6_empty_zero_page; -} - /* the struct page *blocks[] parameter passed to async_gen_syndrome() * and async_syndrome_val() contains the 'P' destination address at * blocks[disks-2] and the 'Q' destination address at blocks[disks-1] @@ -83,7 +78,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, * sources and update the coefficients accordingly */ for (i = 0, idx = 0; i < src_cnt; i++) { - if (is_raid6_zero_block(blocks[i])) + if (blocks[i] == NULL) continue; dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len, DMA_TO_DEVICE); @@ -160,9 +155,9 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, srcs = (void **) blocks; for (i = 0; i < disks; i++) { - if (is_raid6_zero_block(blocks[i])) { + if (blocks[i] == NULL) { BUG_ON(i > disks - 3); /* P or Q can't be zero */ - srcs[i] = blocks[i]; + srcs[i] = (void*)raid6_empty_zero_page; } else srcs[i] = page_address(blocks[i]) + offset; } @@ -290,12 +285,10 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; for (i = 0; i < disks; i++) - if (likely(blocks[i])) { - BUG_ON(is_raid6_zero_block(blocks[i])); + if (likely(blocks[i])) dma_src[i] = dma_map_page(dev, blocks[i], offset, len, DMA_TO_DEVICE); - } for (;;) { tx = device->device_prep_dma_pq_val(chan, pq, dma_src, diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index 6d73dde4786d..8e30b6ed0789 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -263,10 +263,10 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb, * delta p and delta q */ dp = blocks[faila]; - blocks[faila] = (void *)raid6_empty_zero_page; + blocks[faila] = NULL; blocks[disks-2] = dp; dq = blocks[failb]; - blocks[failb] = (void *)raid6_empty_zero_page; + blocks[failb] = NULL; blocks[disks-1] = dq; init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); @@ -338,7 +338,10 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, async_tx_quiesce(&submit->depend_tx); for (i = 0; i < disks; i++) - ptrs[i] = page_address(blocks[i]); + if (blocks[i] == NULL) + ptrs[i] = (void*)raid6_empty_zero_page; + else + ptrs[i] = page_address(blocks[i]); raid6_2data_recov(disks, bytes, faila, failb, ptrs); @@ -398,7 +401,10 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, async_tx_quiesce(&submit->depend_tx); for (i = 0; i < disks; i++) - ptrs[i] = page_address(blocks[i]); + if (blocks[i] == NULL) + ptrs[i] = (void*)raid6_empty_zero_page; + else + ptrs[i] = page_address(blocks[i]); raid6_datap_recov(disks, bytes, faila, ptrs); @@ -414,7 +420,7 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = blocks[faila]; - blocks[faila] = (void *)raid6_empty_zero_page; + blocks[faila] = NULL; blocks[disks-1] = dq; /* in the 4 disk case we only need to perform a single source diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c4366c9373c5..dcd9e659ed9d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -720,7 +720,7 @@ static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh) int i; for (i = 0; i < disks; i++) - srcs[i] = (void *)raid6_empty_zero_page; + srcs[i] = NULL; count = 0; i = d0_idx; @@ -816,7 +816,7 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) * slot number conversion for 'faila' and 'failb' */ for (i = 0; i < disks ; i++) - blocks[i] = (void *)raid6_empty_zero_page; + blocks[i] = NULL; count = 0; i = d0_idx; do { -- cgit v1.2.3-59-g8ed1b From b2141e6951ad56c8f65e70547baeabd5698e390a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 16 Oct 2009 16:40:34 +1100 Subject: raid6/async_tx: handle holes in block list in async_syndrome_val async_syndrome_val check the P and Q blocks used for RAID6 calculations. With DDF raid6, some of the data blocks might be NULL, so this needs to be handled in the same way that async_gen_syndrome handles it. As async_syndrome_val calls async_xor, also enhance async_xor to detect and skip NULL blocks in the list. Signed-off-by: NeilBrown --- crypto/async_tx/async_pq.c | 31 ++++++++++++++++++++++++------- crypto/async_tx/async_xor.c | 18 +++++++++++------- 2 files changed, 35 insertions(+), 14 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 9ab1ce4af3cc..43b1436bd968 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -260,8 +260,10 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx; + unsigned char coefs[disks-2]; enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; dma_addr_t *dma_src = NULL; + int src_cnt = 0; BUG_ON(disks < 4); @@ -280,20 +282,35 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, __func__, disks, len); if (!P(blocks, disks)) dma_flags |= DMA_PREP_PQ_DISABLE_P; + else + pq[0] = dma_map_page(dev, P(blocks,disks), + offset, len, + DMA_TO_DEVICE); if (!Q(blocks, disks)) dma_flags |= DMA_PREP_PQ_DISABLE_Q; + else + pq[1] = dma_map_page(dev, Q(blocks,disks), + offset, len, + DMA_TO_DEVICE); + if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; - for (i = 0; i < disks; i++) - if (likely(blocks[i])) - dma_src[i] = dma_map_page(dev, blocks[i], - offset, len, - DMA_TO_DEVICE); + for (i = 0; i < disks-2; i++) + if (likely(blocks[i])) { + dma_src[src_cnt] = dma_map_page(dev, blocks[i], + offset, len, + DMA_TO_DEVICE); + coefs[src_cnt] = raid6_gfexp[i]; + src_cnt++; + } + pq[1] = dma_map_page(dev, Q(blocks,disks), + offset, len, + DMA_TO_DEVICE); for (;;) { tx = device->device_prep_dma_pq_val(chan, pq, dma_src, - disks - 2, - raid6_gfexp, + src_cnt, + coefs, len, pqres, dma_flags); if (likely(tx)) diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index b459a9034aac..79182dcb91b7 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -44,20 +44,23 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, void *cb_param_orig = submit->cb_param; enum async_tx_flags flags_orig = submit->flags; enum dma_ctrl_flags dma_flags; - int xor_src_cnt; + int xor_src_cnt = 0; dma_addr_t dma_dest; /* map the dest bidrectional in case it is re-used as a source */ dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_BIDIRECTIONAL); for (i = 0; i < src_cnt; i++) { /* only map the dest once */ + if (!src_list[i]) + continue; if (unlikely(src_list[i] == dest)) { - dma_src[i] = dma_dest; + dma_src[xor_src_cnt++] = dma_dest; continue; } - dma_src[i] = dma_map_page(dma->dev, src_list[i], offset, - len, DMA_TO_DEVICE); + dma_src[xor_src_cnt++] = dma_map_page(dma->dev, src_list[i], offset, + len, DMA_TO_DEVICE); } + src_cnt = xor_src_cnt; while (src_cnt) { submit->flags = flags_orig; @@ -123,7 +126,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, struct async_submit_ctl *submit) { int i; - int xor_src_cnt; + int xor_src_cnt = 0; int src_off = 0; void *dest_buf; void **srcs; @@ -135,8 +138,9 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, /* convert to buffer pointers */ for (i = 0; i < src_cnt; i++) - srcs[i] = page_address(src_list[i]) + offset; - + if (src_list[i]) + srcs[xor_src_cnt++] = page_address(src_list[i]) + offset; + src_cnt = xor_src_cnt; /* set destination address */ dest_buf = page_address(dest) + offset; -- cgit v1.2.3-59-g8ed1b From 5676470f06f783aebf545c8f17ca772911022068 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 19 Oct 2009 18:09:32 -0700 Subject: async_pq: kill a stray dma_map() call and other cleanups - update the kernel doc for async_syndrome to indicate what NULL in the source list means - whitespace fixups Signed-off-by: Dan Williams --- crypto/async_tx/async_pq.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 43b1436bd968..60476560e0b0 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -181,10 +181,14 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <= * PAGE_SIZE as a temporary buffer of this size is used in the * synchronous path. 'disks' always accounts for both destination - * buffers. + * buffers. If any source buffers (blocks[i] where i < disks - 2) are + * set to NULL those buffers will be replaced with the raid6_zero_page + * in the synchronous path and omitted in the hardware-asynchronous + * path. * * 'blocks' note: if submit->scribble is NULL then the contents of - * 'blocks' may be overridden + * 'blocks' may be overwritten to perform address conversions + * (dma_map_page() or page_address()). */ struct dma_async_tx_descriptor * async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, @@ -283,13 +287,13 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, if (!P(blocks, disks)) dma_flags |= DMA_PREP_PQ_DISABLE_P; else - pq[0] = dma_map_page(dev, P(blocks,disks), + pq[0] = dma_map_page(dev, P(blocks, disks), offset, len, DMA_TO_DEVICE); if (!Q(blocks, disks)) dma_flags |= DMA_PREP_PQ_DISABLE_Q; else - pq[1] = dma_map_page(dev, Q(blocks,disks), + pq[1] = dma_map_page(dev, Q(blocks, disks), offset, len, DMA_TO_DEVICE); @@ -303,9 +307,6 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, coefs[src_cnt] = raid6_gfexp[i]; src_cnt++; } - pq[1] = dma_map_page(dev, Q(blocks,disks), - offset, len, - DMA_TO_DEVICE); for (;;) { tx = device->device_prep_dma_pq_val(chan, pq, dma_src, -- cgit v1.2.3-59-g8ed1b From 030b07720be0f3bfada12ff6bfa3c61a91212f32 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 19 Oct 2009 18:09:32 -0700 Subject: async_pq: rename scribble page The global scribble page is used as a temporary destination buffer when disabling the P or Q result is requested. The local scribble buffer contains memory for performing address conversions. Rename the global variable to avoid confusion. Signed-off-by: Dan Williams --- crypto/async_tx/async_pq.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 60476560e0b0..6b5cc4fba59f 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -26,9 +26,10 @@ #include /** - * scribble - space to hold throwaway P buffer for synchronous gen_syndrome + * pq_scribble_page - space to hold throwaway P or Q buffer for + * synchronous gen_syndrome */ -static struct page *scribble; +static struct page *pq_scribble_page; /* the struct page *blocks[] parameter passed to async_gen_syndrome() * and async_syndrome_val() contains the 'P' destination address at @@ -226,11 +227,11 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, async_tx_quiesce(&submit->depend_tx); if (!P(blocks, disks)) { - P(blocks, disks) = scribble; + P(blocks, disks) = pq_scribble_page; BUG_ON(len + offset > PAGE_SIZE); } if (!Q(blocks, disks)) { - Q(blocks, disks) = scribble; + Q(blocks, disks) = pq_scribble_page; BUG_ON(len + offset > PAGE_SIZE); } do_sync_gen_syndrome(blocks, offset, disks, len, submit); @@ -384,9 +385,9 @@ EXPORT_SYMBOL_GPL(async_syndrome_val); static int __init async_pq_init(void) { - scribble = alloc_page(GFP_KERNEL); + pq_scribble_page = alloc_page(GFP_KERNEL); - if (scribble) + if (pq_scribble_page) return 0; pr_err("%s: failed to allocate required spare page\n", __func__); @@ -396,7 +397,7 @@ static int __init async_pq_init(void) static void __exit async_pq_exit(void) { - put_page(scribble); + put_page(pq_scribble_page); } module_init(async_pq_init); -- cgit v1.2.3-59-g8ed1b From da17bf4306fd3a52e938b121df82a7baa10eb282 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 19 Oct 2009 14:05:12 -0700 Subject: async_tx: fix asynchronous raid6 recovery for ddf layouts The raid6 recovery code currently requires special handling of the 4-disk and 5-disk recovery scenarios for the native layout. Quoting from commit 0a82a623: In these situations the default N-disk algorithm will present 0-source or 1-source operations to dma devices. To cover for dma devices where the minimum source count is 2 we implement 4-disk and 5-disk handling in the recovery code. The ddf layout presents disks=6 and disks=7 to the recovery code in these situations. Instead of looking at the number of disks count the number of non-zero sources in the list and call the special case code when the number of non-failed sources is 0 or 1. [neilb@suse.de: replace 'ddf' flag with counting good sources] Signed-off-by: Dan Williams --- crypto/async_tx/async_raid6_recov.c | 86 ++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 30 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index 8e30b6ed0789..943f2abac9b4 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -131,8 +131,8 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len, } static struct dma_async_tx_descriptor * -__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, - struct async_submit_ctl *submit) +__2data_recov_4(int disks, size_t bytes, int faila, int failb, + struct page **blocks, struct async_submit_ctl *submit) { struct dma_async_tx_descriptor *tx = NULL; struct page *p, *q, *a, *b; @@ -143,8 +143,8 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, void *cb_param = submit->cb_param; void *scribble = submit->scribble; - p = blocks[4-2]; - q = blocks[4-1]; + p = blocks[disks-2]; + q = blocks[disks-1]; a = blocks[faila]; b = blocks[failb]; @@ -170,8 +170,8 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, } static struct dma_async_tx_descriptor * -__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, - struct async_submit_ctl *submit) +__2data_recov_5(int disks, size_t bytes, int faila, int failb, + struct page **blocks, struct async_submit_ctl *submit) { struct dma_async_tx_descriptor *tx = NULL; struct page *p, *q, *g, *dp, *dq; @@ -181,21 +181,22 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, dma_async_tx_callback cb_fn = submit->cb_fn; void *cb_param = submit->cb_param; void *scribble = submit->scribble; - int uninitialized_var(good); - int i; + int good_srcs, good, i; - for (i = 0; i < 3; i++) { + good_srcs = 0; + good = -1; + for (i = 0; i < disks-2; i++) { + if (blocks[i] == NULL) + continue; if (i == faila || i == failb) continue; - else { - good = i; - break; - } + good = i; + good_srcs++; } - BUG_ON(i >= 3); + BUG_ON(good_srcs > 1); - p = blocks[5-2]; - q = blocks[5-1]; + p = blocks[disks-2]; + q = blocks[disks-1]; g = blocks[good]; /* Compute syndrome with zero for the missing data pages @@ -323,6 +324,8 @@ struct dma_async_tx_descriptor * async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, struct page **blocks, struct async_submit_ctl *submit) { + int non_zero_srcs, i; + BUG_ON(faila == failb); if (failb < faila) swap(faila, failb); @@ -334,12 +337,11 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, */ if (!submit->scribble) { void **ptrs = (void **) blocks; - int i; async_tx_quiesce(&submit->depend_tx); for (i = 0; i < disks; i++) if (blocks[i] == NULL) - ptrs[i] = (void*)raid6_empty_zero_page; + ptrs[i] = (void *) raid6_empty_zero_page; else ptrs[i] = page_address(blocks[i]); @@ -350,19 +352,30 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, return NULL; } - switch (disks) { - case 4: + non_zero_srcs = 0; + for (i = 0; i < disks-2 && non_zero_srcs < 4; i++) + if (blocks[i]) + non_zero_srcs++; + switch (non_zero_srcs) { + case 0: + case 1: + /* There must be at least 2 sources - the failed devices. */ + BUG(); + + case 2: /* dma devices do not uniformly understand a zero source pq * operation (in contrast to the synchronous case), so - * explicitly handle the 4 disk special case + * explicitly handle the special case of a 4 disk array with + * both data disks missing. */ - return __2data_recov_4(bytes, faila, failb, blocks, submit); - case 5: + return __2data_recov_4(disks, bytes, faila, failb, blocks, submit); + case 3: /* dma devices do not uniformly understand a single * source pq operation (in contrast to the synchronous - * case), so explicitly handle the 5 disk special case + * case), so explicitly handle the special case of a 5 disk + * array with 2 of 3 data disks missing. */ - return __2data_recov_5(bytes, faila, failb, blocks, submit); + return __2data_recov_5(disks, bytes, faila, failb, blocks, submit); default: return __2data_recov_n(disks, bytes, faila, failb, blocks, submit); } @@ -388,6 +401,7 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, dma_async_tx_callback cb_fn = submit->cb_fn; void *cb_param = submit->cb_param; void *scribble = submit->scribble; + int good_srcs, good, i; struct page *srcs[2]; pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); @@ -397,7 +411,6 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, */ if (!scribble) { void **ptrs = (void **) blocks; - int i; async_tx_quiesce(&submit->depend_tx); for (i = 0; i < disks; i++) @@ -413,6 +426,20 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, return NULL; } + good_srcs = 0; + good = -1; + for (i = 0; i < disks-2; i++) { + if (i == faila) + continue; + if (blocks[i]) { + good = i; + good_srcs++; + if (good_srcs > 1) + break; + } + } + BUG_ON(good_srcs == 0); + p = blocks[disks-2]; q = blocks[disks-1]; @@ -423,11 +450,10 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, blocks[faila] = NULL; blocks[disks-1] = dq; - /* in the 4 disk case we only need to perform a single source - * multiplication + /* in the 4-disk case we only need to perform a single source + * multiplication with the one good data block. */ - if (disks == 4) { - int good = faila == 0 ? 1 : 0; + if (good_srcs == 1) { struct page *g = blocks[good]; init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, -- cgit v1.2.3-59-g8ed1b From 62c5593aea4b71d61dc0f37fea96c556c158a042 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 16 Nov 2009 21:52:22 +0800 Subject: crypto: gcm - fix another complete call in complete fuction The flow of the complete function (xxx_done) in gcm.c is as follow: void complete(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; if (!err) { err = async_next_step(); if (err == -EINPROGRESS || err == -EBUSY) return; } complete_for_next_step(areq, err); } But *areq may be destroyed in async_next_step(), this makes complete_for_next_step() can not work properly. To fix this, one of following methods is used for each complete function. - Add a __complete() for each complete(), which accept struct aead_request *req instead of areq, so avoid using areq after it is destroyed. - Expand complete_for_next_step(). The fixing method is based on the idea of Herbert Xu. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- crypto/gcm.c | 107 ++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 73 insertions(+), 34 deletions(-) (limited to 'crypto') diff --git a/crypto/gcm.c b/crypto/gcm.c index 5fc3292483ef..c6547130624c 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -40,7 +40,7 @@ struct crypto_rfc4106_ctx { struct crypto_gcm_ghash_ctx { unsigned int cryptlen; struct scatterlist *src; - crypto_completion_t complete; + void (*complete)(struct aead_request *req, int err); }; struct crypto_gcm_req_priv_ctx { @@ -267,23 +267,26 @@ static int gcm_hash_final(struct aead_request *req, return crypto_ahash_final(ahreq); } -static void gcm_hash_final_done(struct crypto_async_request *areq, - int err) +static void __gcm_hash_final_done(struct aead_request *req, int err) { - struct aead_request *req = areq->data; struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx; if (!err) crypto_xor(pctx->auth_tag, pctx->iauth_tag, 16); - gctx->complete(areq, err); + gctx->complete(req, err); } -static void gcm_hash_len_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_final_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; + + __gcm_hash_final_done(req, err); +} + +static void __gcm_hash_len_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); if (!err) { @@ -292,13 +295,18 @@ static void gcm_hash_len_done(struct crypto_async_request *areq, return; } - gcm_hash_final_done(areq, err); + __gcm_hash_final_done(req, err); } -static void gcm_hash_crypt_remain_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_len_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; + + __gcm_hash_len_done(req, err); +} + +static void __gcm_hash_crypt_remain_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); if (!err) { @@ -307,13 +315,19 @@ static void gcm_hash_crypt_remain_done(struct crypto_async_request *areq, return; } - gcm_hash_len_done(areq, err); + __gcm_hash_len_done(req, err); } -static void gcm_hash_crypt_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_crypt_remain_done(struct crypto_async_request *areq, + int err) { struct aead_request *req = areq->data; + + __gcm_hash_crypt_remain_done(req, err); +} + +static void __gcm_hash_crypt_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx; unsigned int remain; @@ -327,13 +341,18 @@ static void gcm_hash_crypt_done(struct crypto_async_request *areq, return; } - gcm_hash_crypt_remain_done(areq, err); + __gcm_hash_crypt_remain_done(req, err); } -static void gcm_hash_assoc_remain_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_crypt_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; + + __gcm_hash_crypt_done(req, err); +} + +static void __gcm_hash_assoc_remain_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx; crypto_completion_t complete; @@ -350,15 +369,21 @@ static void gcm_hash_assoc_remain_done(struct crypto_async_request *areq, } if (remain) - gcm_hash_crypt_done(areq, err); + __gcm_hash_crypt_done(req, err); else - gcm_hash_crypt_remain_done(areq, err); + __gcm_hash_crypt_remain_done(req, err); } -static void gcm_hash_assoc_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_assoc_remain_done(struct crypto_async_request *areq, + int err) { struct aead_request *req = areq->data; + + __gcm_hash_assoc_remain_done(req, err); +} + +static void __gcm_hash_assoc_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); unsigned int remain; @@ -371,13 +396,18 @@ static void gcm_hash_assoc_done(struct crypto_async_request *areq, return; } - gcm_hash_assoc_remain_done(areq, err); + __gcm_hash_assoc_remain_done(req, err); } -static void gcm_hash_init_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_assoc_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; + + __gcm_hash_assoc_done(req, err); +} + +static void __gcm_hash_init_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); crypto_completion_t complete; unsigned int remain = 0; @@ -393,9 +423,16 @@ static void gcm_hash_init_done(struct crypto_async_request *areq, } if (remain) - gcm_hash_assoc_done(areq, err); + __gcm_hash_assoc_done(req, err); else - gcm_hash_assoc_remain_done(areq, err); + __gcm_hash_assoc_remain_done(req, err); +} + +static void gcm_hash_init_done(struct crypto_async_request *areq, int err) +{ + struct aead_request *req = areq->data; + + __gcm_hash_init_done(req, err); } static int gcm_hash(struct aead_request *req, @@ -457,10 +494,8 @@ static void gcm_enc_copy_hash(struct aead_request *req, crypto_aead_authsize(aead), 1); } -static void gcm_enc_hash_done(struct crypto_async_request *areq, - int err) +static void gcm_enc_hash_done(struct aead_request *req, int err) { - struct aead_request *req = areq->data; struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); if (!err) @@ -469,8 +504,7 @@ static void gcm_enc_hash_done(struct crypto_async_request *areq, aead_request_complete(req, err); } -static void gcm_encrypt_done(struct crypto_async_request *areq, - int err) +static void gcm_encrypt_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); @@ -479,9 +513,13 @@ static void gcm_encrypt_done(struct crypto_async_request *areq, err = gcm_hash(req, pctx); if (err == -EINPROGRESS || err == -EBUSY) return; + else if (!err) { + crypto_xor(pctx->auth_tag, pctx->iauth_tag, 16); + gcm_enc_copy_hash(req, pctx); + } } - gcm_enc_hash_done(areq, err); + aead_request_complete(req, err); } static int crypto_gcm_encrypt(struct aead_request *req) @@ -538,9 +576,8 @@ static void gcm_decrypt_done(struct crypto_async_request *areq, int err) aead_request_complete(req, err); } -static void gcm_dec_hash_done(struct crypto_async_request *areq, int err) +static void gcm_dec_hash_done(struct aead_request *req, int err) { - struct aead_request *req = areq->data; struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); struct ablkcipher_request *abreq = &pctx->u.abreq; struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx; @@ -552,9 +589,11 @@ static void gcm_dec_hash_done(struct crypto_async_request *areq, int err) err = crypto_ablkcipher_decrypt(abreq); if (err == -EINPROGRESS || err == -EBUSY) return; + else if (!err) + err = crypto_gcm_verify(req, pctx); } - gcm_decrypt_done(areq, err); + aead_request_complete(req, err); } static int crypto_gcm_decrypt(struct aead_request *req) -- cgit v1.2.3-59-g8ed1b From 7b3cc2b1fc2066391e498f3387204908c4eced21 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Nov 2009 17:10:37 -0700 Subject: async_tx: build-time toggling of async_{syndrome,xor}_val dma support ioat3.2 does not support asynchronous error notifications which makes the driver experience latencies when non-zero pq validate results are expected. Provide a mechanism for turning off async_xor_val and async_syndrome_val via Kconfig. This approach is generally useful for any driver that specifies ASYNC_TX_DISABLE_CHANNEL_SWITCH and would like to force the async_tx api to fall back to the synchronous path for certain operations. Signed-off-by: Dan Williams --- crypto/async_tx/Kconfig | 5 +++++ crypto/async_tx/async_pq.c | 14 +++++++++++--- crypto/async_tx/async_xor.c | 15 ++++++++++++--- drivers/dma/Kconfig | 2 ++ drivers/dma/dmaengine.c | 6 ++++++ drivers/dma/ioat/dma_v3.c | 10 ++++++++++ 6 files changed, 46 insertions(+), 6 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index e5aeb2b79e6f..e28e276ac611 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -23,3 +23,8 @@ config ASYNC_RAID6_RECOV select ASYNC_CORE select ASYNC_PQ +config ASYNC_TX_DISABLE_PQ_VAL_DMA + bool + +config ASYNC_TX_DISABLE_XOR_VAL_DMA + bool diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 6b5cc4fba59f..ec87f53d5059 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -240,6 +240,16 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, } EXPORT_SYMBOL_GPL(async_gen_syndrome); +static inline struct dma_chan * +pq_val_chan(struct async_submit_ctl *submit, struct page **blocks, int disks, size_t len) +{ + #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA + return NULL; + #endif + return async_tx_find_channel(submit, DMA_PQ_VAL, NULL, 0, blocks, + disks, len); +} + /** * async_syndrome_val - asynchronously validate a raid6 syndrome * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 @@ -260,9 +270,7 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, size_t len, enum sum_check_flags *pqres, struct page *spare, struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL, - NULL, 0, blocks, disks, - len); + struct dma_chan *chan = pq_val_chan(submit, blocks, disks, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx; unsigned char coefs[disks-2]; diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 79182dcb91b7..079ae8ca590b 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -234,6 +234,17 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) memcmp(a, a + 4, len - 4) == 0); } +static inline struct dma_chan * +xor_val_chan(struct async_submit_ctl *submit, struct page *dest, + struct page **src_list, int src_cnt, size_t len) +{ + #ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA + return NULL; + #endif + return async_tx_find_channel(submit, DMA_XOR_VAL, &dest, 1, src_list, + src_cnt, len); +} + /** * async_xor_val - attempt a xor parity check with a dma engine. * @dest: destination page used if the xor is performed synchronously @@ -255,9 +266,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, enum sum_check_flags *result, struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL, - &dest, 1, src_list, - src_cnt, len); + struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; dma_addr_t *dma_src = NULL; diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 5903a88351bf..b401dadad4a8 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -26,6 +26,8 @@ config INTEL_IOATDMA select DMA_ENGINE select DCA select ASYNC_TX_DISABLE_CHANNEL_SWITCH + select ASYNC_TX_DISABLE_PQ_VAL_DMA + select ASYNC_TX_DISABLE_XOR_VAL_DMA help Enable support for the Intel(R) I/OAT DMA engine present in recent Intel Xeon chipsets. diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index b6442f09d0fe..8f99354082ce 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -632,16 +632,22 @@ static bool device_has_all_tx_types(struct dma_device *device) #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) if (!dma_has_cap(DMA_XOR, device->cap_mask)) return false; + + #ifndef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA if (!dma_has_cap(DMA_XOR_VAL, device->cap_mask)) return false; #endif + #endif #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE) if (!dma_has_cap(DMA_PQ, device->cap_mask)) return false; + + #ifndef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA if (!dma_has_cap(DMA_PQ_VAL, device->cap_mask)) return false; #endif + #endif return true; } diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 252cf2d3f1da..189788f6351e 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -1206,6 +1206,16 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) device->timer_fn = ioat2_timer_event; } + #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA + dma_cap_clear(DMA_PQ_VAL, dma->cap_mask); + dma->device_prep_dma_pq_val = NULL; + #endif + + #ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA + dma_cap_clear(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = NULL; + #endif + /* -= IOAT ver.3 workarounds =- */ /* Write CHANERRMSK_INT with 3E07h to mask out the errors * that can cause stability issues for IOAT ver.3 -- cgit v1.2.3-59-g8ed1b