From 099f53cb50e45ef617a9f1d63ceec799e489418b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 8 Apr 2009 14:28:37 -0700 Subject: async_tx: rename zero_sum to val 'zero_sum' does not properly describe the operation of generating parity and checking that it validates against an existing buffer. Change the name of the operation to 'val' (for 'validate'). This is in anticipation of the p+q case where it is a requirement to identify the target parity buffers separately from the source buffers, because the target parity buffers will not have corresponding pq coefficients. Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- crypto/async_tx/async_xor.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 95fe2c8d6c5..e0580b0ea53 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -222,7 +222,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) } /** - * async_xor_zero_sum - attempt a xor parity check with a dma engine. + * async_xor_val - attempt a xor parity check with a dma engine. * @dest: destination page used if the xor is performed synchronously * @src_list: array of source pages. The dest page must be listed as a source * at index zero. The contents of this array may be overwritten. @@ -236,13 +236,13 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) * @cb_param: parameter to pass to the callback routine */ struct dma_async_tx_descriptor * -async_xor_zero_sum(struct page *dest, struct page **src_list, +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, u32 *result, enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, dma_async_tx_callback cb_fn, void *cb_param) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM, + struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR_VAL, &dest, 1, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; @@ -261,15 +261,15 @@ async_xor_zero_sum(struct page *dest, struct page **src_list, dma_src[i] = dma_map_page(device->dev, src_list[i], offset, len, DMA_TO_DEVICE); - tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, - len, result, - dma_prep_flags); + tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt, + len, result, + dma_prep_flags); if (unlikely(!tx)) { async_tx_quiesce(&depend_tx); while (!tx) { dma_async_issue_pending(chan); - tx = device->device_prep_dma_zero_sum(chan, + tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt, len, result, dma_prep_flags); } @@ -296,7 +296,7 @@ async_xor_zero_sum(struct page *dest, struct page **src_list, return tx; } -EXPORT_SYMBOL_GPL(async_xor_zero_sum); +EXPORT_SYMBOL_GPL(async_xor_val); static int __init async_xor_init(void) { -- cgit v1.2.3 From 88ba2aa586c874681c072101287e15d40de7e6e2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 9 Apr 2009 16:16:18 -0700 Subject: async_tx: kill ASYNC_TX_DEP_ACK flag In support of inter-channel chaining async_tx utilizes an ack flag to gate whether a dependent operation can be chained to another. While the flag is not set the chain can be considered open for appending. Setting the ack flag closes the chain and flags the descriptor for garbage collection. The ASYNC_TX_DEP_ACK flag essentially means "close the chain after adding this dependency". Since each operation can only have one child the api now implicitly sets the ack flag at dependency submission time. This removes an unnecessary management burden from clients of the api. [ Impact: clean up and enforce one dependency per operation ] Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 9 ++++----- crypto/async_tx/async_memcpy.c | 2 +- crypto/async_tx/async_memset.c | 2 +- crypto/async_tx/async_tx.c | 4 ++-- crypto/async_tx/async_xor.c | 6 ++---- drivers/md/raid5.c | 25 +++++++++++-------------- include/linux/async_tx.h | 4 +--- 7 files changed, 22 insertions(+), 30 deletions(-) (limited to 'crypto') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 4af12180d19..76feda8541d 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -80,8 +80,8 @@ acknowledged by the application before the offload engine driver is allowed to recycle (or free) the descriptor. A descriptor can be acked by one of the following methods: 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted -2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent - descriptor of a new operation. +2/ submitting an unacknowledged descriptor as a dependency to another + async_tx call will implicitly set the acknowledged state. 3/ calling async_tx_ack() on the descriptor. 3.4 When does the operation execute? @@ -136,10 +136,9 @@ int run_xor_copy_xor(struct page **xor_srcs, tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL); - tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, - ASYNC_TX_DEP_ACK, tx, NULL, NULL); + tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, tx, NULL, NULL); tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, - ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, + ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx, complete_xor_copy_xor, NULL); async_tx_issue_pending_all(); diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index ddccfb01c41..7117ec6f1b7 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -35,7 +35,7 @@ * @src: src page * @offset: offset in pages to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, + * @flags: ASYNC_TX_ACK * @depend_tx: memcpy depends on the result of this transaction * @cb_fn: function to call when the memcpy completes * @cb_param: parameter to pass to the callback routine diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index 5b5eb99bb24..b2f133885b7 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -35,7 +35,7 @@ * @val: fill value * @offset: offset in pages to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @flags: ASYNC_TX_ACK * @depend_tx: memset depends on the result of this transaction * @cb_fn: function to call when the memcpy completes * @cb_param: parameter to pass to the callback routine diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 06eb6cc09fe..3766bc3d7d8 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -223,7 +223,7 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, if (flags & ASYNC_TX_ACK) async_tx_ack(tx); - if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) + if (depend_tx) async_tx_ack(depend_tx); } EXPORT_SYMBOL_GPL(async_tx_submit); @@ -231,7 +231,7 @@ EXPORT_SYMBOL_GPL(async_tx_submit); /** * async_trigger_callback - schedules the callback function to be run after * any dependent operations have been completed. - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @flags: ASYNC_TX_ACK * @depend_tx: 'callback' requires the completion of this transaction * @cb_fn: function to call after depend_tx completes * @cb_param: parameter to pass to the callback routine diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index e0580b0ea53..3cc5dc763b5 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -105,7 +105,6 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, _cb_param); depend_tx = tx; - flags |= ASYNC_TX_DEP_ACK; if (src_cnt > xor_src_cnt) { /* drop completed sources */ @@ -168,8 +167,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, * @offset: offset in pages to start transaction * @src_cnt: number of source pages * @len: length in bytes - * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, - * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, ASYNC_TX_ACK * @depend_tx: xor depends on the result of this transaction. * @cb_fn: function to call when the xor completes * @cb_param: parameter to pass to the callback routine @@ -230,7 +228,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) * @src_cnt: number of source pages * @len: length in bytes * @result: 0 if sum == 0 else non-zero - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @flags: ASYNC_TX_ACK * @depend_tx: xor depends on the result of this transaction. * @cb_fn: function to call when the xor completes * @cb_param: parameter to pass to the callback routine diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f8d2d35ed29..0ef5362c8d0 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -525,14 +525,12 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, bio_page = bio_iovec_idx(bio, i)->bv_page; if (frombio) tx = async_memcpy(page, bio_page, page_offset, - b_offset, clen, - ASYNC_TX_DEP_ACK, - tx, NULL, NULL); + b_offset, clen, 0, + tx, NULL, NULL); else tx = async_memcpy(bio_page, page, b_offset, - page_offset, clen, - ASYNC_TX_DEP_ACK, - tx, NULL, NULL); + page_offset, clen, 0, + tx, NULL, NULL); } if (clen < len) /* hit end of page */ break; @@ -615,8 +613,7 @@ static void ops_run_biofill(struct stripe_head *sh) } atomic_inc(&sh->count); - async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, - ops_complete_biofill, sh); + async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_biofill, sh); } static void ops_complete_compute5(void *stripe_head_ref) @@ -701,8 +698,8 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) } tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, - ops_complete_prexor, sh); + ASYNC_TX_XOR_DROP_DST, tx, + ops_complete_prexor, sh); return tx; } @@ -809,7 +806,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST * for the synchronous xor case */ - flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | + flags = ASYNC_TX_ACK | (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); atomic_inc(&sh->count); @@ -858,7 +855,7 @@ static void ops_run_check(struct stripe_head *sh) &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); atomic_inc(&sh->count); - tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, + tx = async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_check, sh); } @@ -2687,8 +2684,8 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, /* place all the copies on one channel */ tx = async_memcpy(sh2->dev[dd_idx].page, - sh->dev[i].page, 0, 0, STRIPE_SIZE, - ASYNC_TX_DEP_ACK, tx, NULL, NULL); + sh->dev[i].page, 0, 0, STRIPE_SIZE, + 0, tx, NULL, NULL); set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 513150d8c25..9f14cd540cd 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -58,13 +58,11 @@ struct dma_chan_ref { * array. * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a * dependency chain - * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. */ enum async_tx_flags { ASYNC_TX_XOR_ZERO_DST = (1 << 0), ASYNC_TX_XOR_DROP_DST = (1 << 1), - ASYNC_TX_ACK = (1 << 3), - ASYNC_TX_DEP_ACK = (1 << 4), + ASYNC_TX_ACK = (1 << 2), }; #ifdef CONFIG_DMA_ENGINE -- cgit v1.2.3 From a08abd8ca890a377521d65d493d174bebcaf694b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 3 Jun 2009 11:43:59 -0700 Subject: async_tx: structify submission arguments, add scribble Prepare the api for the arrival of a new parameter, 'scribble'. This will allow callers to identify scratchpad memory for dma address or page address conversions. As this adds yet another parameter, take this opportunity to convert the common submission parameters (flags, dependency, callback, and callback argument) into an object that is passed by reference. Also, take this opportunity to fix up the kerneldoc and add notes about the relevant ASYNC_TX_* flags for each routine. [ Impact: moves api pass-by-value parameters to a pass-by-reference struct ] Signed-off-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 6 +- crypto/async_tx/async_memcpy.c | 26 +++---- crypto/async_tx/async_memset.c | 25 +++---- crypto/async_tx/async_tx.c | 51 +++++++------- crypto/async_tx/async_xor.c | 123 +++++++++++++++++----------------- drivers/md/raid5.c | 59 +++++++++------- include/linux/async_tx.h | 84 ++++++++++++++--------- 7 files changed, 200 insertions(+), 174 deletions(-) (limited to 'crypto') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 76feda8541d..dfe0475f791 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -54,11 +54,7 @@ features surfaced as a result: 3.1 General format of the API: struct dma_async_tx_descriptor * -async_(, - enum async_tx_flags flags, - struct dma_async_tx_descriptor *dependency, - dma_async_tx_callback callback_routine, - void *callback_parameter); +async_(, struct async_submit ctl *submit) 3.2 Supported operations: memcpy - memory copy between a source and a destination buffer diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index 7117ec6f1b7..89e05556f3d 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -33,28 +33,28 @@ * async_memcpy - attempt to copy memory with a dma engine. * @dest: destination page * @src: src page - * @offset: offset in pages to start transaction + * @dest_offset: offset into 'dest' to start transaction + * @src_offset: offset into 'src' to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK - * @depend_tx: memcpy depends on the result of this transaction - * @cb_fn: function to call when the memcpy completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK */ struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, - unsigned int src_offset, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + unsigned int src_offset, size_t len, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY, &dest, 1, &src, 1, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; if (device) { dma_addr_t dma_dest, dma_src; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + unsigned long dma_prep_flags; + dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; dma_dest = dma_map_page(device->dev, dest, dest_offset, len, DMA_FROM_DEVICE); @@ -67,13 +67,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { void *dest_buf, *src_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; src_buf = kmap_atomic(src, KM_USER1) + src_offset; @@ -83,7 +83,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, kunmap_atomic(dest_buf, KM_USER0); kunmap_atomic(src_buf, KM_USER1); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index b2f133885b7..c14437238f4 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -35,26 +35,23 @@ * @val: fill value * @offset: offset in pages to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK - * @depend_tx: memset depends on the result of this transaction - * @cb_fn: function to call when the memcpy completes - * @cb_param: parameter to pass to the callback routine + * + * honored flags: ASYNC_TX_ACK */ struct dma_async_tx_descriptor * -async_memset(struct page *dest, int val, unsigned int offset, - size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_memset(struct page *dest, int val, unsigned int offset, size_t len, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET, &dest, 1, NULL, 0, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; if (device) { dma_addr_t dma_dest; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + unsigned long dma_prep_flags; + dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; dma_dest = dma_map_page(device->dev, dest, offset, len, DMA_FROM_DEVICE); @@ -64,19 +61,19 @@ async_memset(struct page *dest, int val, unsigned int offset, if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { /* run the memset synchronously */ void *dest_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); - dest_buf = (void *) (((char *) page_address(dest)) + offset); + dest_buf = page_address(dest) + offset; /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); memset(dest_buf, val, len); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 3766bc3d7d8..802a5ce437d 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -45,13 +45,15 @@ static void __exit async_tx_exit(void) /** * __async_tx_find_channel - find a channel to carry out the operation or let * the transaction execute synchronously - * @depend_tx: transaction dependency + * @submit: transaction dependency and submission modifiers * @tx_type: transaction type */ struct dma_chan * -__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type) +__async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type) { + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; + /* see if we can keep the chain on one channel */ if (depend_tx && dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) @@ -144,13 +146,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, /** - * submit_disposition - while holding depend_tx->lock we must avoid submitting - * new operations to prevent a circular locking dependency with - * drivers that already hold a channel lock when calling - * async_tx_run_dependencies. + * submit_disposition - flags for routing an incoming operation * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly + * + * while holding depend_tx->lock we must avoid submitting new operations + * to prevent a circular locking dependency with drivers that already + * hold a channel lock when calling async_tx_run_dependencies. */ enum submit_disposition { ASYNC_TX_SUBMITTED, @@ -160,11 +163,12 @@ enum submit_disposition { void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, - enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + struct async_submit_ctl *submit) { - tx->callback = cb_fn; - tx->callback_param = cb_param; + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; + + tx->callback = submit->cb_fn; + tx->callback_param = submit->cb_param; if (depend_tx) { enum submit_disposition s; @@ -220,7 +224,7 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, tx->tx_submit(tx); } - if (flags & ASYNC_TX_ACK) + if (submit->flags & ASYNC_TX_ACK) async_tx_ack(tx); if (depend_tx) @@ -229,21 +233,20 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, EXPORT_SYMBOL_GPL(async_tx_submit); /** - * async_trigger_callback - schedules the callback function to be run after - * any dependent operations have been completed. - * @flags: ASYNC_TX_ACK - * @depend_tx: 'callback' requires the completion of this transaction - * @cb_fn: function to call after depend_tx completes - * @cb_param: parameter to pass to the callback routine + * async_trigger_callback - schedules the callback function to be run + * @submit: submission and completion parameters + * + * honored flags: ASYNC_TX_ACK + * + * The callback is run after any dependent operations have completed. */ struct dma_async_tx_descriptor * -async_trigger_callback(enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_trigger_callback(struct async_submit_ctl *submit) { struct dma_chan *chan; struct dma_device *device; struct dma_async_tx_descriptor *tx; + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; if (depend_tx) { chan = depend_tx->chan; @@ -262,14 +265,14 @@ async_trigger_callback(enum async_tx_flags flags, if (tx) { pr_debug("%s: (async)\n", __func__); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { pr_debug("%s: (sync)\n", __func__); /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 3cc5dc763b5..691fa98a18c 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -34,18 +34,16 @@ static __async_inline struct dma_async_tx_descriptor * do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, - enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + struct async_submit_ctl *submit) { struct dma_device *dma = chan->device; dma_addr_t *dma_src = (dma_addr_t *) src_list; struct dma_async_tx_descriptor *tx = NULL; int src_off = 0; int i; - dma_async_tx_callback _cb_fn; - void *_cb_param; - enum async_tx_flags async_flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + void *cb_param_orig = submit->cb_param; + enum async_tx_flags flags_orig = submit->flags; enum dma_ctrl_flags dma_flags; int xor_src_cnt; dma_addr_t dma_dest; @@ -63,7 +61,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, } while (src_cnt) { - async_flags = flags; + submit->flags = flags_orig; dma_flags = 0; xor_src_cnt = min(src_cnt, dma->max_xor); /* if we are submitting additional xors, leave the chain open, @@ -71,15 +69,15 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, * buffer mapped */ if (src_cnt > xor_src_cnt) { - async_flags &= ~ASYNC_TX_ACK; + submit->flags &= ~ASYNC_TX_ACK; dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; - _cb_fn = NULL; - _cb_param = NULL; + submit->cb_fn = NULL; + submit->cb_param = NULL; } else { - _cb_fn = cb_fn; - _cb_param = cb_param; + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; } - if (_cb_fn) + if (submit->cb_fn) dma_flags |= DMA_PREP_INTERRUPT; /* Since we have clobbered the src_list we are committed @@ -90,7 +88,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, xor_src_cnt, len, dma_flags); if (unlikely(!tx)) - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); /* spin wait for the preceeding transactions to complete */ while (unlikely(!tx)) { @@ -101,10 +99,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, dma_flags); } - async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn, - _cb_param); - - depend_tx = tx; + async_tx_submit(chan, tx, submit); + submit->depend_tx = tx; if (src_cnt > xor_src_cnt) { /* drop completed sources */ @@ -123,8 +119,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, static void do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - dma_async_tx_callback cb_fn, void *cb_param) + int src_cnt, size_t len, struct async_submit_ctl *submit) { int i; int xor_src_cnt; @@ -139,7 +134,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, /* set destination address */ dest_buf = page_address(dest) + offset; - if (flags & ASYNC_TX_XOR_ZERO_DST) + if (submit->flags & ASYNC_TX_XOR_ZERO_DST) memset(dest_buf, 0, len); while (src_cnt > 0) { @@ -152,33 +147,35 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, src_off += xor_src_cnt; } - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } /** * async_xor - attempt to xor a set of blocks with a dma engine. - * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST - * flag must be set to not include dest data in the calculation. The - * assumption with dma eninges is that they only use the destination - * buffer as a source when it is explicity specified in the source list. * @dest: destination page - * @src_list: array of source pages (if the dest is also a source it must be - * at index zero). The contents of this array may be overwritten. - * @offset: offset in pages to start transaction + * @src_list: array of source pages + * @offset: common src/dst offset to start transaction * @src_cnt: number of source pages * @len: length in bytes - * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, ASYNC_TX_ACK - * @depend_tx: xor depends on the result of this transaction. - * @cb_fn: function to call when the xor completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST + * + * xor_blocks always uses the dest as a source so the + * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in + * the calculation. The assumption with dma eninges is that they only + * use the destination buffer as a source when it is explicity specified + * in the source list. + * + * src_list note: if the dest is also a source it must be at index zero. + * The contents of this array will be overwritten if a scribble region + * is not specified. */ struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + int src_cnt, size_t len, struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, &dest, 1, src_list, src_cnt, len); BUG_ON(src_cnt <= 1); @@ -188,7 +185,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, pr_debug("%s (async): len: %zu\n", __func__, len); return do_async_xor(chan, dest, src_list, offset, src_cnt, len, - flags, depend_tx, cb_fn, cb_param); + submit); } else { /* run the xor synchronously */ pr_debug("%s (sync): len: %zu\n", __func__, len); @@ -196,16 +193,15 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, /* in the sync case the dest is an implied source * (assumes the dest is the first source) */ - if (flags & ASYNC_TX_XOR_DROP_DST) { + if (submit->flags & ASYNC_TX_XOR_DROP_DST) { src_cnt--; src_list++; } /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); - do_sync_xor(dest, src_list, offset, src_cnt, len, - flags, cb_fn, cb_param); + do_sync_xor(dest, src_list, offset, src_cnt, len, submit); return NULL; } @@ -222,25 +218,25 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) /** * async_xor_val - attempt a xor parity check with a dma engine. * @dest: destination page used if the xor is performed synchronously - * @src_list: array of source pages. The dest page must be listed as a source - * at index zero. The contents of this array may be overwritten. + * @src_list: array of source pages * @offset: offset in pages to start transaction * @src_cnt: number of source pages * @len: length in bytes * @result: 0 if sum == 0 else non-zero - * @flags: ASYNC_TX_ACK - * @depend_tx: xor depends on the result of this transaction. - * @cb_fn: function to call when the xor completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK + * + * src_list note: if the dest is also a source it must be at index zero. + * The contents of this array will be overwritten if a scribble region + * is not specified. */ struct dma_async_tx_descriptor * -async_xor_val(struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, - u32 *result, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, u32 *result, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR_VAL, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL, &dest, 1, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; @@ -250,11 +246,12 @@ async_xor_val(struct page *dest, struct page **src_list, if (device && src_cnt <= device->max_xor) { dma_addr_t *dma_src = (dma_addr_t *) src_list; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + unsigned long dma_prep_flags; int i; pr_debug("%s: (async) len: %zu\n", __func__, len); + dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; for (i = 0; i < src_cnt; i++) dma_src[i] = dma_map_page(device->dev, src_list[i], offset, len, DMA_TO_DEVICE); @@ -263,7 +260,7 @@ async_xor_val(struct page *dest, struct page **src_list, len, result, dma_prep_flags); if (unlikely(!tx)) { - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); while (!tx) { dma_async_issue_pending(chan); @@ -273,23 +270,23 @@ async_xor_val(struct page *dest, struct page **src_list, } } - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { - unsigned long xor_flags = flags; + enum async_tx_flags flags_orig = submit->flags; pr_debug("%s: (sync) len: %zu\n", __func__, len); - xor_flags |= ASYNC_TX_XOR_DROP_DST; - xor_flags &= ~ASYNC_TX_ACK; + submit->flags |= ASYNC_TX_XOR_DROP_DST; + submit->flags &= ~ASYNC_TX_ACK; - tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, - depend_tx, NULL, NULL); + tx = async_xor(dest, src_list, offset, src_cnt, len, submit); async_tx_quiesce(&tx); *result = page_is_zero(dest, offset, len) ? 0 : 1; - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); + submit->flags = flags_orig; } return tx; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0ef5362c8d0..e1920f23579 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -499,11 +499,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, struct page *bio_page; int i; int page_offset; + struct async_submit_ctl submit; if (bio->bi_sector >= sector) page_offset = (signed)(bio->bi_sector - sector) * 512; else page_offset = (signed)(sector - bio->bi_sector) * -512; + + init_async_submit(&submit, 0, tx, NULL, NULL, NULL); bio_for_each_segment(bvl, bio, i) { int len = bio_iovec_idx(bio, i)->bv_len; int clen; @@ -525,13 +528,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, bio_page = bio_iovec_idx(bio, i)->bv_page; if (frombio) tx = async_memcpy(page, bio_page, page_offset, - b_offset, clen, 0, - tx, NULL, NULL); + b_offset, clen, &submit); else tx = async_memcpy(bio_page, page, b_offset, - page_offset, clen, 0, - tx, NULL, NULL); + page_offset, clen, &submit); } + /* chain the operations */ + submit.depend_tx = tx; + if (clen < len) /* hit end of page */ break; page_offset += len; @@ -590,6 +594,7 @@ static void ops_run_biofill(struct stripe_head *sh) { struct dma_async_tx_descriptor *tx = NULL; raid5_conf_t *conf = sh->raid_conf; + struct async_submit_ctl submit; int i; pr_debug("%s: stripe %llu\n", __func__, @@ -613,7 +618,8 @@ static void ops_run_biofill(struct stripe_head *sh) } atomic_inc(&sh->count); - async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_biofill, sh); + init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL); + async_trigger_callback(&submit); } static void ops_complete_compute5(void *stripe_head_ref) @@ -645,6 +651,7 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) struct page *xor_dest = tgt->page; int count = 0; struct dma_async_tx_descriptor *tx; + struct async_submit_ctl submit; int i; pr_debug("%s: stripe %llu block: %d\n", @@ -657,13 +664,12 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) atomic_inc(&sh->count); + init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, + ops_complete_compute5, sh, NULL); if (unlikely(count == 1)) - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, - 0, NULL, ops_complete_compute5, sh); + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); else - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - ASYNC_TX_XOR_ZERO_DST, NULL, - ops_complete_compute5, sh); + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); return tx; } @@ -683,6 +689,7 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) int disks = sh->disks; struct page *xor_srcs[disks]; int count = 0, pd_idx = sh->pd_idx, i; + struct async_submit_ctl submit; /* existing parity data subtracted */ struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; @@ -697,9 +704,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) xor_srcs[count++] = dev->page; } - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - ASYNC_TX_XOR_DROP_DST, tx, - ops_complete_prexor, sh); + init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx, + ops_complete_prexor, sh, NULL); + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); return tx; } @@ -772,7 +779,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) /* kernel stack size limits the total number of disks */ int disks = sh->disks; struct page *xor_srcs[disks]; - + struct async_submit_ctl submit; int count = 0, pd_idx = sh->pd_idx, i; struct page *xor_dest; int prexor = 0; @@ -811,13 +818,11 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) atomic_inc(&sh->count); - if (unlikely(count == 1)) { - flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, - flags, tx, ops_complete_postxor, sh); - } else - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - flags, tx, ops_complete_postxor, sh); + init_async_submit(&submit, flags, tx, ops_complete_postxor, sh, NULL); + if (unlikely(count == 1)) + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); + else + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); } static void ops_complete_check(void *stripe_head_ref) @@ -838,6 +843,7 @@ static void ops_run_check(struct stripe_head *sh) int disks = sh->disks; struct page *xor_srcs[disks]; struct dma_async_tx_descriptor *tx; + struct async_submit_ctl submit; int count = 0, pd_idx = sh->pd_idx, i; struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; @@ -851,12 +857,13 @@ static void ops_run_check(struct stripe_head *sh) xor_srcs[count++] = dev->page; } + init_async_submit(&submit, 0, NULL, NULL, NULL, NULL); tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); + &sh->ops.zero_sum_result, &submit); atomic_inc(&sh->count); - tx = async_trigger_callback(ASYNC_TX_ACK, tx, - ops_complete_check, sh); + init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL); + tx = async_trigger_callback(&submit); } static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) @@ -2664,6 +2671,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, if (i != sh->pd_idx && i != sh->qd_idx) { int dd_idx, j; struct stripe_head *sh2; + struct async_submit_ctl submit; sector_t bn = compute_blocknr(sh, i, 1); sector_t s = raid5_compute_sector(conf, bn, 0, @@ -2683,9 +2691,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, } /* place all the copies on one channel */ + init_async_submit(&submit, 0, tx, NULL, NULL, NULL); tx = async_memcpy(sh2->dev[dd_idx].page, sh->dev[i].page, 0, 0, STRIPE_SIZE, - 0, tx, NULL, NULL); + &submit); set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 9f14cd540cd..00cfb637ddf 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -65,6 +65,22 @@ enum async_tx_flags { ASYNC_TX_ACK = (1 << 2), }; +/** + * struct async_submit_ctl - async_tx submission/completion modifiers + * @flags: submission modifiers + * @depend_tx: parent dependency of the current operation being submitted + * @cb_fn: callback routine to run at operation completion + * @cb_param: parameter for the callback routine + * @scribble: caller provided space for dma/page address conversions + */ +struct async_submit_ctl { + enum async_tx_flags flags; + struct dma_async_tx_descriptor *depend_tx; + dma_async_tx_callback cb_fn; + void *cb_param; + void *scribble; +}; + #ifdef CONFIG_DMA_ENGINE #define async_tx_issue_pending_all dma_issue_pending_all #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL @@ -73,8 +89,8 @@ enum async_tx_flags { #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ __async_tx_find_channel(dep, type) struct dma_chan * -__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type); +__async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type); #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ #else static inline void async_tx_issue_pending_all(void) @@ -83,9 +99,10 @@ static inline void async_tx_issue_pending_all(void) } static inline struct dma_chan * -async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type, struct page **dst, int dst_count, - struct page **src, int src_count, size_t len) +async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type, struct page **dst, + int dst_count, struct page **src, int src_count, + size_t len) { return NULL; } @@ -97,46 +114,53 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, * @cb_fn_param: parameter to pass to the callback routine */ static inline void -async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param) +async_tx_sync_epilog(struct async_submit_ctl *submit) +{ + if (submit->cb_fn) + submit->cb_fn(submit->cb_param); +} + +typedef union { + unsigned long addr; + struct page *page; + dma_addr_t dma; +} addr_conv_t; + +static inline void +init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags, + struct dma_async_tx_descriptor *tx, + dma_async_tx_callback cb_fn, void *cb_param, + addr_conv_t *scribble) { - if (cb_fn) - cb_fn(cb_fn_param); + args->flags = flags; + args->depend_tx = tx; + args->cb_fn = cb_fn; + args->cb_param = cb_param; + args->scribble = scribble; } -void -async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, - enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + int src_cnt, size_t len, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_xor_val(struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, - u32 *result, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, u32 *result, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, - unsigned int src_offset, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + unsigned int src_offset, size_t len, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_memset(struct page *dest, int val, unsigned int offset, - size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + size_t len, struct async_submit_ctl *submit); -struct dma_async_tx_descriptor * -async_trigger_callback(enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ -- cgit v1.2.3 From 04ce9ab385dc97eb55299d533cd3af79b8fc7529 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 3 Jun 2009 14:22:28 -0700 Subject: async_xor: permit callers to pass in a 'dma/page scribble' region async_xor() needs space to perform dma and page address conversions. In most cases the code can simply reuse the struct page * array because the size of the native pointer matches the size of a dma/page address. In order to support archs where sizeof(dma_addr_t) is larger than sizeof(struct page *), or to preserve the input parameters, we utilize a memory region passed in by the caller. Since the code is now prepared to handle the case where it cannot perform address conversions on the stack, we no longer need the !HIGHMEM64G dependency in drivers/dma/Kconfig. [ Impact: don't clobber input buffers for address conversions ] Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 43 ++++++++++++++++--------- crypto/async_tx/async_xor.c | 60 +++++++++++++++++------------------ drivers/dma/Kconfig | 2 +- 3 files changed, 58 insertions(+), 47 deletions(-) (limited to 'crypto') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index dfe0475f791..6b15e488c0e 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -115,29 +115,42 @@ of an operation. Perform a xor->copy->xor operation where each operation depends on the result from the previous operation: -void complete_xor_copy_xor(void *param) +void callback(void *param) { - printk("complete\n"); + struct completion *cmp = param; + + complete(cmp); } -int run_xor_copy_xor(struct page **xor_srcs, - int xor_src_cnt, - struct page *xor_dest, - size_t xor_len, - struct page *copy_src, - struct page *copy_dest, - size_t copy_len) +void run_xor_copy_xor(struct page **xor_srcs, + int xor_src_cnt, + struct page *xor_dest, + size_t xor_len, + struct page *copy_src, + struct page *copy_dest, + size_t copy_len) { struct dma_async_tx_descriptor *tx; + addr_conv_t addr_conv[xor_src_cnt]; + struct async_submit_ctl submit; + addr_conv_t addr_conv[NDISKS]; + struct completion cmp; + + init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL, + addr_conv); + tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit) - tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, - ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL); - tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, tx, NULL, NULL); - tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, - ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, - tx, complete_xor_copy_xor, NULL); + submit->depend_tx = tx; + tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit); + + init_completion(&cmp); + init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx, + callback, &cmp, addr_conv); + tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit); async_tx_issue_pending_all(); + + wait_for_completion(&cmp); } See include/linux/async_tx.h for more information on the flags. See the diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 691fa98a18c..1e96c4df706 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -33,11 +33,10 @@ /* do_async_xor - dma map the pages and perform the xor with an engine */ static __async_inline struct dma_async_tx_descriptor * do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, + unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src, struct async_submit_ctl *submit) { struct dma_device *dma = chan->device; - dma_addr_t *dma_src = (dma_addr_t *) src_list; struct dma_async_tx_descriptor *tx = NULL; int src_off = 0; int i; @@ -125,9 +124,14 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, int xor_src_cnt; int src_off = 0; void *dest_buf; - void **srcs = (void **) src_list; + void **srcs; - /* reuse the 'src_list' array to convert to buffer pointers */ + if (submit->scribble) + srcs = submit->scribble; + else + srcs = (void **) src_list; + + /* convert to buffer pointers */ for (i = 0; i < src_cnt; i++) srcs[i] = page_address(src_list[i]) + offset; @@ -178,17 +182,26 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, &dest, 1, src_list, src_cnt, len); + dma_addr_t *dma_src = NULL; + BUG_ON(src_cnt <= 1); - if (chan) { + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) src_list; + + if (dma_src && chan) { /* run the xor asynchronously */ pr_debug("%s (async): len: %zu\n", __func__, len); return do_async_xor(chan, dest, src_list, offset, src_cnt, len, - submit); + dma_src, submit); } else { /* run the xor synchronously */ pr_debug("%s (sync): len: %zu\n", __func__, len); + WARN_ONCE(chan, "%s: no space for dma address conversion\n", + __func__); /* in the sync case the dest is an implied source * (assumes the dest is the first source) @@ -241,11 +254,16 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; + dma_addr_t *dma_src = NULL; BUG_ON(src_cnt <= 1); - if (device && src_cnt <= device->max_xor) { - dma_addr_t *dma_src = (dma_addr_t *) src_list; + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) src_list; + + if (dma_src && device && src_cnt <= device->max_xor) { unsigned long dma_prep_flags; int i; @@ -275,6 +293,9 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, enum async_tx_flags flags_orig = submit->flags; pr_debug("%s: (sync) len: %zu\n", __func__, len); + WARN_ONCE(device && src_cnt <= device->max_xor, + "%s: no space for dma address conversion\n", + __func__); submit->flags |= ASYNC_TX_XOR_DROP_DST; submit->flags &= ~ASYNC_TX_ACK; @@ -293,29 +314,6 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, } EXPORT_SYMBOL_GPL(async_xor_val); -static int __init async_xor_init(void) -{ - #ifdef CONFIG_DMA_ENGINE - /* To conserve stack space the input src_list (array of page pointers) - * is reused to hold the array of dma addresses passed to the driver. - * This conversion is only possible when dma_addr_t is less than the - * the size of a pointer. HIGHMEM64G is known to violate this - * assumption. - */ - BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *)); - #endif - - return 0; -} - -static void __exit async_xor_exit(void) -{ - do { } while (0); -} - -module_init(async_xor_init); -module_exit(async_xor_exit); - MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); MODULE_LICENSE("GPL"); diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 3b3c01b6f1e..912a51b5cbd 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -4,7 +4,7 @@ menuconfig DMADEVICES bool "DMA Engine support" - depends on !HIGHMEM64G && HAS_DMA + depends on HAS_DMA help DMA engines can do asynchronous data transfers without involving the host CPU. Currently, this framework can be -- cgit v1.2.3 From ad283ea4a3ce82cda2efe33163748a397b31b1eb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 29 Aug 2009 19:09:26 -0700 Subject: async_tx: add sum check flags Replace the flat zero_sum_result with a collection of flags to contain the P (xor) zero-sum result, and the soon to be utilized Q (raid6 reed solomon syndrome) zero-sum result. Use the SUM_CHECK_ namespace instead of DMA_ since these flags will be used on non-dma-zero-sum enabled platforms. Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- arch/arm/include/asm/hardware/iop3xx-adma.h | 5 +++-- arch/arm/mach-iop13xx/include/mach/adma.h | 12 +++++++----- crypto/async_tx/async_xor.c | 4 ++-- drivers/md/raid5.c | 2 +- drivers/md/raid5.h | 5 +++-- include/linux/async_tx.h | 2 +- include/linux/dmaengine.h | 21 ++++++++++++++++++++- 7 files changed, 37 insertions(+), 14 deletions(-) (limited to 'crypto') diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h index 83e6ba338e2..26eefea0231 100644 --- a/arch/arm/include/asm/hardware/iop3xx-adma.h +++ b/arch/arm/include/asm/hardware/iop3xx-adma.h @@ -756,13 +756,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, hw_desc->src[0] = val; } -static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) +static inline enum sum_check_flags +iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) { struct iop3xx_desc_aau *hw_desc = desc->hw_desc; struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); - return desc_ctrl.zero_result_err; + return desc_ctrl.zero_result_err << SUM_CHECK_P; } static inline void iop_chan_append(struct iop_adma_chan *chan) diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h index 5722e86f217..1cd31df8924 100644 --- a/arch/arm/mach-iop13xx/include/mach/adma.h +++ b/arch/arm/mach-iop13xx/include/mach/adma.h @@ -428,18 +428,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, hw_desc->block_fill_data = val; } -static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) +static inline enum sum_check_flags +iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) { struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; + enum sum_check_flags flags; BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); - if (desc_ctrl.pq_xfer_en) - return byte_count.zero_result_err_q; - else - return byte_count.zero_result_err; + flags = byte_count.zero_result_err_q << SUM_CHECK_Q; + flags |= byte_count.zero_result_err << SUM_CHECK_P; + + return flags; } static inline void iop_chan_append(struct iop_adma_chan *chan) diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 1e96c4df706..78fb7780272 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -246,7 +246,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) */ struct dma_async_tx_descriptor * async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, u32 *result, + int src_cnt, size_t len, enum sum_check_flags *result, struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL, @@ -304,7 +304,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, async_tx_quiesce(&tx); - *result = page_is_zero(dest, offset, len) ? 0 : 1; + *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P; async_tx_sync_epilog(submit); submit->flags = flags_orig; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7727954cf72..1f2a266f3cf 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2590,7 +2590,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, * we are done. Otherwise update the mismatch count and repair * parity if !MD_RECOVERY_CHECK */ - if (sh->ops.zero_sum_result == 0) + if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0) /* parity is correct (on disc, * not in buffer any more) */ diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index e7baabffee8..75f2c6c4cf9 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -2,6 +2,7 @@ #define _RAID5_H #include +#include /* * @@ -215,8 +216,8 @@ struct stripe_head { * @target - STRIPE_OP_COMPUTE_BLK target */ struct stripe_operations { - int target; - u32 zero_sum_result; + int target; + enum sum_check_flags zero_sum_result; } ops; struct r5dev { struct bio req; diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 00cfb637ddf..3d21a251751 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -148,7 +148,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, struct dma_async_tx_descriptor * async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, u32 *result, + int src_cnt, size_t len, enum sum_check_flags *result, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 6768727d00d..02447afceba 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -86,6 +86,25 @@ enum dma_ctrl_flags { DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), }; +/** + * enum sum_check_bits - bit position of pq_check_flags + */ +enum sum_check_bits { + SUM_CHECK_P = 0, + SUM_CHECK_Q = 1, +}; + +/** + * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations + * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise + * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise + */ +enum sum_check_flags { + SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P), + SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q), +}; + + /** * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. * See linux/cpumask.h @@ -245,7 +264,7 @@ struct dma_device { unsigned int src_cnt, size_t len, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, - size_t len, u32 *result, unsigned long flags); + size_t len, enum sum_check_flags *result, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags); -- cgit v1.2.3 From af1f951eb6ef27b01cbfb3f6c21b770af4368a6d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 29 Aug 2009 19:09:26 -0700 Subject: async_tx: kill needless module_{init|exit} If module_init and module_exit are nops then neither need to be defined. [ Impact: pure cleanup ] Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- crypto/async_tx/async_memcpy.c | 13 ------------- crypto/async_tx/async_memset.c | 13 ------------- crypto/async_tx/async_tx.c | 17 +++-------------- 3 files changed, 3 insertions(+), 40 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index 89e05556f3d..98e15bd0dcb 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -90,19 +90,6 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, } EXPORT_SYMBOL_GPL(async_memcpy); -static int __init async_memcpy_init(void) -{ - return 0; -} - -static void __exit async_memcpy_exit(void) -{ - do { } while (0); -} - -module_init(async_memcpy_init); -module_exit(async_memcpy_exit); - MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("asynchronous memcpy api"); MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index c14437238f4..b896a6e5f67 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -80,19 +80,6 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len, } EXPORT_SYMBOL_GPL(async_memset); -static int __init async_memset_init(void) -{ - return 0; -} - -static void __exit async_memset_exit(void) -{ - do { } while (0); -} - -module_init(async_memset_init); -module_exit(async_memset_exit); - MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("asynchronous memset api"); MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 802a5ce437d..6e37ad3f441 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -42,6 +42,9 @@ static void __exit async_tx_exit(void) async_dmaengine_put(); } +module_init(async_tx_init); +module_exit(async_tx_exit); + /** * __async_tx_find_channel - find a channel to carry out the operation or let * the transaction execute synchronously @@ -61,17 +64,6 @@ __async_tx_find_channel(struct async_submit_ctl *submit, return async_dma_find_channel(tx_type); } EXPORT_SYMBOL_GPL(__async_tx_find_channel); -#else -static int __init async_tx_init(void) -{ - printk(KERN_INFO "async_tx: api initialized (sync-only)\n"); - return 0; -} - -static void __exit async_tx_exit(void) -{ - do { } while (0); -} #endif @@ -298,9 +290,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx) } EXPORT_SYMBOL_GPL(async_tx_quiesce); -module_init(async_tx_init); -module_exit(async_tx_exit); - MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 95475e57113c66aac7583925736ed2e2d58c990d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:19:02 -0700 Subject: async_tx: remove walk of tx->parent chain in dma_wait_for_async_tx We currently walk the parent chain when waiting for a given tx to complete however this walk may race with the driver cleanup routine. The routines in async_raid6_recov.c may fall back to the synchronous path at any point so we need to be prepared to call async_tx_quiesce() (which calls dma_wait_for_async_tx). To remove the ->parent walk we guarantee that every time a dependency is attached ->issue_pending() is invoked, then we can simply poll the initial descriptor until completion. This also allows for a lighter weight 'issue pending' implementation as there is no longer a requirement to iterate through all the channels' ->issue_pending() routines as long as operations have been submitted in an ordered chain. async_tx_issue_pending() is added for this case. Signed-off-by: Dan Williams --- crypto/async_tx/async_tx.c | 13 +++++++------ drivers/dma/dmaengine.c | 45 ++++++++++----------------------------------- include/linux/async_tx.h | 23 +++++++++++++++++++++++ 3 files changed, 40 insertions(+), 41 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 6e37ad3f441..60615fedcf5 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -77,8 +77,8 @@ static void async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, struct dma_async_tx_descriptor *tx) { - struct dma_chan *chan; - struct dma_device *device; + struct dma_chan *chan = depend_tx->chan; + struct dma_device *device = chan->device; struct dma_async_tx_descriptor *intr_tx = (void *) ~0; /* first check to see if we can still append to depend_tx */ @@ -90,11 +90,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, } spin_unlock_bh(&depend_tx->lock); - if (!intr_tx) + /* attached dependency, flush the parent channel */ + if (!intr_tx) { + device->device_issue_pending(chan); return; - - chan = depend_tx->chan; - device = chan->device; + } /* see if we can schedule an interrupt * otherwise poll for completion @@ -128,6 +128,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, intr_tx->tx_submit(intr_tx); async_tx_ack(intr_tx); } + device->device_issue_pending(chan); } else { if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) panic("%s: DMA_ERROR waiting for depend_tx\n", diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 6781e8f3c06..e002e0e0d05 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -934,49 +934,24 @@ EXPORT_SYMBOL(dma_async_tx_descriptor_init); /* dma_wait_for_async_tx - spin wait for a transaction to complete * @tx: in-flight transaction to wait on - * - * This routine assumes that tx was obtained from a call to async_memcpy, - * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped - * and submitted). Walking the parent chain is only meant to cover for DMA - * drivers that do not implement the DMA_INTERRUPT capability and may race with - * the driver's descriptor cleanup routine. */ enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) { - enum dma_status status; - struct dma_async_tx_descriptor *iter; - struct dma_async_tx_descriptor *parent; + unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); if (!tx) return DMA_SUCCESS; - WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for" - " %s\n", __func__, dma_chan_name(tx->chan)); - - /* poll through the dependency chain, return when tx is complete */ - do { - iter = tx; - - /* find the root of the unsubmitted dependency chain */ - do { - parent = iter->parent; - if (!parent) - break; - else - iter = parent; - } while (parent); - - /* there is a small window for ->parent == NULL and - * ->cookie == -EBUSY - */ - while (iter->cookie == -EBUSY) - cpu_relax(); - - status = dma_sync_wait(iter->chan, iter->cookie); - } while (status == DMA_IN_PROGRESS || (iter != tx)); - - return status; + while (tx->cookie == -EBUSY) { + if (time_after_eq(jiffies, dma_sync_wait_timeout)) { + pr_err("%s timeout waiting for descriptor submission\n", + __func__); + return DMA_ERROR; + } + cpu_relax(); + } + return dma_sync_wait(tx->chan, tx->cookie); } EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 3d21a251751..12a2efcbd56 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -83,6 +83,24 @@ struct async_submit_ctl { #ifdef CONFIG_DMA_ENGINE #define async_tx_issue_pending_all dma_issue_pending_all + +/** + * async_tx_issue_pending - send pending descriptor to the hardware channel + * @tx: descriptor handle to retrieve hardware context + * + * Note: any dependent operations will have already been issued by + * async_tx_channel_switch, or (in the case of no channel switch) will + * be already pending on this channel. + */ +static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) +{ + if (likely(tx)) { + struct dma_chan *chan = tx->chan; + struct dma_device *dma = chan->device; + + dma->device_issue_pending(chan); + } +} #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL #include #else @@ -98,6 +116,11 @@ static inline void async_tx_issue_pending_all(void) do { } while (0); } +static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) +{ + do { } while (0); +} + static inline struct dma_chan * async_tx_find_channel(struct async_submit_ctl *submit, enum dma_transaction_type tx_type, struct page **dst, -- cgit v1.2.3 From b2f46fd8ef3dff2ab30f31126833f78b7480283a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:20:36 -0700 Subject: async_tx: add support for asynchronous GF multiplication [ Based on an original patch by Yuri Tikhonov ] This adds support for doing asynchronous GF multiplication by adding two additional functions to the async_tx API: async_gen_syndrome() does simultaneous XOR and Galois field multiplication of sources. async_syndrome_val() validates the given source buffers against known P and Q values. When a request is made to run async_pq against more than the hardware maximum number of supported sources we need to reuse the previous generated P and Q values as sources into the next operation. Care must be taken to remove Q from P' and P from Q'. For example to perform a 5 source pq op with hardware that only supports 4 sources at a time the following approach is taken: p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})) p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10})) p' = p + q + q + src4 = p + src4 q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10}*src4 Note: 4 is the minimum acceptable maxpq otherwise we punt to synchronous-software path. The DMA_PREP_CONTINUE flag indicates to the driver to reuse p and q as sources (in the above manner) and fill the remaining slots up to maxpq with the new sources/coefficients. Note1: Some devices have native support for P+Q continuation and can skip this extra work. Devices with this capability can advertise it with dma_set_maxpq. It is up to each driver how to handle the DMA_PREP_CONTINUE flag. Note2: The api supports disabling the generation of P when generating Q, this is ignored by the synchronous path but is implemented by some dma devices to save unnecessary writes. In this case the continuation algorithm is simplified to only reuse Q as a source. Cc: H. Peter Anvin Cc: David Woodhouse Signed-off-by: Yuri Tikhonov Signed-off-by: Ilya Yanok Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 3 + arch/arm/mach-iop13xx/setup.c | 2 +- crypto/async_tx/Kconfig | 4 + crypto/async_tx/Makefile | 1 + crypto/async_tx/async_pq.c | 388 ++++++++++++++++++++++++++++++++++ crypto/async_tx/async_xor.c | 2 +- drivers/dma/dmaengine.c | 4 + drivers/dma/iop-adma.c | 2 +- include/linux/async_tx.h | 9 + include/linux/dmaengine.h | 87 +++++++- 10 files changed, 493 insertions(+), 9 deletions(-) create mode 100644 crypto/async_tx/async_pq.c (limited to 'crypto') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 6b15e488c0e..0e48e054d69 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -64,6 +64,9 @@ xor - xor a series of source buffers and write the result to a xor_val - xor a series of source buffers and set a flag if the result is zero. The implementation attempts to prevent writes to memory +pq - generate the p+q (raid6 syndrome) from a series of source buffers +pq_val - validate that a p and or q buffer are in sync with a given series of + sources 3.3 Descriptor management: The return value is non-NULL and points to a 'descriptor' when the operation diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index 9800228b71d..2e7ca0d75f8 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c @@ -506,7 +506,7 @@ void __init iop13xx_platform_init(void) dma_cap_set(DMA_MEMSET, plat_data->cap_mask); dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); - dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); + dma_cap_set(DMA_PQ, plat_data->cap_mask); dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask); break; diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index d8fb3914598..cb6d7314f19 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -14,3 +14,7 @@ config ASYNC_MEMSET tristate select ASYNC_CORE +config ASYNC_PQ + tristate + select ASYNC_CORE + diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index 27baa7d52fb..1b992658825 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o +obj-$(CONFIG_ASYNC_PQ) += async_pq.o diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c new file mode 100644 index 00000000000..108b21efb49 --- /dev/null +++ b/crypto/async_tx/async_pq.c @@ -0,0 +1,388 @@ +/* + * Copyright(c) 2007 Yuri Tikhonov + * Copyright(c) 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#include +#include +#include +#include +#include + +/** + * scribble - space to hold throwaway P buffer for synchronous gen_syndrome + */ +static struct page *scribble; + +static bool is_raid6_zero_block(struct page *p) +{ + return p == (void *) raid6_empty_zero_page; +} + +/* the struct page *blocks[] parameter passed to async_gen_syndrome() + * and async_syndrome_val() contains the 'P' destination address at + * blocks[disks-2] and the 'Q' destination address at blocks[disks-1] + * + * note: these are macros as they are used as lvalues + */ +#define P(b, d) (b[d-2]) +#define Q(b, d) (b[d-1]) + +/** + * do_async_gen_syndrome - asynchronously calculate P and/or Q + */ +static __async_inline struct dma_async_tx_descriptor * +do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, + const unsigned char *scfs, unsigned int offset, int disks, + size_t len, dma_addr_t *dma_src, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct dma_device *dma = chan->device; + enum dma_ctrl_flags dma_flags = 0; + enum async_tx_flags flags_orig = submit->flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + dma_async_tx_callback cb_param_orig = submit->cb_param; + int src_cnt = disks - 2; + unsigned char coefs[src_cnt]; + unsigned short pq_src_cnt; + dma_addr_t dma_dest[2]; + int src_off = 0; + int idx; + int i; + + /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */ + if (P(blocks, disks)) + dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset, + len, DMA_BIDIRECTIONAL); + else + dma_flags |= DMA_PREP_PQ_DISABLE_P; + if (Q(blocks, disks)) + dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset, + len, DMA_BIDIRECTIONAL); + else + dma_flags |= DMA_PREP_PQ_DISABLE_Q; + + /* convert source addresses being careful to collapse 'empty' + * sources and update the coefficients accordingly + */ + for (i = 0, idx = 0; i < src_cnt; i++) { + if (is_raid6_zero_block(blocks[i])) + continue; + dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len, + DMA_TO_DEVICE); + coefs[idx] = scfs[i]; + idx++; + } + src_cnt = idx; + + while (src_cnt > 0) { + submit->flags = flags_orig; + pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags)); + /* if we are submitting additional pqs, leave the chain open, + * clear the callback parameters, and leave the destination + * buffers mapped + */ + if (src_cnt > pq_src_cnt) { + submit->flags &= ~ASYNC_TX_ACK; + dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP; + submit->cb_fn = NULL; + submit->cb_param = NULL; + } else { + dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP; + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; + if (cb_fn_orig) + dma_flags |= DMA_PREP_INTERRUPT; + } + + /* Since we have clobbered the src_list we are committed + * to doing this asynchronously. Drivers force forward + * progress in case they can not provide a descriptor + */ + for (;;) { + tx = dma->device_prep_dma_pq(chan, dma_dest, + &dma_src[src_off], + pq_src_cnt, + &coefs[src_off], len, + dma_flags); + if (likely(tx)) + break; + async_tx_quiesce(&submit->depend_tx); + dma_async_issue_pending(chan); + } + + async_tx_submit(chan, tx, submit); + submit->depend_tx = tx; + + /* drop completed sources */ + src_cnt -= pq_src_cnt; + src_off += pq_src_cnt; + + dma_flags |= DMA_PREP_CONTINUE; + } + + return tx; +} + +/** + * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome + */ +static void +do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, + size_t len, struct async_submit_ctl *submit) +{ + void **srcs; + int i; + + if (submit->scribble) + srcs = submit->scribble; + else + srcs = (void **) blocks; + + for (i = 0; i < disks; i++) { + if (is_raid6_zero_block(blocks[i])) { + BUG_ON(i > disks - 3); /* P or Q can't be zero */ + srcs[i] = blocks[i]; + } else + srcs[i] = page_address(blocks[i]) + offset; + } + raid6_call.gen_syndrome(disks, len, srcs); + async_tx_sync_epilog(submit); +} + +/** + * async_gen_syndrome - asynchronously calculate a raid6 syndrome + * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 + * @offset: common offset into each block (src and dest) to start transaction + * @disks: number of blocks (including missing P or Q, see below) + * @len: length of operation in bytes + * @submit: submission/completion modifiers + * + * General note: This routine assumes a field of GF(2^8) with a + * primitive polynomial of 0x11d and a generator of {02}. + * + * 'disks' note: callers can optionally omit either P or Q (but not + * both) from the calculation by setting blocks[disks-2] or + * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <= + * PAGE_SIZE as a temporary buffer of this size is used in the + * synchronous path. 'disks' always accounts for both destination + * buffers. + * + * 'blocks' note: if submit->scribble is NULL then the contents of + * 'blocks' may be overridden + */ +struct dma_async_tx_descriptor * +async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, + size_t len, struct async_submit_ctl *submit) +{ + int src_cnt = disks - 2; + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &P(blocks, disks), 2, + blocks, src_cnt, len); + struct dma_device *device = chan ? chan->device : NULL; + dma_addr_t *dma_src = NULL; + + BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks))); + + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) blocks; + + if (dma_src && device && + (src_cnt <= dma_maxpq(device, 0) || + dma_maxpq(device, DMA_PREP_CONTINUE) > 0)) { + /* run the p+q asynchronously */ + pr_debug("%s: (async) disks: %d len: %zu\n", + __func__, disks, len); + return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset, + disks, len, dma_src, submit); + } + + /* run the pq synchronously */ + pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len); + + /* wait for any prerequisite operations */ + async_tx_quiesce(&submit->depend_tx); + + if (!P(blocks, disks)) { + P(blocks, disks) = scribble; + BUG_ON(len + offset > PAGE_SIZE); + } + if (!Q(blocks, disks)) { + Q(blocks, disks) = scribble; + BUG_ON(len + offset > PAGE_SIZE); + } + do_sync_gen_syndrome(blocks, offset, disks, len, submit); + + return NULL; +} +EXPORT_SYMBOL_GPL(async_gen_syndrome); + +/** + * async_syndrome_val - asynchronously validate a raid6 syndrome + * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 + * @offset: common offset into each block (src and dest) to start transaction + * @disks: number of blocks (including missing P or Q, see below) + * @len: length of operation in bytes + * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set + * @spare: temporary result buffer for the synchronous case + * @submit: submission / completion modifiers + * + * The same notes from async_gen_syndrome apply to the 'blocks', + * and 'disks' parameters of this routine. The synchronous path + * requires a temporary result buffer and submit->scribble to be + * specified. + */ +struct dma_async_tx_descriptor * +async_syndrome_val(struct page **blocks, unsigned int offset, int disks, + size_t len, enum sum_check_flags *pqres, struct page *spare, + struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL, + NULL, 0, blocks, disks, + len); + struct dma_device *device = chan ? chan->device : NULL; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; + dma_addr_t *dma_src = NULL; + + BUG_ON(disks < 4); + + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) blocks; + + if (dma_src && device && disks <= dma_maxpq(device, 0)) { + struct device *dev = device->dev; + dma_addr_t *pq = &dma_src[disks-2]; + int i; + + pr_debug("%s: (async) disks: %d len: %zu\n", + __func__, disks, len); + if (!P(blocks, disks)) + dma_flags |= DMA_PREP_PQ_DISABLE_P; + if (!Q(blocks, disks)) + dma_flags |= DMA_PREP_PQ_DISABLE_Q; + for (i = 0; i < disks; i++) + if (likely(blocks[i])) { + BUG_ON(is_raid6_zero_block(blocks[i])); + dma_src[i] = dma_map_page(dev, blocks[i], + offset, len, + DMA_TO_DEVICE); + } + + for (;;) { + tx = device->device_prep_dma_pq_val(chan, pq, dma_src, + disks - 2, + raid6_gfexp, + len, pqres, + dma_flags); + if (likely(tx)) + break; + async_tx_quiesce(&submit->depend_tx); + dma_async_issue_pending(chan); + } + async_tx_submit(chan, tx, submit); + + return tx; + } else { + struct page *p_src = P(blocks, disks); + struct page *q_src = Q(blocks, disks); + enum async_tx_flags flags_orig = submit->flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + void *scribble = submit->scribble; + void *cb_param_orig = submit->cb_param; + void *p, *q, *s; + + pr_debug("%s: (sync) disks: %d len: %zu\n", + __func__, disks, len); + + /* caller must provide a temporary result buffer and + * allow the input parameters to be preserved + */ + BUG_ON(!spare || !scribble); + + /* wait for any prerequisite operations */ + async_tx_quiesce(&submit->depend_tx); + + /* recompute p and/or q into the temporary buffer and then + * check to see the result matches the current value + */ + tx = NULL; + *pqres = 0; + if (p_src) { + init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL, + NULL, NULL, scribble); + tx = async_xor(spare, blocks, offset, disks-2, len, submit); + async_tx_quiesce(&tx); + p = page_address(p_src) + offset; + s = page_address(spare) + offset; + *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P; + } + + if (q_src) { + P(blocks, disks) = NULL; + Q(blocks, disks) = spare; + init_async_submit(submit, 0, NULL, NULL, NULL, scribble); + tx = async_gen_syndrome(blocks, offset, disks, len, submit); + async_tx_quiesce(&tx); + q = page_address(q_src) + offset; + s = page_address(spare) + offset; + *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q; + } + + /* restore P, Q and submit */ + P(blocks, disks) = p_src; + Q(blocks, disks) = q_src; + + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; + submit->flags = flags_orig; + async_tx_sync_epilog(submit); + + return NULL; + } +} +EXPORT_SYMBOL_GPL(async_syndrome_val); + +static int __init async_pq_init(void) +{ + scribble = alloc_page(GFP_KERNEL); + + if (scribble) + return 0; + + pr_err("%s: failed to allocate required spare page\n", __func__); + + return -ENOMEM; +} + +static void __exit async_pq_exit(void) +{ + put_page(scribble); +} + +module_init(async_pq_init); +module_exit(async_pq_exit); + +MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation"); +MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 78fb7780272..56b5f98da46 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -62,7 +62,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, while (src_cnt) { submit->flags = flags_orig; dma_flags = 0; - xor_src_cnt = min(src_cnt, dma->max_xor); + xor_src_cnt = min(src_cnt, (int)dma->max_xor); /* if we are submitting additional xors, leave the chain open, * clear the callback parameters, and leave the destination * buffer mapped diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index e002e0e0d05..cd5673d3043 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -646,6 +646,10 @@ int dma_async_device_register(struct dma_device *device) !device->device_prep_dma_xor); BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) && !device->device_prep_dma_xor_val); + BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) && + !device->device_prep_dma_pq); + BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) && + !device->device_prep_dma_pq_val); BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && !device->device_prep_dma_memset); BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 6ff79a67269..4496bc60666 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -1257,7 +1257,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " "( %s%s%s%s%s%s%s%s%s%s)\n", - dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", + dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "", dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 12a2efcbd56..e6ce5f004f9 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -185,5 +185,14 @@ async_memset(struct page *dest, int val, unsigned int offset, struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); +struct dma_async_tx_descriptor * +async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt, + size_t len, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, + size_t len, enum sum_check_flags *pqres, struct page *spare, + struct async_submit_ctl *submit); + void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 02447afceba..ce010cd991d 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -52,7 +52,7 @@ enum dma_status { enum dma_transaction_type { DMA_MEMCPY, DMA_XOR, - DMA_PQ_XOR, + DMA_PQ, DMA_DUAL_XOR, DMA_PQ_UPDATE, DMA_XOR_VAL, @@ -70,20 +70,28 @@ enum dma_transaction_type { /** * enum dma_ctrl_flags - DMA flags to augment operation preparation, - * control completion, and communicate status. + * control completion, and communicate status. * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of - * this transaction + * this transaction * @DMA_CTRL_ACK - the descriptor cannot be reused until the client - * acknowledges receipt, i.e. has has a chance to establish any - * dependency chains + * acknowledges receipt, i.e. has has a chance to establish any dependency + * chains * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) + * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q + * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P + * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as + * sources that were the result of a previous operation, in the case of a PQ + * operation it continues the calculation with new sources */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), DMA_CTRL_ACK = (1 << 1), DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2), DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), + DMA_PREP_PQ_DISABLE_P = (1 << 4), + DMA_PREP_PQ_DISABLE_Q = (1 << 5), + DMA_PREP_CONTINUE = (1 << 6), }; /** @@ -226,6 +234,7 @@ struct dma_async_tx_descriptor { * @global_node: list_head for global dma_device_list * @cap_mask: one or more dma_capability flags * @max_xor: maximum number of xor sources, 0 if no capability + * @max_pq: maximum number of PQ sources and PQ-continue capability * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @device_alloc_chan_resources: allocate resources and return the @@ -234,6 +243,8 @@ struct dma_async_tx_descriptor { * @device_prep_dma_memcpy: prepares a memcpy operation * @device_prep_dma_xor: prepares a xor operation * @device_prep_dma_xor_val: prepares a xor validation operation + * @device_prep_dma_pq: prepares a pq operation + * @device_prep_dma_pq_val: prepares a pqzero_sum operation * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation @@ -248,7 +259,9 @@ struct dma_device { struct list_head channels; struct list_head global_node; dma_cap_mask_t cap_mask; - int max_xor; + unsigned short max_xor; + unsigned short max_pq; + #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; struct device *dev; @@ -265,6 +278,14 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, size_t len, enum sum_check_flags *result, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_pq)( + struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)( + struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags); @@ -283,6 +304,60 @@ struct dma_device { void (*device_issue_pending)(struct dma_chan *chan); }; +static inline void +dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) +{ + dma->max_pq = maxpq; + if (has_pq_continue) + dma->max_pq |= DMA_HAS_PQ_CONTINUE; +} + +static inline bool dmaf_continue(enum dma_ctrl_flags flags) +{ + return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE; +} + +static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags) +{ + enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P; + + return (flags & mask) == mask; +} + +static inline bool dma_dev_has_pq_continue(struct dma_device *dma) +{ + return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; +} + +static unsigned short dma_dev_to_maxpq(struct dma_device *dma) +{ + return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; +} + +/* dma_maxpq - reduce maxpq in the face of continued operations + * @dma - dma device with PQ capability + * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set + * + * When an engine does not support native continuation we need 3 extra + * source slots to reuse P and Q with the following coefficients: + * 1/ {00} * P : remove P from Q', but use it as a source for P' + * 2/ {01} * Q : use Q to continue Q' calculation + * 3/ {00} * Q : subtract Q from P' to cancel (2) + * + * In the case where P is disabled we only need 1 extra source: + * 1/ {01} * Q : use Q to continue Q' calculation + */ +static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags) +{ + if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags)) + return dma_dev_to_maxpq(dma); + else if (dmaf_p_disabled_continue(flags)) + return dma_dev_to_maxpq(dma) - 1; + else if (dmaf_continue(flags)) + return dma_dev_to_maxpq(dma) - 3; + BUG(); +} + /* --- public DMA engine API --- */ #ifdef CONFIG_DMA_ENGINE -- cgit v1.2.3 From 0a82a6239beecc95db6e05fe43ee62d16b381d38 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:20:37 -0700 Subject: async_tx: add support for asynchronous RAID6 recovery operations async_raid6_2data_recov() recovers two data disk failures async_raid6_datap_recov() recovers a data disk and the P disk These routines are a port of the synchronous versions found in drivers/md/raid6recov.c. The primary difference is breaking out the xor operations into separate calls to async_xor. Two helper routines are introduced to perform scalar multiplication where needed. async_sum_product() multiplies two sources by scalar coefficients and then sums (xor) the result. async_mult() simply multiplies a single source by a scalar. This implemention also includes, in contrast to the original synchronous-only code, special case handling for the 4-disk and 5-disk array cases. In these situations the default N-disk algorithm will present 0-source or 1-source operations to dma devices. To cover for dma devices where the minimum source count is 2 we implement 4-disk and 5-disk handling in the recovery code. [ Impact: asynchronous raid6 recovery routines for 2data and datap cases ] Cc: Yuri Tikhonov Cc: Ilya Yanok Cc: H. Peter Anvin Cc: David Woodhouse Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- Documentation/crypto/async-tx-api.txt | 4 + crypto/async_tx/Kconfig | 5 + crypto/async_tx/Makefile | 1 + crypto/async_tx/async_raid6_recov.c | 448 ++++++++++++++++++++++++++++++++++ include/linux/async_tx.h | 8 + 5 files changed, 466 insertions(+) create mode 100644 crypto/async_tx/async_raid6_recov.c (limited to 'crypto') diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 0e48e054d69..ba046b8fa92 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -67,6 +67,10 @@ xor_val - xor a series of source buffers and set a flag if the pq - generate the p+q (raid6 syndrome) from a series of source buffers pq_val - validate that a p and or q buffer are in sync with a given series of sources +datap - (raid6_datap_recov) recover a raid6 data block and the p block + from the given sources +2data - (raid6_2data_recov) recover 2 raid6 data blocks from the given + sources 3.3 Descriptor management: The return value is non-NULL and points to a 'descriptor' when the operation diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index cb6d7314f19..e5aeb2b79e6 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -18,3 +18,8 @@ config ASYNC_PQ tristate select ASYNC_CORE +config ASYNC_RAID6_RECOV + tristate + select ASYNC_CORE + select ASYNC_PQ + diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index 1b992658825..9a1a76811b8 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o obj-$(CONFIG_ASYNC_PQ) += async_pq.o +obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c new file mode 100644 index 00000000000..0c14d48c989 --- /dev/null +++ b/crypto/async_tx/async_raid6_recov.c @@ -0,0 +1,448 @@ +/* + * Asynchronous RAID-6 recovery calculations ASYNC_TX API. + * Copyright(c) 2009 Intel Corporation + * + * based on raid6recov.c: + * Copyright 2002 H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include +#include +#include +#include +#include + +static struct dma_async_tx_descriptor * +async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, + size_t len, struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &dest, 1, srcs, 2, len); + struct dma_device *dma = chan ? chan->device : NULL; + const u8 *amul, *bmul; + u8 ax, bx; + u8 *a, *b, *c; + + if (dma) { + dma_addr_t dma_dest[2]; + dma_addr_t dma_src[2]; + struct device *dev = dma->dev; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; + + dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); + dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); + tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef, + len, dma_flags); + if (tx) { + async_tx_submit(chan, tx, submit); + return tx; + } + } + + /* run the operation synchronously */ + async_tx_quiesce(&submit->depend_tx); + amul = raid6_gfmul[coef[0]]; + bmul = raid6_gfmul[coef[1]]; + a = page_address(srcs[0]); + b = page_address(srcs[1]); + c = page_address(dest); + + while (len--) { + ax = amul[*a++]; + bx = bmul[*b++]; + *c++ = ax ^ bx; + } + + return NULL; +} + +static struct dma_async_tx_descriptor * +async_mult(struct page *dest, struct page *src, u8 coef, size_t len, + struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &dest, 1, &src, 1, len); + struct dma_device *dma = chan ? chan->device : NULL; + const u8 *qmul; /* Q multiplier table */ + u8 *d, *s; + + if (dma) { + dma_addr_t dma_dest[2]; + dma_addr_t dma_src[1]; + struct device *dev = dma->dev; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; + + dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); + tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, + len, dma_flags); + if (tx) { + async_tx_submit(chan, tx, submit); + return tx; + } + } + + /* no channel available, or failed to allocate a descriptor, so + * perform the operation synchronously + */ + async_tx_quiesce(&submit->depend_tx); + qmul = raid6_gfmul[coef]; + d = page_address(dest); + s = page_address(src); + + while (len--) + *d++ = qmul[*s++]; + + return NULL; +} + +static struct dma_async_tx_descriptor * +__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *a, *b; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + + p = blocks[4-2]; + q = blocks[4-1]; + + a = blocks[faila]; + b = blocks[failb]; + + /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */ + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = p; + srcs[1] = q; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_sum_product(b, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = p; + srcs[1] = b; + init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(a, srcs, 0, 2, bytes, submit); + + return tx; + +} + +static struct dma_async_tx_descriptor * +__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *g, *dp, *dq; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + int uninitialized_var(good); + int i; + + for (i = 0; i < 3; i++) { + if (i == faila || i == failb) + continue; + else { + good = i; + break; + } + } + BUG_ON(i >= 3); + + p = blocks[5-2]; + q = blocks[5-1]; + g = blocks[good]; + + /* Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for delta p and + * delta q + */ + dp = blocks[faila]; + dq = blocks[failb]; + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_memcpy(dp, g, 0, 0, bytes, submit); + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); + + /* compute P + Pxy */ + srcs[0] = dp; + srcs[1] = p; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + /* compute Q + Qxy */ + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = dp; + srcs[1] = dq; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_sum_product(dq, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = dp; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + return tx; +} + +static struct dma_async_tx_descriptor * +__2data_recov_n(int disks, size_t bytes, int faila, int failb, + struct page **blocks, struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *dp, *dq; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + + p = blocks[disks-2]; + q = blocks[disks-1]; + + /* Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = blocks[faila]; + blocks[faila] = (void *)raid6_empty_zero_page; + blocks[disks-2] = dp; + dq = blocks[failb]; + blocks[failb] = (void *)raid6_empty_zero_page; + blocks[disks-1] = dq; + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); + + /* Restore pointer table */ + blocks[faila] = dp; + blocks[failb] = dq; + blocks[disks-2] = p; + blocks[disks-1] = q; + + /* compute P + Pxy */ + srcs[0] = dp; + srcs[1] = p; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + /* compute Q + Qxy */ + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = dp; + srcs[1] = dq; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_sum_product(dq, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = dp; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + return tx; +} + +/** + * async_raid6_2data_recov - asynchronously calculate two missing data blocks + * @disks: number of disks in the RAID-6 array + * @bytes: block size + * @faila: first failed drive index + * @failb: second failed drive index + * @blocks: array of source pointers where the last two entries are p and q + * @submit: submission/completion modifiers + */ +struct dma_async_tx_descriptor * +async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, + struct page **blocks, struct async_submit_ctl *submit) +{ + BUG_ON(faila == failb); + if (failb < faila) + swap(faila, failb); + + pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); + + /* we need to preserve the contents of 'blocks' for the async + * case, so punt to synchronous if a scribble buffer is not available + */ + if (!submit->scribble) { + void **ptrs = (void **) blocks; + int i; + + async_tx_quiesce(&submit->depend_tx); + for (i = 0; i < disks; i++) + ptrs[i] = page_address(blocks[i]); + + raid6_2data_recov(disks, bytes, faila, failb, ptrs); + + async_tx_sync_epilog(submit); + + return NULL; + } + + switch (disks) { + case 4: + /* dma devices do not uniformly understand a zero source pq + * operation (in contrast to the synchronous case), so + * explicitly handle the 4 disk special case + */ + return __2data_recov_4(bytes, faila, failb, blocks, submit); + case 5: + /* dma devices do not uniformly understand a single + * source pq operation (in contrast to the synchronous + * case), so explicitly handle the 5 disk special case + */ + return __2data_recov_5(bytes, faila, failb, blocks, submit); + default: + return __2data_recov_n(disks, bytes, faila, failb, blocks, submit); + } +} +EXPORT_SYMBOL_GPL(async_raid6_2data_recov); + +/** + * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block + * @disks: number of disks in the RAID-6 array + * @bytes: block size + * @faila: failed drive index + * @blocks: array of source pointers where the last two entries are p and q + * @submit: submission/completion modifiers + */ +struct dma_async_tx_descriptor * +async_raid6_datap_recov(int disks, size_t bytes, int faila, + struct page **blocks, struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *dq; + u8 coef; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + struct page *srcs[2]; + + pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); + + /* we need to preserve the contents of 'blocks' for the async + * case, so punt to synchronous if a scribble buffer is not available + */ + if (!scribble) { + void **ptrs = (void **) blocks; + int i; + + async_tx_quiesce(&submit->depend_tx); + for (i = 0; i < disks; i++) + ptrs[i] = page_address(blocks[i]); + + raid6_datap_recov(disks, bytes, faila, ptrs); + + async_tx_sync_epilog(submit); + + return NULL; + } + + p = blocks[disks-2]; + q = blocks[disks-1]; + + /* Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = blocks[faila]; + blocks[faila] = (void *)raid6_empty_zero_page; + blocks[disks-1] = dq; + + /* in the 4 disk case we only need to perform a single source + * multiplication + */ + if (disks == 4) { + int good = faila == 0 ? 1 : 0; + struct page *g = blocks[good]; + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_memcpy(p, g, 0, 0, bytes, submit); + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); + } else { + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); + } + + /* Restore pointer table */ + blocks[faila] = dq; + blocks[disks-1] = q; + + /* calculate g^{-faila} */ + coef = raid6_gfinv[raid6_gfexp[faila]]; + + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, + scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + init_async_submit(submit, 0, tx, NULL, NULL, scribble); + tx = async_mult(dq, dq, coef, bytes, submit); + + srcs[0] = p; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(p, srcs, 0, 2, bytes, submit); + + return tx; +} +EXPORT_SYMBOL_GPL(async_raid6_datap_recov); + +MODULE_AUTHOR("Dan Williams "); +MODULE_DESCRIPTION("asynchronous RAID-6 recovery api"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index e6ce5f004f9..866e61c4e2e 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -194,5 +194,13 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, size_t len, enum sum_check_flags *pqres, struct page *spare, struct async_submit_ctl *submit); +struct dma_async_tx_descriptor * +async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb, + struct page **ptrs, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_raid6_datap_recov(int src_num, size_t bytes, int faila, + struct page **ptrs, struct async_submit_ctl *submit); + void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ -- cgit v1.2.3 From cb3c82992f62f838e6476a0bff12909158007fc6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:20:37 -0700 Subject: async_tx: raid6 recovery self test Port drivers/md/raid6test/test.c to use the async raid6 recovery routines. This is meant as a unit test for raid6 acceleration drivers. In addition to the 16-drive test case this implements tests for the 4-disk and 5-disk special cases (dma devices can not generically handle less than 2 sources), and adds a test for the D+Q case. Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- crypto/async_tx/Makefile | 1 + crypto/async_tx/raid6test.c | 241 ++++++++++++++++++++++++++++++++++++++++++++ drivers/md/Kconfig | 13 +++ 3 files changed, 255 insertions(+) create mode 100644 crypto/async_tx/raid6test.c (limited to 'crypto') diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index 9a1a76811b8..d1e0e6f72bc 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o obj-$(CONFIG_ASYNC_PQ) += async_pq.o obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o +obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c new file mode 100644 index 00000000000..98c83ca96c8 --- /dev/null +++ b/crypto/async_tx/raid6test.c @@ -0,0 +1,241 @@ +/* + * asynchronous raid6 recovery self test + * Copyright (c) 2009, Intel Corporation. + * + * based on drivers/md/raid6test/test.c: + * Copyright 2002-2007 H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include +#include + +#undef pr +#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args) + +#define NDISKS 16 /* Including P and Q */ + +static struct page *dataptrs[NDISKS]; +static struct page *data[NDISKS+3]; +static struct page *spare; +static struct page *recovi; +static struct page *recovj; + +static void callback(void *param) +{ + struct completion *cmp = param; + + complete(cmp); +} + +static void makedata(int disks) +{ + int i, j; + + for (i = 0; i < disks; i++) { + for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) { + u32 *p = page_address(data[i]) + j; + + *p = random32(); + } + + dataptrs[i] = data[i]; + } +} + +static char disk_type(int d, int disks) +{ + if (d == disks - 2) + return 'P'; + else if (d == disks - 1) + return 'Q'; + else + return 'D'; +} + +/* Recover two failed blocks. */ +static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs) +{ + struct async_submit_ctl submit; + addr_conv_t addr_conv[disks]; + struct completion cmp; + struct dma_async_tx_descriptor *tx = NULL; + enum sum_check_flags result = ~0; + + if (faila > failb) + swap(faila, failb); + + if (failb == disks-1) { + if (faila == disks-2) { + /* P+Q failure. Just rebuild the syndrome. */ + init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); + tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); + } else { + struct page *blocks[disks]; + struct page *dest; + int count = 0; + int i; + + /* data+Q failure. Reconstruct data from P, + * then rebuild syndrome + */ + for (i = disks; i-- ; ) { + if (i == faila || i == failb) + continue; + blocks[count++] = ptrs[i]; + } + dest = ptrs[faila]; + init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, + NULL, NULL, addr_conv); + tx = async_xor(dest, blocks, 0, count, bytes, &submit); + + init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv); + tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); + } + } else { + if (failb == disks-2) { + /* data+P failure. */ + init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); + tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit); + } else { + /* data+data failure. */ + init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); + tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit); + } + } + init_completion(&cmp); + init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv); + tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit); + async_tx_issue_pending(tx); + + if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) + pr("%s: timeout! (faila: %d failb: %d disks: %d)\n", + __func__, faila, failb, disks); + + if (result != 0) + pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n", + __func__, faila, failb, result); +} + +static int test_disks(int i, int j, int disks) +{ + int erra, errb; + + memset(page_address(recovi), 0xf0, PAGE_SIZE); + memset(page_address(recovj), 0xba, PAGE_SIZE); + + dataptrs[i] = recovi; + dataptrs[j] = recovj; + + raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs); + + erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE); + errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE); + + pr("%s(%d, %d): faila=%3d(%c) failb=%3d(%c) %s\n", + __func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks), + (!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB"); + + dataptrs[i] = data[i]; + dataptrs[j] = data[j]; + + return erra || errb; +} + +static int test(int disks, int *tests) +{ + addr_conv_t addr_conv[disks]; + struct dma_async_tx_descriptor *tx; + struct async_submit_ctl submit; + struct completion cmp; + int err = 0; + int i, j; + + recovi = data[disks]; + recovj = data[disks+1]; + spare = data[disks+2]; + + makedata(disks); + + /* Nuke syndromes */ + memset(page_address(data[disks-2]), 0xee, PAGE_SIZE); + memset(page_address(data[disks-1]), 0xee, PAGE_SIZE); + + /* Generate assumed good syndrome */ + init_completion(&cmp); + init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv); + tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit); + async_tx_issue_pending(tx); + + if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) { + pr("error: initial gen_syndrome(%d) timed out\n", disks); + return 1; + } + + pr("testing the %d-disk case...\n", disks); + for (i = 0; i < disks-1; i++) + for (j = i+1; j < disks; j++) { + (*tests)++; + err += test_disks(i, j, disks); + } + + return err; +} + + +static int raid6_test(void) +{ + int err = 0; + int tests = 0; + int i; + + for (i = 0; i < NDISKS+3; i++) { + data[i] = alloc_page(GFP_KERNEL); + if (!data[i]) { + while (i--) + put_page(data[i]); + return -ENOMEM; + } + } + + /* the 4-disk and 5-disk cases are special for the recovery code */ + if (NDISKS > 4) + err += test(4, &tests); + if (NDISKS > 5) + err += test(5, &tests); + err += test(NDISKS, &tests); + + pr("\n"); + pr("complete (%d tests, %d failure%s)\n", + tests, err, err == 1 ? "" : "s"); + + for (i = 0; i < NDISKS+3; i++) + put_page(data[i]); + + return 0; +} + +static void raid6_test_exit(void) +{ +} + +/* when compiled-in wait for drivers to load first (assumes dma drivers + * are also compliled-in) + */ +late_initcall(raid6_test); +module_exit(raid6_test_exit); +MODULE_AUTHOR("Dan Williams "); +MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests"); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 36e0675be9f..41b3ae25b81 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -155,6 +155,19 @@ config MD_RAID456 config MD_RAID6_PQ tristate +config ASYNC_RAID6_TEST + tristate "Self test for hardware accelerated raid6 recovery" + depends on MD_RAID6_PQ + select ASYNC_RAID6_RECOV + ---help--- + This is a one-shot self test that permutes through the + recovery of all the possible two disk failure scenarios for a + N-disk array. Recovery is performed with the asynchronous + raid6 recovery routines, and will optionally use an offload + engine if one is available. + + If unsure, say N. + config MD_MULTIPATH tristate "Multipath I/O support" depends on BLK_DEV_MD -- cgit v1.2.3 From 0403e3827788d878163f9ef0541b748b0f88ca5d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:50 -0700 Subject: dmaengine: add fence support Some engines optimize operation by reading ahead in the descriptor chain such that descriptor2 may start execution before descriptor1 completes. If descriptor2 depends on the result from descriptor1 then a fence is required (on descriptor2) to disable this optimization. The async_tx api could implicitly identify dependencies via the 'depend_tx' parameter, but that would constrain cases where the dependency chain only specifies a completion order rather than a data dependency. So, provide an ASYNC_TX_FENCE to explicitly identify data dependencies. Signed-off-by: Dan Williams --- crypto/async_tx/async_memcpy.c | 7 ++++-- crypto/async_tx/async_memset.c | 7 ++++-- crypto/async_tx/async_pq.c | 5 ++++ crypto/async_tx/async_raid6_recov.c | 47 +++++++++++++++++++++---------------- crypto/async_tx/async_xor.c | 11 ++++++--- drivers/md/raid5.c | 37 ++++++++++++++++++----------- include/linux/async_tx.h | 3 +++ include/linux/dmaengine.h | 3 +++ 8 files changed, 79 insertions(+), 41 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index 98e15bd0dcb..b38cbb3fd52 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -52,9 +52,12 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, if (device) { dma_addr_t dma_dest, dma_src; - unsigned long dma_prep_flags; + unsigned long dma_prep_flags = 0; - dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; + if (submit->cb_fn) + dma_prep_flags |= DMA_PREP_INTERRUPT; + if (submit->flags & ASYNC_TX_FENCE) + dma_prep_flags |= DMA_PREP_FENCE; dma_dest = dma_map_page(device->dev, dest, dest_offset, len, DMA_FROM_DEVICE); diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index b896a6e5f67..a374784e332 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -49,9 +49,12 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len, if (device) { dma_addr_t dma_dest; - unsigned long dma_prep_flags; + unsigned long dma_prep_flags = 0; - dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; + if (submit->cb_fn) + dma_prep_flags |= DMA_PREP_INTERRUPT; + if (submit->flags & ASYNC_TX_FENCE) + dma_prep_flags |= DMA_PREP_FENCE; dma_dest = dma_map_page(device->dev, dest, offset, len, DMA_FROM_DEVICE); diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 108b21efb49..a25e290c39f 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -101,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, */ if (src_cnt > pq_src_cnt) { submit->flags &= ~ASYNC_TX_ACK; + submit->flags |= ASYNC_TX_FENCE; dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP; submit->cb_fn = NULL; submit->cb_param = NULL; @@ -111,6 +112,8 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, if (cb_fn_orig) dma_flags |= DMA_PREP_INTERRUPT; } + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; /* Since we have clobbered the src_list we are committed * to doing this asynchronously. Drivers force forward @@ -282,6 +285,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, dma_flags |= DMA_PREP_PQ_DISABLE_P; if (!Q(blocks, disks)) dma_flags |= DMA_PREP_PQ_DISABLE_Q; + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; for (i = 0; i < disks; i++) if (likely(blocks[i])) { BUG_ON(is_raid6_zero_block(blocks[i])); diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index 0c14d48c989..822a42d1006 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -44,6 +44,8 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, struct dma_async_tx_descriptor *tx; enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); @@ -89,6 +91,8 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len, struct dma_async_tx_descriptor *tx; enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, @@ -138,7 +142,7 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, srcs[1] = q; coef[0] = raid6_gfexi[failb-faila]; coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); tx = async_sum_product(b, srcs, coef, bytes, submit); /* Dy = P+Pxy+Dx */ @@ -188,23 +192,23 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, dp = blocks[faila]; dq = blocks[failb]; - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); tx = async_memcpy(dp, g, 0, 0, bytes, submit); - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); /* compute P + Pxy */ srcs[0] = dp; srcs[1] = p; - init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, - scribble); + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); tx = async_xor(dp, srcs, 0, 2, bytes, submit); /* compute Q + Qxy */ srcs[0] = dq; srcs[1] = q; - init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, - scribble); + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); tx = async_xor(dq, srcs, 0, 2, bytes, submit); /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ @@ -212,7 +216,7 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, srcs[1] = dq; coef[0] = raid6_gfexi[failb-faila]; coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); tx = async_sum_product(dq, srcs, coef, bytes, submit); /* Dy = P+Pxy+Dx */ @@ -252,7 +256,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb, blocks[failb] = (void *)raid6_empty_zero_page; blocks[disks-1] = dq; - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); /* Restore pointer table */ @@ -264,15 +268,15 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb, /* compute P + Pxy */ srcs[0] = dp; srcs[1] = p; - init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, - scribble); + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); tx = async_xor(dp, srcs, 0, 2, bytes, submit); /* compute Q + Qxy */ srcs[0] = dq; srcs[1] = q; - init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, - scribble); + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); tx = async_xor(dq, srcs, 0, 2, bytes, submit); /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ @@ -280,7 +284,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb, srcs[1] = dq; coef[0] = raid6_gfexi[failb-faila]; coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); tx = async_sum_product(dq, srcs, coef, bytes, submit); /* Dy = P+Pxy+Dx */ @@ -407,13 +411,16 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, int good = faila == 0 ? 1 : 0; struct page *g = blocks[good]; - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, + scribble); tx = async_memcpy(p, g, 0, 0, bytes, submit); - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, + scribble); tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); } else { - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, + scribble); tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); } @@ -426,11 +433,11 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, srcs[0] = dq; srcs[1] = q; - init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, - scribble); + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); tx = async_xor(dq, srcs, 0, 2, bytes, submit); - init_async_submit(submit, 0, tx, NULL, NULL, scribble); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); tx = async_mult(dq, dq, coef, bytes, submit); srcs[0] = p; diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 56b5f98da46..db279872ef3 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -69,6 +69,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, */ if (src_cnt > xor_src_cnt) { submit->flags &= ~ASYNC_TX_ACK; + submit->flags |= ASYNC_TX_FENCE; dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; submit->cb_fn = NULL; submit->cb_param = NULL; @@ -78,7 +79,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, } if (submit->cb_fn) dma_flags |= DMA_PREP_INTERRUPT; - + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; /* Since we have clobbered the src_list we are committed * to doing this asynchronously. Drivers force forward progress * in case they can not provide a descriptor @@ -264,12 +266,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, dma_src = (dma_addr_t *) src_list; if (dma_src && device && src_cnt <= device->max_xor) { - unsigned long dma_prep_flags; + unsigned long dma_prep_flags = 0; int i; pr_debug("%s: (async) len: %zu\n", __func__, len); - dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; + if (submit->cb_fn) + dma_prep_flags |= DMA_PREP_INTERRUPT; + if (submit->flags & ASYNC_TX_FENCE) + dma_prep_flags |= DMA_PREP_FENCE; for (i = 0; i < src_cnt; i++) dma_src[i] = dma_map_page(device->dev, src_list[i], offset, len, DMA_TO_DEVICE); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0a5cf217121..54ef8d75541 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -502,13 +502,17 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, int i; int page_offset; struct async_submit_ctl submit; + enum async_tx_flags flags = 0; if (bio->bi_sector >= sector) page_offset = (signed)(bio->bi_sector - sector) * 512; else page_offset = (signed)(sector - bio->bi_sector) * -512; - init_async_submit(&submit, 0, tx, NULL, NULL, NULL); + if (frombio) + flags |= ASYNC_TX_FENCE; + init_async_submit(&submit, flags, tx, NULL, NULL, NULL); + bio_for_each_segment(bvl, bio, i) { int len = bio_iovec_idx(bio, i)->bv_len; int clen; @@ -685,7 +689,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) atomic_inc(&sh->count); - init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, + init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu)); if (unlikely(count == 1)) tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); @@ -763,7 +767,8 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) count = set_syndrome_sources(blocks, sh); blocks[count] = NULL; /* regenerating p is not necessary */ BUG_ON(blocks[count+1] != dest); /* q should already be set */ - init_async_submit(&submit, 0, NULL, ops_complete_compute, sh, + init_async_submit(&submit, ASYNC_TX_FENCE, NULL, + ops_complete_compute, sh, to_addr_conv(sh, percpu)); tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); } else { @@ -775,8 +780,8 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) blocks[count++] = sh->dev[i].page; } - init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, - ops_complete_compute, sh, + init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, + NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu)); tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit); } @@ -837,8 +842,9 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) /* Q disk is one of the missing disks */ if (faila == syndrome_disks) { /* Missing P+Q, just recompute */ - init_async_submit(&submit, 0, NULL, ops_complete_compute, - sh, to_addr_conv(sh, percpu)); + init_async_submit(&submit, ASYNC_TX_FENCE, NULL, + ops_complete_compute, sh, + to_addr_conv(sh, percpu)); return async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); } else { @@ -859,21 +865,24 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) blocks[count++] = sh->dev[i].page; } dest = sh->dev[data_target].page; - init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, - NULL, NULL, to_addr_conv(sh, percpu)); + init_async_submit(&submit, + ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, + NULL, NULL, NULL, + to_addr_conv(sh, percpu)); tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit); count = set_syndrome_sources(blocks, sh); - init_async_submit(&submit, 0, tx, ops_complete_compute, - sh, to_addr_conv(sh, percpu)); + init_async_submit(&submit, ASYNC_TX_FENCE, tx, + ops_complete_compute, sh, + to_addr_conv(sh, percpu)); return async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); } } - init_async_submit(&submit, 0, NULL, ops_complete_compute, sh, - to_addr_conv(sh, percpu)); + init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute, + sh, to_addr_conv(sh, percpu)); if (failb == syndrome_disks) { /* We're missing D+P. */ return async_raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, @@ -916,7 +925,7 @@ ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu, xor_srcs[count++] = dev->page; } - init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx, + init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, ops_complete_prexor, sh, to_addr_conv(sh, percpu)); tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 866e61c4e2e..a1c486a88e8 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -58,11 +58,14 @@ struct dma_chan_ref { * array. * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a * dependency chain + * @ASYNC_TX_FENCE: specify that the next operation in the dependency + * chain uses this operation's result as an input */ enum async_tx_flags { ASYNC_TX_XOR_ZERO_DST = (1 << 0), ASYNC_TX_XOR_DROP_DST = (1 << 1), ASYNC_TX_ACK = (1 << 2), + ASYNC_TX_FENCE = (1 << 3), }; /** diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 1012f1abcb5..4d6c1c925fd 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -87,6 +87,8 @@ enum dma_transaction_type { * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as * sources that were the result of a previous operation, in the case of a PQ * operation it continues the calculation with new sources + * @DMA_PREP_FENCE - tell the driver that subsequent operations depend + * on the result of this operation */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), @@ -98,6 +100,7 @@ enum dma_ctrl_flags { DMA_PREP_PQ_DISABLE_P = (1 << 6), DMA_PREP_PQ_DISABLE_Q = (1 << 7), DMA_PREP_CONTINUE = (1 << 8), + DMA_PREP_FENCE = (1 << 9), }; /** -- cgit v1.2.3 From 138f4c359d23d2ec38d18bd70dd9613ae515fe93 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:51 -0700 Subject: dmaengine, async_tx: add a "no channel switch" allocator Channel switching is problematic for some dmaengine drivers as the architecture precludes separating the ->prep from ->submit. In these cases the driver can select ASYNC_TX_DISABLE_CHANNEL_SWITCH to modify the async_tx allocator to only return channels that support all of the required asynchronous operations. For example MD_RAID456=y selects support for asynchronous xor, xor validate, pq, pq validate, and memcpy. When ASYNC_TX_DISABLE_CHANNEL_SWITCH=y any channel with all these capabilities is marked DMA_ASYNC_TX allowing async_tx_find_channel() to quickly locate compatible channels with the guarantee that dependency chains will remain on one channel. When ASYNC_TX_DISABLE_CHANNEL_SWITCH=n async_tx_find_channel() may select channels that lead to operation chains that need to cross channel boundaries using the async_tx channel switch capability. Signed-off-by: Dan Williams --- crypto/async_tx/async_tx.c | 4 ++++ drivers/dma/Kconfig | 4 ++++ drivers/dma/dmaengine.c | 40 ++++++++++++++++++++++++++++++++++++++++ include/linux/dmaengine.h | 10 +++++++++- 4 files changed, 57 insertions(+), 1 deletion(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 60615fedcf5..f9cdf04fe7c 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -81,6 +81,10 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, struct dma_device *device = chan->device; struct dma_async_tx_descriptor *intr_tx = (void *) ~0; + #ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH + BUG(); + #endif + /* first check to see if we can still append to depend_tx */ spin_lock_bh(&depend_tx->lock); if (depend_tx->parent && depend_tx->chan == tx->chan) { diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 912a51b5cbd..ddcd9793b25 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -17,11 +17,15 @@ if DMADEVICES comment "DMA Devices" +config ASYNC_TX_DISABLE_CHANNEL_SWITCH + bool + config INTEL_IOATDMA tristate "Intel I/OAT DMA support" depends on PCI && X86 select DMA_ENGINE select DCA + select ASYNC_TX_DISABLE_CHANNEL_SWITCH help Enable support for the Intel(R) I/OAT DMA engine present in recent Intel Xeon chipsets. diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 96598479eec..d5bc628d207 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -608,6 +608,40 @@ void dmaengine_put(void) } EXPORT_SYMBOL(dmaengine_put); +static bool device_has_all_tx_types(struct dma_device *device) +{ + /* A device that satisfies this test has channels that will never cause + * an async_tx channel switch event as all possible operation types can + * be handled. + */ + #ifdef CONFIG_ASYNC_TX_DMA + if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE) + if (!dma_has_cap(DMA_MEMCPY, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE) + if (!dma_has_cap(DMA_MEMSET, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) + if (!dma_has_cap(DMA_XOR, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE) + if (!dma_has_cap(DMA_PQ, device->cap_mask)) + return false; + #endif + + return true; +} + static int get_dma_id(struct dma_device *device) { int rc; @@ -665,6 +699,12 @@ int dma_async_device_register(struct dma_device *device) BUG_ON(!device->device_issue_pending); BUG_ON(!device->dev); + /* note: this only matters in the + * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case + */ + if (device_has_all_tx_types(device)) + dma_cap_set(DMA_ASYNC_TX, device->cap_mask); + idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL); if (!idr_ref) return -ENOMEM; diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 4d6c1c925fd..86853ed7970 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -48,6 +48,9 @@ enum dma_status { /** * enum dma_transaction_type - DMA transaction types/indexes + * + * Note: The DMA_ASYNC_TX capability is not to be set by drivers. It is + * automatically set as dma devices are registered. */ enum dma_transaction_type { DMA_MEMCPY, @@ -61,6 +64,7 @@ enum dma_transaction_type { DMA_MEMCPY_CRC32C, DMA_INTERRUPT, DMA_PRIVATE, + DMA_ASYNC_TX, DMA_SLAVE, }; @@ -396,7 +400,11 @@ static inline void net_dmaengine_put(void) #ifdef CONFIG_ASYNC_TX_DMA #define async_dmaengine_get() dmaengine_get() #define async_dmaengine_put() dmaengine_put() +#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH +#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX) +#else #define async_dma_find_channel(type) dma_find_channel(type) +#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */ #else static inline void async_dmaengine_get(void) { @@ -409,7 +417,7 @@ async_dma_find_channel(enum dma_transaction_type type) { return NULL; } -#endif +#endif /* CONFIG_ASYNC_TX_DMA */ dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); -- cgit v1.2.3 From 83544ae9f3991bfc7d5e0fe9a3008cd05a8d57b7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:53 -0700 Subject: dmaengine, async_tx: support alignment checks Some engines have transfer size and address alignment restrictions. Add a per-operation alignment property to struct dma_device that the async routines and dmatest can use to check alignment capabilities. Signed-off-by: Dan Williams --- crypto/async_tx/async_memcpy.c | 2 +- crypto/async_tx/async_memset.c | 2 +- crypto/async_tx/async_pq.c | 6 ++++-- crypto/async_tx/async_xor.c | 5 +++-- drivers/dma/dmatest.c | 14 ++++++++++++++ include/linux/dmaengine.h | 44 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 67 insertions(+), 6 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index b38cbb3fd52..0ec1fb69d4e 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -50,7 +50,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; - if (device) { + if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) { dma_addr_t dma_dest, dma_src; unsigned long dma_prep_flags = 0; diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index a374784e332..58e4a8752ae 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -47,7 +47,7 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len, struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; - if (device) { + if (device && is_dma_fill_aligned(device, offset, 0, len)) { dma_addr_t dma_dest; unsigned long dma_prep_flags = 0; diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index a25e290c39f..b88db6d1dc6 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -211,7 +211,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, if (dma_src && device && (src_cnt <= dma_maxpq(device, 0) || - dma_maxpq(device, DMA_PREP_CONTINUE) > 0)) { + dma_maxpq(device, DMA_PREP_CONTINUE) > 0) && + is_dma_pq_aligned(device, offset, 0, len)) { /* run the p+q asynchronously */ pr_debug("%s: (async) disks: %d len: %zu\n", __func__, disks, len); @@ -274,7 +275,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, else if (sizeof(dma_addr_t) <= sizeof(struct page *)) dma_src = (dma_addr_t *) blocks; - if (dma_src && device && disks <= dma_maxpq(device, 0)) { + if (dma_src && device && disks <= dma_maxpq(device, 0) && + is_dma_pq_aligned(device, offset, 0, len)) { struct device *dev = device->dev; dma_addr_t *pq = &dma_src[disks-2]; int i; diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index db279872ef3..b459a9034aa 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -193,7 +193,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, else if (sizeof(dma_addr_t) <= sizeof(struct page *)) dma_src = (dma_addr_t *) src_list; - if (dma_src && chan) { + if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) { /* run the xor asynchronously */ pr_debug("%s (async): len: %zu\n", __func__, len); @@ -265,7 +265,8 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, else if (sizeof(dma_addr_t) <= sizeof(struct page *)) dma_src = (dma_addr_t *) src_list; - if (dma_src && device && src_cnt <= device->max_xor) { + if (dma_src && device && src_cnt <= device->max_xor && + is_dma_xor_aligned(device, offset, 0, len)) { unsigned long dma_prep_flags = 0; int i; diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 58e49e41c7a..a3722a7384b 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -288,6 +288,7 @@ static int dmatest_func(void *data) dma_addr_t dma_dsts[dst_cnt]; struct completion cmp; unsigned long tmo = msecs_to_jiffies(3000); + u8 align = 0; total_tests++; @@ -295,6 +296,18 @@ static int dmatest_func(void *data) src_off = dmatest_random() % (test_buf_size - len + 1); dst_off = dmatest_random() % (test_buf_size - len + 1); + /* honor alignment restrictions */ + if (thread->type == DMA_MEMCPY) + align = dev->copy_align; + else if (thread->type == DMA_XOR) + align = dev->xor_align; + else if (thread->type == DMA_PQ) + align = dev->pq_align; + + len = (len >> align) << align; + src_off = (src_off >> align) << align; + dst_off = (dst_off >> align) << align; + dmatest_init_srcs(thread->srcs, src_off, len); dmatest_init_dsts(thread->dsts, dst_off, len); @@ -311,6 +324,7 @@ static int dmatest_func(void *data) DMA_BIDIRECTIONAL); } + if (thread->type == DMA_MEMCPY) tx = dev->device_prep_dma_memcpy(chan, dma_dsts[0] + dst_off, diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index db23fd583f9..835b9c7bf1c 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -245,6 +245,10 @@ struct dma_async_tx_descriptor { * @cap_mask: one or more dma_capability flags * @max_xor: maximum number of xor sources, 0 if no capability * @max_pq: maximum number of PQ sources and PQ-continue capability + * @copy_align: alignment shift for memcpy operations + * @xor_align: alignment shift for xor operations + * @pq_align: alignment shift for pq operations + * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @device_alloc_chan_resources: allocate resources and return the @@ -271,6 +275,10 @@ struct dma_device { dma_cap_mask_t cap_mask; unsigned short max_xor; unsigned short max_pq; + u8 copy_align; + u8 xor_align; + u8 pq_align; + u8 fill_align; #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; @@ -314,6 +322,42 @@ struct dma_device { void (*device_issue_pending)(struct dma_chan *chan); }; +static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len) +{ + size_t mask; + + if (!align) + return true; + mask = (1 << align) - 1; + if (mask & (off1 | off2 | len)) + return false; + return true; +} + +static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->copy_align, off1, off2, len); +} + +static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->xor_align, off1, off2, len); +} + +static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->pq_align, off1, off2, len); +} + +static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->fill_align, off1, off2, len); +} + static inline void dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) { -- cgit v1.2.3 From 1b6df6930994d5d027375b07ac9da63644eb5758 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 16 Sep 2009 21:03:29 -0700 Subject: raid6test: fix stack overflow Testing on x86_64 with NDISKS=255 yields: do_IRQ: modprobe near stack overflow (cur:ffff88007d19c000,sp:ffff88007d19c128) ...and eventually general protection fault: 0000 [#1] Moving the scribble buffers off the stack allows the test to complete successfully. Signed-off-by: Dan Williams --- crypto/async_tx/raid6test.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c index 98c83ca96c8..3ec27c7e62e 100644 --- a/crypto/async_tx/raid6test.c +++ b/crypto/async_tx/raid6test.c @@ -28,6 +28,7 @@ #define NDISKS 16 /* Including P and Q */ static struct page *dataptrs[NDISKS]; +static addr_conv_t addr_conv[NDISKS]; static struct page *data[NDISKS+3]; static struct page *spare; static struct page *recovi; @@ -69,7 +70,6 @@ static char disk_type(int d, int disks) static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs) { struct async_submit_ctl submit; - addr_conv_t addr_conv[disks]; struct completion cmp; struct dma_async_tx_descriptor *tx = NULL; enum sum_check_flags result = ~0; @@ -156,7 +156,6 @@ static int test_disks(int i, int j, int disks) static int test(int disks, int *tests) { - addr_conv_t addr_conv[disks]; struct dma_async_tx_descriptor *tx; struct async_submit_ctl submit; struct completion cmp; -- cgit v1.2.3 From 1f6672d44c1ae7408b43c06170ec34eb0a0e9b9f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 21 Sep 2009 10:47:40 -0700 Subject: async_tx/raid6: add missing dma_unmap calls to the async fail case If we are unable to offload async_mult() or async_sum_product(), then unmap the buffers before falling through to the synchronous path. Signed-off-by: Dan Williams --- crypto/async_tx/async_raid6_recov.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'crypto') diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index 822a42d1006..6d73dde4786 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -55,6 +55,13 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, async_tx_submit(chan, tx, submit); return tx; } + + /* could not get a descriptor, unmap and fall through to + * the synchronous path + */ + dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); + dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE); } /* run the operation synchronously */ @@ -101,6 +108,12 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len, async_tx_submit(chan, tx, submit); return tx; } + + /* could not get a descriptor, unmap and fall through to + * the synchronous path + */ + dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); } /* no channel available, or failed to allocate a descriptor, so -- cgit v1.2.3 From d43c36dc6b357fa1806800f18aa30123c747a6d1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 7 Oct 2009 17:09:06 +0400 Subject: headers: remove sched.h from interrupt.h After m68k's task_thread_info() doesn't refer to current, it's possible to remove sched.h from interrupt.h and not break m68k! Many thanks to Heiko Carstens for allowing this. Signed-off-by: Alexey Dobriyan --- arch/arm/kernel/time.c | 1 + arch/arm/mach-integrator/pci_v3.c | 1 + arch/arm/plat-s3c24xx/adc.c | 1 + arch/blackfin/kernel/time.c | 1 + arch/m32r/kernel/smp.c | 1 + arch/um/drivers/line.c | 1 + arch/um/drivers/port_kern.c | 1 + arch/um/kernel/irq.c | 1 + arch/x86/kernel/cpu/mcheck/mce_intel.c | 1 + arch/x86/kernel/pci-gart_64.c | 1 + arch/x86/kernel/reboot.c | 1 + arch/xtensa/kernel/time.c | 1 + crypto/aead.c | 1 + drivers/char/applicom.c | 1 + drivers/char/epca.c | 1 + drivers/char/generic_serial.c | 1 + drivers/char/istallion.c | 1 + drivers/char/nozomi.c | 1 + drivers/char/pty.c | 1 + drivers/char/rio/riocmd.c | 1 + drivers/char/rio/rioctrl.c | 1 + drivers/char/rio/riotty.c | 1 + drivers/char/ser_a2232.c | 1 + drivers/char/stallion.c | 1 + drivers/char/tlclk.c | 1 + drivers/hwmon/sht15.c | 1 + drivers/ieee1394/raw1394.c | 1 + drivers/ieee1394/video1394.c | 1 + drivers/infiniband/core/iwcm.c | 1 + drivers/infiniband/core/ucma.c | 1 + drivers/infiniband/hw/cxgb3/iwch_provider.c | 1 + drivers/infiniband/hw/cxgb3/iwch_qp.c | 1 + drivers/infiniband/hw/ipath/ipath_driver.c | 1 + drivers/infiniband/hw/ipath/ipath_iba7220.c | 1 + drivers/infiniband/hw/ipath/ipath_intr.c | 1 + drivers/infiniband/hw/ipath/ipath_qp.c | 1 + drivers/infiniband/hw/ipath/ipath_ruc.c | 1 + drivers/infiniband/hw/ipath/ipath_ud.c | 1 + drivers/infiniband/hw/ipath/ipath_user_pages.c | 1 + drivers/infiniband/hw/ipath/ipath_user_sdma.c | 1 + drivers/infiniband/hw/ipath/ipath_verbs_mcast.c | 1 + drivers/input/keyboard/hilkbd.c | 1 + drivers/input/keyboard/sunkbd.c | 1 + drivers/input/serio/libps2.c | 1 + drivers/input/serio/serio_raw.c | 1 + drivers/input/serio/serport.c | 1 + drivers/isdn/capi/kcapi.c | 1 + drivers/isdn/hisax/arcofi.c | 1 + drivers/isdn/hisax/hfc_2bds0.c | 1 + drivers/isdn/hisax/hfc_pci.c | 1 + drivers/isdn/hysdn/hysdn_procconf.c | 1 + drivers/isdn/hysdn/hysdn_proclog.c | 1 + drivers/isdn/pcbit/drv.c | 1 + drivers/isdn/pcbit/layer2.c | 1 + drivers/isdn/sc/init.c | 1 + drivers/lguest/interrupts_and_traps.c | 1 + drivers/media/dvb/dvb-core/dvb_net.c | 1 + drivers/media/video/meye.c | 1 + drivers/media/video/videobuf-core.c | 1 + drivers/media/video/videobuf-dma-sg.c | 1 + drivers/message/fusion/mptlan.c | 1 + drivers/mfd/ucb1x00-core.c | 1 + drivers/misc/hpilo.c | 1 + drivers/misc/ibmasm/command.c | 1 + drivers/misc/ibmasm/event.c | 1 + drivers/misc/ibmasm/r_heartbeat.c | 1 + drivers/misc/phantom.c | 1 + drivers/mtd/devices/m25p80.c | 1 + drivers/mtd/devices/sst25l.c | 1 + drivers/net/bonding/bond_sysfs.c | 1 + drivers/net/depca.c | 1 + drivers/net/e100.c | 1 + drivers/net/eql.c | 1 + drivers/net/ethoc.c | 1 + drivers/net/ewrk3.c | 1 + drivers/net/forcedeth.c | 1 + drivers/net/hamachi.c | 1 + drivers/net/hamradio/baycom_epp.c | 1 + drivers/net/hamradio/baycom_ser_fdx.c | 1 + drivers/net/hamradio/baycom_ser_hdx.c | 1 + drivers/net/hamradio/hdlcdrv.c | 1 + drivers/net/hp100.c | 1 + drivers/net/igb/igb_ethtool.c | 1 + drivers/net/irda/toim3232-sir.c | 1 + drivers/net/ns83820.c | 1 + drivers/net/pcnet32.c | 1 + drivers/net/sb1000.c | 1 + drivers/net/sis900.c | 1 + drivers/net/skfp/skfddi.c | 1 + drivers/net/skge.c | 1 + drivers/net/slip.c | 1 + drivers/net/sungem.c | 1 + drivers/net/tokenring/ibmtr.c | 1 + drivers/net/typhoon.c | 1 + drivers/net/wan/cosa.c | 1 + drivers/net/wan/cycx_x25.c | 1 + drivers/net/wan/dscc4.c | 1 + drivers/net/wan/farsync.c | 1 + drivers/net/wireless/b43/pio.c | 1 + drivers/net/wireless/b43legacy/main.c | 1 + drivers/net/wireless/b43legacy/phy.c | 1 + drivers/net/wireless/hostap/hostap_info.c | 1 + drivers/net/wireless/hostap/hostap_ioctl.c | 1 + drivers/net/wireless/ipw2x00/ipw2200.c | 1 + drivers/net/wireless/iwlwifi/iwl-3945.c | 1 + drivers/net/wireless/iwlwifi/iwl-4965.c | 1 + drivers/net/wireless/iwlwifi/iwl-5000.c | 1 + drivers/net/wireless/iwlwifi/iwl-agn.c | 1 + drivers/net/wireless/iwlwifi/iwl-core.c | 1 + drivers/net/wireless/iwlwifi/iwl-hcmd.c | 1 + drivers/net/wireless/iwlwifi/iwl-tx.c | 1 + drivers/net/wireless/iwlwifi/iwl3945-base.c | 1 + drivers/net/wireless/iwmc3200wifi/cfg80211.c | 1 + drivers/net/wireless/iwmc3200wifi/commands.c | 1 + drivers/net/wireless/iwmc3200wifi/main.c | 1 + drivers/net/wireless/iwmc3200wifi/rx.c | 1 + drivers/net/wireless/libertas/cmd.c | 1 + drivers/net/wireless/libertas/tx.c | 1 + drivers/net/wireless/prism54/isl_ioctl.c | 1 + drivers/net/wireless/prism54/islpci_dev.c | 1 + drivers/net/wireless/prism54/islpci_mgt.c | 1 + drivers/net/wireless/rt2x00/rt2x00debug.c | 1 + drivers/pci/pcie/aer/aerdrv.c | 1 + drivers/rtc/interface.c | 1 + drivers/rtc/rtc-dev.c | 1 + drivers/uio/uio.c | 1 + drivers/uwb/whc-rc.c | 1 + fs/file.c | 1 + include/linux/interrupt.h | 2 +- include/linux/mmc/host.h | 1 + kernel/irq/handle.c | 1 + kernel/mutex-debug.c | 1 + kernel/time/timekeeping.c | 1 + lib/debugobjects.c | 1 + lib/fault-inject.c | 1 + mm/vmalloc.c | 1 + net/irda/ircomm/ircomm_tty_attach.c | 1 + net/irda/irlan/irlan_common.c | 1 + net/irda/irlan/irlan_eth.c | 1 + net/irda/irnet/irnet_irda.c | 1 + net/irda/irnet/irnet_ppp.c | 1 + net/mac80211/rc80211_pid_debugfs.c | 1 + net/netfilter/nf_conntrack_core.c | 1 + net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 + net/wireless/core.c | 1 + 145 files changed, 145 insertions(+), 1 deletion(-) (limited to 'crypto') diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 4cdc4a0bd02..d38cdf2c827 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c index 901cc205015..148d25fc636 100644 --- a/arch/arm/mach-integrator/pci_v3.c +++ b/arch/arm/mach-integrator/pci_v3.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm/plat-s3c24xx/adc.c b/arch/arm/plat-s3c24xx/adc.c index 11117a7ba91..4d36b784fb8 100644 --- a/arch/arm/plat-s3c24xx/adc.c +++ b/arch/arm/plat-s3c24xx/adc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c index e5069fe6861..bd3b53da295 100644 --- a/arch/blackfin/kernel/time.c +++ b/arch/blackfin/kernel/time.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c index 1b7598e6f6e..8a88f1f0a3e 100644 --- a/arch/m32r/kernel/smp.c +++ b/arch/m32r/kernel/smp.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 14a102e877d..cf8a97f3451 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -5,6 +5,7 @@ #include "linux/irqreturn.h" #include "linux/kd.h" +#include "linux/sched.h" #include "chan_kern.h" #include "irq_kern.h" #include "irq_user.h" diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c index 19930081d3d..4ebc8a34738 100644 --- a/arch/um/drivers/port_kern.c +++ b/arch/um/drivers/port_kern.c @@ -7,6 +7,7 @@ #include "linux/interrupt.h" #include "linux/list.h" #include "linux/mutex.h" +#include "linux/workqueue.h" #include "asm/atomic.h" #include "init.h" #include "irq_kern.h" diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 454cdb43e35..039270b9b73 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -10,6 +10,7 @@ #include "linux/interrupt.h" #include "linux/kernel_stat.h" #include "linux/module.h" +#include "linux/sched.h" #include "linux/seq_file.h" #include "as-layout.h" #include "kern_util.h" diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 889f665fe93..7c785634af2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 98a827ee9ed..a7f1b64f86e 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 27349f92a6d..a1a3cdda06e 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 19085ff0484..19f7df30937 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include diff --git a/crypto/aead.c b/crypto/aead.c index d9aa733db16..0a55da70845 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c index 73a0765344b..fe2cb2f5db1 100644 --- a/drivers/char/applicom.c +++ b/drivers/char/applicom.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/epca.c b/drivers/char/epca.c index 9d589e3144d..dde5134713e 100644 --- a/drivers/char/epca.c +++ b/drivers/char/epca.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/generic_serial.c b/drivers/char/generic_serial.c index 9e4e569dc00..d400cbd280f 100644 --- a/drivers/char/generic_serial.c +++ b/drivers/char/generic_serial.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index ab2f3349c5c..402838f4083 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -19,6 +19,7 @@ /*****************************************************************************/ #include +#include #include #include #include diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c index ec58d8c387f..d3400b20444 100644 --- a/drivers/char/nozomi.c +++ b/drivers/char/nozomi.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/pty.c b/drivers/char/pty.c index e066c4fdf81..62f282e6763 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/rio/riocmd.c b/drivers/char/rio/riocmd.c index 01f2654d5a2..f121357e5af 100644 --- a/drivers/char/rio/riocmd.c +++ b/drivers/char/rio/riocmd.c @@ -32,6 +32,7 @@ */ #include +#include #include #include #include diff --git a/drivers/char/rio/rioctrl.c b/drivers/char/rio/rioctrl.c index 74339559f0b..780506326a7 100644 --- a/drivers/char/rio/rioctrl.c +++ b/drivers/char/rio/rioctrl.c @@ -31,6 +31,7 @@ */ #include +#include #include #include #include diff --git a/drivers/char/rio/riotty.c b/drivers/char/rio/riotty.c index 2fb49e89b32..47fab7c3307 100644 --- a/drivers/char/rio/riotty.c +++ b/drivers/char/rio/riotty.c @@ -33,6 +33,7 @@ #define __EXPLICIT_DEF_H__ #include +#include #include #include #include diff --git a/drivers/char/ser_a2232.c b/drivers/char/ser_a2232.c index 33a2b531802..9610861d1f5 100644 --- a/drivers/char/ser_a2232.c +++ b/drivers/char/ser_a2232.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c index 53e504f41b2..db6dcfa35ba 100644 --- a/drivers/char/stallion.c +++ b/drivers/char/stallion.c @@ -27,6 +27,7 @@ /*****************************************************************************/ #include +#include #include #include #include diff --git a/drivers/char/tlclk.c b/drivers/char/tlclk.c index 8f2284be68e..80ea6bcfffd 100644 --- a/drivers/char/tlclk.c +++ b/drivers/char/tlclk.c @@ -32,6 +32,7 @@ #include /* printk() */ #include /* everything... */ #include /* error codes */ +#include #include #include #include diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c index 303c02694c3..2da6fb2c325 100644 --- a/drivers/hwmon/sht15.c +++ b/drivers/hwmon/sht15.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c index 0bc3d78ce7b..8aa56ac07e2 100644 --- a/drivers/ieee1394/raw1394.c +++ b/drivers/ieee1394/raw1394.c @@ -29,6 +29,7 @@ #include #include +#include #include #include #include diff --git a/drivers/ieee1394/video1394.c b/drivers/ieee1394/video1394.c index d287ba79821..949064a0567 100644 --- a/drivers/ieee1394/video1394.c +++ b/drivers/ieee1394/video1394.c @@ -30,6 +30,7 @@ */ #include #include +#include #include #include #include diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 625fec5a741..0f89909abce 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 4346a24568f..bb96d3c4b0f 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 03cfaecc3bb..ed7175549eb 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 6e865347194..1cecf98829a 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -29,6 +29,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include #include "iwch_provider.h" #include "iwch.h" #include "iwch_cm.h" diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 04e88b60055..013d1380e77 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include #include #include #include diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c index b2a9d4c155d..a805402dd4a 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba7220.c +++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c @@ -37,6 +37,7 @@ #include #include +#include #include #include #include diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 6c21b4b5ec7..c0a03ac03ee 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -33,6 +33,7 @@ #include #include +#include #include "ipath_kernel.h" #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 3a5a89b609c..cb2d3ef2ae1 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -32,6 +32,7 @@ */ #include +#include #include #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 2296832f94d..1f95bbaf760 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include #include #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 6076cb61bf6..7420715256a 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include #include #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index 855911e7396..82878e34862 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -33,6 +33,7 @@ #include #include +#include #include "ipath_kernel.h" diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c index 7bff4b9baa0..be78f6643c0 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index d73e3223287..6923e1d986d 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -32,6 +32,7 @@ */ #include +#include #include "ipath_verbs.h" diff --git a/drivers/input/keyboard/hilkbd.c b/drivers/input/keyboard/hilkbd.c index e9d639ec283..5f72440b50c 100644 --- a/drivers/input/keyboard/hilkbd.c +++ b/drivers/input/keyboard/hilkbd.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_HP300 diff --git a/drivers/input/keyboard/sunkbd.c b/drivers/input/keyboard/sunkbd.c index 472b56639cd..a99a04b03ee 100644 --- a/drivers/input/keyboard/sunkbd.c +++ b/drivers/input/keyboard/sunkbd.c @@ -27,6 +27,7 @@ */ #include +#include #include #include #include diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c index 769ba65a585..f3876acc3e8 100644 --- a/drivers/input/serio/libps2.c +++ b/drivers/input/serio/libps2.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include diff --git a/drivers/input/serio/serio_raw.c b/drivers/input/serio/serio_raw.c index b03009bb746..27fdaaffbb4 100644 --- a/drivers/input/serio/serio_raw.c +++ b/drivers/input/serio/serio_raw.c @@ -9,6 +9,7 @@ * the Free Software Foundation. */ +#include #include #include #include diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c index b9694b6445d..6d345112bcb 100644 --- a/drivers/input/serio/serport.c +++ b/drivers/input/serio/serport.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index 57d26360f64..dc506ab99ca 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/isdn/hisax/arcofi.c b/drivers/isdn/hisax/arcofi.c index d30ce5b978c..85a8fd8dd0b 100644 --- a/drivers/isdn/hisax/arcofi.c +++ b/drivers/isdn/hisax/arcofi.c @@ -10,6 +10,7 @@ * */ +#include #include "hisax.h" #include "isdnl1.h" #include "isac.h" diff --git a/drivers/isdn/hisax/hfc_2bds0.c b/drivers/isdn/hisax/hfc_2bds0.c index 5c46a7130e0..8d22f50760e 100644 --- a/drivers/isdn/hisax/hfc_2bds0.c +++ b/drivers/isdn/hisax/hfc_2bds0.c @@ -11,6 +11,7 @@ */ #include +#include #include "hisax.h" #include "hfc_2bds0.h" #include "isdnl1.h" diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c index d110a77940a..10914731b30 100644 --- a/drivers/isdn/hisax/hfc_pci.c +++ b/drivers/isdn/hisax/hfc_pci.c @@ -20,6 +20,7 @@ #include "hfc_pci.h" #include "isdnl1.h" #include +#include #include static const char *hfcpci_revision = "$Revision: 1.48.2.4 $"; diff --git a/drivers/isdn/hysdn/hysdn_procconf.c b/drivers/isdn/hysdn/hysdn_procconf.c index 8f9f4912de3..90b35e1a4b7 100644 --- a/drivers/isdn/hysdn/hysdn_procconf.c +++ b/drivers/isdn/hysdn/hysdn_procconf.c @@ -11,6 +11,7 @@ * */ +#include #include #include #include diff --git a/drivers/isdn/hysdn/hysdn_proclog.c b/drivers/isdn/hysdn/hysdn_proclog.c index 8991d2c8ee4..8bcae28c440 100644 --- a/drivers/isdn/hysdn/hysdn_proclog.c +++ b/drivers/isdn/hysdn/hysdn_proclog.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "hysdn_defs.h" diff --git a/drivers/isdn/pcbit/drv.c b/drivers/isdn/pcbit/drv.c index 8c66bcb953a..123c1d6c43b 100644 --- a/drivers/isdn/pcbit/drv.c +++ b/drivers/isdn/pcbit/drv.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include diff --git a/drivers/isdn/pcbit/layer2.c b/drivers/isdn/pcbit/layer2.c index e075e8d2fce..30f0f45e313 100644 --- a/drivers/isdn/pcbit/layer2.c +++ b/drivers/isdn/pcbit/layer2.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c index dd0acd06750..5a0774880d5 100644 --- a/drivers/isdn/sc/init.c +++ b/drivers/isdn/sc/init.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "includes.h" #include "hardware.h" #include "card.h" diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 18648180db0..daaf8663164 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "lg.h" /* Allow Guests to use a non-128 (ie. non-Linux) syscall trap. */ diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c index 8c9ae0a3a27..0241a7c5c34 100644 --- a/drivers/media/dvb/dvb-core/dvb_net.c +++ b/drivers/media/dvb/dvb-core/dvb_net.c @@ -63,6 +63,7 @@ #include #include #include +#include #include "dvb_demux.h" #include "dvb_net.h" diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c index 4b1bc05a462..01e1eefcf1e 100644 --- a/drivers/media/video/meye.c +++ b/drivers/media/video/meye.c @@ -28,6 +28,7 @@ */ #include #include +#include #include #include #include diff --git a/drivers/media/video/videobuf-core.c b/drivers/media/video/videobuf-core.c index f1ccf98c0a6..8e93c6f25c8 100644 --- a/drivers/media/video/videobuf-core.c +++ b/drivers/media/video/videobuf-core.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c index 53cdd67cebe..032ebae0134 100644 --- a/drivers/media/video/videobuf-dma-sg.c +++ b/drivers/media/video/videobuf-dma-sg.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c index bc2ec2182c0..34f3f36f819 100644 --- a/drivers/message/fusion/mptlan.c +++ b/drivers/message/fusion/mptlan.c @@ -56,6 +56,7 @@ #include #include #include +#include #define my_VERSION MPT_LINUX_VERSION_COMMON #define MYNAM "mptlan" diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index fea9085fe52..60c3988f3cf 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -18,6 +18,7 @@ */ #include #include +#include #include #include #include diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c index 1ad27c6abcc..a92a3a742b4 100644 --- a/drivers/misc/hpilo.c +++ b/drivers/misc/hpilo.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/misc/ibmasm/command.c b/drivers/misc/ibmasm/command.c index 276d3fb6809..e2031739aa2 100644 --- a/drivers/misc/ibmasm/command.c +++ b/drivers/misc/ibmasm/command.c @@ -22,6 +22,7 @@ * */ +#include #include "ibmasm.h" #include "lowlevel.h" diff --git a/drivers/misc/ibmasm/event.c b/drivers/misc/ibmasm/event.c index 68a0a5b9479..572d41ffc18 100644 --- a/drivers/misc/ibmasm/event.c +++ b/drivers/misc/ibmasm/event.c @@ -22,6 +22,7 @@ * */ +#include #include "ibmasm.h" #include "lowlevel.h" diff --git a/drivers/misc/ibmasm/r_heartbeat.c b/drivers/misc/ibmasm/r_heartbeat.c index bec9e2c44be..2de487ac788 100644 --- a/drivers/misc/ibmasm/r_heartbeat.c +++ b/drivers/misc/ibmasm/r_heartbeat.c @@ -20,6 +20,7 @@ * */ +#include #include "ibmasm.h" #include "dot_command.h" diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c index 90a95ce8dc3..04c27266f56 100644 --- a/drivers/misc/phantom.c +++ b/drivers/misc/phantom.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index 379c316f329..4c19269de91 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/drivers/mtd/devices/sst25l.c b/drivers/mtd/devices/sst25l.c index c2baf3353f8..0a11721f146 100644 --- a/drivers/mtd/devices/sst25l.c +++ b/drivers/mtd/devices/sst25l.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index ff449de6f3c..8762a27a2a1 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/depca.c b/drivers/net/depca.c index 9686c1fa28f..7a3bdac84ab 100644 --- a/drivers/net/depca.c +++ b/drivers/net/depca.c @@ -237,6 +237,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 679965c2bb8..5d2f48f0225 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -151,6 +151,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/eql.c b/drivers/net/eql.c index d4d9a3eda69..f5b96cadeb2 100644 --- a/drivers/net/eql.c +++ b/drivers/net/eql.c @@ -111,6 +111,7 @@ * Sorry, I had to rewrite most of this for 2.5.x -DaveM */ +#include #include #include #include diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c index 34d0c69e67f..96f5b2a2d2c 100644 --- a/drivers/net/ethoc.c +++ b/drivers/net/ethoc.c @@ -17,6 +17,7 @@ #include #include #include +#include #include static int buffer_size = 0x8000; /* 32 KBytes */ diff --git a/drivers/net/ewrk3.c b/drivers/net/ewrk3.c index b2a5ec8f372..dd4ba01fd92 100644 --- a/drivers/net/ewrk3.c +++ b/drivers/net/ewrk3.c @@ -145,6 +145,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 0a1c2bb27d4..e1da4666f20 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c index 1d5064a09ac..f7519a59494 100644 --- a/drivers/net/hamachi.c +++ b/drivers/net/hamachi.c @@ -145,6 +145,7 @@ static int tx_params[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; /* Time in jiffies before concluding the transmitter is hung. */ #define TX_TIMEOUT (5*HZ) +#include #include #include #include diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 7bcaf7c6624..e344c84c0ef 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c index aa4488e871b..ed60fd66427 100644 --- a/drivers/net/hamradio/baycom_ser_fdx.c +++ b/drivers/net/hamradio/baycom_ser_fdx.c @@ -71,6 +71,7 @@ /*****************************************************************************/ +#include #include #include #include diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c index 88c59359602..1686f6dcbbc 100644 --- a/drivers/net/hamradio/baycom_ser_hdx.c +++ b/drivers/net/hamradio/baycom_ser_hdx.c @@ -61,6 +61,7 @@ /*****************************************************************************/ +#include #include #include #include diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index 0013c409782..91c5790c958 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -42,6 +42,7 @@ /*****************************************************************************/ +#include #include #include #include diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c index a9a1a99f02d..dd866513806 100644 --- a/drivers/net/hp100.c +++ b/drivers/net/hp100.c @@ -98,6 +98,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/igb/igb_ethtool.c b/drivers/net/igb/igb_ethtool.c index d004c359244..deaea8fa103 100644 --- a/drivers/net/igb/igb_ethtool.c +++ b/drivers/net/igb/igb_ethtool.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "igb.h" diff --git a/drivers/net/irda/toim3232-sir.c b/drivers/net/irda/toim3232-sir.c index fcf287b749d..99e1ec02a01 100644 --- a/drivers/net/irda/toim3232-sir.c +++ b/drivers/net/irda/toim3232-sir.c @@ -120,6 +120,7 @@ #include #include #include +#include #include diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index c594e194647..57fd483dbb1 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -111,6 +111,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index 6d28b18e7e2..c1b3f09f452 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c @@ -31,6 +31,7 @@ static const char *const version = #include #include +#include #include #include #include diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c index ee366c5a8fa..c9c70ab0cce 100644 --- a/drivers/net/sb1000.c +++ b/drivers/net/sb1000.c @@ -36,6 +36,7 @@ static char version[] = "sb1000.c:v1.1.2 6/01/98 (fventuri@mediaone.net)\n"; #include #include +#include #include #include #include diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c index 97949d0a699..c072f7f36ac 100644 --- a/drivers/net/sis900.c +++ b/drivers/net/sis900.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c index 38a508b4aad..b27156eaf26 100644 --- a/drivers/net/skfp/skfddi.c +++ b/drivers/net/skfp/skfddi.c @@ -73,6 +73,7 @@ static const char * const boot_msg = /* Include files */ +#include #include #include #include diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 01f6811f132..8f5414348e8 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/slip.c b/drivers/net/slip.c index e17c535a577..fe3cebb984d 100644 --- a/drivers/net/slip.c +++ b/drivers/net/slip.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index 305ec3d783d..7019a0d1a82 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/tokenring/ibmtr.c b/drivers/net/tokenring/ibmtr.c index 525bbc5b9c9..36cb2423bcf 100644 --- a/drivers/net/tokenring/ibmtr.c +++ b/drivers/net/tokenring/ibmtr.c @@ -108,6 +108,7 @@ in the event that chatty debug messages are desired - jjs 12/30/98 */ #define IBMTR_DEBUG_MESSAGES 0 #include +#include #ifdef PCMCIA /* required for ibmtr_cs.c to build */ #undef MODULE /* yes, really */ diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c index d6d345229fe..5921f5bdd76 100644 --- a/drivers/net/typhoon.c +++ b/drivers/net/typhoon.c @@ -108,6 +108,7 @@ static const int multicast_filter_limit = 32; #include #include +#include #include #include #include diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 66360a2a14c..e2c33c06190 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -76,6 +76,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c index 2573c18b6aa..cd8cb95c5bd 100644 --- a/drivers/net/wan/cycx_x25.c +++ b/drivers/net/wan/cycx_x25.c @@ -84,6 +84,7 @@ #include /* printk(), and other useful stuff */ #include #include /* inline memset(), etc. */ +#include #include /* kmalloc(), kfree() */ #include /* offsetof(), etc. */ #include /* WAN router definitions */ diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index 81c8aec9df9..07d00b4cf48 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -81,6 +81,7 @@ */ #include +#include #include #include #include diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 3e90eb81618..beda387f2fc 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/b43/pio.c b/drivers/net/wireless/b43/pio.c index 9c1397996e0..5e87650b07f 100644 --- a/drivers/net/wireless/b43/pio.c +++ b/drivers/net/wireless/b43/pio.c @@ -30,6 +30,7 @@ #include "xmit.h" #include +#include static u16 generate_cookie(struct b43_pio_txqueue *q, diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 1d9223b3d4c..4b60148a5e6 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/b43legacy/phy.c b/drivers/net/wireless/b43legacy/phy.c index 11319ec2d64..aaf227203a9 100644 --- a/drivers/net/wireless/b43legacy/phy.c +++ b/drivers/net/wireless/b43legacy/phy.c @@ -31,6 +31,7 @@ #include #include +#include #include #include "b43legacy.h" diff --git a/drivers/net/wireless/hostap/hostap_info.c b/drivers/net/wireless/hostap/hostap_info.c index 6fa14a4e4b5..4dfb40a84c9 100644 --- a/drivers/net/wireless/hostap/hostap_info.c +++ b/drivers/net/wireless/hostap/hostap_info.c @@ -1,6 +1,7 @@ /* Host AP driver Info Frame processing (part of hostap.o module) */ #include +#include #include "hostap_wlan.h" #include "hostap.h" #include "hostap_ap.h" diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c index 3f2bda881a4..9419cebca8a 100644 --- a/drivers/net/wireless/hostap/hostap_ioctl.c +++ b/drivers/net/wireless/hostap/hostap_ioctl.c @@ -1,6 +1,7 @@ /* ioctl() (mostly Linux Wireless Extensions) routines for Host AP driver */ #include +#include #include #include #include diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c index 8d58e6ed4e7..827824d45de 100644 --- a/drivers/net/wireless/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/ipw2x00/ipw2200.c @@ -30,6 +30,7 @@ ******************************************************************************/ +#include #include "ipw2200.h" diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c index e70c5b0af36..68136172b82 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c index a22a0501c19..6f703a04184 100644 --- a/drivers/net/wireless/iwlwifi/iwl-4965.c +++ b/drivers/net/wireless/iwlwifi/iwl-4965.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c index eb08f441100..d6bc0e05104 100644 --- a/drivers/net/wireless/iwlwifi/iwl-5000.c +++ b/drivers/net/wireless/iwlwifi/iwl-5000.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index cdc07c47745..313d3e5ee84 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 484d5c1a731..2dc92875545 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "iwl-eeprom.h" diff --git a/drivers/net/wireless/iwlwifi/iwl-hcmd.c b/drivers/net/wireless/iwlwifi/iwl-hcmd.c index 532c8d6cd8d..a6856daf14c 100644 --- a/drivers/net/wireless/iwlwifi/iwl-hcmd.c +++ b/drivers/net/wireless/iwlwifi/iwl-hcmd.c @@ -28,6 +28,7 @@ #include #include +#include #include #include "iwl-dev.h" /* FIXME: remove */ diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index c1890754470..fb9bcfa6d94 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -28,6 +28,7 @@ *****************************************************************************/ #include +#include #include #include "iwl-eeprom.h" #include "iwl-dev.h" diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index c390dbd877e..aa49230422f 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/iwmc3200wifi/cfg80211.c b/drivers/net/wireless/iwmc3200wifi/cfg80211.c index a56a2b0ac99..f3c55658225 100644 --- a/drivers/net/wireless/iwmc3200wifi/cfg80211.c +++ b/drivers/net/wireless/iwmc3200wifi/cfg80211.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/wireless/iwmc3200wifi/commands.c b/drivers/net/wireless/iwmc3200wifi/commands.c index 23b52fa2605..84158b6d35d 100644 --- a/drivers/net/wireless/iwmc3200wifi/commands.c +++ b/drivers/net/wireless/iwmc3200wifi/commands.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "iwm.h" #include "bus.h" diff --git a/drivers/net/wireless/iwmc3200wifi/main.c b/drivers/net/wireless/iwmc3200wifi/main.c index d668e475632..222eb2cf1b3 100644 --- a/drivers/net/wireless/iwmc3200wifi/main.c +++ b/drivers/net/wireless/iwmc3200wifi/main.c @@ -38,6 +38,7 @@ #include #include +#include #include #include diff --git a/drivers/net/wireless/iwmc3200wifi/rx.c b/drivers/net/wireless/iwmc3200wifi/rx.c index 40dbcbc1659..771a301003c 100644 --- a/drivers/net/wireless/iwmc3200wifi/rx.c +++ b/drivers/net/wireless/iwmc3200wifi/rx.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c index 685098148e1..0a324dcd264 100644 --- a/drivers/net/wireless/libertas/cmd.c +++ b/drivers/net/wireless/libertas/cmd.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "host.h" #include "hostcmd.h" #include "decl.h" diff --git a/drivers/net/wireless/libertas/tx.c b/drivers/net/wireless/libertas/tx.c index 4c018f7a0a8..8c3766a6e8e 100644 --- a/drivers/net/wireless/libertas/tx.c +++ b/drivers/net/wireless/libertas/tx.c @@ -3,6 +3,7 @@ */ #include #include +#include #include "hostcmd.h" #include "radiotap.h" diff --git a/drivers/net/wireless/prism54/isl_ioctl.c b/drivers/net/wireless/prism54/isl_ioctl.c index 4c97c6ad6f5..bc08464d832 100644 --- a/drivers/net/wireless/prism54/isl_ioctl.c +++ b/drivers/net/wireless/prism54/isl_ioctl.c @@ -19,6 +19,7 @@ * */ +#include #include #include #include diff --git a/drivers/net/wireless/prism54/islpci_dev.c b/drivers/net/wireless/prism54/islpci_dev.c index e26d7b3ceab..2505be56ae3 100644 --- a/drivers/net/wireless/prism54/islpci_dev.c +++ b/drivers/net/wireless/prism54/islpci_dev.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/prism54/islpci_mgt.c b/drivers/net/wireless/prism54/islpci_mgt.c index f7c677e2094..69d2f882fd0 100644 --- a/drivers/net/wireless/prism54/islpci_mgt.c +++ b/drivers/net/wireless/prism54/islpci_mgt.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/wireless/rt2x00/rt2x00debug.c b/drivers/net/wireless/rt2x00/rt2x00debug.c index 7b3ee8c2eae..68bc9bb1dbf 100644 --- a/drivers/net/wireless/rt2x00/rt2x00debug.c +++ b/drivers/net/wireless/rt2x00/rt2x00debug.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "rt2x00.h" diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 2ce8f9ccc66..d49ecc94bd4 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 4cdb31a362c..a0c816238aa 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -12,6 +12,7 @@ */ #include +#include #include int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm) diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c index 8a11de9552c..62227cd5241 100644 --- a/drivers/rtc/rtc-dev.c +++ b/drivers/rtc/rtc-dev.c @@ -13,6 +13,7 @@ #include #include +#include #include "rtc-core.h" static dev_t rtc_devt; diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index a9d70704720..e941367dd28 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c index 1d9a6f54658..01950c62dc8 100644 --- a/drivers/uwb/whc-rc.c +++ b/drivers/uwb/whc-rc.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/file.c b/fs/file.c index f313314f996..87e129030ab 100644 --- a/fs/file.c +++ b/fs/file.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index b78cf819495..7ca72b74eec 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -610,6 +609,7 @@ extern void debug_poll_all_shared_irqs(void); static inline void debug_poll_all_shared_irqs(void) { } #endif +struct seq_file; int show_interrupts(struct seq_file *p, void *v); struct irq_desc; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 81bb4235859..eaf36364b7d 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -11,6 +11,7 @@ #define LINUX_MMC_HOST_H #include +#include #include diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index a81cf80554d..17c71bb565c 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index 50d022e5a56..ec815a960b5 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index fb0f46fa1ec..c3a4e2907ea 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 2755a3bd16a..eae56fddfa3 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -9,6 +9,7 @@ */ #include #include +#include #include #include #include diff --git a/lib/fault-inject.c b/lib/fault-inject.c index f97af55bdd9..7e65af70635 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 5e7aed0802b..0f551a4a44c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index eafc010907c..3c175402302 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -30,6 +30,7 @@ ********************************************************************/ #include +#include #include #include diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 62116829b81..315ead3cb92 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 7b6b631f647..d340110f5c0 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c index cf9a4b531a9..cccc2e93234 100644 --- a/net/irda/irnet/irnet_irda.c +++ b/net/irda/irnet/irnet_irda.c @@ -9,6 +9,7 @@ */ #include "irnet_irda.h" /* Private header */ +#include #include #include diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 68cbcb19cbd..7dea882dbb7 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -13,6 +13,7 @@ * 2) as a control channel (write commands, read events) */ +#include #include #include "irnet_ppp.h" /* Private header */ /* Please put other headers in irnet.h - Thanks */ diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c index a59043fbb0f..45667054a5f 100644 --- a/net/mac80211/rc80211_pid_debugfs.c +++ b/net/mac80211/rc80211_pid_debugfs.c @@ -6,6 +6,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7c9ec3dee96..ca6e68dcd8a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 0cf5e8c27a1..3fa5751af0e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include diff --git a/net/wireless/core.c b/net/wireless/core.c index 45b2be3274d..a595f712b5b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include "nl80211.h" -- cgit v1.2.3 From 5dd33c9a4c29015f6d87568d33521c98931a387e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 16 Oct 2009 16:40:25 +1100 Subject: md/async: don't pass a memory pointer as a page pointer. md/raid6 passes a list of 'struct page *' to the async_tx routines, which then either DMA map them for offload, or take the page_address for CPU based calculations. For RAID6 we sometime leave 'blanks' in the list of pages. For CPU based calcs, we want to treat theses as a page of zeros. For offloaded calculations, we simply don't pass a page to the hardware. Currently the 'blanks' are encoded as a pointer to raid6_empty_zero_page. This is a 4096 byte memory region, not a 'struct page'. This is mostly handled correctly but is rather ugly. So change the code to pass and expect a NULL pointer for the blanks. When taking page_address of a page, we need to check for a NULL and in that case use raid6_empty_zero_page. Signed-off-by: NeilBrown --- crypto/async_tx/async_pq.c | 15 ++++----------- crypto/async_tx/async_raid6_recov.c | 16 +++++++++++----- drivers/md/raid5.c | 4 ++-- 3 files changed, 17 insertions(+), 18 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index b88db6d1dc6..9ab1ce4af3c 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -30,11 +30,6 @@ */ static struct page *scribble; -static bool is_raid6_zero_block(struct page *p) -{ - return p == (void *) raid6_empty_zero_page; -} - /* the struct page *blocks[] parameter passed to async_gen_syndrome() * and async_syndrome_val() contains the 'P' destination address at * blocks[disks-2] and the 'Q' destination address at blocks[disks-1] @@ -83,7 +78,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, * sources and update the coefficients accordingly */ for (i = 0, idx = 0; i < src_cnt; i++) { - if (is_raid6_zero_block(blocks[i])) + if (blocks[i] == NULL) continue; dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len, DMA_TO_DEVICE); @@ -160,9 +155,9 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, srcs = (void **) blocks; for (i = 0; i < disks; i++) { - if (is_raid6_zero_block(blocks[i])) { + if (blocks[i] == NULL) { BUG_ON(i > disks - 3); /* P or Q can't be zero */ - srcs[i] = blocks[i]; + srcs[i] = (void*)raid6_empty_zero_page; } else srcs[i] = page_address(blocks[i]) + offset; } @@ -290,12 +285,10 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; for (i = 0; i < disks; i++) - if (likely(blocks[i])) { - BUG_ON(is_raid6_zero_block(blocks[i])); + if (likely(blocks[i])) dma_src[i] = dma_map_page(dev, blocks[i], offset, len, DMA_TO_DEVICE); - } for (;;) { tx = device->device_prep_dma_pq_val(chan, pq, dma_src, diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index 6d73dde4786..8e30b6ed078 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -263,10 +263,10 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb, * delta p and delta q */ dp = blocks[faila]; - blocks[faila] = (void *)raid6_empty_zero_page; + blocks[faila] = NULL; blocks[disks-2] = dp; dq = blocks[failb]; - blocks[failb] = (void *)raid6_empty_zero_page; + blocks[failb] = NULL; blocks[disks-1] = dq; init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); @@ -338,7 +338,10 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, async_tx_quiesce(&submit->depend_tx); for (i = 0; i < disks; i++) - ptrs[i] = page_address(blocks[i]); + if (blocks[i] == NULL) + ptrs[i] = (void*)raid6_empty_zero_page; + else + ptrs[i] = page_address(blocks[i]); raid6_2data_recov(disks, bytes, faila, failb, ptrs); @@ -398,7 +401,10 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, async_tx_quiesce(&submit->depend_tx); for (i = 0; i < disks; i++) - ptrs[i] = page_address(blocks[i]); + if (blocks[i] == NULL) + ptrs[i] = (void*)raid6_empty_zero_page; + else + ptrs[i] = page_address(blocks[i]); raid6_datap_recov(disks, bytes, faila, ptrs); @@ -414,7 +420,7 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = blocks[faila]; - blocks[faila] = (void *)raid6_empty_zero_page; + blocks[faila] = NULL; blocks[disks-1] = dq; /* in the 4 disk case we only need to perform a single source diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c4366c9373c..dcd9e659ed9 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -720,7 +720,7 @@ static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh) int i; for (i = 0; i < disks; i++) - srcs[i] = (void *)raid6_empty_zero_page; + srcs[i] = NULL; count = 0; i = d0_idx; @@ -816,7 +816,7 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) * slot number conversion for 'faila' and 'failb' */ for (i = 0; i < disks ; i++) - blocks[i] = (void *)raid6_empty_zero_page; + blocks[i] = NULL; count = 0; i = d0_idx; do { -- cgit v1.2.3 From b2141e6951ad56c8f65e70547baeabd5698e390a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 16 Oct 2009 16:40:34 +1100 Subject: raid6/async_tx: handle holes in block list in async_syndrome_val async_syndrome_val check the P and Q blocks used for RAID6 calculations. With DDF raid6, some of the data blocks might be NULL, so this needs to be handled in the same way that async_gen_syndrome handles it. As async_syndrome_val calls async_xor, also enhance async_xor to detect and skip NULL blocks in the list. Signed-off-by: NeilBrown --- crypto/async_tx/async_pq.c | 31 ++++++++++++++++++++++++------- crypto/async_tx/async_xor.c | 18 +++++++++++------- 2 files changed, 35 insertions(+), 14 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 9ab1ce4af3c..43b1436bd96 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -260,8 +260,10 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx; + unsigned char coefs[disks-2]; enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; dma_addr_t *dma_src = NULL; + int src_cnt = 0; BUG_ON(disks < 4); @@ -280,20 +282,35 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, __func__, disks, len); if (!P(blocks, disks)) dma_flags |= DMA_PREP_PQ_DISABLE_P; + else + pq[0] = dma_map_page(dev, P(blocks,disks), + offset, len, + DMA_TO_DEVICE); if (!Q(blocks, disks)) dma_flags |= DMA_PREP_PQ_DISABLE_Q; + else + pq[1] = dma_map_page(dev, Q(blocks,disks), + offset, len, + DMA_TO_DEVICE); + if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; - for (i = 0; i < disks; i++) - if (likely(blocks[i])) - dma_src[i] = dma_map_page(dev, blocks[i], - offset, len, - DMA_TO_DEVICE); + for (i = 0; i < disks-2; i++) + if (likely(blocks[i])) { + dma_src[src_cnt] = dma_map_page(dev, blocks[i], + offset, len, + DMA_TO_DEVICE); + coefs[src_cnt] = raid6_gfexp[i]; + src_cnt++; + } + pq[1] = dma_map_page(dev, Q(blocks,disks), + offset, len, + DMA_TO_DEVICE); for (;;) { tx = device->device_prep_dma_pq_val(chan, pq, dma_src, - disks - 2, - raid6_gfexp, + src_cnt, + coefs, len, pqres, dma_flags); if (likely(tx)) diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index b459a9034aa..79182dcb91b 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -44,20 +44,23 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, void *cb_param_orig = submit->cb_param; enum async_tx_flags flags_orig = submit->flags; enum dma_ctrl_flags dma_flags; - int xor_src_cnt; + int xor_src_cnt = 0; dma_addr_t dma_dest; /* map the dest bidrectional in case it is re-used as a source */ dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_BIDIRECTIONAL); for (i = 0; i < src_cnt; i++) { /* only map the dest once */ + if (!src_list[i]) + continue; if (unlikely(src_list[i] == dest)) { - dma_src[i] = dma_dest; + dma_src[xor_src_cnt++] = dma_dest; continue; } - dma_src[i] = dma_map_page(dma->dev, src_list[i], offset, - len, DMA_TO_DEVICE); + dma_src[xor_src_cnt++] = dma_map_page(dma->dev, src_list[i], offset, + len, DMA_TO_DEVICE); } + src_cnt = xor_src_cnt; while (src_cnt) { submit->flags = flags_orig; @@ -123,7 +126,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, struct async_submit_ctl *submit) { int i; - int xor_src_cnt; + int xor_src_cnt = 0; int src_off = 0; void *dest_buf; void **srcs; @@ -135,8 +138,9 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, /* convert to buffer pointers */ for (i = 0; i < src_cnt; i++) - srcs[i] = page_address(src_list[i]) + offset; - + if (src_list[i]) + srcs[xor_src_cnt++] = page_address(src_list[i]) + offset; + src_cnt = xor_src_cnt; /* set destination address */ dest_buf = page_address(dest) + offset; -- cgit v1.2.3 From 5676470f06f783aebf545c8f17ca772911022068 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 19 Oct 2009 18:09:32 -0700 Subject: async_pq: kill a stray dma_map() call and other cleanups - update the kernel doc for async_syndrome to indicate what NULL in the source list means - whitespace fixups Signed-off-by: Dan Williams --- crypto/async_tx/async_pq.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 43b1436bd96..60476560e0b 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -181,10 +181,14 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <= * PAGE_SIZE as a temporary buffer of this size is used in the * synchronous path. 'disks' always accounts for both destination - * buffers. + * buffers. If any source buffers (blocks[i] where i < disks - 2) are + * set to NULL those buffers will be replaced with the raid6_zero_page + * in the synchronous path and omitted in the hardware-asynchronous + * path. * * 'blocks' note: if submit->scribble is NULL then the contents of - * 'blocks' may be overridden + * 'blocks' may be overwritten to perform address conversions + * (dma_map_page() or page_address()). */ struct dma_async_tx_descriptor * async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, @@ -283,13 +287,13 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, if (!P(blocks, disks)) dma_flags |= DMA_PREP_PQ_DISABLE_P; else - pq[0] = dma_map_page(dev, P(blocks,disks), + pq[0] = dma_map_page(dev, P(blocks, disks), offset, len, DMA_TO_DEVICE); if (!Q(blocks, disks)) dma_flags |= DMA_PREP_PQ_DISABLE_Q; else - pq[1] = dma_map_page(dev, Q(blocks,disks), + pq[1] = dma_map_page(dev, Q(blocks, disks), offset, len, DMA_TO_DEVICE); @@ -303,9 +307,6 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, coefs[src_cnt] = raid6_gfexp[i]; src_cnt++; } - pq[1] = dma_map_page(dev, Q(blocks,disks), - offset, len, - DMA_TO_DEVICE); for (;;) { tx = device->device_prep_dma_pq_val(chan, pq, dma_src, -- cgit v1.2.3 From 030b07720be0f3bfada12ff6bfa3c61a91212f32 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 19 Oct 2009 18:09:32 -0700 Subject: async_pq: rename scribble page The global scribble page is used as a temporary destination buffer when disabling the P or Q result is requested. The local scribble buffer contains memory for performing address conversions. Rename the global variable to avoid confusion. Signed-off-by: Dan Williams --- crypto/async_tx/async_pq.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 60476560e0b..6b5cc4fba59 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -26,9 +26,10 @@ #include /** - * scribble - space to hold throwaway P buffer for synchronous gen_syndrome + * pq_scribble_page - space to hold throwaway P or Q buffer for + * synchronous gen_syndrome */ -static struct page *scribble; +static struct page *pq_scribble_page; /* the struct page *blocks[] parameter passed to async_gen_syndrome() * and async_syndrome_val() contains the 'P' destination address at @@ -226,11 +227,11 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, async_tx_quiesce(&submit->depend_tx); if (!P(blocks, disks)) { - P(blocks, disks) = scribble; + P(blocks, disks) = pq_scribble_page; BUG_ON(len + offset > PAGE_SIZE); } if (!Q(blocks, disks)) { - Q(blocks, disks) = scribble; + Q(blocks, disks) = pq_scribble_page; BUG_ON(len + offset > PAGE_SIZE); } do_sync_gen_syndrome(blocks, offset, disks, len, submit); @@ -384,9 +385,9 @@ EXPORT_SYMBOL_GPL(async_syndrome_val); static int __init async_pq_init(void) { - scribble = alloc_page(GFP_KERNEL); + pq_scribble_page = alloc_page(GFP_KERNEL); - if (scribble) + if (pq_scribble_page) return 0; pr_err("%s: failed to allocate required spare page\n", __func__); @@ -396,7 +397,7 @@ static int __init async_pq_init(void) static void __exit async_pq_exit(void) { - put_page(scribble); + put_page(pq_scribble_page); } module_init(async_pq_init); -- cgit v1.2.3 From da17bf4306fd3a52e938b121df82a7baa10eb282 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 19 Oct 2009 14:05:12 -0700 Subject: async_tx: fix asynchronous raid6 recovery for ddf layouts The raid6 recovery code currently requires special handling of the 4-disk and 5-disk recovery scenarios for the native layout. Quoting from commit 0a82a623: In these situations the default N-disk algorithm will present 0-source or 1-source operations to dma devices. To cover for dma devices where the minimum source count is 2 we implement 4-disk and 5-disk handling in the recovery code. The ddf layout presents disks=6 and disks=7 to the recovery code in these situations. Instead of looking at the number of disks count the number of non-zero sources in the list and call the special case code when the number of non-failed sources is 0 or 1. [neilb@suse.de: replace 'ddf' flag with counting good sources] Signed-off-by: Dan Williams --- crypto/async_tx/async_raid6_recov.c | 86 ++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 30 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index 8e30b6ed078..943f2abac9b 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -131,8 +131,8 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len, } static struct dma_async_tx_descriptor * -__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, - struct async_submit_ctl *submit) +__2data_recov_4(int disks, size_t bytes, int faila, int failb, + struct page **blocks, struct async_submit_ctl *submit) { struct dma_async_tx_descriptor *tx = NULL; struct page *p, *q, *a, *b; @@ -143,8 +143,8 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, void *cb_param = submit->cb_param; void *scribble = submit->scribble; - p = blocks[4-2]; - q = blocks[4-1]; + p = blocks[disks-2]; + q = blocks[disks-1]; a = blocks[faila]; b = blocks[failb]; @@ -170,8 +170,8 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, } static struct dma_async_tx_descriptor * -__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, - struct async_submit_ctl *submit) +__2data_recov_5(int disks, size_t bytes, int faila, int failb, + struct page **blocks, struct async_submit_ctl *submit) { struct dma_async_tx_descriptor *tx = NULL; struct page *p, *q, *g, *dp, *dq; @@ -181,21 +181,22 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, dma_async_tx_callback cb_fn = submit->cb_fn; void *cb_param = submit->cb_param; void *scribble = submit->scribble; - int uninitialized_var(good); - int i; + int good_srcs, good, i; - for (i = 0; i < 3; i++) { + good_srcs = 0; + good = -1; + for (i = 0; i < disks-2; i++) { + if (blocks[i] == NULL) + continue; if (i == faila || i == failb) continue; - else { - good = i; - break; - } + good = i; + good_srcs++; } - BUG_ON(i >= 3); + BUG_ON(good_srcs > 1); - p = blocks[5-2]; - q = blocks[5-1]; + p = blocks[disks-2]; + q = blocks[disks-1]; g = blocks[good]; /* Compute syndrome with zero for the missing data pages @@ -323,6 +324,8 @@ struct dma_async_tx_descriptor * async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, struct page **blocks, struct async_submit_ctl *submit) { + int non_zero_srcs, i; + BUG_ON(faila == failb); if (failb < faila) swap(faila, failb); @@ -334,12 +337,11 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, */ if (!submit->scribble) { void **ptrs = (void **) blocks; - int i; async_tx_quiesce(&submit->depend_tx); for (i = 0; i < disks; i++) if (blocks[i] == NULL) - ptrs[i] = (void*)raid6_empty_zero_page; + ptrs[i] = (void *) raid6_empty_zero_page; else ptrs[i] = page_address(blocks[i]); @@ -350,19 +352,30 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, return NULL; } - switch (disks) { - case 4: + non_zero_srcs = 0; + for (i = 0; i < disks-2 && non_zero_srcs < 4; i++) + if (blocks[i]) + non_zero_srcs++; + switch (non_zero_srcs) { + case 0: + case 1: + /* There must be at least 2 sources - the failed devices. */ + BUG(); + + case 2: /* dma devices do not uniformly understand a zero source pq * operation (in contrast to the synchronous case), so - * explicitly handle the 4 disk special case + * explicitly handle the special case of a 4 disk array with + * both data disks missing. */ - return __2data_recov_4(bytes, faila, failb, blocks, submit); - case 5: + return __2data_recov_4(disks, bytes, faila, failb, blocks, submit); + case 3: /* dma devices do not uniformly understand a single * source pq operation (in contrast to the synchronous - * case), so explicitly handle the 5 disk special case + * case), so explicitly handle the special case of a 5 disk + * array with 2 of 3 data disks missing. */ - return __2data_recov_5(bytes, faila, failb, blocks, submit); + return __2data_recov_5(disks, bytes, faila, failb, blocks, submit); default: return __2data_recov_n(disks, bytes, faila, failb, blocks, submit); } @@ -388,6 +401,7 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, dma_async_tx_callback cb_fn = submit->cb_fn; void *cb_param = submit->cb_param; void *scribble = submit->scribble; + int good_srcs, good, i; struct page *srcs[2]; pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); @@ -397,7 +411,6 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, */ if (!scribble) { void **ptrs = (void **) blocks; - int i; async_tx_quiesce(&submit->depend_tx); for (i = 0; i < disks; i++) @@ -413,6 +426,20 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, return NULL; } + good_srcs = 0; + good = -1; + for (i = 0; i < disks-2; i++) { + if (i == faila) + continue; + if (blocks[i]) { + good = i; + good_srcs++; + if (good_srcs > 1) + break; + } + } + BUG_ON(good_srcs == 0); + p = blocks[disks-2]; q = blocks[disks-1]; @@ -423,11 +450,10 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, blocks[faila] = NULL; blocks[disks-1] = dq; - /* in the 4 disk case we only need to perform a single source - * multiplication + /* in the 4-disk case we only need to perform a single source + * multiplication with the one good data block. */ - if (disks == 4) { - int good = faila == 0 ? 1 : 0; + if (good_srcs == 1) { struct page *g = blocks[good]; init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, -- cgit v1.2.3 From 62c5593aea4b71d61dc0f37fea96c556c158a042 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 16 Nov 2009 21:52:22 +0800 Subject: crypto: gcm - fix another complete call in complete fuction The flow of the complete function (xxx_done) in gcm.c is as follow: void complete(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; if (!err) { err = async_next_step(); if (err == -EINPROGRESS || err == -EBUSY) return; } complete_for_next_step(areq, err); } But *areq may be destroyed in async_next_step(), this makes complete_for_next_step() can not work properly. To fix this, one of following methods is used for each complete function. - Add a __complete() for each complete(), which accept struct aead_request *req instead of areq, so avoid using areq after it is destroyed. - Expand complete_for_next_step(). The fixing method is based on the idea of Herbert Xu. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- crypto/gcm.c | 107 ++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 73 insertions(+), 34 deletions(-) (limited to 'crypto') diff --git a/crypto/gcm.c b/crypto/gcm.c index 5fc3292483e..c6547130624 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -40,7 +40,7 @@ struct crypto_rfc4106_ctx { struct crypto_gcm_ghash_ctx { unsigned int cryptlen; struct scatterlist *src; - crypto_completion_t complete; + void (*complete)(struct aead_request *req, int err); }; struct crypto_gcm_req_priv_ctx { @@ -267,23 +267,26 @@ static int gcm_hash_final(struct aead_request *req, return crypto_ahash_final(ahreq); } -static void gcm_hash_final_done(struct crypto_async_request *areq, - int err) +static void __gcm_hash_final_done(struct aead_request *req, int err) { - struct aead_request *req = areq->data; struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx; if (!err) crypto_xor(pctx->auth_tag, pctx->iauth_tag, 16); - gctx->complete(areq, err); + gctx->complete(req, err); } -static void gcm_hash_len_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_final_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; + + __gcm_hash_final_done(req, err); +} + +static void __gcm_hash_len_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); if (!err) { @@ -292,13 +295,18 @@ static void gcm_hash_len_done(struct crypto_async_request *areq, return; } - gcm_hash_final_done(areq, err); + __gcm_hash_final_done(req, err); } -static void gcm_hash_crypt_remain_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_len_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; + + __gcm_hash_len_done(req, err); +} + +static void __gcm_hash_crypt_remain_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); if (!err) { @@ -307,13 +315,19 @@ static void gcm_hash_crypt_remain_done(struct crypto_async_request *areq, return; } - gcm_hash_len_done(areq, err); + __gcm_hash_len_done(req, err); } -static void gcm_hash_crypt_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_crypt_remain_done(struct crypto_async_request *areq, + int err) { struct aead_request *req = areq->data; + + __gcm_hash_crypt_remain_done(req, err); +} + +static void __gcm_hash_crypt_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx; unsigned int remain; @@ -327,13 +341,18 @@ static void gcm_hash_crypt_done(struct crypto_async_request *areq, return; } - gcm_hash_crypt_remain_done(areq, err); + __gcm_hash_crypt_remain_done(req, err); } -static void gcm_hash_assoc_remain_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_crypt_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; + + __gcm_hash_crypt_done(req, err); +} + +static void __gcm_hash_assoc_remain_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx; crypto_completion_t complete; @@ -350,15 +369,21 @@ static void gcm_hash_assoc_remain_done(struct crypto_async_request *areq, } if (remain) - gcm_hash_crypt_done(areq, err); + __gcm_hash_crypt_done(req, err); else - gcm_hash_crypt_remain_done(areq, err); + __gcm_hash_crypt_remain_done(req, err); } -static void gcm_hash_assoc_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_assoc_remain_done(struct crypto_async_request *areq, + int err) { struct aead_request *req = areq->data; + + __gcm_hash_assoc_remain_done(req, err); +} + +static void __gcm_hash_assoc_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); unsigned int remain; @@ -371,13 +396,18 @@ static void gcm_hash_assoc_done(struct crypto_async_request *areq, return; } - gcm_hash_assoc_remain_done(areq, err); + __gcm_hash_assoc_remain_done(req, err); } -static void gcm_hash_init_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_assoc_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; + + __gcm_hash_assoc_done(req, err); +} + +static void __gcm_hash_init_done(struct aead_request *req, int err) +{ struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); crypto_completion_t complete; unsigned int remain = 0; @@ -393,9 +423,16 @@ static void gcm_hash_init_done(struct crypto_async_request *areq, } if (remain) - gcm_hash_assoc_done(areq, err); + __gcm_hash_assoc_done(req, err); else - gcm_hash_assoc_remain_done(areq, err); + __gcm_hash_assoc_remain_done(req, err); +} + +static void gcm_hash_init_done(struct crypto_async_request *areq, int err) +{ + struct aead_request *req = areq->data; + + __gcm_hash_init_done(req, err); } static int gcm_hash(struct aead_request *req, @@ -457,10 +494,8 @@ static void gcm_enc_copy_hash(struct aead_request *req, crypto_aead_authsize(aead), 1); } -static void gcm_enc_hash_done(struct crypto_async_request *areq, - int err) +static void gcm_enc_hash_done(struct aead_request *req, int err) { - struct aead_request *req = areq->data; struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); if (!err) @@ -469,8 +504,7 @@ static void gcm_enc_hash_done(struct crypto_async_request *areq, aead_request_complete(req, err); } -static void gcm_encrypt_done(struct crypto_async_request *areq, - int err) +static void gcm_encrypt_done(struct crypto_async_request *areq, int err) { struct aead_request *req = areq->data; struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); @@ -479,9 +513,13 @@ static void gcm_encrypt_done(struct crypto_async_request *areq, err = gcm_hash(req, pctx); if (err == -EINPROGRESS || err == -EBUSY) return; + else if (!err) { + crypto_xor(pctx->auth_tag, pctx->iauth_tag, 16); + gcm_enc_copy_hash(req, pctx); + } } - gcm_enc_hash_done(areq, err); + aead_request_complete(req, err); } static int crypto_gcm_encrypt(struct aead_request *req) @@ -538,9 +576,8 @@ static void gcm_decrypt_done(struct crypto_async_request *areq, int err) aead_request_complete(req, err); } -static void gcm_dec_hash_done(struct crypto_async_request *areq, int err) +static void gcm_dec_hash_done(struct aead_request *req, int err) { - struct aead_request *req = areq->data; struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req); struct ablkcipher_request *abreq = &pctx->u.abreq; struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx; @@ -552,9 +589,11 @@ static void gcm_dec_hash_done(struct crypto_async_request *areq, int err) err = crypto_ablkcipher_decrypt(abreq); if (err == -EINPROGRESS || err == -EBUSY) return; + else if (!err) + err = crypto_gcm_verify(req, pctx); } - gcm_decrypt_done(areq, err); + aead_request_complete(req, err); } static int crypto_gcm_decrypt(struct aead_request *req) -- cgit v1.2.3 From 7b3cc2b1fc2066391e498f3387204908c4eced21 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Nov 2009 17:10:37 -0700 Subject: async_tx: build-time toggling of async_{syndrome,xor}_val dma support ioat3.2 does not support asynchronous error notifications which makes the driver experience latencies when non-zero pq validate results are expected. Provide a mechanism for turning off async_xor_val and async_syndrome_val via Kconfig. This approach is generally useful for any driver that specifies ASYNC_TX_DISABLE_CHANNEL_SWITCH and would like to force the async_tx api to fall back to the synchronous path for certain operations. Signed-off-by: Dan Williams --- crypto/async_tx/Kconfig | 5 +++++ crypto/async_tx/async_pq.c | 14 +++++++++++--- crypto/async_tx/async_xor.c | 15 ++++++++++++--- drivers/dma/Kconfig | 2 ++ drivers/dma/dmaengine.c | 6 ++++++ drivers/dma/ioat/dma_v3.c | 10 ++++++++++ 6 files changed, 46 insertions(+), 6 deletions(-) (limited to 'crypto') diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index e5aeb2b79e6..e28e276ac61 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -23,3 +23,8 @@ config ASYNC_RAID6_RECOV select ASYNC_CORE select ASYNC_PQ +config ASYNC_TX_DISABLE_PQ_VAL_DMA + bool + +config ASYNC_TX_DISABLE_XOR_VAL_DMA + bool diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 6b5cc4fba59..ec87f53d505 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -240,6 +240,16 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, } EXPORT_SYMBOL_GPL(async_gen_syndrome); +static inline struct dma_chan * +pq_val_chan(struct async_submit_ctl *submit, struct page **blocks, int disks, size_t len) +{ + #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA + return NULL; + #endif + return async_tx_find_channel(submit, DMA_PQ_VAL, NULL, 0, blocks, + disks, len); +} + /** * async_syndrome_val - asynchronously validate a raid6 syndrome * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 @@ -260,9 +270,7 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, size_t len, enum sum_check_flags *pqres, struct page *spare, struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL, - NULL, 0, blocks, disks, - len); + struct dma_chan *chan = pq_val_chan(submit, blocks, disks, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx; unsigned char coefs[disks-2]; diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 79182dcb91b..079ae8ca590 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -234,6 +234,17 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) memcmp(a, a + 4, len - 4) == 0); } +static inline struct dma_chan * +xor_val_chan(struct async_submit_ctl *submit, struct page *dest, + struct page **src_list, int src_cnt, size_t len) +{ + #ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA + return NULL; + #endif + return async_tx_find_channel(submit, DMA_XOR_VAL, &dest, 1, src_list, + src_cnt, len); +} + /** * async_xor_val - attempt a xor parity check with a dma engine. * @dest: destination page used if the xor is performed synchronously @@ -255,9 +266,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, enum sum_check_flags *result, struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL, - &dest, 1, src_list, - src_cnt, len); + struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; dma_addr_t *dma_src = NULL; diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 5903a88351b..b401dadad4a 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -26,6 +26,8 @@ config INTEL_IOATDMA select DMA_ENGINE select DCA select ASYNC_TX_DISABLE_CHANNEL_SWITCH + select ASYNC_TX_DISABLE_PQ_VAL_DMA + select ASYNC_TX_DISABLE_XOR_VAL_DMA help Enable support for the Intel(R) I/OAT DMA engine present in recent Intel Xeon chipsets. diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index b6442f09d0f..8f99354082c 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -632,16 +632,22 @@ static bool device_has_all_tx_types(struct dma_device *device) #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) if (!dma_has_cap(DMA_XOR, device->cap_mask)) return false; + + #ifndef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA if (!dma_has_cap(DMA_XOR_VAL, device->cap_mask)) return false; #endif + #endif #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE) if (!dma_has_cap(DMA_PQ, device->cap_mask)) return false; + + #ifndef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA if (!dma_has_cap(DMA_PQ_VAL, device->cap_mask)) return false; #endif + #endif return true; } diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 252cf2d3f1d..189788f6351 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -1206,6 +1206,16 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) device->timer_fn = ioat2_timer_event; } + #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA + dma_cap_clear(DMA_PQ_VAL, dma->cap_mask); + dma->device_prep_dma_pq_val = NULL; + #endif + + #ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA + dma_cap_clear(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = NULL; + #endif + /* -= IOAT ver.3 workarounds =- */ /* Write CHANERRMSK_INT with 3E07h to mask out the errors * that can cause stability issues for IOAT ver.3 -- cgit v1.2.3