7 files changed, 70 insertions, 61 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index b04f98df94ea..835def11419d 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (C) 2003 Christophe Saout <christophe@saout.de>
  * Copyright (C) 2004 Clemens Fruhwirth <clemens@endorphin.org>
- * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
  *
  * This file is released under the GPL.
  */
@@ -93,6 +93,8 @@ struct crypt_config {
 
 	struct workqueue_struct *io_queue;
 	struct workqueue_struct *crypt_queue;
+	wait_queue_head_t writeq;
+
 	/*
 	 * crypto related data
 	 */
@@ -331,14 +333,7 @@ static void crypt_convert_init(struct crypt_config *cc,
 	ctx->idx_out = bio_out ? bio_out->bi_idx : 0;
 	ctx->sector = sector + cc->iv_offset;
 	init_completion(&ctx->restart);
-	/*
-	 * Crypto operation can be asynchronous,
-	 * ctx->pending is increased after request submission.
-	 * We need to ensure that we don't call the crypt finish
-	 * operation before pending got incremented
-	 * (dependent on crypt submission return code).
-	 */
-	atomic_set(&ctx->pending, 2);
+	atomic_set(&ctx->pending, 1);
 }
 
 static int crypt_convert_block(struct crypt_config *cc,
@@ -411,43 +406,42 @@ static void crypt_alloc_req(struct crypt_config *cc,
 static int crypt_convert(struct crypt_config *cc,
 			 struct convert_context *ctx)
 {
-	int r = 0;
+	int r;
 
 	while(ctx->idx_in < ctx->bio_in->bi_vcnt &&
 	      ctx->idx_out < ctx->bio_out->bi_vcnt) {
 
 		crypt_alloc_req(cc, ctx);
 
+		atomic_inc(&ctx->pending);
+
 		r = crypt_convert_block(cc, ctx, cc->req);
 
 		switch (r) {
+		/* async */
 		case -EBUSY:
 			wait_for_completion(&ctx->restart);
 			INIT_COMPLETION(ctx->restart);
 			/* fall through*/
 		case -EINPROGRESS:
-			atomic_inc(&ctx->pending);
 			cc->req = NULL;
-			r = 0;
-			/* fall through*/
+			ctx->sector++;
+			continue;
+
+		/* sync */
 		case 0:
+			atomic_dec(&ctx->pending);
 			ctx->sector++;
 			continue;
-		}
 
-		break;
+		/* error */
+		default:
+			atomic_dec(&ctx->pending);
+			return r;
+		}
 	}
 
-	/*
-	 * If there are pending crypto operation run async
-	 * code. Otherwise process return code synchronously.
-	 * The step of 2 ensures that async finish doesn't
-	 * call crypto finish too early.
-	 */
-	if (atomic_sub_return(2, &ctx->pending))
-		return -EINPROGRESS;
-
-	return r;
+	return 0;
 }
 
 static void dm_crypt_bio_destructor(struct bio *bio)
@@ -624,8 +618,10 @@ static void kcryptd_io_read(struct dm_crypt_io *io)
 static void kcryptd_io_write(struct dm_crypt_io *io)
 {
 	struct bio *clone = io->ctx.bio_out;
+	struct crypt_config *cc = io->target->private;
 
 	generic_make_request(clone);
+	wake_up(&cc->writeq);
 }
 
 static void kcryptd_io(struct work_struct *work)
@@ -698,7 +694,8 @@ static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io)
 
 		r = crypt_convert(cc, &io->ctx);
 
-		if (r != -EINPROGRESS) {
+		if (atomic_dec_and_test(&io->ctx.pending)) {
+			/* processed, no running async crypto  */
 			kcryptd_crypt_write_io_submit(io, r, 0);
 			if (unlikely(r < 0))
 				return;
@@ -706,8 +703,12 @@ static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io)
 			atomic_inc(&io->pending);
 
 		/* out of memory -> run queues */
-		if (unlikely(remaining))
+		if (unlikely(remaining)) {
+			/* wait for async crypto then reinitialize pending */
+			wait_event(cc->writeq, !atomic_read(&io->ctx.pending));
+			atomic_set(&io->ctx.pending, 1);
 			congestion_wait(WRITE, HZ/100);
+		}
 	}
 }
 
@@ -746,7 +747,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
 
 	r = crypt_convert(cc, &io->ctx);
 
-	if (r != -EINPROGRESS)
+	if (atomic_dec_and_test(&io->ctx.pending))
 		kcryptd_crypt_read_done(io, r);
 
 	crypt_dec_pending(io);
@@ -1047,6 +1048,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad_crypt_queue;
 	}
 
+	init_waitqueue_head(&cc->writeq);
 	ti->private = cc;
 	return 0;
 
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index b8e342fe7586..8f25f628ef16 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -114,7 +114,7 @@ static void dec_count(struct io *io, unsigned int region, int error)
 			wake_up_process(io->sleeper);
 
 		else {
-			int r = io->error;
+			unsigned long r = io->error;
 			io_notify_fn fn = io->callback;
 			void *context = io->context;
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 51605870f898..762cb086bb7f 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -753,7 +753,7 @@ out:
  * are in the no-sync state.  We have to recover these by
  * recopying from the default mirror to all the others.
  *---------------------------------------------------------------*/
-static void recovery_complete(int read_err, unsigned int write_err,
+static void recovery_complete(int read_err, unsigned long write_err,
 			      void *context)
 {
 	struct region *reg = (struct region *)context;
@@ -767,7 +767,7 @@ static void recovery_complete(int read_err, unsigned int write_err,
 	}
 
 	if (write_err) {
-		DMERR_LIMIT("Write error during recovery (error = 0x%x)",
+		DMERR_LIMIT("Write error during recovery (error = 0x%lx)",
 			    write_err);
 		/*
 		 * Bits correspond to devices (excluding default mirror).
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index ae24eab8cd81..4dc8a43c034b 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -804,7 +804,7 @@ static void commit_callback(void *context, int success)
  * Called when the copy I/O has finished.  kcopyd actually runs
  * this code so don't block.
  */
-static void copy_callback(int read_err, unsigned int write_err, void *context)
+static void copy_callback(int read_err, unsigned long write_err, void *context)
 {
 	struct dm_snap_pending_exception *pe = context;
 	struct dm_snapshot *s = pe->snap;
diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c
index f3831f31223e..e76b52ade690 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/kcopyd.c
@@ -169,7 +169,7 @@ struct kcopyd_job {
 	 * Error state of the job.
 	 */
 	int read_err;
-	unsigned int write_err;
+	unsigned long write_err;
 
 	/*
 	 * Either READ or WRITE
@@ -293,7 +293,7 @@ static int run_complete_job(struct kcopyd_job *job)
 {
 	void *context = job->context;
 	int read_err = job->read_err;
-	unsigned int write_err = job->write_err;
+	unsigned long write_err = job->write_err;
 	kcopyd_notify_fn fn = job->fn;
 	struct kcopyd_client *kc = job->kc;
 
@@ -396,7 +396,7 @@ static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *))
 		if (r < 0) {
 			/* error this rogue job */
 			if (job->rw == WRITE)
-				job->write_err = (unsigned int) -1;
+				job->write_err = (unsigned long) -1L;
 			else
 				job->read_err = 1;
 			push(&_complete_jobs, job);
@@ -448,8 +448,8 @@ static void dispatch_job(struct kcopyd_job *job)
 }
 
 #define SUB_JOB_SIZE 128
-static void segment_complete(int read_err,
-			     unsigned int write_err, void *context)
+static void segment_complete(int read_err, unsigned long write_err,
+			     void *context)
 {
 	/* FIXME: tidy this function */
 	sector_t progress = 0;
diff --git a/drivers/md/kcopyd.h b/drivers/md/kcopyd.h
index 4621ea055c0e..4845f2a0c676 100644
--- a/drivers/md/kcopyd.h
+++ b/drivers/md/kcopyd.h
@@ -32,8 +32,8 @@ void kcopyd_client_destroy(struct kcopyd_client *kc);
  * read_err is a boolean,
  * write_err is a bitset, with 1 bit for each destination region
  */
-typedef void (*kcopyd_notify_fn)(int read_err,
-				 unsigned int write_err, void *context);
+typedef void (*kcopyd_notify_fn)(int read_err, unsigned long write_err,
+				 void *context);
 
 int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
 		unsigned int num_dests, struct io_region *dests,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c574cf5efb5c..b162b839a662 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2348,25 +2348,15 @@ static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
 static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
 				struct stripe_head_state *s, int disks)
 {
+	int canceled_check = 0;
+
 	set_bit(STRIPE_HANDLE, &sh->state);
-	/* Take one of the following actions:
-	 * 1/ start a check parity operation if (uptodate == disks)
-	 * 2/ finish a check parity operation and act on the result
-	 * 3/ skip to the writeback section if we previously
-	 *    initiated a recovery operation
-	 */
-	if (s->failed == 0 &&
-	    !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
-		if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
-			BUG_ON(s->uptodate != disks);
-			clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
-			sh->ops.count++;
-			s->uptodate--;
-		} else if (
-		       test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
-			clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
-			clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
 
+	/* complete a check operation */
+	if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
+	    clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
+	    clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
+		if (s->failed == 0) {
 			if (sh->ops.zero_sum_result == 0)
 				/* parity is correct (on disc,
 				 * not in buffer any more)
@@ -2391,7 +2381,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
 					s->uptodate++;
 				}
 			}
-		}
+		} else
+			canceled_check = 1; /* STRIPE_INSYNC is not set */
 	}
 
 	/* check if we can clear a parity disk reconstruct */
@@ -2404,12 +2395,28 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
 		clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
 	}
 
+	/* start a new check operation if there are no failures, the stripe is
+	 * not insync, and a repair is not in flight
+	 */
+	if (s->failed == 0 &&
+	    !test_bit(STRIPE_INSYNC, &sh->state) &&
+	    !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
+		if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
+			BUG_ON(s->uptodate != disks);
+			clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
+			sh->ops.count++;
+			s->uptodate--;
+		}
+	}
+
 	/* Wait for check parity and compute block operations to complete
-	 * before write-back
+	 * before write-back.  If a failure occurred while the check operation
+	 * was in flight we need to cycle this stripe through handle_stripe
+	 * since the parity block may not be uptodate
 	 */
-	if (!test_bit(STRIPE_INSYNC, &sh->state) &&
-		!test_bit(STRIPE_OP_CHECK, &sh->ops.pending) &&
-		!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
+	if (!canceled_check && !test_bit(STRIPE_INSYNC, &sh->state) &&
+	    !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) &&
+	    !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
 		struct r5dev *dev;
 		/* either failed parity check, or recovery is happening */
 		if (s->failed == 0)