1 files changed, 139 insertions, 67 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 08f806379b07..3c31f7f8aa65 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2901,91 +2901,163 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
 				  struct stripe_head_state *s,
 				  struct r6_state *r6s, int disks)
 {
-	int update_p = 0, update_q = 0;
-	struct r5dev *dev;
 	int pd_idx = sh->pd_idx;
 	int qd_idx = sh->qd_idx;
-	unsigned long cpu;
-	struct page *tmp_page;
+	struct r5dev *dev;
 
 	set_bit(STRIPE_HANDLE, &sh->state);
 
 	BUG_ON(s->failed > 2);
-	BUG_ON(s->uptodate < disks);
+
 	/* Want to check and possibly repair P and Q.
 	 * However there could be one 'failed' device, in which
 	 * case we can only check one of them, possibly using the
 	 * other to generate missing data
 	 */
-	cpu = get_cpu();
-	tmp_page = per_cpu_ptr(conf->percpu, cpu)->spare_page;
-	if (s->failed == r6s->q_failed) {
-		/* The only possible failed device holds 'Q', so it
-		 * makes sense to check P (If anything else were failed,
-		 * we would have used P to recreate it).
-		 */
-		compute_block_1(sh, pd_idx, 1);
-		if (!page_is_zero(sh->dev[pd_idx].page)) {
-			compute_block_1(sh, pd_idx, 0);
-			update_p = 1;
+
+	switch (sh->check_state) {
+	case check_state_idle:
+		/* start a new check operation if there are < 2 failures */
+		if (s->failed == r6s->q_failed) {
+			/* The only possible failed device holds Q, so it
+			 * makes sense to check P (If anything else were failed,
+			 * we would have used P to recreate it).
+			 */
+			sh->check_state = check_state_run;
 		}
-	}
-	if (!r6s->q_failed && s->failed < 2) {
-		/* q is not failed, and we didn't use it to generate
-		 * anything, so it makes sense to check it
-		 */
-		memcpy(page_address(tmp_page),
-		       page_address(sh->dev[qd_idx].page),
-		       STRIPE_SIZE);
-		compute_parity6(sh, UPDATE_PARITY);
-		if (memcmp(page_address(tmp_page),
-			   page_address(sh->dev[qd_idx].page),
-			   STRIPE_SIZE) != 0) {
-			clear_bit(STRIPE_INSYNC, &sh->state);
-			update_q = 1;
+		if (!r6s->q_failed && s->failed < 2) {
+			/* Q is not failed, and we didn't use it to generate
+			 * anything, so it makes sense to check it
+			 */
+			if (sh->check_state == check_state_run)
+				sh->check_state = check_state_run_pq;
+			else
+				sh->check_state = check_state_run_q;
 		}
-	}
-	put_cpu();
 
-	if (update_p || update_q) {
-		conf->mddev->resync_mismatches += STRIPE_SECTORS;
-		if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
-			/* don't try to repair!! */
-			update_p = update_q = 0;
-	}
+		/* discard potentially stale zero_sum_result */
+		sh->ops.zero_sum_result = 0;
 
-	/* now write out any block on a failed drive,
-	 * or P or Q if they need it
-	 */
+		if (sh->check_state == check_state_run) {
+			/* async_xor_zero_sum destroys the contents of P */
+			clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+			s->uptodate--;
+		}
+		if (sh->check_state >= check_state_run &&
+		    sh->check_state <= check_state_run_pq) {
+			/* async_syndrome_zero_sum preserves P and Q, so
+			 * no need to mark them !uptodate here
+			 */
+			set_bit(STRIPE_OP_CHECK, &s->ops_request);
+			break;
+		}
 
-	if (s->failed == 2) {
-		dev = &sh->dev[r6s->failed_num[1]];
-		s->locked++;
-		set_bit(R5_LOCKED, &dev->flags);
-		set_bit(R5_Wantwrite, &dev->flags);
-	}
-	if (s->failed >= 1) {
-		dev = &sh->dev[r6s->failed_num[0]];
-		s->locked++;
-		set_bit(R5_LOCKED, &dev->flags);
-		set_bit(R5_Wantwrite, &dev->flags);
-	}
+		/* we have 2-disk failure */
+		BUG_ON(s->failed != 2);
+		/* fall through */
+	case check_state_compute_result:
+		sh->check_state = check_state_idle;
 
-	if (update_p) {
-		dev = &sh->dev[pd_idx];
-		s->locked++;
-		set_bit(R5_LOCKED, &dev->flags);
-		set_bit(R5_Wantwrite, &dev->flags);
-	}
-	if (update_q) {
-		dev = &sh->dev[qd_idx];
-		s->locked++;
-		set_bit(R5_LOCKED, &dev->flags);
-		set_bit(R5_Wantwrite, &dev->flags);
-	}
-	clear_bit(STRIPE_DEGRADED, &sh->state);
+		/* check that a write has not made the stripe insync */
+		if (test_bit(STRIPE_INSYNC, &sh->state))
+			break;
 
-	set_bit(STRIPE_INSYNC, &sh->state);
+		/* now write out any block on a failed drive,
+		 * or P or Q if they were recomputed
+		 */
+		BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
+		if (s->failed == 2) {
+			dev = &sh->dev[r6s->failed_num[1]];
+			s->locked++;
+			set_bit(R5_LOCKED, &dev->flags);
+			set_bit(R5_Wantwrite, &dev->flags);
+		}
+		if (s->failed >= 1) {
+			dev = &sh->dev[r6s->failed_num[0]];
+			s->locked++;
+			set_bit(R5_LOCKED, &dev->flags);
+			set_bit(R5_Wantwrite, &dev->flags);
+		}
+		if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
+			dev = &sh->dev[pd_idx];
+			s->locked++;
+			set_bit(R5_LOCKED, &dev->flags);
+			set_bit(R5_Wantwrite, &dev->flags);
+		}
+		if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
+			dev = &sh->dev[qd_idx];
+			s->locked++;
+			set_bit(R5_LOCKED, &dev->flags);
+			set_bit(R5_Wantwrite, &dev->flags);
+		}
+		clear_bit(STRIPE_DEGRADED, &sh->state);
+
+		set_bit(STRIPE_INSYNC, &sh->state);
+		break;
+	case check_state_run:
+	case check_state_run_q:
+	case check_state_run_pq:
+		break; /* we will be called again upon completion */
+	case check_state_check_result:
+		sh->check_state = check_state_idle;
+
+		/* handle a successful check operation, if parity is correct
+		 * we are done.  Otherwise update the mismatch count and repair
+		 * parity if !MD_RECOVERY_CHECK
+		 */
+		if (sh->ops.zero_sum_result == 0) {
+			/* both parities are correct */
+			if (!s->failed)
+				set_bit(STRIPE_INSYNC, &sh->state);
+			else {
+				/* in contrast to the raid5 case we can validate
+				 * parity, but still have a failure to write
+				 * back
+				 */
+				sh->check_state = check_state_compute_result;
+				/* Returning at this point means that we may go
+				 * off and bring p and/or q uptodate again so
+				 * we make sure to check zero_sum_result again
+				 * to verify if p or q need writeback
+				 */
+			}
+		} else {
+			conf->mddev->resync_mismatches += STRIPE_SECTORS;
+			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+				/* don't try to repair!! */
+				set_bit(STRIPE_INSYNC, &sh->state);
+			else {
+				int *target = &sh->ops.target;
+
+				sh->ops.target = -1;
+				sh->ops.target2 = -1;
+				sh->check_state = check_state_compute_run;
+				set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+				set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+				if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
+					set_bit(R5_Wantcompute,
+						&sh->dev[pd_idx].flags);
+					*target = pd_idx;
+					target = &sh->ops.target2;
+					s->uptodate++;
+				}
+				if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
+					set_bit(R5_Wantcompute,
+						&sh->dev[qd_idx].flags);
+					*target = qd_idx;
+					s->uptodate++;
+				}
+			}
+		}
+		break;
+	case check_state_compute_run:
+		break;
+	default:
+		printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
+		       __func__, sh->check_state,
+		       (unsigned long long) sh->sector);
+		BUG();
+	}
 }
 
 static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,