aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
authorSong Liu <songliubraving@fb.com>2017-01-11 13:39:14 -0800
committerShaohua Li <shli@fb.com>2017-02-13 09:17:51 -0800
commit03b047f45c29dff02f913a0234ca0cc1ca51966f (patch)
tree4fff6abd3504a643c238a8255029b4b6ce73ab61 /drivers/md/raid5.c
parentEXPORT_SYMBOL radix_tree_replace_slot (diff)
downloadlinux-dev-03b047f45c29dff02f913a0234ca0cc1ca51966f.tar.xz
linux-dev-03b047f45c29dff02f913a0234ca0cc1ca51966f.zip
md/r5cache: enable chunk_aligned_read with write back cache
Chunk aligned read significantly reduces CPU usage of raid456. However, it is not safe to fully bypass the write back cache. This patch enables chunk aligned read with write back cache. For chunk aligned read, we track stripes in write back cache at a bigger granularity, "big_stripe". Each chunk may contain more than one stripe (for example, a 256kB chunk contains 64 4kB-page, so this chunk contain 64 stripes). For chunk_aligned_read, these stripes are grouped into one big_stripe, so we only need one lookup for the whole chunk. For each big_stripe, struct big_stripe_info tracks how many stripes of this big_stripe are in the write back cache. We count how many stripes of this big_stripe are in the write back cache. These counters are tracked in a radix tree (big_stripe_tree). r5c_tree_index() is used to calculate keys for the radix tree. chunk_aligned_read() calls r5c_big_stripe_cached() to look up big_stripe of each chunk in the tree. If this big_stripe is in the tree, chunk_aligned_read() aborts. This look up is protected by rcu_read_lock(). It is necessary to remember whether a stripe is counted in big_stripe_tree. Instead of adding new flag, we reuses existing flags: STRIPE_R5C_PARTIAL_STRIPE and STRIPE_R5C_FULL_STRIPE. If either of these two flags are set, the stripe is counted in big_stripe_tree. This requires moving set_bit(STRIPE_R5C_PARTIAL_STRIPE) to r5c_try_caching_write(); and moving clear_bit of STRIPE_R5C_PARTIAL_STRIPE and STRIPE_R5C_FULL_STRIPE to r5c_finish_stripe_write_out(). Signed-off-by: Song Liu <songliubraving@fb.com> Reviewed-by: NeilBrown <neilb@suse.com> Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c20
1 files changed, 13 insertions, 7 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9d744a8961d1..b62f671a93ab 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -281,13 +281,13 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
atomic_dec(&conf->r5c_cached_partial_stripes);
list_add_tail(&sh->lru, &conf->r5c_full_stripe_list);
r5c_check_cached_full_stripe(conf);
- } else {
- /* partial stripe */
- if (!test_and_set_bit(STRIPE_R5C_PARTIAL_STRIPE,
- &sh->state))
- atomic_inc(&conf->r5c_cached_partial_stripes);
+ } else
+ /*
+ * STRIPE_R5C_PARTIAL_STRIPE is set in
+ * r5c_try_caching_write(). No need to
+ * set it again.
+ */
list_add_tail(&sh->lru, &conf->r5c_partial_stripe_list);
- }
}
}
}
@@ -5062,6 +5062,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
rdev->recovery_offset >= end_sector)))
rdev = NULL;
}
+
+ if (r5c_big_stripe_cached(conf, align_bi->bi_iter.bi_sector)) {
+ rcu_read_unlock();
+ bio_put(align_bi);
+ return 0;
+ }
+
if (rdev) {
sector_t first_bad;
int bad_sectors;
@@ -5418,7 +5425,6 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
* data on failed drives.
*/
if (rw == READ && mddev->degraded == 0 &&
- !r5c_is_writeback(conf->log) &&
mddev->reshape_position == MaxSector) {
bi = chunk_aligned_read(mddev, bi);
if (!bi)