From ed2e063f92c44c891ccd883e289dde6ca870edcc Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 16 Sep 2022 19:34:24 +0800 Subject: md/raid10: factor out code from wait_barrier() to stop_waiting_barrier() Currently the nasty condition in wait_barrier() is hard to read. This patch factors out the condition into a function. There are no functional changes. Signed-off-by: Yu Kuai Acked-by: Paul Menzel Reviewed-by: Logan Gunthorpe Acked-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/raid10.c | 50 ++++++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 077c7cdefcd4..310a6132304f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -957,41 +957,47 @@ static void lower_barrier(struct r10conf *conf) wake_up(&conf->wait_barrier); } +static bool stop_waiting_barrier(struct r10conf *conf) +{ + struct bio_list *bio_list = current->bio_list; + + /* barrier is dropped */ + if (!conf->barrier) + return true; + + /* + * If there are already pending requests (preventing the barrier from + * rising completely), and the pre-process bio queue isn't empty, then + * don't wait, as we need to empty that queue to get the nr_pending + * count down. + */ + if (atomic_read(&conf->nr_pending) && bio_list && + (!bio_list_empty(&bio_list[0]) || !bio_list_empty(&bio_list[1]))) + return true; + + /* move on if recovery thread is blocked by us */ + if (conf->mddev->thread->tsk == current && + test_bit(MD_RECOVERY_RUNNING, &conf->mddev->recovery) && + conf->nr_queued > 0) + return true; + + return false; +} + static bool wait_barrier(struct r10conf *conf, bool nowait) { bool ret = true; spin_lock_irq(&conf->resync_lock); if (conf->barrier) { - struct bio_list *bio_list = current->bio_list; conf->nr_waiting++; - /* Wait for the barrier to drop. - * However if there are already pending - * requests (preventing the barrier from - * rising completely), and the - * pre-process bio queue isn't empty, - * then don't wait, as we need to empty - * that queue to get the nr_pending - * count down. - */ /* Return false when nowait flag is set */ if (nowait) { ret = false; } else { raid10_log(conf->mddev, "wait barrier"); wait_event_lock_irq(conf->wait_barrier, - !conf->barrier || - (atomic_read(&conf->nr_pending) && - bio_list && - (!bio_list_empty(&bio_list[0]) || - !bio_list_empty(&bio_list[1]))) || - /* move on if recovery thread is - * blocked by us - */ - (conf->mddev->thread->tsk == current && - test_bit(MD_RECOVERY_RUNNING, - &conf->mddev->recovery) && - conf->nr_queued > 0), + stop_waiting_barrier(conf), conf->resync_lock); } conf->nr_waiting--; -- cgit v1.2.3-59-g8ed1b From 0de57e541bb4207c0602eca271c6531c305e9c5d Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 16 Sep 2022 19:34:25 +0800 Subject: md/raid10: don't modify 'nr_waitng' in wait_barrier() for the case nowait For the case nowait in wait_barrier(), there is no point to increase nr_waiting and then decrease it. Signed-off-by: Yu Kuai Reviewed-by: Logan Gunthorpe Acked-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/raid10.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 310a6132304f..834a34274976 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -990,17 +990,17 @@ static bool wait_barrier(struct r10conf *conf, bool nowait) spin_lock_irq(&conf->resync_lock); if (conf->barrier) { - conf->nr_waiting++; /* Return false when nowait flag is set */ if (nowait) { ret = false; } else { + conf->nr_waiting++; raid10_log(conf->mddev, "wait barrier"); wait_event_lock_irq(conf->wait_barrier, stop_waiting_barrier(conf), conf->resync_lock); + conf->nr_waiting--; } - conf->nr_waiting--; if (!conf->nr_waiting) wake_up(&conf->wait_barrier); } -- cgit v1.2.3-59-g8ed1b From 0c0be98bbe67662a7d2bf8381106bfca0e31ed72 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 16 Sep 2022 19:34:26 +0800 Subject: md/raid10: prevent unnecessary calls to wake_up() in fast path Currently, wake_up() is called unconditionally in fast path such as raid10_make_request(), which will cause lock contention under high concurrency: raid10_make_request wake_up __wake_up_common_lock spin_lock_irqsave Improve performance by only call wake_up() if waitqueue is not empty in allow_barrier() and raid10_make_request(). Signed-off-by: Yu Kuai Reviewed-by: Logan Gunthorpe Acked-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/raid10.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 834a34274976..461c8a79fb99 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -274,6 +274,12 @@ static void put_buf(struct r10bio *r10_bio) lower_barrier(conf); } +static void wake_up_barrier(struct r10conf *conf) +{ + if (wq_has_sleeper(&conf->wait_barrier)) + wake_up(&conf->wait_barrier); +} + static void reschedule_retry(struct r10bio *r10_bio) { unsigned long flags; @@ -1015,7 +1021,7 @@ static void allow_barrier(struct r10conf *conf) { if ((atomic_dec_and_test(&conf->nr_pending)) || (conf->array_freeze_pending)) - wake_up(&conf->wait_barrier); + wake_up_barrier(conf); } static void freeze_array(struct r10conf *conf, int extra) @@ -1891,7 +1897,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio) __make_request(mddev, bio, sectors); /* In case raid10d snuck in to freeze_array */ - wake_up(&conf->wait_barrier); + wake_up_barrier(conf); return true; } -- cgit v1.2.3-59-g8ed1b From 4f350284a7306b3dff676caeafd3faf1b5c068d5 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 16 Sep 2022 19:34:27 +0800 Subject: md/raid10: fix improper BUG_ON() in raise_barrier() 'conf->barrier' is protected by 'conf->resync_lock', reading 'conf->barrier' without holding the lock is wrong. Signed-off-by: Yu Kuai Reviewed-by: Logan Gunthorpe Acked-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/raid10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 461c8a79fb99..2970a73d9f5c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -936,8 +936,8 @@ static void flush_pending_writes(struct r10conf *conf) static void raise_barrier(struct r10conf *conf, int force) { - BUG_ON(force && !conf->barrier); spin_lock_irq(&conf->resync_lock); + BUG_ON(force && !conf->barrier); /* Wait until no block IO is waiting (unless 'force') */ wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, -- cgit v1.2.3-59-g8ed1b From b9b083f9044abf89f3391fbc196ddece68ac9dba Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 16 Sep 2022 19:34:28 +0800 Subject: md/raid10: convert resync_lock to use seqlock Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier', and io can't be dispatched until 'barrier' is dropped. Since holding the 'barrier' is not common, convert 'resync_lock' to use seqlock so that holding lock can be avoided in fast path. Signed-off-by: Yu Kuai Reviewed-and-Tested-by: Logan Gunthorpe Signed-off-by: Song Liu --- drivers/md/raid10.c | 87 +++++++++++++++++++++++++++++++++++------------------ drivers/md/raid10.h | 2 +- 2 files changed, 59 insertions(+), 30 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 2970a73d9f5c..58c711912875 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf); #include "raid1-10.c" +#define NULL_CMD +#define cmd_before(conf, cmd) \ + do { \ + write_sequnlock_irq(&(conf)->resync_lock); \ + cmd; \ + } while (0) +#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock) + +#define wait_event_barrier_cmd(conf, cond, cmd) \ + wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \ + cmd_after(conf)) + +#define wait_event_barrier(conf, cond) \ + wait_event_barrier_cmd(conf, cond, NULL_CMD) + /* * for resync bio, r10bio pointer can be retrieved from the per-bio * 'struct resync_pages'. @@ -936,30 +951,29 @@ static void flush_pending_writes(struct r10conf *conf) static void raise_barrier(struct r10conf *conf, int force) { - spin_lock_irq(&conf->resync_lock); + write_seqlock_irq(&conf->resync_lock); BUG_ON(force && !conf->barrier); /* Wait until no block IO is waiting (unless 'force') */ - wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, - conf->resync_lock); + wait_event_barrier(conf, force || !conf->nr_waiting); /* block any new IO from starting */ - conf->barrier++; + WRITE_ONCE(conf->barrier, conf->barrier + 1); /* Now wait for all pending IO to complete */ - wait_event_lock_irq(conf->wait_barrier, - !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH, - conf->resync_lock); + wait_event_barrier(conf, !atomic_read(&conf->nr_pending) && + conf->barrier < RESYNC_DEPTH); - spin_unlock_irq(&conf->resync_lock); + write_sequnlock_irq(&conf->resync_lock); } static void lower_barrier(struct r10conf *conf) { unsigned long flags; - spin_lock_irqsave(&conf->resync_lock, flags); - conf->barrier--; - spin_unlock_irqrestore(&conf->resync_lock, flags); + + write_seqlock_irqsave(&conf->resync_lock, flags); + WRITE_ONCE(conf->barrier, conf->barrier - 1); + write_sequnlock_irqrestore(&conf->resync_lock, flags); wake_up(&conf->wait_barrier); } @@ -990,11 +1004,31 @@ static bool stop_waiting_barrier(struct r10conf *conf) return false; } +static bool wait_barrier_nolock(struct r10conf *conf) +{ + unsigned int seq = read_seqbegin(&conf->resync_lock); + + if (READ_ONCE(conf->barrier)) + return false; + + atomic_inc(&conf->nr_pending); + if (!read_seqretry(&conf->resync_lock, seq)) + return true; + + if (atomic_dec_and_test(&conf->nr_pending)) + wake_up_barrier(conf); + + return false; +} + static bool wait_barrier(struct r10conf *conf, bool nowait) { bool ret = true; - spin_lock_irq(&conf->resync_lock); + if (wait_barrier_nolock(conf)) + return true; + + write_seqlock_irq(&conf->resync_lock); if (conf->barrier) { /* Return false when nowait flag is set */ if (nowait) { @@ -1002,9 +1036,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait) } else { conf->nr_waiting++; raid10_log(conf->mddev, "wait barrier"); - wait_event_lock_irq(conf->wait_barrier, - stop_waiting_barrier(conf), - conf->resync_lock); + wait_event_barrier(conf, stop_waiting_barrier(conf)); conf->nr_waiting--; } if (!conf->nr_waiting) @@ -1013,7 +1045,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait) /* Only increment nr_pending when we wait */ if (ret) atomic_inc(&conf->nr_pending); - spin_unlock_irq(&conf->resync_lock); + write_sequnlock_irq(&conf->resync_lock); return ret; } @@ -1038,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra) * must match the number of pending IOs (nr_pending) before * we continue. */ - spin_lock_irq(&conf->resync_lock); + write_seqlock_irq(&conf->resync_lock); conf->array_freeze_pending++; - conf->barrier++; + WRITE_ONCE(conf->barrier, conf->barrier + 1); conf->nr_waiting++; - wait_event_lock_irq_cmd(conf->wait_barrier, - atomic_read(&conf->nr_pending) == conf->nr_queued+extra, - conf->resync_lock, - flush_pending_writes(conf)); - + wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) == + conf->nr_queued + extra, flush_pending_writes(conf)); conf->array_freeze_pending--; - spin_unlock_irq(&conf->resync_lock); + write_sequnlock_irq(&conf->resync_lock); } static void unfreeze_array(struct r10conf *conf) { /* reverse the effect of the freeze */ - spin_lock_irq(&conf->resync_lock); - conf->barrier--; + write_seqlock_irq(&conf->resync_lock); + WRITE_ONCE(conf->barrier, conf->barrier - 1); conf->nr_waiting--; wake_up(&conf->wait_barrier); - spin_unlock_irq(&conf->resync_lock); + write_sequnlock_irq(&conf->resync_lock); } static sector_t choose_data_offset(struct r10bio *r10_bio, @@ -4045,7 +4074,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) INIT_LIST_HEAD(&conf->retry_list); INIT_LIST_HEAD(&conf->bio_end_io_list); - spin_lock_init(&conf->resync_lock); + seqlock_init(&conf->resync_lock); init_waitqueue_head(&conf->wait_barrier); atomic_set(&conf->nr_pending, 0); @@ -4364,7 +4393,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs) rdev->new_raid_disk = rdev->raid_disk * 2; rdev->sectors = size; } - conf->barrier = 1; + WRITE_ONCE(conf->barrier, 1); } return conf; diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 5c0804d8bb1f..8c072ce0bc54 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -76,7 +76,7 @@ struct r10conf { /* queue pending writes and submit them on unplug */ struct bio_list pending_bio_list; - spinlock_t resync_lock; + seqlock_t resync_lock; atomic_t nr_pending; int nr_waiting; int nr_queued; -- cgit v1.2.3-59-g8ed1b