From b88efd43f900d608560211a18a38d450f8192948 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 9 Sep 2016 19:26:19 -0400 Subject: dm mpath: delay the requeue of blk-mq requests while all paths down Return DM_MAPIO_DELAY_REQUEUE from .clone_and_map_rq. Also, return false from .busy, if all paths are down, so that blk-mq requests get mapped via .clone_and_map_rq -- which results in DM_MAPIO_DELAY_REQUEUE being returned to dm-rq. This change allows for a noticeable reduction in cpu utilization (reduced kworker load) while all paths are down, e.g.: system CPU idleness (as measured by fio's --idle-prof=system): before: system: 86.58% after: system: 98.60% Signed-off-by: Mike Snitzer Reviewed-by: Hannes Reinecke --- drivers/md/dm-mpath.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index f69715bf0575..f31fa1364abc 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -550,9 +550,9 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, pgpath = choose_pgpath(m, nr_bytes); if (!pgpath) { - if (!must_push_back_rq(m)) - r = -EIO; /* Failed */ - return r; + if (must_push_back_rq(m)) + return DM_MAPIO_DELAY_REQUEUE; + return -EIO; /* Failed */ } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { pg_init_all_paths(m); @@ -1992,11 +1992,14 @@ static int multipath_busy(struct dm_target *ti) struct priority_group *pg, *next_pg; struct pgpath *pgpath; - /* pg_init in progress or no paths available */ - if (atomic_read(&m->pg_init_in_progress) || - (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) + /* pg_init in progress */ + if (atomic_read(&m->pg_init_in_progress)) return true; + /* no paths available, for blk-mq: rely on IO mapping to delay requeue */ + if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) + return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED); + /* Guess which priority_group will be used at next mapping time */ pg = lockless_dereference(m->current_pg); next_pg = lockless_dereference(m->next_pg); -- cgit v1.2.3-59-g8ed1b