diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bio.c | 12 | ||||
-rw-r--r-- | block/blk-core.c | 30 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 181 | ||||
-rw-r--r-- | block/blk-mq-sched.h | 25 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 3 | ||||
-rw-r--r-- | block/blk-mq.c | 123 | ||||
-rw-r--r-- | block/blk-mq.h | 2 | ||||
-rw-r--r-- | block/blk-stat.c | 4 | ||||
-rw-r--r-- | block/blk-sysfs.c | 2 | ||||
-rw-r--r-- | block/elevator.c | 126 |
10 files changed, 324 insertions, 184 deletions
diff --git a/block/bio.c b/block/bio.c index 5eec5e08417f..e75878f8b14a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -376,10 +376,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs) bio_list_init(&punt); bio_list_init(&nopunt); - while ((bio = bio_list_pop(current->bio_list))) + while ((bio = bio_list_pop(¤t->bio_list[0]))) bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); + current->bio_list[0] = nopunt; - *current->bio_list = nopunt; + bio_list_init(&nopunt); + while ((bio = bio_list_pop(¤t->bio_list[1]))) + bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); + current->bio_list[1] = nopunt; spin_lock(&bs->rescue_lock); bio_list_merge(&bs->rescue_list, &punt); @@ -466,7 +470,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) * we retry with the original gfp_flags. */ - if (current->bio_list && !bio_list_empty(current->bio_list)) + if (current->bio_list && + (!bio_list_empty(¤t->bio_list[0]) || + !bio_list_empty(¤t->bio_list[1]))) gfp_mask &= ~__GFP_DIRECT_RECLAIM; p = mempool_alloc(bs->bio_pool, gfp_mask); diff --git a/block/blk-core.c b/block/blk-core.c index 0eeb99ef654f..d772c221cc17 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1973,7 +1973,14 @@ end_io: */ blk_qc_t generic_make_request(struct bio *bio) { - struct bio_list bio_list_on_stack; + /* + * bio_list_on_stack[0] contains bios submitted by the current + * make_request_fn. + * bio_list_on_stack[1] contains bios that were submitted before + * the current make_request_fn, but that haven't been processed + * yet. + */ + struct bio_list bio_list_on_stack[2]; blk_qc_t ret = BLK_QC_T_NONE; if (!generic_make_request_checks(bio)) @@ -1990,7 +1997,7 @@ blk_qc_t generic_make_request(struct bio *bio) * should be added at the tail */ if (current->bio_list) { - bio_list_add(current->bio_list, bio); + bio_list_add(¤t->bio_list[0], bio); goto out; } @@ -2009,18 +2016,17 @@ blk_qc_t generic_make_request(struct bio *bio) * bio_list, and call into ->make_request() again. */ BUG_ON(bio->bi_next); - bio_list_init(&bio_list_on_stack); - current->bio_list = &bio_list_on_stack; + bio_list_init(&bio_list_on_stack[0]); + current->bio_list = bio_list_on_stack; do { struct request_queue *q = bdev_get_queue(bio->bi_bdev); if (likely(blk_queue_enter(q, false) == 0)) { - struct bio_list hold; struct bio_list lower, same; /* Create a fresh bio_list for all subordinate requests */ - hold = bio_list_on_stack; - bio_list_init(&bio_list_on_stack); + bio_list_on_stack[1] = bio_list_on_stack[0]; + bio_list_init(&bio_list_on_stack[0]); ret = q->make_request_fn(q, bio); blk_queue_exit(q); @@ -2030,19 +2036,19 @@ blk_qc_t generic_make_request(struct bio *bio) */ bio_list_init(&lower); bio_list_init(&same); - while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL) + while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) if (q == bdev_get_queue(bio->bi_bdev)) bio_list_add(&same, bio); else bio_list_add(&lower, bio); /* now assemble so we handle the lowest level first */ - bio_list_merge(&bio_list_on_stack, &lower); - bio_list_merge(&bio_list_on_stack, &same); - bio_list_merge(&bio_list_on_stack, &hold); + bio_list_merge(&bio_list_on_stack[0], &lower); + bio_list_merge(&bio_list_on_stack[0], &same); + bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); } else { bio_io_error(bio); } - bio = bio_list_pop(current->bio_list); + bio = bio_list_pop(&bio_list_on_stack[0]); } while (bio); current->bio_list = NULL; /* deactivate */ diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 09af8ff18719..c974a1bbf4cb 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -171,7 +171,8 @@ void blk_mq_sched_put_request(struct request *rq) void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) { - struct elevator_queue *e = hctx->queue->elevator; + struct request_queue *q = hctx->queue; + struct elevator_queue *e = q->elevator; const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request; bool did_work = false; LIST_HEAD(rq_list); @@ -203,10 +204,10 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) */ if (!list_empty(&rq_list)) { blk_mq_sched_mark_restart_hctx(hctx); - did_work = blk_mq_dispatch_rq_list(hctx, &rq_list); + did_work = blk_mq_dispatch_rq_list(q, &rq_list); } else if (!has_sched_dispatch) { blk_mq_flush_busy_ctxs(hctx, &rq_list); - blk_mq_dispatch_rq_list(hctx, &rq_list); + blk_mq_dispatch_rq_list(q, &rq_list); } /* @@ -222,7 +223,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) if (!rq) break; list_add(&rq->queuelist, &rq_list); - } while (blk_mq_dispatch_rq_list(hctx, &rq_list)); + } while (blk_mq_dispatch_rq_list(q, &rq_list)); } } @@ -317,25 +318,68 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, return true; } -static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) +static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) { if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) { clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (blk_mq_hctx_has_pending(hctx)) + if (blk_mq_hctx_has_pending(hctx)) { blk_mq_run_hw_queue(hctx, true); + return true; + } } + return false; } -void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx) -{ - struct request_queue *q = hctx->queue; - unsigned int i; +/** + * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list + * @pos: loop cursor. + * @skip: the list element that will not be examined. Iteration starts at + * @skip->next. + * @head: head of the list to examine. This list must have at least one + * element, namely @skip. + * @member: name of the list_head structure within typeof(*pos). + */ +#define list_for_each_entry_rcu_rr(pos, skip, head, member) \ + for ((pos) = (skip); \ + (pos = (pos)->member.next != (head) ? list_entry_rcu( \ + (pos)->member.next, typeof(*pos), member) : \ + list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \ + (pos) != (skip); ) - if (test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) { - if (test_and_clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) { - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_sched_restart_hctx(hctx); +/* + * Called after a driver tag has been freed to check whether a hctx needs to + * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware + * queues in a round-robin fashion if the tag set of @hctx is shared with other + * hardware queues. + */ +void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx) +{ + struct blk_mq_tags *const tags = hctx->tags; + struct blk_mq_tag_set *const set = hctx->queue->tag_set; + struct request_queue *const queue = hctx->queue, *q; + struct blk_mq_hw_ctx *hctx2; + unsigned int i, j; + + if (set->flags & BLK_MQ_F_TAG_SHARED) { + rcu_read_lock(); + list_for_each_entry_rcu_rr(q, queue, &set->tag_list, + tag_set_list) { + queue_for_each_hw_ctx(q, hctx2, i) + if (hctx2->tags == tags && + blk_mq_sched_restart_hctx(hctx2)) + goto done; + } + j = hctx->queue_num + 1; + for (i = 0; i < queue->nr_hw_queues; i++, j++) { + if (j == queue->nr_hw_queues) + j = 0; + hctx2 = queue->queue_hw_ctx[j]; + if (hctx2->tags == tags && + blk_mq_sched_restart_hctx(hctx2)) + break; } +done: + rcu_read_unlock(); } else { blk_mq_sched_restart_hctx(hctx); } @@ -431,11 +475,67 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, } } -int blk_mq_sched_setup(struct request_queue *q) +static int blk_mq_sched_alloc_tags(struct request_queue *q, + struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx) +{ + struct blk_mq_tag_set *set = q->tag_set; + int ret; + + hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests, + set->reserved_tags); + if (!hctx->sched_tags) + return -ENOMEM; + + ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests); + if (ret) + blk_mq_sched_free_tags(set, hctx, hctx_idx); + + return ret; +} + +static void blk_mq_sched_tags_teardown(struct request_queue *q) { struct blk_mq_tag_set *set = q->tag_set; struct blk_mq_hw_ctx *hctx; - int ret, i; + int i; + + queue_for_each_hw_ctx(q, hctx, i) + blk_mq_sched_free_tags(set, hctx, i); +} + +int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx) +{ + struct elevator_queue *e = q->elevator; + + if (!e) + return 0; + + return blk_mq_sched_alloc_tags(q, hctx, hctx_idx); +} + +void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx) +{ + struct elevator_queue *e = q->elevator; + + if (!e) + return; + + blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx); +} + +int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + int ret; + + if (!e) { + q->elevator = NULL; + return 0; + } /* * Default to 256, since we don't split into sync/async like the @@ -443,49 +543,30 @@ int blk_mq_sched_setup(struct request_queue *q) */ q->nr_requests = 2 * BLKDEV_MAX_RQ; - /* - * We're switching to using an IO scheduler, so setup the hctx - * scheduler tags and switch the request map from the regular - * tags to scheduler tags. First allocate what we need, so we - * can safely fail and fallback, if needed. - */ - ret = 0; queue_for_each_hw_ctx(q, hctx, i) { - hctx->sched_tags = blk_mq_alloc_rq_map(set, i, - q->nr_requests, set->reserved_tags); - if (!hctx->sched_tags) { - ret = -ENOMEM; - break; - } - ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests); + ret = blk_mq_sched_alloc_tags(q, hctx, i); if (ret) - break; + goto err; } - /* - * If we failed, free what we did allocate - */ - if (ret) { - queue_for_each_hw_ctx(q, hctx, i) { - if (!hctx->sched_tags) - continue; - blk_mq_sched_free_tags(set, hctx, i); - } - - return ret; - } + ret = e->ops.mq.init_sched(q, e); + if (ret) + goto err; return 0; + +err: + blk_mq_sched_tags_teardown(q); + q->elevator = NULL; + return ret; } -void blk_mq_sched_teardown(struct request_queue *q) +void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) { - struct blk_mq_tag_set *set = q->tag_set; - struct blk_mq_hw_ctx *hctx; - int i; - - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_sched_free_tags(set, hctx, i); + if (e->type->ops.mq.exit_sched) + e->type->ops.mq.exit_sched(e); + blk_mq_sched_tags_teardown(q); + q->elevator = NULL; } int blk_mq_sched_init(struct request_queue *q) diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index a75b16b123f7..3a9e6e40558b 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -19,7 +19,7 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, struct request **merged_request); bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio); bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq); -void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx); +void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue, bool async, bool can_block); @@ -32,8 +32,13 @@ void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx, struct list_head *rq_list, struct request *(*get_rq)(struct blk_mq_hw_ctx *)); -int blk_mq_sched_setup(struct request_queue *q); -void blk_mq_sched_teardown(struct request_queue *q); +int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e); +void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e); + +int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx); +void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx); int blk_mq_sched_init(struct request_queue *q); @@ -131,20 +136,6 @@ static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); } -/* - * Mark a hardware queue and the request queue it belongs to as needing a - * restart. - */ -static inline void blk_mq_sched_mark_restart_queue(struct blk_mq_hw_ctx *hctx) -{ - struct request_queue *q = hctx->queue; - - if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) - set_bit(QUEUE_FLAG_RESTART, &q->queue_flags); -} - static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) { return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index e48bc2c72615..9d97bfc4d465 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -295,6 +295,9 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set) for (i = 0; i < set->nr_hw_queues; i++) { struct blk_mq_tags *tags = set->tags[i]; + if (!tags) + continue; + for (j = 0; j < tags->nr_tags; j++) { if (!tags->static_rqs[j]) continue; diff --git a/block/blk-mq.c b/block/blk-mq.c index 159187a28d66..c7836a1ded97 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -321,7 +321,6 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data); - blk_mq_put_ctx(alloc_data.ctx); blk_queue_exit(q); if (!rq) @@ -349,7 +348,7 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); if (sched_tag != -1) blk_mq_sched_completed_request(hctx, rq); - blk_mq_sched_restart_queues(hctx); + blk_mq_sched_restart(hctx); blk_queue_exit(q); } @@ -697,17 +696,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, { struct blk_mq_timeout_data *data = priv; - if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { - /* - * If a request wasn't started before the queue was - * marked dying, kill it here or it'll go unnoticed. - */ - if (unlikely(blk_queue_dying(rq->q))) { - rq->errors = -EIO; - blk_mq_end_request(rq, rq->errors); - } + if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) return; - } if (time_after_eq(jiffies, rq->deadline)) { if (!blk_mark_rq_complete(rq)) @@ -855,12 +845,8 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, .flags = wait ? 0 : BLK_MQ_REQ_NOWAIT, }; - if (rq->tag != -1) { -done: - if (hctx) - *hctx = data.hctx; - return true; - } + if (rq->tag != -1) + goto done; if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag)) data.flags |= BLK_MQ_REQ_RESERVED; @@ -872,10 +858,12 @@ done: atomic_inc(&data.hctx->nr_active); } data.hctx->tags->rqs[rq->tag] = rq; - goto done; } - return false; +done: + if (hctx) + *hctx = data.hctx; + return rq->tag != -1; } static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, @@ -972,13 +960,16 @@ static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx) return true; } -bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) +bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) { - struct request_queue *q = hctx->queue; + struct blk_mq_hw_ctx *hctx; struct request *rq; LIST_HEAD(driver_list); struct list_head *dptr; - int queued, ret = BLK_MQ_RQ_QUEUE_OK; + int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK; + + if (list_empty(list)) + return false; /* * Start off with dptr being NULL, so we start the first request @@ -989,8 +980,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) /* * Now process all the entries, sending them to the driver. */ - queued = 0; - while (!list_empty(list)) { + errors = queued = 0; + do { struct blk_mq_queue_data bd; rq = list_first_entry(list, struct request, queuelist); @@ -1046,6 +1037,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) default: pr_err("blk-mq: bad return on queue: %d\n", ret); case BLK_MQ_RQ_QUEUE_ERROR: + errors++; rq->errors = -EIO; blk_mq_end_request(rq, rq->errors); break; @@ -1060,7 +1052,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) */ if (!dptr && list->next != list->prev) dptr = &driver_list; - } + } while (!list_empty(list)); hctx->dispatched[queued_to_index(queued)]++; @@ -1097,7 +1089,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) blk_mq_run_hw_queue(hctx, true); } - return queued != 0; + return (queued + errors) != 0; } static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) @@ -1143,7 +1135,8 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) return hctx->next_cpu; } -void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) +static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, + unsigned long msecs) { if (unlikely(blk_mq_hctx_stopped(hctx) || !blk_mq_hw_queue_mapped(hctx))) @@ -1160,7 +1153,24 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) put_cpu(); } - kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work); + if (msecs == 0) + kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), + &hctx->run_work); + else + kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), + &hctx->delayed_run_work, + msecs_to_jiffies(msecs)); +} + +void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) +{ + __blk_mq_delay_run_hw_queue(hctx, true, msecs); +} +EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); + +void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) +{ + __blk_mq_delay_run_hw_queue(hctx, async, 0); } void blk_mq_run_hw_queues(struct request_queue *q, bool async) @@ -1263,6 +1273,15 @@ static void blk_mq_run_work_fn(struct work_struct *work) __blk_mq_run_hw_queue(hctx); } +static void blk_mq_delayed_run_work_fn(struct work_struct *work) +{ + struct blk_mq_hw_ctx *hctx; + + hctx = container_of(work, struct blk_mq_hw_ctx, delayed_run_work.work); + + __blk_mq_run_hw_queue(hctx); +} + static void blk_mq_delay_work_fn(struct work_struct *work) { struct blk_mq_hw_ctx *hctx; @@ -1434,7 +1453,8 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true); } -static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) +static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie, + bool may_sleep) { struct request_queue *q = rq->q; struct blk_mq_queue_data bd = { @@ -1475,7 +1495,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) } insert: - blk_mq_sched_insert_request(rq, false, true, true, false); + blk_mq_sched_insert_request(rq, false, true, false, may_sleep); } /* @@ -1569,11 +1589,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) { rcu_read_lock(); - blk_mq_try_issue_directly(old_rq, &cookie); + blk_mq_try_issue_directly(old_rq, &cookie, false); rcu_read_unlock(); } else { srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu); - blk_mq_try_issue_directly(old_rq, &cookie); + blk_mq_try_issue_directly(old_rq, &cookie, true); srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx); } goto done; @@ -1931,6 +1951,8 @@ static void blk_mq_exit_hctx(struct request_queue *q, hctx->fq->flush_rq, hctx_idx, flush_start_tag + hctx_idx); + blk_mq_sched_exit_hctx(q, hctx, hctx_idx); + if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); @@ -1967,6 +1989,7 @@ static int blk_mq_init_hctx(struct request_queue *q, node = hctx->numa_node = set->numa_node; INIT_WORK(&hctx->run_work, blk_mq_run_work_fn); + INIT_DELAYED_WORK(&hctx->delayed_run_work, blk_mq_delayed_run_work_fn); INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn); spin_lock_init(&hctx->lock); INIT_LIST_HEAD(&hctx->dispatch); @@ -1997,9 +2020,12 @@ static int blk_mq_init_hctx(struct request_queue *q, set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) goto free_bitmap; + if (blk_mq_sched_init_hctx(q, hctx, hctx_idx)) + goto exit_hctx; + hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size); if (!hctx->fq) - goto exit_hctx; + goto sched_exit_hctx; if (set->ops->init_request && set->ops->init_request(set->driver_data, @@ -2014,6 +2040,8 @@ static int blk_mq_init_hctx(struct request_queue *q, free_fq: kfree(hctx->fq); + sched_exit_hctx: + blk_mq_sched_exit_hctx(q, hctx, hctx_idx); exit_hctx: if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); @@ -2240,8 +2268,6 @@ void blk_mq_release(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned int i; - blk_mq_sched_teardown(q); - /* hctx kobj stays in hctx */ queue_for_each_hw_ctx(q, hctx, i) { if (!hctx) @@ -2572,6 +2598,14 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) return 0; } +static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) +{ + if (set->ops->map_queues) + return set->ops->map_queues(set); + else + return blk_mq_map_queues(set); +} + /* * Alloc a tag set to be associated with one or more request queues. * May fail with EINVAL for various error conditions. May adjust the @@ -2626,10 +2660,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (!set->mq_map) goto out_free_tags; - if (set->ops->map_queues) - ret = set->ops->map_queues(set); - else - ret = blk_mq_map_queues(set); + ret = blk_mq_update_queue_map(set); if (ret) goto out_free_mq_map; @@ -2721,6 +2752,7 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) blk_mq_freeze_queue(q); set->nr_hw_queues = nr_hw_queues; + blk_mq_update_queue_map(set); list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_realloc_hw_ctxs(set, q); @@ -2896,8 +2928,17 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie) hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; if (!blk_qc_t_is_internal(cookie)) rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie)); - else + else { rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie)); + /* + * With scheduling, if the request has completed, we'll + * get a NULL return here, as we clear the sched tag when + * that happens. The request still remains valid, like always, + * so we should be safe with just the NULL check. + */ + if (!rq) + return false; + } return __blk_mq_poll(hctx, rq); } diff --git a/block/blk-mq.h b/block/blk-mq.h index b79f9a7d8cf6..660a17e1d033 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -31,7 +31,7 @@ void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); -bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *); +bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *); void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx); bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, diff --git a/block/blk-stat.c b/block/blk-stat.c index 9b43efb8933f..186fcb981e9b 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -30,11 +30,11 @@ static void blk_stat_flush_batch(struct blk_rq_stat *stat) static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src) { + blk_stat_flush_batch(src); + if (!src->nr_samples) return; - blk_stat_flush_batch(src); - dst->min = min(dst->min, src->min); dst->max = max(dst->max, src->max); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index c44b321335f3..37f0b3ad635e 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -816,7 +816,7 @@ static void blk_release_queue(struct kobject *kobj) if (q->elevator) { ioc_clear_queue(q); - elevator_exit(q->elevator); + elevator_exit(q, q->elevator); } blk_exit_rl(&q->root_rl); diff --git a/block/elevator.c b/block/elevator.c index 01139f549b5b..4d9084a14c10 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -242,26 +242,21 @@ int elevator_init(struct request_queue *q, char *name) } } - if (e->uses_mq) { - err = blk_mq_sched_setup(q); - if (!err) - err = e->ops.mq.init_sched(q, e); - } else + if (e->uses_mq) + err = blk_mq_init_sched(q, e); + else err = e->ops.sq.elevator_init_fn(q, e); - if (err) { - if (e->uses_mq) - blk_mq_sched_teardown(q); + if (err) elevator_put(e); - } return err; } EXPORT_SYMBOL(elevator_init); -void elevator_exit(struct elevator_queue *e) +void elevator_exit(struct request_queue *q, struct elevator_queue *e) { mutex_lock(&e->sysfs_lock); if (e->uses_mq && e->type->ops.mq.exit_sched) - e->type->ops.mq.exit_sched(e); + blk_mq_exit_sched(q, e); else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn) e->type->ops.sq.elevator_exit_fn(e); mutex_unlock(&e->sysfs_lock); @@ -946,6 +941,45 @@ void elv_unregister(struct elevator_type *e) } EXPORT_SYMBOL_GPL(elv_unregister); +static int elevator_switch_mq(struct request_queue *q, + struct elevator_type *new_e) +{ + int ret; + + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + + if (q->elevator) { + if (q->elevator->registered) + elv_unregister_queue(q); + ioc_clear_queue(q); + elevator_exit(q, q->elevator); + } + + ret = blk_mq_init_sched(q, new_e); + if (ret) + goto out; + + if (new_e) { + ret = elv_register_queue(q); + if (ret) { + elevator_exit(q, q->elevator); + goto out; + } + } + + if (new_e) + blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); + else + blk_add_trace_msg(q, "elv switch: none"); + +out: + blk_mq_unfreeze_queue(q); + blk_mq_start_stopped_hw_queues(q, true); + return ret; + +} + /* * switch to new_e io scheduler. be careful not to introduce deadlocks - * we don't free the old io scheduler, before we have allocated what we @@ -958,10 +992,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) bool old_registered = false; int err; - if (q->mq_ops) { - blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); - } + if (q->mq_ops) + return elevator_switch_mq(q, new_e); /* * Turn on BYPASS and drain all requests w/ elevator private data. @@ -973,11 +1005,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) if (old) { old_registered = old->registered; - if (old->uses_mq) - blk_mq_sched_teardown(q); - - if (!q->mq_ops) - blk_queue_bypass_start(q); + blk_queue_bypass_start(q); /* unregister and clear all auxiliary data of the old elevator */ if (old_registered) @@ -987,56 +1015,32 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) } /* allocate, init and register new elevator */ - if (new_e) { - if (new_e->uses_mq) { - err = blk_mq_sched_setup(q); - if (!err) - err = new_e->ops.mq.init_sched(q, new_e); - } else - err = new_e->ops.sq.elevator_init_fn(q, new_e); - if (err) - goto fail_init; + err = new_e->ops.sq.elevator_init_fn(q, new_e); + if (err) + goto fail_init; - err = elv_register_queue(q); - if (err) - goto fail_register; - } else - q->elevator = NULL; + err = elv_register_queue(q); + if (err) + goto fail_register; /* done, kill the old one and finish */ if (old) { - elevator_exit(old); - if (!q->mq_ops) - blk_queue_bypass_end(q); + elevator_exit(q, old); + blk_queue_bypass_end(q); } - if (q->mq_ops) { - blk_mq_unfreeze_queue(q); - blk_mq_start_stopped_hw_queues(q, true); - } - - if (new_e) - blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); - else - blk_add_trace_msg(q, "elv switch: none"); + blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); return 0; fail_register: - if (q->mq_ops) - blk_mq_sched_teardown(q); - elevator_exit(q->elevator); + elevator_exit(q, q->elevator); fail_init: /* switch failed, restore and re-register old elevator */ if (old) { q->elevator = old; elv_register_queue(q); - if (!q->mq_ops) - blk_queue_bypass_end(q); - } - if (q->mq_ops) { - blk_mq_unfreeze_queue(q); - blk_mq_start_stopped_hw_queues(q, true); + blk_queue_bypass_end(q); } return err; @@ -1094,12 +1098,20 @@ int elevator_change(struct request_queue *q, const char *name) } EXPORT_SYMBOL(elevator_change); +static inline bool elv_support_iosched(struct request_queue *q) +{ + if (q->mq_ops && q->tag_set && (q->tag_set->flags & + BLK_MQ_F_NO_SCHED)) + return false; + return true; +} + ssize_t elv_iosched_store(struct request_queue *q, const char *name, size_t count) { int ret; - if (!(q->mq_ops || q->request_fn)) + if (!(q->mq_ops || q->request_fn) || !elv_support_iosched(q)) return count; ret = __elevator_change(q, name); @@ -1131,7 +1143,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name) len += sprintf(name+len, "[%s] ", elv->elevator_name); continue; } - if (__e->uses_mq && q->mq_ops) + if (__e->uses_mq && q->mq_ops && elv_support_iosched(q)) len += sprintf(name+len, "%s ", __e->elevator_name); else if (!__e->uses_mq && !q->mq_ops) len += sprintf(name+len, "%s ", __e->elevator_name); |