aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-mq.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c137
1 files changed, 88 insertions, 49 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0015a1892153..55bcee5dc032 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -105,7 +105,7 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
{
struct mq_inflight *mi = priv;
- if (rq->part == mi->part)
+ if (rq->part == mi->part && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
mi->inflight[rq_data_dir(rq)]++;
return true;
@@ -519,7 +519,7 @@ void blk_mq_free_request(struct request *rq)
ctx->rq_completed[rq_is_sync(rq)]++;
if (rq->rq_flags & RQF_MQ_INFLIGHT)
- atomic_dec(&hctx->nr_active);
+ __blk_mq_dec_active_requests(hctx);
if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
laptop_io_completion(q->backing_dev_info);
@@ -1096,19 +1096,20 @@ static inline unsigned int queued_to_index(unsigned int queued)
static bool __blk_mq_get_driver_tag(struct request *rq)
{
- struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
+ struct sbitmap_queue *bt = rq->mq_hctx->tags->bitmap_tags;
unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
int tag;
blk_mq_tag_busy(rq->mq_hctx);
if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
- bt = &rq->mq_hctx->tags->breserved_tags;
+ bt = rq->mq_hctx->tags->breserved_tags;
tag_offset = 0;
+ } else {
+ if (!hctx_may_queue(rq->mq_hctx, bt))
+ return false;
}
- if (!hctx_may_queue(rq->mq_hctx, bt))
- return false;
tag = __sbitmap_queue_get(bt);
if (tag == BLK_MQ_NO_TAG)
return false;
@@ -1124,10 +1125,10 @@ static bool blk_mq_get_driver_tag(struct request *rq)
if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq))
return false;
- if ((hctx->flags & BLK_MQ_F_TAG_SHARED) &&
+ if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) &&
!(rq->rq_flags & RQF_MQ_INFLIGHT)) {
rq->rq_flags |= RQF_MQ_INFLIGHT;
- atomic_inc(&hctx->nr_active);
+ __blk_mq_inc_active_requests(hctx);
}
hctx->tags->rqs[rq->tag] = rq;
return true;
@@ -1145,7 +1146,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
struct sbitmap_queue *sbq;
list_del_init(&wait->entry);
- sbq = &hctx->tags->bitmap_tags;
+ sbq = hctx->tags->bitmap_tags;
atomic_dec(&sbq->ws_active);
}
spin_unlock(&hctx->dispatch_wait_lock);
@@ -1163,12 +1164,12 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
- struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags;
+ struct sbitmap_queue *sbq = hctx->tags->bitmap_tags;
struct wait_queue_head *wq;
wait_queue_entry_t *wait;
bool ret;
- if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) {
+ if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
blk_mq_sched_mark_restart_hctx(hctx);
/*
@@ -1412,6 +1413,11 @@ out:
hctx->dispatched[queued_to_index(queued)]++;
+ /* If we didn't flush the entire list, we could have told the driver
+ * there was more coming, but that turned out to be a lie.
+ */
+ if ((!list_empty(list) || errors) && q->mq_ops->commit_rqs && queued)
+ q->mq_ops->commit_rqs(hctx);
/*
* Any items that need requeuing? Stuff them into hctx->dispatch,
* that is where we will continue on next queue run.
@@ -1420,24 +1426,25 @@ out:
bool needs_restart;
/* For non-shared tags, the RESTART check will suffice */
bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
- (hctx->flags & BLK_MQ_F_TAG_SHARED);
+ (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED);
bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET;
blk_mq_release_budgets(q, nr_budgets);
- /*
- * If we didn't flush the entire list, we could have told
- * the driver there was more coming, but that turned out to
- * be a lie.
- */
- if (q->mq_ops->commit_rqs && queued)
- q->mq_ops->commit_rqs(hctx);
-
spin_lock(&hctx->lock);
list_splice_tail_init(list, &hctx->dispatch);
spin_unlock(&hctx->lock);
/*
+ * Order adding requests to hctx->dispatch and checking
+ * SCHED_RESTART flag. The pair of this smp_mb() is the one
+ * in blk_mq_sched_restart(). Avoid restart code path to
+ * miss the new added requests to hctx->dispatch, meantime
+ * SCHED_RESTART is observed here.
+ */
+ smp_mb();
+
+ /*
* If SCHED_RESTART was set by the caller of this function and
* it is no longer set that means that it was cleared by another
* thread and hence that a queue rerun is needed.
@@ -1657,7 +1664,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
EXPORT_SYMBOL(blk_mq_run_hw_queue);
/**
- * blk_mq_run_hw_queue - Run all hardware queues in a request queue.
+ * blk_mq_run_hw_queues - Run all hardware queues in a request queue.
* @q: Pointer to the request queue to run.
* @async: If we want to run the queue asynchronously.
*/
@@ -1797,7 +1804,7 @@ static void blk_mq_run_work_fn(struct work_struct *work)
/*
* If we are stopped, don't run the queue.
*/
- if (test_bit(BLK_MQ_S_STOPPED, &hctx->state))
+ if (blk_mq_hctx_stopped(hctx))
return;
__blk_mq_run_hw_queue(hctx);
@@ -1834,6 +1841,7 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
/**
* blk_mq_request_bypass_insert - Insert a request at dispatch list.
* @rq: Pointer to request to be inserted.
+ * @at_head: true if the request should be inserted at the head of the list.
* @run_queue: If we should run the hardware queue after inserting the request.
*
* Should only be used carefully, when the caller knows we want to
@@ -1929,13 +1937,18 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
unsigned int nr_segs)
{
+ int err;
+
if (bio->bi_opf & REQ_RAHEAD)
rq->cmd_flags |= REQ_FAILFAST_MASK;
rq->__sector = bio->bi_iter.bi_sector;
rq->write_hint = bio->bi_write_hint;
blk_rq_bio_prep(rq, bio, nr_segs);
- blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO);
+
+ /* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
+ err = blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO);
+ WARN_ON_ONCE(err);
blk_account_io_start(rq);
}
@@ -2016,7 +2029,8 @@ insert:
if (bypass_insert)
return BLK_STS_RESOURCE;
- blk_mq_request_bypass_insert(rq, false, run_queue);
+ blk_mq_sched_insert_request(rq, false, run_queue, false);
+
return BLK_STS_OK;
}
@@ -2068,6 +2082,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
struct list_head *list)
{
int queued = 0;
+ int errors = 0;
while (!list_empty(list)) {
blk_status_t ret;
@@ -2084,6 +2099,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
break;
}
blk_mq_end_request(rq, ret);
+ errors++;
} else
queued++;
}
@@ -2093,7 +2109,8 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
* the driver there was more coming, but that turned out to
* be a lie.
*/
- if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs && queued)
+ if ((!list_empty(list) || errors) &&
+ hctx->queue->mq_ops->commit_rqs && queued)
hctx->queue->mq_ops->commit_rqs(hctx);
}
@@ -2253,7 +2270,6 @@ queue_exit:
blk_queue_exit(q);
return BLK_QC_T_NONE;
}
-EXPORT_SYMBOL_GPL(blk_mq_submit_bio); /* only for request based dm */
void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
unsigned int hctx_idx)
@@ -2285,20 +2301,21 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
}
}
-void blk_mq_free_rq_map(struct blk_mq_tags *tags)
+void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags)
{
kfree(tags->rqs);
tags->rqs = NULL;
kfree(tags->static_rqs);
tags->static_rqs = NULL;
- blk_mq_free_tags(tags);
+ blk_mq_free_tags(tags, flags);
}
struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
unsigned int hctx_idx,
unsigned int nr_tags,
- unsigned int reserved_tags)
+ unsigned int reserved_tags,
+ unsigned int flags)
{
struct blk_mq_tags *tags;
int node;
@@ -2307,8 +2324,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
if (node == NUMA_NO_NODE)
node = set->numa_node;
- tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
- BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
+ tags = blk_mq_init_tags(nr_tags, reserved_tags, node, flags);
if (!tags)
return NULL;
@@ -2316,7 +2332,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
node);
if (!tags->rqs) {
- blk_mq_free_tags(tags);
+ blk_mq_free_tags(tags, flags);
return NULL;
}
@@ -2325,7 +2341,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
node);
if (!tags->static_rqs) {
kfree(tags->rqs);
- blk_mq_free_tags(tags);
+ blk_mq_free_tags(tags, flags);
return NULL;
}
@@ -2649,6 +2665,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
goto free_hctx;
atomic_set(&hctx->nr_active, 0);
+ atomic_set(&hctx->elevator_queued, 0);
if (node == NUMA_NO_NODE)
node = set->numa_node;
hctx->numa_node = node;
@@ -2657,7 +2674,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
spin_lock_init(&hctx->lock);
INIT_LIST_HEAD(&hctx->dispatch);
hctx->queue = q;
- hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
+ hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED;
INIT_LIST_HEAD(&hctx->hctx_list);
@@ -2726,7 +2743,7 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
for (j = 0; j < set->nr_maps; j++) {
hctx = blk_mq_map_queue_type(q, j, i);
if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
- hctx->numa_node = local_memory_node(cpu_to_node(i));
+ hctx->numa_node = cpu_to_node(i);
}
}
}
@@ -2734,10 +2751,11 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set,
int hctx_idx)
{
+ unsigned int flags = set->flags;
int ret = 0;
set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx,
- set->queue_depth, set->reserved_tags);
+ set->queue_depth, set->reserved_tags, flags);
if (!set->tags[hctx_idx])
return false;
@@ -2746,7 +2764,7 @@ static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set,
if (!ret)
return true;
- blk_mq_free_rq_map(set->tags[hctx_idx]);
+ blk_mq_free_rq_map(set->tags[hctx_idx], flags);
set->tags[hctx_idx] = NULL;
return false;
}
@@ -2754,9 +2772,11 @@ static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set,
static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
unsigned int hctx_idx)
{
+ unsigned int flags = set->flags;
+
if (set->tags && set->tags[hctx_idx]) {
blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
- blk_mq_free_rq_map(set->tags[hctx_idx]);
+ blk_mq_free_rq_map(set->tags[hctx_idx], flags);
set->tags[hctx_idx] = NULL;
}
}
@@ -2874,14 +2894,14 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared)
queue_for_each_hw_ctx(q, hctx, i) {
if (shared)
- hctx->flags |= BLK_MQ_F_TAG_SHARED;
+ hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
else
- hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
+ hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
}
}
-static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
- bool shared)
+static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set,
+ bool shared)
{
struct request_queue *q;
@@ -2902,9 +2922,9 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
list_del(&q->tag_set_list);
if (list_is_singular(&set->tag_list)) {
/* just transitioned to unshared */
- set->flags &= ~BLK_MQ_F_TAG_SHARED;
+ set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
/* update existing queue */
- blk_mq_update_tag_set_depth(set, false);
+ blk_mq_update_tag_set_shared(set, false);
}
mutex_unlock(&set->tag_list_lock);
INIT_LIST_HEAD(&q->tag_set_list);
@@ -2919,12 +2939,12 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
* Check to see if we're transitioning to shared (from 1 to 2 queues).
*/
if (!list_empty(&set->tag_list) &&
- !(set->flags & BLK_MQ_F_TAG_SHARED)) {
- set->flags |= BLK_MQ_F_TAG_SHARED;
+ !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
+ set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
/* update existing queue */
- blk_mq_update_tag_set_depth(set, true);
+ blk_mq_update_tag_set_shared(set, true);
}
- if (set->flags & BLK_MQ_F_TAG_SHARED)
+ if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
queue_set_hctx_shared(q, true);
list_add_tail(&q->tag_set_list, &set->tag_list);
@@ -3245,9 +3265,11 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
{
int i;
- for (i = 0; i < set->nr_hw_queues; i++)
+ for (i = 0; i < set->nr_hw_queues; i++) {
if (!__blk_mq_alloc_map_and_request(set, i))
goto out_unwind;
+ cond_resched();
+ }
return 0;
@@ -3427,11 +3449,23 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
if (ret)
goto out_free_mq_map;
+ if (blk_mq_is_sbitmap_shared(set->flags)) {
+ atomic_set(&set->active_queues_shared_sbitmap, 0);
+
+ if (blk_mq_init_shared_sbitmap(set, set->flags)) {
+ ret = -ENOMEM;
+ goto out_free_mq_rq_maps;
+ }
+ }
+
mutex_init(&set->tag_list_lock);
INIT_LIST_HEAD(&set->tag_list);
return 0;
+out_free_mq_rq_maps:
+ for (i = 0; i < set->nr_hw_queues; i++)
+ blk_mq_free_map_and_requests(set, i);
out_free_mq_map:
for (i = 0; i < set->nr_maps; i++) {
kfree(set->map[i].mq_map);
@@ -3450,6 +3484,9 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
for (i = 0; i < set->nr_hw_queues; i++)
blk_mq_free_map_and_requests(set, i);
+ if (blk_mq_is_sbitmap_shared(set->flags))
+ blk_mq_exit_shared_sbitmap(set);
+
for (j = 0; j < set->nr_maps; j++) {
kfree(set->map[j].mq_map);
set->map[j].mq_map = NULL;
@@ -3486,6 +3523,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
if (!hctx->sched_tags) {
ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr,
false);
+ if (!ret && blk_mq_is_sbitmap_shared(set->flags))
+ blk_mq_tag_resize_shared_sbitmap(set, nr);
} else {
ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
nr, true);