aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-mq.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c192
1 files changed, 111 insertions, 81 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index fc60ed7e940e..08a6248d8536 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Block multiqueue core code
*
@@ -2062,7 +2063,7 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
list_del_init(&page->lru);
/*
* Remove kmemleak object previously allocated in
- * blk_mq_init_rq_map().
+ * blk_mq_alloc_rqs().
*/
kmemleak_free(page_address(page));
__free_pages(page, page->private);
@@ -2267,12 +2268,11 @@ static void blk_mq_exit_hctx(struct request_queue *q,
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
- if (hctx->flags & BLK_MQ_F_BLOCKING)
- cleanup_srcu_struct(hctx->srcu);
-
blk_mq_remove_cpuhp(hctx);
- blk_free_flush_queue(hctx->fq);
- sbitmap_free(&hctx->ctx_map);
+
+ spin_lock(&q->unused_hctx_lock);
+ list_add(&hctx->hctx_list, &q->unused_hctx_list);
+ spin_unlock(&q->unused_hctx_lock);
}
static void blk_mq_exit_hw_queues(struct request_queue *q,
@@ -2289,15 +2289,65 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
}
}
+static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
+{
+ int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
+
+ BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu),
+ __alignof__(struct blk_mq_hw_ctx)) !=
+ sizeof(struct blk_mq_hw_ctx));
+
+ if (tag_set->flags & BLK_MQ_F_BLOCKING)
+ hw_ctx_size += sizeof(struct srcu_struct);
+
+ return hw_ctx_size;
+}
+
static int blk_mq_init_hctx(struct request_queue *q,
struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
{
- int node;
+ hctx->queue_num = hctx_idx;
+
+ cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
+
+ hctx->tags = set->tags[hctx_idx];
+
+ if (set->ops->init_hctx &&
+ set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
+ goto unregister_cpu_notifier;
+
+ if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx,
+ hctx->numa_node))
+ goto exit_hctx;
+ return 0;
+
+ exit_hctx:
+ if (set->ops->exit_hctx)
+ set->ops->exit_hctx(hctx, hctx_idx);
+ unregister_cpu_notifier:
+ blk_mq_remove_cpuhp(hctx);
+ return -1;
+}
+
+static struct blk_mq_hw_ctx *
+blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
+ int node)
+{
+ struct blk_mq_hw_ctx *hctx;
+ gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY;
+
+ hctx = kzalloc_node(blk_mq_hw_ctx_size(set), gfp, node);
+ if (!hctx)
+ goto fail_alloc_hctx;
- node = hctx->numa_node;
+ if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node))
+ goto free_hctx;
+
+ atomic_set(&hctx->nr_active, 0);
if (node == NUMA_NO_NODE)
- node = hctx->numa_node = set->numa_node;
+ node = set->numa_node;
+ hctx->numa_node = node;
INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
spin_lock_init(&hctx->lock);
@@ -2305,58 +2355,47 @@ static int blk_mq_init_hctx(struct request_queue *q,
hctx->queue = q;
hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
- cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
-
- hctx->tags = set->tags[hctx_idx];
+ INIT_LIST_HEAD(&hctx->hctx_list);
/*
* Allocate space for all possible cpus to avoid allocation at
* runtime
*/
hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *),
- GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node);
+ gfp, node);
if (!hctx->ctxs)
- goto unregister_cpu_notifier;
+ goto free_cpumask;
if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8),
- GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node))
+ gfp, node))
goto free_ctxs;
-
hctx->nr_ctx = 0;
spin_lock_init(&hctx->dispatch_wait_lock);
init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
- if (set->ops->init_hctx &&
- set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
- goto free_bitmap;
-
hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size,
- GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
+ gfp);
if (!hctx->fq)
- goto exit_hctx;
-
- if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node))
- goto free_fq;
+ goto free_bitmap;
if (hctx->flags & BLK_MQ_F_BLOCKING)
init_srcu_struct(hctx->srcu);
+ blk_mq_hctx_kobj_init(hctx);
- return 0;
+ return hctx;
- free_fq:
- blk_free_flush_queue(hctx->fq);
- exit_hctx:
- if (set->ops->exit_hctx)
- set->ops->exit_hctx(hctx, hctx_idx);
free_bitmap:
sbitmap_free(&hctx->ctx_map);
free_ctxs:
kfree(hctx->ctxs);
- unregister_cpu_notifier:
- blk_mq_remove_cpuhp(hctx);
- return -1;
+ free_cpumask:
+ free_cpumask_var(hctx->cpumask);
+ free_hctx:
+ kfree(hctx);
+ fail_alloc_hctx:
+ return NULL;
}
static void blk_mq_init_cpu_queues(struct request_queue *q,
@@ -2631,13 +2670,17 @@ static int blk_mq_alloc_ctxs(struct request_queue *q)
*/
void blk_mq_release(struct request_queue *q)
{
- struct blk_mq_hw_ctx *hctx;
- unsigned int i;
+ struct blk_mq_hw_ctx *hctx, *next;
+ int i;
- /* hctx kobj stays in hctx */
- queue_for_each_hw_ctx(q, hctx, i) {
- if (!hctx)
- continue;
+ cancel_delayed_work_sync(&q->requeue_work);
+
+ queue_for_each_hw_ctx(q, hctx, i)
+ WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));
+
+ /* all hctx are in .unused_hctx_list now */
+ list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) {
+ list_del_init(&hctx->hctx_list);
kobject_put(&hctx->kobj);
}
@@ -2700,51 +2743,38 @@ struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
}
EXPORT_SYMBOL(blk_mq_init_sq_queue);
-static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
-{
- int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
-
- BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu),
- __alignof__(struct blk_mq_hw_ctx)) !=
- sizeof(struct blk_mq_hw_ctx));
-
- if (tag_set->flags & BLK_MQ_F_BLOCKING)
- hw_ctx_size += sizeof(struct srcu_struct);
-
- return hw_ctx_size;
-}
-
static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
struct blk_mq_tag_set *set, struct request_queue *q,
int hctx_idx, int node)
{
- struct blk_mq_hw_ctx *hctx;
-
- hctx = kzalloc_node(blk_mq_hw_ctx_size(set),
- GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
- node);
- if (!hctx)
- return NULL;
+ struct blk_mq_hw_ctx *hctx = NULL, *tmp;
- if (!zalloc_cpumask_var_node(&hctx->cpumask,
- GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
- node)) {
- kfree(hctx);
- return NULL;
+ /* reuse dead hctx first */
+ spin_lock(&q->unused_hctx_lock);
+ list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) {
+ if (tmp->numa_node == node) {
+ hctx = tmp;
+ break;
+ }
}
+ if (hctx)
+ list_del_init(&hctx->hctx_list);
+ spin_unlock(&q->unused_hctx_lock);
- atomic_set(&hctx->nr_active, 0);
- hctx->numa_node = node;
- hctx->queue_num = hctx_idx;
+ if (!hctx)
+ hctx = blk_mq_alloc_hctx(q, set, node);
+ if (!hctx)
+ goto fail;
- if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) {
- free_cpumask_var(hctx->cpumask);
- kfree(hctx);
- return NULL;
- }
- blk_mq_hctx_kobj_init(hctx);
+ if (blk_mq_init_hctx(q, set, hctx, hctx_idx))
+ goto free_hctx;
return hctx;
+
+ free_hctx:
+ kobject_put(&hctx->kobj);
+ fail:
+ return NULL;
}
static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
@@ -2770,10 +2800,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
hctx = blk_mq_alloc_and_init_hctx(set, q, i, node);
if (hctx) {
- if (hctxs[i]) {
+ if (hctxs[i])
blk_mq_exit_hctx(q, set, hctxs[i], i);
- kobject_put(&hctxs[i]->kobj);
- }
hctxs[i] = hctx;
} else {
if (hctxs[i])
@@ -2804,9 +2832,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
if (hctx->tags)
blk_mq_free_map_and_requests(set, j);
blk_mq_exit_hctx(q, set, hctx, j);
- kobject_put(&hctx->kobj);
hctxs[j] = NULL;
-
}
}
mutex_unlock(&q->sysfs_lock);
@@ -2849,6 +2875,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
if (!q->queue_hw_ctx)
goto err_sys_init;
+ INIT_LIST_HEAD(&q->unused_hctx_list);
+ spin_lock_init(&q->unused_hctx_lock);
+
blk_mq_realloc_hw_ctxs(set, q);
if (!q->nr_hw_queues)
goto err_hctxs;
@@ -2905,7 +2934,8 @@ err_exit:
}
EXPORT_SYMBOL(blk_mq_init_allocated_queue);
-void blk_mq_free_queue(struct request_queue *q)
+/* tags can _not_ be used after returning from blk_mq_exit_queue */
+void blk_mq_exit_queue(struct request_queue *q)
{
struct blk_mq_tag_set *set = q->tag_set;