aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-11-05 09:02:28 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-11-05 09:02:28 -0700
commit4869f5750afdb10a0e9cfa0252fce33e53ab681e (patch)
tree82c3ba67f57163f1230ea85a4acd29af40c07344 /drivers/block
parentMerge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux (diff)
parentblock: blk_add_rq_to_plug(): clear stale 'last' after flush (diff)
downloadlinux-dev-4869f5750afdb10a0e9cfa0252fce33e53ab681e.tar.xz
linux-dev-4869f5750afdb10a0e9cfa0252fce33e53ab681e.zip
Merge tag 'block-6.1-2022-11-05' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe: - Fixes for the ublk driver (Ming) - Fixes for error handling memory leaks (Chen Jun, Chen Zhongjin) - Explicitly clear the last request in a chain when the plug is flushed, as it may have already been issued (Al) * tag 'block-6.1-2022-11-05' of git://git.kernel.dk/linux: block: blk_add_rq_to_plug(): clear stale 'last' after flush blk-mq: Fix kmemleak in blk_mq_init_allocated_queue block: Fix possible memory leak for rq_wb on add_disk failure ublk_drv: add ublk_queue_cmd() for cleanup ublk_drv: avoid to touch io_uring cmd in blk_mq io path ublk_drv: comment on ublk_driver entry of Kconfig ublk_drv: return flag of UBLK_F_URING_CMD_COMP_IN_TASK in case of module
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig6
-rw-r--r--drivers/block/ublk_drv.c115
2 files changed, 74 insertions, 47 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index db1b4b202646..a41145d52de9 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -408,6 +408,12 @@ config BLK_DEV_UBLK
definition isn't finalized yet, and might change according to future
requirement, so mark is as experimental now.
+ Say Y if you want to get better performance because task_work_add()
+ can be used in IO path for replacing io_uring cmd, which will become
+ shared between IO tasks and ubq daemon, meantime task_work_add() can
+ can handle batch more effectively, but task_work_add() isn't exported
+ for module, so ublk has to be built to kernel.
+
source "drivers/block/rnbd/Kconfig"
endif # BLK_DEV
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 5afce6ffaadf..f96cb01e9604 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -57,11 +57,14 @@
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
struct ublk_rq_data {
- struct callback_head work;
+ union {
+ struct callback_head work;
+ struct llist_node node;
+ };
};
struct ublk_uring_cmd_pdu {
- struct request *req;
+ struct ublk_queue *ubq;
};
/*
@@ -119,6 +122,8 @@ struct ublk_queue {
struct task_struct *ubq_daemon;
char *io_cmd_buf;
+ struct llist_head io_cmds;
+
unsigned long io_addr; /* mapped vm address */
unsigned int max_io_sz;
bool force_abort;
@@ -764,8 +769,12 @@ static inline void __ublk_rq_task_work(struct request *req)
static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
{
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+ struct ublk_queue *ubq = pdu->ubq;
+ struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
+ struct ublk_rq_data *data;
- __ublk_rq_task_work(pdu->req);
+ llist_for_each_entry(data, io_cmds, node)
+ __ublk_rq_task_work(blk_mq_rq_from_pdu(data));
}
static void ublk_rq_task_work_fn(struct callback_head *work)
@@ -777,6 +786,54 @@ static void ublk_rq_task_work_fn(struct callback_head *work)
__ublk_rq_task_work(req);
}
+static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq)
+{
+ struct ublk_io *io = &ubq->ios[rq->tag];
+
+ /*
+ * If the check pass, we know that this is a re-issued request aborted
+ * previously in monitor_work because the ubq_daemon(cmd's task) is
+ * PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
+ * because this ioucmd's io_uring context may be freed now if no inflight
+ * ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
+ *
+ * Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
+ * the tag). Then the request is re-started(allocating the tag) and we are here.
+ * Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
+ * guarantees that here is a re-issued request aborted previously.
+ */
+ if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) {
+ struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
+ struct ublk_rq_data *data;
+
+ llist_for_each_entry(data, io_cmds, node)
+ __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data));
+ } else {
+ struct io_uring_cmd *cmd = io->cmd;
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+
+ pdu->ubq = ubq;
+ io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
+ }
+}
+
+static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq,
+ bool last)
+{
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
+
+ if (ublk_can_use_task_work(ubq)) {
+ enum task_work_notify_mode notify_mode = last ?
+ TWA_SIGNAL_NO_IPI : TWA_NONE;
+
+ if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode))
+ __ublk_abort_rq(ubq, rq);
+ } else {
+ if (llist_add(&data->node, &ubq->io_cmds))
+ ublk_submit_cmd(ubq, rq);
+ }
+}
+
static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -788,6 +845,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
res = ublk_setup_iod(ubq, rq);
if (unlikely(res != BLK_STS_OK))
return BLK_STS_IOERR;
+
/* With recovery feature enabled, force_abort is set in
* ublk_stop_dev() before calling del_gendisk(). We have to
* abort all requeued and new rqs here to let del_gendisk()
@@ -803,41 +861,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(bd->rq);
if (unlikely(ubq_daemon_is_dying(ubq))) {
- fail:
__ublk_abort_rq(ubq, rq);
return BLK_STS_OK;
}
- if (ublk_can_use_task_work(ubq)) {
- struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
- enum task_work_notify_mode notify_mode = bd->last ?
- TWA_SIGNAL_NO_IPI : TWA_NONE;
-
- if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode))
- goto fail;
- } else {
- struct ublk_io *io = &ubq->ios[rq->tag];
- struct io_uring_cmd *cmd = io->cmd;
- struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
-
- /*
- * If the check pass, we know that this is a re-issued request aborted
- * previously in monitor_work because the ubq_daemon(cmd's task) is
- * PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
- * because this ioucmd's io_uring context may be freed now if no inflight
- * ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
- *
- * Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
- * the tag). Then the request is re-started(allocating the tag) and we are here.
- * Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
- * guarantees that here is a re-issued request aborted previously.
- */
- if ((io->flags & UBLK_IO_FLAG_ABORTED))
- goto fail;
-
- pdu->req = rq;
- io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
- }
+ ublk_queue_cmd(ubq, rq, bd->last);
return BLK_STS_OK;
}
@@ -1164,22 +1192,12 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
}
static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
- int tag, struct io_uring_cmd *cmd)
+ int tag)
{
struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
- if (ublk_can_use_task_work(ubq)) {
- struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
-
- /* should not fail since we call it just in ubq->ubq_daemon */
- task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI);
- } else {
- struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
-
- pdu->req = req;
- io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
- }
+ ublk_queue_cmd(ubq, req, true);
}
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
@@ -1267,7 +1285,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
io->addr = ub_cmd->addr;
io->cmd = cmd;
io->flags |= UBLK_IO_FLAG_ACTIVE;
- ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd);
+ ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag);
break;
default:
goto out;
@@ -1658,6 +1676,9 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
*/
ub->dev_info.flags &= UBLK_F_ALL;
+ if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK))
+ ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK;
+
/* We are not ready to support zero copy */
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;