From 6a02a61e81c231cc5c680c5dbf8665275147ac52 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 12 Aug 2022 14:03:17 -0700 Subject: nvmet: fix a use-after-free Fix the following use-after-free complaint triggered by blktests nvme/004: BUG: KASAN: user-memory-access in blk_mq_complete_request_remote+0xac/0x350 Read of size 4 at addr 0000607bd1835943 by task kworker/13:1/460 Workqueue: nvmet-wq nvme_loop_execute_work [nvme_loop] Call Trace: show_stack+0x52/0x58 dump_stack_lvl+0x49/0x5e print_report.cold+0x36/0x1e2 kasan_report+0xb9/0xf0 __asan_load4+0x6b/0x80 blk_mq_complete_request_remote+0xac/0x350 nvme_loop_queue_response+0x1df/0x275 [nvme_loop] __nvmet_req_complete+0x132/0x4f0 [nvmet] nvmet_req_complete+0x15/0x40 [nvmet] nvmet_execute_io_connect+0x18a/0x1f0 [nvmet] nvme_loop_execute_work+0x20/0x30 [nvme_loop] process_one_work+0x56e/0xa70 worker_thread+0x2d1/0x640 kthread+0x183/0x1c0 ret_from_fork+0x1f/0x30 Cc: stable@vger.kernel.org Fixes: a07b4970f464 ("nvmet: add a generic NVMe target") Signed-off-by: Bart Van Assche Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a1345790005f..7f4083cf953a 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -735,6 +735,8 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status) static void __nvmet_req_complete(struct nvmet_req *req, u16 status) { + struct nvmet_ns *ns = req->ns; + if (!req->sq->sqhd_disabled) nvmet_update_sq_head(req); req->cqe->sq_id = cpu_to_le16(req->sq->qid); @@ -745,9 +747,9 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status) trace_nvmet_req_complete(req); - if (req->ns) - nvmet_put_namespace(req->ns); req->ops->queue_response(req); + if (ns) + nvmet_put_namespace(ns); } void nvmet_req_complete(struct nvmet_req *req, u16 status) -- cgit v1.2.3-59-g8ed1b From 160f3549a907a50e51a8518678ba2dcf2541abea Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 5 Sep 2022 13:54:17 +0300 Subject: nvme-tcp: fix UAF when detecting digest errors We should also bail from the io_work loop when we set rd_enabled to true, so we don't attempt to read data from the socket when the TCP stream is already out-of-sync or corrupted. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Reported-by: Daniel Wagner Signed-off-by: Sagi Grimberg Reviewed-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 044da18c06f5..54b4e8a7fe42 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1229,7 +1229,7 @@ static void nvme_tcp_io_work(struct work_struct *w) else if (unlikely(result < 0)) return; - if (!pending) + if (!pending || !queue->rd_enabled) return; } while (!time_after(jiffies, deadline)); /* quota is exhausted */ -- cgit v1.2.3-59-g8ed1b From 3770a42bb8ceb856877699257a43c0585a5d2996 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 5 Sep 2022 18:07:06 +0300 Subject: nvme-tcp: fix regression that causes sporadic requests to time out When we queue requests, we strive to batch as much as possible and also signal the network stack that more data is about to be sent over a socket with MSG_SENDPAGE_NOTLAST. This flag looks at the pending requests queued as well as queue->more_requests that is derived from the block layer last-in-batch indication. We set more_request=true when we flush the request directly from .queue_rq submission context (in nvme_tcp_send_all), however this is wrongly assuming that no other requests may be queued during the execution of nvme_tcp_send_all. Due to this, a race condition may happen where: 1. request X is queued as !last-in-batch 2. request X submission context calls nvme_tcp_send_all directly 3. nvme_tcp_send_all is preempted and schedules to a different cpu 4. request Y is queued as last-in-batch 5. nvme_tcp_send_all context sends request X+Y, however signals for both MSG_SENDPAGE_NOTLAST because queue->more_requests=true. ==> none of the requests is pushed down to the wire as the network stack is waiting for more data, both requests timeout. To fix this, we eliminate queue->more_requests and only rely on the queue req_list and send_list to be not-empty. Fixes: 122e5b9f3d37 ("nvme-tcp: optimize network stack with setting msg flags according to batch size") Reported-by: Jonathan Nicklin Signed-off-by: Sagi Grimberg Tested-by: Jonathan Nicklin Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 54b4e8a7fe42..d5871fd6f769 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -121,7 +121,6 @@ struct nvme_tcp_queue { struct mutex send_mutex; struct llist_head req_list; struct list_head send_list; - bool more_requests; /* recv state */ void *pdu; @@ -320,7 +319,7 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) { return !list_empty(&queue->send_list) || - !llist_empty(&queue->req_list) || queue->more_requests; + !llist_empty(&queue->req_list); } static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, @@ -339,9 +338,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, */ if (queue->io_cpu == raw_smp_processor_id() && sync && empty && mutex_trylock(&queue->send_mutex)) { - queue->more_requests = !last; nvme_tcp_send_all(queue); - queue->more_requests = false; mutex_unlock(&queue->send_mutex); } -- cgit v1.2.3-59-g8ed1b From b7e97872a65e1d57b4451769610554c131f37a0a Mon Sep 17 00:00:00 2001 From: Dennis Maisenbacher Date: Tue, 6 Sep 2022 09:39:28 +0200 Subject: nvmet: fix mar and mor off-by-one errors Maximum Active Resources (MAR) and Maximum Open Resources (MOR) are 0's based vales where a value of 0xffffffff indicates that there is no limit. Decrement the values that are returned by bdev_max_open_zones and bdev_max_active_zones as the block layer helpers are not 0's based. A 0 returned by the block layer helpers indicates no limit, thus convert it to 0xffffffff (U32_MAX). Fixes: aaf2e048af27 ("nvmet: add ZBD over ZNS backend support") Suggested-by: Niklas Cassel Signed-off-by: Dennis Maisenbacher Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/zns.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index c7ef69f29fe4..835bfda86fcf 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -100,6 +100,7 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req) struct nvme_id_ns_zns *id_zns; u64 zsze; u16 status; + u32 mar, mor; if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) { req->error_loc = offsetof(struct nvme_identify, nsid); @@ -130,8 +131,20 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req) zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >> req->ns->blksize_shift; id_zns->lbafe[0].zsze = cpu_to_le64(zsze); - id_zns->mor = cpu_to_le32(bdev_max_open_zones(req->ns->bdev)); - id_zns->mar = cpu_to_le32(bdev_max_active_zones(req->ns->bdev)); + + mor = bdev_max_open_zones(req->ns->bdev); + if (!mor) + mor = U32_MAX; + else + mor--; + id_zns->mor = cpu_to_le32(mor); + + mar = bdev_max_active_zones(req->ns->bdev); + if (!mar) + mar = U32_MAX; + else + mar--; + id_zns->mar = cpu_to_le32(mar); done: status = nvmet_copy_to_sgl(req, 0, id_zns, sizeof(*id_zns)); -- cgit v1.2.3-59-g8ed1b From 371a982cd2b01487295cd87abec82357e268457b Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 1 Sep 2022 08:30:39 -0700 Subject: nvme: requeue aen after firmware activation The driver prevents async event work while handling a processing paused event, but someone needs to restart it after the controller returns to a live state. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216400 Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2429b11eb9a8..70ebf27ad10e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4702,6 +4702,8 @@ static void nvme_fw_act_work(struct work_struct *work) nvme_start_queues(ctrl); /* read FW slot information to clear the AER */ nvme_get_fw_slot_info(ctrl); + + queue_work(nvme_wq, &ctrl->async_event_work); } static u32 nvme_aer_type(u32 result) @@ -4714,9 +4716,10 @@ static u32 nvme_aer_subtype(u32 result) return (result & 0xff00) >> 8; } -static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) +static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) { u32 aer_notice_type = nvme_aer_subtype(result); + bool requeue = true; trace_nvme_async_event(ctrl, aer_notice_type); @@ -4733,6 +4736,7 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) */ if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) { nvme_auth_stop(ctrl); + requeue = false; queue_work(nvme_wq, &ctrl->fw_act_work); } break; @@ -4749,6 +4753,7 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) default: dev_warn(ctrl->device, "async event result %08x\n", result); } + return requeue; } static void nvme_handle_aer_persistent_error(struct nvme_ctrl *ctrl) @@ -4764,13 +4769,14 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, u32 result = le32_to_cpu(res->u32); u32 aer_type = nvme_aer_type(result); u32 aer_subtype = nvme_aer_subtype(result); + bool requeue = true; if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS) return; switch (aer_type) { case NVME_AER_NOTICE: - nvme_handle_aen_notice(ctrl, result); + requeue = nvme_handle_aen_notice(ctrl, result); break; case NVME_AER_ERROR: /* @@ -4791,7 +4797,9 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, default: break; } - queue_work(nvme_wq, &ctrl->async_event_work); + + if (requeue) + queue_work(nvme_wq, &ctrl->async_event_work); } EXPORT_SYMBOL_GPL(nvme_complete_async_event); -- cgit v1.2.3-59-g8ed1b