From 8b1c9b2025491d7c86255fb773b00ecf94b53acc Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Fri, 19 Mar 2021 14:50:28 -0600 Subject: scsi: ibmvfc: Fix potential race in ibmvfc_wait_for_ops() For various EH activities the ibmvfc driver uses ibmvfc_wait_for_ops() to wait for the completion of commands that match a given criteria be it cancel key, or specific LUN. With recent changes commands are completed outside the lock in bulk by removing them from the sent list and adding them to a private completion list. This introduces a potential race in ibmvfc_wait_for_ops() since the criteria for a command to be outstanding is no longer simply being on the sent list, but instead not being on the free list. Avoid this race by scanning the entire command event pool and checking that any matching command that ibmvfc needs to wait on is not already on the free list. Link: https://lore.kernel.org/r/20210319205029.312969-2-tyreld@linux.ibm.com Fixes: 1f4a4a19508d ("scsi: ibmvfc: Complete commands outside the host/queue lock") Reviewed-by: Brian King Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 6a92891ac488..94c07cc82563 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -2371,6 +2371,24 @@ static int ibmvfc_match_lun(struct ibmvfc_event *evt, void *device) return 0; } +/** + * ibmvfc_event_is_free - Check if event is free or not + * @evt: ibmvfc event struct + * + * Returns: + * true / false + **/ +static bool ibmvfc_event_is_free(struct ibmvfc_event *evt) +{ + struct ibmvfc_event *loop_evt; + + list_for_each_entry(loop_evt, &evt->queue->free, queue_list) + if (loop_evt == evt) + return true; + + return false; +} + /** * ibmvfc_wait_for_ops - Wait for ops to complete * @vhost: ibmvfc host struct @@ -2385,7 +2403,7 @@ static int ibmvfc_wait_for_ops(struct ibmvfc_host *vhost, void *device, { struct ibmvfc_event *evt; DECLARE_COMPLETION_ONSTACK(comp); - int wait; + int wait, i; unsigned long flags; signed long timeout = IBMVFC_ABORT_WAIT_TIMEOUT * HZ; @@ -2393,10 +2411,13 @@ static int ibmvfc_wait_for_ops(struct ibmvfc_host *vhost, void *device, do { wait = 0; spin_lock_irqsave(&vhost->crq.l_lock, flags); - list_for_each_entry(evt, &vhost->crq.sent, queue_list) { - if (match(evt, device)) { - evt->eh_comp = ∁ - wait++; + for (i = 0; i < vhost->crq.evt_pool.size; i++) { + evt = &vhost->crq.evt_pool.events[i]; + if (!ibmvfc_event_is_free(evt)) { + if (match(evt, device)) { + evt->eh_comp = ∁ + wait++; + } } } spin_unlock_irqrestore(&vhost->crq.l_lock, flags); @@ -2407,10 +2428,13 @@ static int ibmvfc_wait_for_ops(struct ibmvfc_host *vhost, void *device, if (!timeout) { wait = 0; spin_lock_irqsave(&vhost->crq.l_lock, flags); - list_for_each_entry(evt, &vhost->crq.sent, queue_list) { - if (match(evt, device)) { - evt->eh_comp = NULL; - wait++; + for (i = 0; i < vhost->crq.evt_pool.size; i++) { + evt = &vhost->crq.evt_pool.events[i]; + if (!ibmvfc_event_is_free(evt)) { + if (match(evt, device)) { + evt->eh_comp = NULL; + wait++; + } } } spin_unlock_irqrestore(&vhost->crq.l_lock, flags); -- cgit v1.2.3-59-g8ed1b From 62fc2661482b6beccfab8a5987419e96a9499fb4 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Fri, 19 Mar 2021 14:50:29 -0600 Subject: scsi: ibmvfc: Make ibmvfc_wait_for_ops() MQ aware During MQ enablement of the ibmvfc driver ibmvfc_wait_for_ops() was missed. This function is responsible for waiting on commands to complete that match a certain criteria such as LUN or cancel key. The implementation as is only scans the CRQ for events ignoring any sub-queues and as a result will exit successfully without doing anything when operating in MQ channelized mode. Check the MQ and channel use flags to determine which queues are applicable, and scan each queue accordingly. Note in MQ mode SCSI commands are only issued down sub-queues and the CRQ is only used for driver specific management commands. As such the CRQ events are ignored when operating in MQ mode with channels. Link: https://lore.kernel.org/r/20210319205029.312969-3-tyreld@linux.ibm.com Fixes: 9000cb998bcf ("scsi: ibmvfc: Enable MQ and set reasonable defaults") Reviewed-by: Brian King Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 51 ++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 94c07cc82563..bb64e3247a6c 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -2403,41 +2403,58 @@ static int ibmvfc_wait_for_ops(struct ibmvfc_host *vhost, void *device, { struct ibmvfc_event *evt; DECLARE_COMPLETION_ONSTACK(comp); - int wait, i; + int wait, i, q_index, q_size; unsigned long flags; signed long timeout = IBMVFC_ABORT_WAIT_TIMEOUT * HZ; + struct ibmvfc_queue *queues; ENTER; + if (vhost->mq_enabled && vhost->using_channels) { + queues = vhost->scsi_scrqs.scrqs; + q_size = vhost->scsi_scrqs.active_queues; + } else { + queues = &vhost->crq; + q_size = 1; + } + do { wait = 0; - spin_lock_irqsave(&vhost->crq.l_lock, flags); - for (i = 0; i < vhost->crq.evt_pool.size; i++) { - evt = &vhost->crq.evt_pool.events[i]; - if (!ibmvfc_event_is_free(evt)) { - if (match(evt, device)) { - evt->eh_comp = ∁ - wait++; + spin_lock_irqsave(vhost->host->host_lock, flags); + for (q_index = 0; q_index < q_size; q_index++) { + spin_lock(&queues[q_index].l_lock); + for (i = 0; i < queues[q_index].evt_pool.size; i++) { + evt = &queues[q_index].evt_pool.events[i]; + if (!ibmvfc_event_is_free(evt)) { + if (match(evt, device)) { + evt->eh_comp = ∁ + wait++; + } } } + spin_unlock(&queues[q_index].l_lock); } - spin_unlock_irqrestore(&vhost->crq.l_lock, flags); + spin_unlock_irqrestore(vhost->host->host_lock, flags); if (wait) { timeout = wait_for_completion_timeout(&comp, timeout); if (!timeout) { wait = 0; - spin_lock_irqsave(&vhost->crq.l_lock, flags); - for (i = 0; i < vhost->crq.evt_pool.size; i++) { - evt = &vhost->crq.evt_pool.events[i]; - if (!ibmvfc_event_is_free(evt)) { - if (match(evt, device)) { - evt->eh_comp = NULL; - wait++; + spin_lock_irqsave(vhost->host->host_lock, flags); + for (q_index = 0; q_index < q_size; q_index++) { + spin_lock(&queues[q_index].l_lock); + for (i = 0; i < queues[q_index].evt_pool.size; i++) { + evt = &queues[q_index].evt_pool.events[i]; + if (!ibmvfc_event_is_free(evt)) { + if (match(evt, device)) { + evt->eh_comp = NULL; + wait++; + } } } + spin_unlock(&queues[q_index].l_lock); } - spin_unlock_irqrestore(&vhost->crq.l_lock, flags); + spin_unlock_irqrestore(vhost->host->host_lock, flags); if (wait) dev_err(vhost->dev, "Timed out waiting for aborted commands\n"); LEAVE; -- cgit v1.2.3-59-g8ed1b From 39c0c8553bfb5a3d108aa47f1256076d507605e3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 20 Mar 2021 16:23:53 -0700 Subject: scsi: Revert "qla2xxx: Make sure that aborted commands are freed" Calling vha->hw->tgt.tgt_ops->free_cmd() from qlt_xmit_response() is wrong since the command for which a response is sent must remain valid until the SCSI target core calls .release_cmd(). It has been observed that the following scenario triggers a kernel crash: - qlt_xmit_response() calls qlt_check_reserve_free_req() - qlt_check_reserve_free_req() returns -EAGAIN - qlt_xmit_response() calls vha->hw->tgt.tgt_ops->free_cmd(cmd) - transport_handle_queue_full() tries to retransmit the response Fix this crash by reverting the patch that introduced it. Link: https://lore.kernel.org/r/20210320232359.941-2-bvanassche@acm.org Fixes: 0dcec41acb85 ("scsi: qla2xxx: Make sure that aborted commands are freed") Cc: Quinn Tran Cc: Mike Christie Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 13 +++++-------- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 4 ---- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index c48daf52725d..480e7d2dcf3e 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3222,8 +3222,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, if (!qpair->fw_started || (cmd->reset_count != qpair->chip_reset) || (cmd->sess && cmd->sess->deleted)) { cmd->state = QLA_TGT_STATE_PROCESSED; - res = 0; - goto free; + return 0; } ql_dbg_qp(ql_dbg_tgt, qpair, 0xe018, @@ -3234,8 +3233,9 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, res = qlt_pre_xmit_response(cmd, &prm, xmit_type, scsi_status, &full_req_cnt); - if (unlikely(res != 0)) - goto free; + if (unlikely(res != 0)) { + return res; + } spin_lock_irqsave(qpair->qp_lock_ptr, flags); @@ -3255,8 +3255,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, vha->flags.online, qla2x00_reset_active(vha), cmd->reset_count, qpair->chip_reset); spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); - res = 0; - goto free; + return 0; } /* Does F/W have an IOCBs for this request */ @@ -3359,8 +3358,6 @@ out_unmap_unlock: qlt_unmap_sg(vha, cmd); spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); -free: - vha->hw->tgt.tgt_ops->free_cmd(cmd); return res; } EXPORT_SYMBOL(qlt_xmit_response); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index b55fc768a2a7..8b4890cdd4ca 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -644,7 +644,6 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); - struct scsi_qla_host *vha = cmd->vha; if (cmd->aborted) { /* Cmd can loop during Q-full. tcm_qla2xxx_aborted_task @@ -657,7 +656,6 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd) cmd->se_cmd.transport_state, cmd->se_cmd.t_state, cmd->se_cmd.se_cmd_flags); - vha->hw->tgt.tgt_ops->free_cmd(cmd); return 0; } @@ -685,7 +683,6 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); - struct scsi_qla_host *vha = cmd->vha; int xmit_type = QLA_TGT_XMIT_STATUS; if (cmd->aborted) { @@ -699,7 +696,6 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd) cmd, kref_read(&cmd->se_cmd.cmd_kref), cmd->se_cmd.transport_state, cmd->se_cmd.t_state, cmd->se_cmd.se_cmd_flags); - vha->hw->tgt.tgt_ops->free_cmd(cmd); return 0; } cmd->bufflen = se_cmd->data_length; -- cgit v1.2.3-59-g8ed1b From f69953837ca5d98aa983a138dc0b90a411e9c763 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sun, 7 Mar 2021 19:30:24 -0800 Subject: scsi: qedi: Fix error return code of qedi_alloc_global_queues() When kzalloc() returns NULL to qedi->global_queues[i], no error return code of qedi_alloc_global_queues() is assigned. To fix this bug, status is assigned with -ENOMEM in this case. Link: https://lore.kernel.org/r/20210308033024.27147-1-baijiaju1990@gmail.com Fixes: ace7f46ba5fd ("scsi: qedi: Add QLogic FastLinQ offload iSCSI driver framework.") Reported-by: TOTE Robot Acked-by: Manish Rangankar Signed-off-by: Jia-Ju Bai Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 47ad64b06623..69c5b5ee2169 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1675,6 +1675,7 @@ static int qedi_alloc_global_queues(struct qedi_ctx *qedi) if (!qedi->global_queues[i]) { QEDI_ERR(&qedi->dbg_ctx, "Unable to allocation global queue %d.\n", i); + status = -ENOMEM; goto mem_alloc_failure; } -- cgit v1.2.3-59-g8ed1b From 3401ecf7fc1b9458a19d42c0e26a228f18ac7dda Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sun, 7 Mar 2021 19:52:41 -0800 Subject: scsi: mpt3sas: Fix error return code of mpt3sas_base_attach() When kzalloc() returns NULL, no error return code of mpt3sas_base_attach() is assigned. To fix this bug, r is assigned with -ENOMEM in this case. Link: https://lore.kernel.org/r/20210308035241.3288-1-baijiaju1990@gmail.com Fixes: c696f7b83ede ("scsi: mpt3sas: Implement device_remove_in_progress check in IOCTL path") Reported-by: TOTE Robot Signed-off-by: Jia-Ju Bai Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index ac066f86bb14..ac0eef975f17 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -7806,14 +7806,18 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->pend_os_device_add_sz++; ioc->pend_os_device_add = kzalloc(ioc->pend_os_device_add_sz, GFP_KERNEL); - if (!ioc->pend_os_device_add) + if (!ioc->pend_os_device_add) { + r = -ENOMEM; goto out_free_resources; + } ioc->device_remove_in_progress_sz = ioc->pend_os_device_add_sz; ioc->device_remove_in_progress = kzalloc(ioc->device_remove_in_progress_sz, GFP_KERNEL); - if (!ioc->device_remove_in_progress) + if (!ioc->device_remove_in_progress) { + r = -ENOMEM; goto out_free_resources; + } ioc->fwfault_debug = mpt3sas_fwfault_debug; -- cgit v1.2.3-59-g8ed1b From 077ce028b8e0684d5ee7da573bd835b14b591546 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Tue, 23 Mar 2021 22:24:30 +0100 Subject: scsi: target: pscsi: Avoid OOM in pscsi_map_sg() pscsi_map_sg() uses the variable nr_pages as a hint for bio_kmalloc() how many vector elements to allocate. If nr_pages is < BIO_MAX_PAGES, it will be reset to 0 after successful allocation of the bio. If bio_add_pc_page() fails later for whatever reason, pscsi_map_sg() tries to allocate another bio, passing nr_vecs = 0. This causes bio_add_pc_page() to fail immediately in the next call. pci_map_sg() continues to allocate zero-length bios until memory is exhausted and the kernel crashes with OOM. This can be easily observed by exporting a SATA DVD drive via pscsi. The target crashes as soon as the client tries to access the DVD LUN. In the case I analyzed, bio_add_pc_page() would fail because the DVD device's max_sectors_kb (128) was exceeded. Avoid this by simply not resetting nr_pages to 0 after allocating the bio. This way, the client receives an I/O error when it tries to send requests exceeding the devices max_sectors_kb, and eventually gets it right. The client must still limit max_sectors_kb e.g. by an udev rule if (like in my case) the driver doesn't report valid block limits, otherwise it encounters I/O errors. Link: https://lore.kernel.org/r/20210323212431.15306-1-mwilck@suse.com Reviewed-by: Christoph Hellwig Reviewed-by: Lee Duncan Signed-off-by: Martin Wilck Signed-off-by: Martin K. Petersen --- drivers/target/target_core_pscsi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 3cbc074992bc..6fa9fddc2e1e 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -882,7 +882,6 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (!bio) { new_bio: nr_vecs = bio_max_segs(nr_pages); - nr_pages -= nr_vecs; /* * Calls bio_kmalloc() and sets bio->bi_end_io() */ -- cgit v1.2.3-59-g8ed1b From 36fa766faa0c822c860e636fe82b1affcd022974 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Tue, 23 Mar 2021 22:24:31 +0100 Subject: scsi: target: pscsi: Clean up after failure in pscsi_map_sg() If pscsi_map_sg() fails, make sure to drop references to already allocated bios. Link: https://lore.kernel.org/r/20210323212431.15306-2-mwilck@suse.com Reviewed-by: Christoph Hellwig Reviewed-by: Lee Duncan Signed-off-by: Martin Wilck Signed-off-by: Martin K. Petersen --- drivers/target/target_core_pscsi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 6fa9fddc2e1e..9ee797b8cb7e 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -938,6 +938,14 @@ new_bio: return 0; fail: + if (bio) + bio_put(bio); + while (req->bio) { + bio = req->bio; + req->bio = bio->bi_next; + bio_put(bio); + } + req->biotail = NULL; return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } -- cgit v1.2.3-59-g8ed1b