From ea045fd344cb15c164e9ffc8b8cffb6883df8475 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Mon, 7 Nov 2022 13:02:29 +0900 Subject: ata: libata-scsi: fix SYNCHRONIZE CACHE (16) command failure SAT SCSI/ATA Translation specification requires SCSI SYNCHRONIZE CACHE (10) and (16) commands both shall be translated to ATA flush command. Also, ZBC Zoned Block Commands specification mandates SYNCHRONIZE CACHE (16) command support. However, libata translates only SYNCHRONIZE CACHE (10). This results in SYNCHRONIZE CACHE (16) command failures on SATA drives and then libata translation does not conform to ZBC. To avoid the failure, add support for SYNCHRONIZE CACHE (16). Signed-off-by: Shin'ichiro Kawasaki Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- drivers/ata/libata-scsi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index e2ebb0b065e2..61cd4e90e4e7 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3264,6 +3264,7 @@ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf) case REPORT_LUNS: case REQUEST_SENSE: case SYNCHRONIZE_CACHE: + case SYNCHRONIZE_CACHE_16: case REZERO_UNIT: case SEEK_6: case SEEK_10: @@ -3922,6 +3923,7 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd) return ata_scsi_write_same_xlat; case SYNCHRONIZE_CACHE: + case SYNCHRONIZE_CACHE_16: if (ata_try_flush_cache(dev)) return ata_scsi_flush_xlat; break; @@ -4145,6 +4147,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd) * turning this into a no-op. */ case SYNCHRONIZE_CACHE: + case SYNCHRONIZE_CACHE_16: fallthrough; /* no-op's, complete with success */ -- cgit v1.2.3-59-g8ed1b From 8c76310740807ade5ecdab5888f70ecb6d35732e Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 8 Nov 2022 21:40:01 +0800 Subject: ata: libata-transport: fix double ata_host_put() in ata_tport_add() In the error path in ata_tport_add(), when calling put_device(), ata_tport_release() is called, it will put the refcount of 'ap->host'. And then ata_host_put() is called again, the refcount is decreased to 0, ata_host_release() is called, all ports are freed and set to null. When unbinding the device after failure, ata_host_stop() is called to release the resources, it leads a null-ptr-deref(), because all the ports all freed and null. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 CPU: 7 PID: 18671 Comm: modprobe Kdump: loaded Tainted: G E 6.1.0-rc3+ #8 pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : ata_host_stop+0x3c/0x84 [libata] lr : release_nodes+0x64/0xd0 Call trace: ata_host_stop+0x3c/0x84 [libata] release_nodes+0x64/0xd0 devres_release_all+0xbc/0x1b0 device_unbind_cleanup+0x20/0x70 really_probe+0x158/0x320 __driver_probe_device+0x84/0x120 driver_probe_device+0x44/0x120 __driver_attach+0xb4/0x220 bus_for_each_dev+0x78/0xdc driver_attach+0x2c/0x40 bus_add_driver+0x184/0x240 driver_register+0x80/0x13c __pci_register_driver+0x4c/0x60 ahci_pci_driver_init+0x30/0x1000 [ahci] Fix this by removing redundant ata_host_put() in the error path. Fixes: 2623c7a5f279 ("libata: add refcounting to ata_host") Signed-off-by: Yang Yingliang Signed-off-by: Damien Le Moal --- drivers/ata/libata-transport.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index a7e9a75410a3..105da3ec5eaa 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -317,7 +317,6 @@ int ata_tport_add(struct device *parent, tport_err: transport_destroy_device(dev); put_device(dev); - ata_host_put(ap->host); return error; } -- cgit v1.2.3-59-g8ed1b From 3613dbe3909dcc637fe6be00e4dc43b4aa0470ee Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 8 Nov 2022 21:40:02 +0800 Subject: ata: libata-transport: fix error handling in ata_tport_add() In ata_tport_add(), the return value of transport_add_device() is not checked. As a result, it causes null-ptr-deref while removing the module, because transport_remove_device() is called to remove the device that was not added. Unable to handle kernel NULL pointer dereference at virtual address 00000000000000d0 CPU: 12 PID: 13605 Comm: rmmod Kdump: loaded Tainted: G W 6.1.0-rc3+ #8 pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : device_del+0x48/0x39c lr : device_del+0x44/0x39c Call trace: device_del+0x48/0x39c attribute_container_class_device_del+0x28/0x40 transport_remove_classdev+0x60/0x7c attribute_container_device_trigger+0x118/0x120 transport_remove_device+0x20/0x30 ata_tport_delete+0x34/0x60 [libata] ata_port_detach+0x148/0x1b0 [libata] ata_pci_remove_one+0x50/0x80 [libata] ahci_remove_one+0x4c/0x8c [ahci] Fix this by checking and handling return value of transport_add_device() in ata_tport_add(). Fixes: d9027470b886 ("[libata] Add ATA transport class") Signed-off-by: Yang Yingliang Signed-off-by: Damien Le Moal --- drivers/ata/libata-transport.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index 105da3ec5eaa..ef53bdfbcbb2 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -301,7 +301,9 @@ int ata_tport_add(struct device *parent, pm_runtime_enable(dev); pm_runtime_forbid(dev); - transport_add_device(dev); + error = transport_add_device(dev); + if (error) + goto tport_transport_add_err; transport_configure_device(dev); error = ata_tlink_add(&ap->link); @@ -312,6 +314,7 @@ int ata_tport_add(struct device *parent, tport_link_err: transport_remove_device(dev); + tport_transport_add_err: device_del(dev); tport_err: -- cgit v1.2.3-59-g8ed1b From cf0816f6322c5c37ee52655f928e91ecf32da103 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 8 Nov 2022 21:40:03 +0800 Subject: ata: libata-transport: fix error handling in ata_tlink_add() In ata_tlink_add(), the return value of transport_add_device() is not checked. As a result, it causes null-ptr-deref while removing the module, because transport_remove_device() is called to remove the device that was not added. Unable to handle kernel NULL pointer dereference at virtual address 00000000000000d0 CPU: 33 PID: 13850 Comm: rmmod Kdump: loaded Tainted: G W 6.1.0-rc3+ #12 pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : device_del+0x48/0x39c lr : device_del+0x44/0x39c Call trace: device_del+0x48/0x39c attribute_container_class_device_del+0x28/0x40 transport_remove_classdev+0x60/0x7c attribute_container_device_trigger+0x118/0x120 transport_remove_device+0x20/0x30 ata_tlink_delete+0x88/0xb0 [libata] ata_tport_delete+0x2c/0x60 [libata] ata_port_detach+0x148/0x1b0 [libata] ata_pci_remove_one+0x50/0x80 [libata] ahci_remove_one+0x4c/0x8c [ahci] Fix this by checking and handling return value of transport_add_device() in ata_tlink_add(). Fixes: d9027470b886 ("[libata] Add ATA transport class") Signed-off-by: Yang Yingliang Signed-off-by: Damien Le Moal --- drivers/ata/libata-transport.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index ef53bdfbcbb2..aac9336e8153 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -458,7 +458,9 @@ int ata_tlink_add(struct ata_link *link) goto tlink_err; } - transport_add_device(dev); + error = transport_add_device(dev); + if (error) + goto tlink_transport_err; transport_configure_device(dev); ata_for_each_dev(ata_dev, link, ALL) { @@ -473,6 +475,7 @@ int ata_tlink_add(struct ata_link *link) ata_tdev_delete(ata_dev); } transport_remove_device(dev); + tlink_transport_err: device_del(dev); tlink_err: transport_destroy_device(dev); -- cgit v1.2.3-59-g8ed1b From 1ff36351309e3eadcff297480baf4785e726de9b Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 8 Nov 2022 21:40:04 +0800 Subject: ata: libata-transport: fix error handling in ata_tdev_add() In ata_tdev_add(), the return value of transport_add_device() is not checked. As a result, it causes null-ptr-deref while removing the module, because transport_remove_device() is called to remove the device that was not added. Unable to handle kernel NULL pointer dereference at virtual address 00000000000000d0 CPU: 13 PID: 13603 Comm: rmmod Kdump: loaded Tainted: G W 6.1.0-rc3+ #36 pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : device_del+0x48/0x3a0 lr : device_del+0x44/0x3a0 Call trace: device_del+0x48/0x3a0 attribute_container_class_device_del+0x28/0x40 transport_remove_classdev+0x60/0x7c attribute_container_device_trigger+0x118/0x120 transport_remove_device+0x20/0x30 ata_tdev_delete+0x24/0x50 [libata] ata_tlink_delete+0x40/0xa0 [libata] ata_tport_delete+0x2c/0x60 [libata] ata_port_detach+0x148/0x1b0 [libata] ata_pci_remove_one+0x50/0x80 [libata] ahci_remove_one+0x4c/0x8c [ahci] Fix this by checking and handling return value of transport_add_device() in ata_tdev_add(). In the error path, device_del() is called to delete the device which was added earlier in this function, and ata_tdev_free() is called to free ata_dev. Fixes: d9027470b886 ("[libata] Add ATA transport class") Signed-off-by: Yang Yingliang Signed-off-by: Damien Le Moal --- drivers/ata/libata-transport.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index aac9336e8153..e4fb9d1b9b39 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -713,7 +713,13 @@ static int ata_tdev_add(struct ata_device *ata_dev) return error; } - transport_add_device(dev); + error = transport_add_device(dev); + if (error) { + device_del(dev); + ata_tdev_free(ata_dev); + return error; + } + transport_configure_device(dev); return 0; } -- cgit v1.2.3-59-g8ed1b From e20e81a24a4d58744a29715aac2f795cd1651955 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 9 Nov 2022 00:15:34 +0100 Subject: ata: libata-core: do not issue non-internal commands once EH is pending While the ATA specification states that a device should return command aborted for all commands queued after the device has entered error state, since ATA only keeps the sense data for the latest command (in non-NCQ case), we really don't want to send block layer commands to the device after it has entered error state. (Only ATA EH commands should be sent, to read the sense data etc.) Currently, scsi_queue_rq() will check if scsi_host_in_recovery() (state is SHOST_RECOVERY), and if so, it will _not_ issue a command via: scsi_dispatch_cmd() -> host->hostt->queuecommand() (ata_scsi_queuecmd()) -> __ata_scsi_queuecmd() -> ata_scsi_translate() -> ata_qc_issue() Before commit e494f6a72839 ("[SCSI] improved eh timeout handler"), when receiving a TFES error IRQ, the call chain looked like this: ahci_error_intr() -> ata_port_abort() -> ata_do_link_abort() -> ata_qc_complete() -> ata_qc_schedule_eh() -> blk_abort_request() -> blk_rq_timed_out() -> q->rq_timed_out_fn() (scsi_times_out()) -> scsi_eh_scmd_add() -> scsi_host_set_state(shost, SHOST_RECOVERY) Which meant that as soon as an error IRQ was serviced, SHOST_RECOVERY would be set. However, after commit e494f6a72839 ("[SCSI] improved eh timeout handler"), scsi_times_out() will instead call scsi_abort_command() which will queue delayed work, and the worker function scmd_eh_abort_handler() will call scsi_eh_scmd_add(), which calls scsi_host_set_state(shost, SHOST_RECOVERY). So now, after the TFES error IRQ has been serviced, we need to wait for the SCSI workqueue to run its work before SHOST_RECOVERY gets set. It is worth noting that, even before commit e494f6a72839 ("[SCSI] improved eh timeout handler"), we could receive an error IRQ from the time when scsi_queue_rq() checks scsi_host_in_recovery(), to the time when ata_scsi_queuecmd() is actually called. In order to handle both the delayed setting of SHOST_RECOVERY and the window where we can receive an error IRQ, add a check against ATA_PFLAG_EH_PENDING (which gets set when servicing the error IRQ), inside ata_scsi_queuecmd() itself, while holding the ap->lock. (Since the ap->lock is held while servicing IRQs.) Fixes: e494f6a72839 ("[SCSI] improved eh timeout handler") Signed-off-by: Niklas Cassel Tested-by: John Garry Signed-off-by: Damien Le Moal --- drivers/ata/libata-scsi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 61cd4e90e4e7..06a3d95ed8f9 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3964,9 +3964,19 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd) int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev) { + struct ata_port *ap = dev->link->ap; u8 scsi_op = scmd->cmnd[0]; ata_xlat_func_t xlat_func; + /* + * scsi_queue_rq() will defer commands if scsi_host_in_recovery(). + * However, this check is done without holding the ap->lock (a libata + * specific lock), so we can have received an error irq since then, + * therefore we must check if EH is pending, while holding ap->lock. + */ + if (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) + return SCSI_MLQUEUE_DEVICE_BUSY; + if (unlikely(!scmd->cmd_len)) goto bad_cdb_len; -- cgit v1.2.3-59-g8ed1b