aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorPeter Wang <peter.wang@mediatek.com>2023-09-27 11:35:57 +0800
committerMartin K. Petersen <martin.petersen@oracle.com>2023-10-09 21:17:55 -0400
commit971237b900c38f50e7865289a2aecb77dc7f09f3 (patch)
tree94e67a4afacef13f8be3aabe3a6097990df2ba95
parentMerge patch series "ibmvfc: fixes and generic prep work for NVMeoF support" (diff)
downloadwireguard-linux-971237b900c38f50e7865289a2aecb77dc7f09f3.tar.xz
wireguard-linux-971237b900c38f50e7865289a2aecb77dc7f09f3.zip
scsi: ufs: core: WLUN send SSU timeout recovery
When runtime PM send SSU times out, the SCSI core invokes eh_host_reset_handler, in this case ufshcd_eh_host_reset_handler(), which is then stuck waiting for flush_work(&hba->eh_work). However, ufshcd_err_handler hangs in wait RPM resume. Do link recovery only in this case. The following IO hang stack dump was observed: kworker/4:0 D <ffffffd7d31f6fb4> __switch_to+0x180/0x344 <ffffffd7d31f779c> __schedule+0x5ec/0xa14 <ffffffd7d31f7c3c> schedule+0x78/0xe0 <ffffffd7d31fefbc> schedule_timeout+0xb0/0x15c <ffffffd7d31f8120> io_schedule_timeout+0x48/0x70 <ffffffd7d31f8e40> do_wait_for_common+0x108/0x19c <ffffffd7d31f837c> wait_for_completion_io_timeout+0x50/0x78 <ffffffd7d2876bc0> blk_execute_rq+0x1b8/0x218 <ffffffd7d2b4297c> scsi_execute_cmd+0x148/0x238 <ffffffd7d2da7358> ufshcd_set_dev_pwr_mode+0xe8/0x244 <ffffffd7d2da7e40> __ufshcd_wl_resume+0x1e0/0x45c <ffffffd7d2da7b28> ufshcd_wl_runtime_resume+0x3c/0x174 <ffffffd7d2b4f290> scsi_runtime_resume+0x7c/0xc8 <ffffffd7d2ae1d48> __rpm_callback+0xa0/0x410 <ffffffd7d2ae0128> rpm_resume+0x43c/0x67c <ffffffd7d2ae1e98> __rpm_callback+0x1f0/0x410 <ffffffd7d2ae014c> rpm_resume+0x460/0x67c <ffffffd7d2ae1450> pm_runtime_work+0xa4/0xac <ffffffd7d22e39ac> process_one_work+0x208/0x598 <ffffffd7d22e3fc0> worker_thread+0x228/0x438 <ffffffd7d22eb038> kthread+0x104/0x1d4 <ffffffd7d22171a0> ret_from_fork+0x10/0x20 scsi_eh_0 D <ffffffd7d31f6fb4> __switch_to+0x180/0x344 <ffffffd7d31f779c> __schedule+0x5ec/0xa14 <ffffffd7d31f7c3c> schedule+0x78/0xe0 <ffffffd7d31fef50> schedule_timeout+0x44/0x15c <ffffffd7d31f8e40> do_wait_for_common+0x108/0x19c <ffffffd7d31f8234> wait_for_completion+0x48/0x64 <ffffffd7d22deb88> __flush_work+0x260/0x2d0 <ffffffd7d22de918> flush_work+0x10/0x20 <ffffffd7d2da4728> ufshcd_eh_host_reset_handler+0x88/0xcc <ffffffd7d2b41da4> scsi_try_host_reset+0x48/0xe0 <ffffffd7d2b410fc> scsi_eh_ready_devs+0x934/0xa40 <ffffffd7d2b41618> scsi_error_handler+0x168/0x374 <ffffffd7d22eb038> kthread+0x104/0x1d4 <ffffffd7d22171a0> ret_from_fork+0x10/0x20 kworker/u16:5 D <ffffffd7d31f6fb4> __switch_to+0x180/0x344 <ffffffd7d31f779c> __schedule+0x5ec/0xa14 <ffffffd7d31f7c3c> schedule+0x78/0xe0 <ffffffd7d2adfe00> rpm_resume+0x114/0x67c <ffffffd7d2adfca8> __pm_runtime_resume+0x70/0xb4 <ffffffd7d2d9cf48> ufshcd_err_handler+0x1a0/0xe68 <ffffffd7d22e39ac> process_one_work+0x208/0x598 <ffffffd7d22e3fc0> worker_thread+0x228/0x438 <ffffffd7d22eb038> kthread+0x104/0x1d4 <ffffffd7d22171a0> ret_from_fork+0x10/0x20 Signed-off-by: Peter Wang <peter.wang@mediatek.com> Link: https://lore.kernel.org/r/20230927033557.13801-1-peter.wang@mediatek.com Reviewed-by: Bart Van Assche <bvanassche@acm.org> Reviewed-by: Stanley Chu <stanley.chu@mediatek.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
-rw-r--r--drivers/ufs/core/ufshcd.c13
1 files changed, 13 insertions, 0 deletions
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 0f08234def3f..1839e188a2c5 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -7708,6 +7708,19 @@ static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
hba = shost_priv(cmd->device->host);
+ /*
+ * If runtime PM sent SSU and got a timeout, scsi_error_handler is
+ * stuck in this function waiting for flush_work(&hba->eh_work). And
+ * ufshcd_err_handler(eh_work) is stuck waiting for runtime PM. Do
+ * ufshcd_link_recovery instead of eh_work to prevent deadlock.
+ */
+ if (hba->pm_op_in_progress) {
+ if (ufshcd_link_recovery(hba))
+ err = FAILED;
+
+ return err;
+ }
+
spin_lock_irqsave(hba->host->host_lock, flags);
hba->force_reset = true;
ufshcd_schedule_eh_work(hba);