From 4b1068f5d74b6cc92319bd7eba40809b1222e73f Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Thu, 19 Mar 2020 01:38:10 -0700 Subject: scsi: qedi: Add MFW error recovery process This patch adds the mfw error recovery process in the qedi driver. The process includes a partial/customized driver unload and load to reset context by preserving active iSCSI session kernel state. Link: https://lore.kernel.org/r/20200319083811.19499-2-mrangankar@marvell.com Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi.h | 1 + drivers/scsi/qedi/qedi_main.c | 87 ++++++++++++++++++++++++++++++++----------- 2 files changed, 66 insertions(+), 22 deletions(-) diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h index 9513fd320ffd..812e368cf100 100644 --- a/drivers/scsi/qedi/qedi.h +++ b/drivers/scsi/qedi/qedi.h @@ -331,6 +331,7 @@ struct qedi_ctx { u16 ll2_mtu; struct workqueue_struct *dpc_wq; + struct delayed_work recovery_work; spinlock_t task_idx_lock; /* To protect gbl context */ s32 last_tidx_alloc; diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index acb930b8c6a6..cfa705aa16a6 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -58,6 +58,7 @@ static struct qedi_cmd *qedi_get_cmd_from_tid(struct qedi_ctx *qedi, u32 tid); static void qedi_reset_uio_rings(struct qedi_uio_dev *udev); static void qedi_ll2_free_skbs(struct qedi_ctx *qedi); static struct nvm_iscsi_block *qedi_get_nvram_block(struct qedi_ctx *qedi); +static void qedi_recovery_handler(struct work_struct *work); static int qedi_iscsi_event_cb(void *context, u8 fw_event_code, void *fw_handle) { @@ -1113,6 +1114,20 @@ exit_get_data: return; } +static void qedi_schedule_recovery_handler(void *dev) +{ + struct qedi_ctx *qedi = dev; + + QEDI_ERR(&qedi->dbg_ctx, "Recovery handler scheduled.\n"); + + if (test_and_set_bit(QEDI_IN_RECOVERY, &qedi->flags)) + return; + + atomic_set(&qedi->link_state, QEDI_LINK_DOWN); + + schedule_delayed_work(&qedi->recovery_work, 0); +} + static void qedi_link_update(void *dev, struct qed_link_output *link) { struct qedi_ctx *qedi = (struct qedi_ctx *)dev; @@ -1130,6 +1145,7 @@ static void qedi_link_update(void *dev, struct qed_link_output *link) static struct qed_iscsi_cb_ops qedi_cb_ops = { { .link_update = qedi_link_update, + .schedule_recovery_handler = qedi_schedule_recovery_handler, .get_protocol_tlv_data = qedi_get_protocol_tlv_data, .get_generic_tlv_data = qedi_get_generic_tlv_data, } @@ -2328,16 +2344,18 @@ static void __qedi_remove(struct pci_dev *pdev, int mode) struct qedi_ctx *qedi = pci_get_drvdata(pdev); int rval; - if (qedi->tmf_thread) { - flush_workqueue(qedi->tmf_thread); - destroy_workqueue(qedi->tmf_thread); - qedi->tmf_thread = NULL; - } + if (mode == QEDI_MODE_NORMAL) { + if (qedi->tmf_thread) { + flush_workqueue(qedi->tmf_thread); + destroy_workqueue(qedi->tmf_thread); + qedi->tmf_thread = NULL; + } - if (qedi->offload_thread) { - flush_workqueue(qedi->offload_thread); - destroy_workqueue(qedi->offload_thread); - qedi->offload_thread = NULL; + if (qedi->offload_thread) { + flush_workqueue(qedi->offload_thread); + destroy_workqueue(qedi->offload_thread); + qedi->offload_thread = NULL; + } } #ifdef CONFIG_DEBUG_FS @@ -2353,8 +2371,7 @@ static void __qedi_remove(struct pci_dev *pdev, int mode) qedi_ops->ll2->stop(qedi->cdev); } - if (mode == QEDI_MODE_NORMAL) - qedi_free_iscsi_pf_param(qedi); + qedi_free_iscsi_pf_param(qedi); rval = qedi_ops->common->update_drv_state(qedi->cdev, false); if (rval) @@ -2373,9 +2390,6 @@ static void __qedi_remove(struct pci_dev *pdev, int mode) qedi_free_uio(qedi->udev); qedi_free_itt(qedi); - iscsi_host_remove(qedi->shost); - iscsi_host_free(qedi->shost); - if (qedi->ll2_recv_thread) { kthread_stop(qedi->ll2_recv_thread); qedi->ll2_recv_thread = NULL; @@ -2384,6 +2398,9 @@ static void __qedi_remove(struct pci_dev *pdev, int mode) if (qedi->boot_kset) iscsi_boot_destroy_kset(qedi->boot_kset); + + iscsi_host_remove(qedi->shost); + iscsi_host_free(qedi->shost); } } @@ -2435,14 +2452,12 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) qedi->dev_info.common.num_hwfns, qedi_ops->common->get_affin_hwfn_idx(qedi->cdev)); - if (mode != QEDI_MODE_RECOVERY) { - rc = qedi_set_iscsi_pf_param(qedi); - if (rc) { - rc = -ENOMEM; - QEDI_ERR(&qedi->dbg_ctx, - "Set iSCSI pf param fail\n"); - goto free_host; - } + rc = qedi_set_iscsi_pf_param(qedi); + if (rc) { + rc = -ENOMEM; + QEDI_ERR(&qedi->dbg_ctx, + "Set iSCSI pf param fail\n"); + goto free_host; } qedi_ops->common->update_pf_params(qedi->cdev, &qedi->pf_params); @@ -2633,6 +2648,8 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) goto free_cid_que; } + INIT_DELAYED_WORK(&qedi->recovery_work, qedi_recovery_handler); + /* F/w needs 1st task context memory entry for performance */ set_bit(QEDI_RESERVE_TASK_ID, qedi->task_idx_map); atomic_set(&qedi->num_offloads, 0); @@ -2673,6 +2690,32 @@ exit_probe: return rc; } +static void qedi_mark_conn_recovery(struct iscsi_cls_session *cls_session) +{ + struct iscsi_session *session = cls_session->dd_data; + struct iscsi_conn *conn = session->leadconn; + struct qedi_conn *qedi_conn = conn->dd_data; + + iscsi_conn_failure(qedi_conn->cls_conn->dd_data, ISCSI_ERR_CONN_FAILED); +} + +static void qedi_recovery_handler(struct work_struct *work) +{ + struct qedi_ctx *qedi = + container_of(work, struct qedi_ctx, recovery_work.work); + + iscsi_host_for_each_session(qedi->shost, qedi_mark_conn_recovery); + + /* Call common_ops->recovery_prolog to allow the MFW to quiesce + * any PCI transactions. + */ + qedi_ops->common->recovery_prolog(qedi->cdev); + + __qedi_remove(qedi->pdev, QEDI_MODE_RECOVERY); + __qedi_probe(qedi->pdev, QEDI_MODE_RECOVERY); + clear_bit(QEDI_IN_RECOVERY, &qedi->flags); +} + static int qedi_probe(struct pci_dev *pdev, const struct pci_device_id *id) { return __qedi_probe(pdev, QEDI_MODE_NORMAL); -- cgit v1.2.3-59-g8ed1b