aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/net/ethernet/qlogic/qede/qede_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/qlogic/qede/qede_main.c')
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c232
1 files changed, 225 insertions, 7 deletions
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 34fa3917eb33..b2d154258b07 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -29,6 +29,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include <linux/crash_dump.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/version.h>
@@ -60,6 +61,7 @@
#include <net/ip6_checksum.h>
#include <linux/bitops.h>
#include <linux/vmalloc.h>
+#include <linux/aer.h>
#include "qede.h"
#include "qede_ptp.h"
@@ -124,6 +126,8 @@ static const struct pci_device_id qede_pci_tbl[] = {
MODULE_DEVICE_TABLE(pci, qede_pci_tbl);
static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
+static pci_ers_result_t
+qede_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state);
#define TX_TIMEOUT (5 * HZ)
@@ -135,10 +139,12 @@ static void qede_shutdown(struct pci_dev *pdev);
static void qede_link_update(void *dev, struct qed_link_output *link);
static void qede_schedule_recovery_handler(void *dev);
static void qede_recovery_handler(struct qede_dev *edev);
+static void qede_schedule_hw_err_handler(void *dev,
+ enum qed_hw_err_type err_type);
static void qede_get_eth_tlv_data(void *edev, void *data);
static void qede_get_generic_tlv_data(void *edev,
struct qed_generic_tlvs *data);
-
+static void qede_generic_hw_err_handler(struct qede_dev *edev);
#ifdef CONFIG_QED_SRIOV
static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
__be16 vlan_proto)
@@ -203,6 +209,10 @@ static int qede_sriov_configure(struct pci_dev *pdev, int num_vfs_param)
}
#endif
+static const struct pci_error_handlers qede_err_handler = {
+ .error_detected = qede_io_error_detected,
+};
+
static struct pci_driver qede_pci_driver = {
.name = "qede",
.id_table = qede_pci_tbl,
@@ -212,6 +222,7 @@ static struct pci_driver qede_pci_driver = {
#ifdef CONFIG_QED_SRIOV
.sriov_configure = qede_sriov_configure,
#endif
+ .err_handler = &qede_err_handler,
};
static struct qed_eth_cb_ops qede_ll_ops = {
@@ -221,6 +232,7 @@ static struct qed_eth_cb_ops qede_ll_ops = {
#endif
.link_update = qede_link_update,
.schedule_recovery_handler = qede_schedule_recovery_handler,
+ .schedule_hw_err_handler = qede_schedule_hw_err_handler,
.get_generic_tlv_data = qede_get_generic_tlv_data,
.get_protocol_tlv_data = qede_get_eth_tlv_data,
},
@@ -527,6 +539,51 @@ static int qede_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return 0;
}
+static void qede_tx_log_print(struct qede_dev *edev, struct qede_tx_queue *txq)
+{
+ DP_NOTICE(edev,
+ "Txq[%d]: FW cons [host] %04x, SW cons %04x, SW prod %04x [Jiffies %lu]\n",
+ txq->index, le16_to_cpu(*txq->hw_cons_ptr),
+ qed_chain_get_cons_idx(&txq->tx_pbl),
+ qed_chain_get_prod_idx(&txq->tx_pbl),
+ jiffies);
+}
+
+static void qede_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+ struct qede_dev *edev = netdev_priv(dev);
+ struct qede_tx_queue *txq;
+ int cos;
+
+ netif_carrier_off(dev);
+ DP_NOTICE(edev, "TX timeout on queue %u!\n", txqueue);
+
+ if (!(edev->fp_array[txqueue].type & QEDE_FASTPATH_TX))
+ return;
+
+ for_each_cos_in_txq(edev, cos) {
+ txq = &edev->fp_array[txqueue].txq[cos];
+
+ if (qed_chain_get_cons_idx(&txq->tx_pbl) !=
+ qed_chain_get_prod_idx(&txq->tx_pbl))
+ qede_tx_log_print(edev, txq);
+ }
+
+ if (IS_VF(edev))
+ return;
+
+ if (test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
+ edev->state == QEDE_STATE_RECOVERY) {
+ DP_INFO(edev,
+ "Avoid handling a Tx timeout while another HW error is being handled\n");
+ return;
+ }
+
+ set_bit(QEDE_ERR_GET_DBG_INFO, &edev->err_flags);
+ set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
+ schedule_delayed_work(&edev->sp_task, 0);
+}
+
static int qede_setup_tc(struct net_device *ndev, u8 num_tc)
{
struct qede_dev *edev = netdev_priv(ndev);
@@ -614,6 +671,7 @@ static const struct net_device_ops qede_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = qede_change_mtu,
.ndo_do_ioctl = qede_ioctl,
+ .ndo_tx_timeout = qede_tx_timeout,
#ifdef CONFIG_QED_SRIOV
.ndo_set_vf_mac = qede_set_vf_mac,
.ndo_set_vf_vlan = qede_set_vf_vlan,
@@ -707,8 +765,14 @@ static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev,
edev->dp_module = dp_module;
edev->dp_level = dp_level;
edev->ops = qed_ops;
- edev->q_num_rx_buffers = NUM_RX_BDS_DEF;
- edev->q_num_tx_buffers = NUM_TX_BDS_DEF;
+
+ if (is_kdump_kernel()) {
+ edev->q_num_rx_buffers = NUM_RX_BDS_KDUMP_MIN;
+ edev->q_num_tx_buffers = NUM_TX_BDS_KDUMP_MIN;
+ } else {
+ edev->q_num_rx_buffers = NUM_RX_BDS_DEF;
+ edev->q_num_tx_buffers = NUM_TX_BDS_DEF;
+ }
DP_INFO(edev, "Allocated netdev with %d tx queues and %d rx queues\n",
info->num_queues, info->num_queues);
@@ -974,7 +1038,8 @@ static void qede_sp_task(struct work_struct *work)
/* SRIOV must be disabled outside the lock to avoid a deadlock.
* The recovery of the active VFs is currently not supported.
*/
- qede_sriov_configure(edev->pdev, 0);
+ if (pci_num_vf(edev->pdev))
+ qede_sriov_configure(edev->pdev, 0);
#endif
qede_lock(edev);
qede_recovery_handler(edev);
@@ -993,7 +1058,20 @@ static void qede_sp_task(struct work_struct *work)
qede_process_arfs_filters(edev, false);
}
#endif
+ if (test_and_clear_bit(QEDE_SP_HW_ERR, &edev->sp_flags))
+ qede_generic_hw_err_handler(edev);
__qede_unlock(edev);
+
+ if (test_and_clear_bit(QEDE_SP_AER, &edev->sp_flags)) {
+#ifdef CONFIG_QED_SRIOV
+ /* SRIOV must be disabled outside the lock to avoid a deadlock.
+ * The recovery of the active VFs is currently not supported.
+ */
+ if (pci_num_vf(edev->pdev))
+ qede_sriov_configure(edev->pdev, 0);
+#endif
+ edev->ops->common->recovery_process(edev->cdev);
+ }
}
static void qede_update_pf_params(struct qed_dev *cdev)
@@ -1187,7 +1265,7 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id)
case QEDE_PRIVATE_VF:
if (debug & QED_LOG_VERBOSE_MASK)
dev_err(&pdev->dev, "Probing a VF\n");
- is_vf = true;
+ is_vf = is_kdump_kernel() ? false : true;
break;
default:
if (debug & QED_LOG_VERBOSE_MASK)
@@ -1398,7 +1476,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
if (rxq->rx_buf_size + size > PAGE_SIZE)
rxq->rx_buf_size = PAGE_SIZE - size;
- /* Segment size to spilt a page in multiple equal parts ,
+ /* Segment size to split a page in multiple equal parts,
* unless XDP is used in which case we'd use the entire page.
*/
if (!edev->xdp_prog) {
@@ -1694,7 +1772,7 @@ static void qede_init_fp(struct qede_dev *edev)
txq->ndev_txq_id = ndev_tx_id;
if (edev->dev_info.is_legacy)
- txq->is_legacy = 1;
+ txq->is_legacy = true;
txq->dev = &edev->pdev->dev;
}
@@ -2482,6 +2560,100 @@ err:
qede_recovery_failed(edev);
}
+static void qede_atomic_hw_err_handler(struct qede_dev *edev)
+{
+ struct qed_dev *cdev = edev->cdev;
+
+ DP_NOTICE(edev,
+ "Generic non-sleepable HW error handling started - err_flags 0x%lx\n",
+ edev->err_flags);
+
+ /* Get a call trace of the flow that led to the error */
+ WARN_ON(test_bit(QEDE_ERR_WARN, &edev->err_flags));
+
+ /* Prevent HW attentions from being reasserted */
+ if (test_bit(QEDE_ERR_ATTN_CLR_EN, &edev->err_flags))
+ edev->ops->common->attn_clr_enable(cdev, true);
+
+ DP_NOTICE(edev, "Generic non-sleepable HW error handling is done\n");
+}
+
+static void qede_generic_hw_err_handler(struct qede_dev *edev)
+{
+ struct qed_dev *cdev = edev->cdev;
+
+ DP_NOTICE(edev,
+ "Generic sleepable HW error handling started - err_flags 0x%lx\n",
+ edev->err_flags);
+
+ /* Trigger a recovery process.
+ * This is placed in the sleep requiring section just to make
+ * sure it is the last one, and that all the other operations
+ * were completed.
+ */
+ if (test_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags))
+ edev->ops->common->recovery_process(cdev);
+
+ clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
+
+ DP_NOTICE(edev, "Generic sleepable HW error handling is done\n");
+}
+
+static void qede_set_hw_err_flags(struct qede_dev *edev,
+ enum qed_hw_err_type err_type)
+{
+ unsigned long err_flags = 0;
+
+ switch (err_type) {
+ case QED_HW_ERR_DMAE_FAIL:
+ set_bit(QEDE_ERR_WARN, &err_flags);
+ fallthrough;
+ case QED_HW_ERR_MFW_RESP_FAIL:
+ case QED_HW_ERR_HW_ATTN:
+ case QED_HW_ERR_RAMROD_FAIL:
+ case QED_HW_ERR_FW_ASSERT:
+ set_bit(QEDE_ERR_ATTN_CLR_EN, &err_flags);
+ set_bit(QEDE_ERR_GET_DBG_INFO, &err_flags);
+ break;
+
+ default:
+ DP_NOTICE(edev, "Unexpected HW error [%d]\n", err_type);
+ break;
+ }
+
+ edev->err_flags |= err_flags;
+}
+
+static void qede_schedule_hw_err_handler(void *dev,
+ enum qed_hw_err_type err_type)
+{
+ struct qede_dev *edev = dev;
+
+ /* Fan failure cannot be masked by handling of another HW error or by a
+ * concurrent recovery process.
+ */
+ if ((test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
+ edev->state == QEDE_STATE_RECOVERY) &&
+ err_type != QED_HW_ERR_FAN_FAIL) {
+ DP_INFO(edev,
+ "Avoid scheduling an error handling while another HW error is being handled\n");
+ return;
+ }
+
+ if (err_type >= QED_HW_ERR_LAST) {
+ DP_NOTICE(edev, "Unknown HW error [%d]\n", err_type);
+ clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
+ return;
+ }
+
+ qede_set_hw_err_flags(edev, err_type);
+ qede_atomic_hw_err_handler(edev);
+ set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
+ schedule_delayed_work(&edev->sp_task, 0);
+
+ DP_INFO(edev, "Scheduled a error handler [err_type %d]\n", err_type);
+}
+
static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
{
struct netdev_queue *netdev_txq;
@@ -2579,3 +2751,49 @@ static void qede_get_eth_tlv_data(void *dev, void *data)
etlv->num_txqs_full_set = true;
etlv->num_rxqs_full_set = true;
}
+
+/**
+ * qede_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t
+qede_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+ struct net_device *dev = pci_get_drvdata(pdev);
+ struct qede_dev *edev = netdev_priv(dev);
+
+ if (!edev)
+ return PCI_ERS_RESULT_NONE;
+
+ DP_NOTICE(edev, "IO error detected [%d]\n", state);
+
+ __qede_lock(edev);
+ if (edev->state == QEDE_STATE_RECOVERY) {
+ DP_NOTICE(edev, "Device already in the recovery state\n");
+ __qede_unlock(edev);
+ return PCI_ERS_RESULT_NONE;
+ }
+
+ /* PF handles the recovery of its VFs */
+ if (IS_VF(edev)) {
+ DP_VERBOSE(edev, QED_MSG_IOV,
+ "VF recovery is handled by its PF\n");
+ __qede_unlock(edev);
+ return PCI_ERS_RESULT_RECOVERED;
+ }
+
+ /* Close OS Tx */
+ netif_tx_disable(edev->ndev);
+ netif_carrier_off(edev->ndev);
+
+ set_bit(QEDE_SP_AER, &edev->sp_flags);
+ schedule_delayed_work(&edev->sp_task, 0);
+
+ __qede_unlock(edev);
+
+ return PCI_ERS_RESULT_CAN_RECOVER;
+}