aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-05-14 13:25:47 -0700
committerDavid S. Miller <davem@davemloft.net>2020-05-14 13:25:47 -0700
commit86b6ba171d4bcce72ffbdc33b42b0b4c0e6d99d0 (patch)
tree34e134e5883863ee99868245e5a64af5d41582ef
parentMerge branch 'net-hns3-add-some-cleanups-for-next' (diff)
parentnet: qed: fix bad formatting (diff)
downloadwireguard-linux-86b6ba171d4bcce72ffbdc33b42b0b4c0e6d99d0.tar.xz
wireguard-linux-86b6ba171d4bcce72ffbdc33b42b0b4c0e6d99d0.zip
Merge branch 'net-qed-qede-critical-hw-error-handling'
Igor Russkikh says: ==================== net: qed/qede: critical hw error handling FastLinQ devices as a complex systems may observe various hardware level error conditions, both severe and recoverable. Driver is able to detect and report this, but so far it only did trace/dmesg based reporting. Here we implement an extended hw error detection, service task handler captures a dump for the later analysis. I also resubmit a patch from Denis Bolotin on tx timeout handler, addressing David's comment regarding recovery procedure as an extra reaction on this event. v2: Removing the patch with ethtool dump and udev magic. Its quite isolated, I'm working on devlink based logic for this separately. v1: https://patchwork.ozlabs.org/project/netdev/cover/cover.1588758463.git.irusskikh@marvell.com/ ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed.h16
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_debug.c26
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev.c4
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_hsi.h49
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_hw.c42
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_hw.h15
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_int.c40
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_int.h11
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c34
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.c253
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.h28
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_spq.c16
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede.h14
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_ethtool.c24
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c147
-rw-r--r--include/linux/qed/qed_if.h26
16 files changed, 699 insertions, 46 deletions
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index fa41bf08a589..66ed39d6f357 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -740,12 +740,6 @@ struct qed_dbg_feature {
u32 dumped_dwords;
};
-struct qed_dbg_params {
- struct qed_dbg_feature features[DBG_FEATURE_NUM];
- u8 engine_for_debug;
- bool print_data;
-};
-
struct qed_dev {
u32 dp_module;
u8 dp_level;
@@ -844,6 +838,9 @@ struct qed_dev {
/* Recovery */
bool recov_in_prog;
+ /* Indicates whether should prevent attentions from being reasserted */
+ bool attn_clr_en;
+
/* LLH info */
u8 ppfid_bitmap;
struct qed_llh_info *p_llh_info;
@@ -872,17 +869,18 @@ struct qed_dev {
} protocol_ops;
void *ops_cookie;
- struct qed_dbg_params dbg_params;
-
#ifdef CONFIG_QED_LL2
struct qed_cb_ll2_info *ll2;
u8 ll2_mac_address[ETH_ALEN];
#endif
struct qed_dbg_feature dbg_features[DBG_FEATURE_NUM];
+ u8 engine_for_debug;
bool disable_ilt_dump;
DECLARE_HASHTABLE(connections, 10);
const struct firmware *firmware;
+ bool print_dbg_data;
+
u32 rdma_max_sge;
u32 rdma_max_inline;
u32 rdma_max_srq_sge;
@@ -1020,6 +1018,8 @@ u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
u32 input_len, u8 *input_buf,
u32 max_size, u8 *unzip_buf);
void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
+void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
+ enum qed_hw_err_type err_type);
void qed_get_protocol_stats(struct qed_dev *cdev,
enum qed_mcp_protocol_type type,
union qed_mcp_protocol_stats *stats);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index f4eebaabb6d0..57a0dab88431 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -7453,7 +7453,7 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
enum qed_dbg_features feature_idx)
{
struct qed_dbg_feature *feature =
- &p_hwfn->cdev->dbg_params.features[feature_idx];
+ &p_hwfn->cdev->dbg_features[feature_idx];
u32 text_size_bytes, null_char_pos, i;
enum dbg_status rc;
char *text_buf;
@@ -7502,7 +7502,7 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
text_buf[i] = '\n';
/* Dump printable feature to log */
- if (p_hwfn->cdev->dbg_params.print_data)
+ if (p_hwfn->cdev->print_dbg_data)
qed_dbg_print_feature(text_buf, text_size_bytes);
/* Free the old dump_buf and point the dump_buf to the newly allocagted
@@ -7523,7 +7523,7 @@ static enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn,
enum qed_dbg_features feature_idx)
{
struct qed_dbg_feature *feature =
- &p_hwfn->cdev->dbg_params.features[feature_idx];
+ &p_hwfn->cdev->dbg_features[feature_idx];
u32 buf_size_dwords;
enum dbg_status rc;
@@ -7648,7 +7648,7 @@ static int qed_dbg_nvm_image(struct qed_dev *cdev, void *buffer,
enum qed_nvm_images image_id)
{
struct qed_hwfn *p_hwfn =
- &cdev->hwfns[cdev->dbg_params.engine_for_debug];
+ &cdev->hwfns[cdev->engine_for_debug];
u32 len_rounded, i;
__be32 val;
int rc;
@@ -7780,7 +7780,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
{
u8 cur_engine, omit_engine = 0, org_engine;
struct qed_hwfn *p_hwfn =
- &cdev->hwfns[cdev->dbg_params.engine_for_debug];
+ &cdev->hwfns[cdev->engine_for_debug];
struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
int grc_params[MAX_DBG_GRC_PARAMS], i;
u32 offset = 0, feature_size;
@@ -8000,7 +8000,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
int qed_dbg_all_data_size(struct qed_dev *cdev)
{
struct qed_hwfn *p_hwfn =
- &cdev->hwfns[cdev->dbg_params.engine_for_debug];
+ &cdev->hwfns[cdev->engine_for_debug];
u32 regs_len = 0, image_len = 0, ilt_len = 0, total_ilt_len = 0;
u8 cur_engine, org_engine;
@@ -8059,9 +8059,9 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
enum qed_dbg_features feature, u32 *num_dumped_bytes)
{
struct qed_hwfn *p_hwfn =
- &cdev->hwfns[cdev->dbg_params.engine_for_debug];
+ &cdev->hwfns[cdev->engine_for_debug];
struct qed_dbg_feature *qed_feature =
- &cdev->dbg_params.features[feature];
+ &cdev->dbg_features[feature];
enum dbg_status dbg_rc;
struct qed_ptt *p_ptt;
int rc = 0;
@@ -8084,7 +8084,7 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
DP_VERBOSE(cdev, QED_MSG_DEBUG,
"copying debugfs feature to external buffer\n");
memcpy(buffer, qed_feature->dump_buf, qed_feature->buf_size);
- *num_dumped_bytes = cdev->dbg_params.features[feature].dumped_dwords *
+ *num_dumped_bytes = cdev->dbg_features[feature].dumped_dwords *
4;
out:
@@ -8095,7 +8095,7 @@ out:
int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature)
{
struct qed_hwfn *p_hwfn =
- &cdev->hwfns[cdev->dbg_params.engine_for_debug];
+ &cdev->hwfns[cdev->engine_for_debug];
struct qed_dbg_feature *qed_feature = &cdev->dbg_features[feature];
struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
u32 buf_size_dwords;
@@ -8120,14 +8120,14 @@ int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature)
u8 qed_get_debug_engine(struct qed_dev *cdev)
{
- return cdev->dbg_params.engine_for_debug;
+ return cdev->engine_for_debug;
}
void qed_set_debug_engine(struct qed_dev *cdev, int engine_number)
{
DP_VERBOSE(cdev, QED_MSG_DEBUG, "set debug engine to %d\n",
engine_number);
- cdev->dbg_params.engine_for_debug = engine_number;
+ cdev->engine_for_debug = engine_number;
}
void qed_dbg_pf_init(struct qed_dev *cdev)
@@ -8146,7 +8146,7 @@ void qed_dbg_pf_init(struct qed_dev *cdev)
}
/* Set the hwfn to be 0 as default */
- cdev->dbg_params.engine_for_debug = 0;
+ cdev->engine_for_debug = 0;
}
void qed_dbg_pf_exit(struct qed_dev *cdev)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 7119a18af19e..6e857468e993 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -3085,7 +3085,9 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
rc = qed_final_cleanup(p_hwfn, p_hwfn->p_main_ptt,
p_hwfn->rel_pf_id, false);
if (rc) {
- DP_NOTICE(p_hwfn, "Final cleanup failed\n");
+ qed_hw_err_notify(p_hwfn, p_hwfn->p_main_ptt,
+ QED_HW_ERR_RAMROD_FAIL,
+ "Final cleanup failed\n");
goto load_err;
}
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 4597015b8bff..f00460d00cab 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12400,6 +12400,13 @@ struct load_rsp_stc {
#define LOAD_RSP_FLAGS0_DRV_EXISTS (0x1 << 0)
};
+struct mdump_retain_data_stc {
+ u32 valid;
+ u32 epoch;
+ u32 pf;
+ u32 status;
+};
+
union drv_union_data {
u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD];
struct mcp_mac wol_mac;
@@ -12488,10 +12495,14 @@ struct public_drv_mb {
#define DRV_MSG_CODE_BIST_TEST 0x001e0000
#define DRV_MSG_CODE_SET_LED_MODE 0x00200000
#define DRV_MSG_CODE_RESOURCE_CMD 0x00230000
+/* Send crash dump commands with param[3:0] - opcode */
+#define DRV_MSG_CODE_MDUMP_CMD 0x00250000
#define DRV_MSG_CODE_GET_TLV_DONE 0x002f0000
#define DRV_MSG_CODE_GET_ENGINE_CONFIG 0x00370000
#define DRV_MSG_CODE_GET_PPFID_BITMAP 0x43000000
+#define DRV_MSG_CODE_DEBUG_DATA_SEND 0xc0040000
+
#define RESOURCE_CMD_REQ_RESC_MASK 0x0000001F
#define RESOURCE_CMD_REQ_RESC_SHIFT 0
#define RESOURCE_CMD_REQ_OPCODE_MASK 0x000000E0
@@ -12517,6 +12528,21 @@ struct public_drv_mb {
#define RESOURCE_DUMP 0
+/* DRV_MSG_CODE_MDUMP_CMD parameters */
+#define MDUMP_DRV_PARAM_OPCODE_MASK 0x0000000f
+#define DRV_MSG_CODE_MDUMP_ACK 0x01
+#define DRV_MSG_CODE_MDUMP_SET_VALUES 0x02
+#define DRV_MSG_CODE_MDUMP_TRIGGER 0x03
+#define DRV_MSG_CODE_MDUMP_GET_CONFIG 0x04
+#define DRV_MSG_CODE_MDUMP_SET_ENABLE 0x05
+#define DRV_MSG_CODE_MDUMP_CLEAR_LOGS 0x06
+#define DRV_MSG_CODE_MDUMP_GET_RETAIN 0x07
+#define DRV_MSG_CODE_MDUMP_CLR_RETAIN 0x08
+
+#define DRV_MSG_CODE_HW_DUMP_TRIGGER 0x0a
+#define DRV_MSG_CODE_MDUMP_GEN_MDUMP2 0x0b
+#define DRV_MSG_CODE_MDUMP_FREE_MDUMP2 0x0c
+
#define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL 0x002b0000
#define DRV_MSG_CODE_OS_WOL 0x002e0000
@@ -12626,6 +12652,17 @@ struct public_drv_mb {
#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE 0x00000002
#define DRV_MB_PARAM_FEATURE_SUPPORT_FUNC_VLINK 0x00010000
+/* DRV_MSG_CODE_DEBUG_DATA_SEND parameters */
+#define DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE_OFFSET 0
+#define DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE_MASK 0xFF
+
+/* Driver attributes params */
+#define DRV_MB_PARAM_ATTRIBUTE_KEY_OFFSET 0
+#define DRV_MB_PARAM_ATTRIBUTE_KEY_MASK 0x00FFFFFF
+#define DRV_MB_PARAM_ATTRIBUTE_CMD_OFFSET 24
+#define DRV_MB_PARAM_ATTRIBUTE_CMD_MASK 0xFF000000
+
+#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_OFFSET 0
#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_SHIFT 0
#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_MASK 0x0000FFFF
#define DRV_MB_PARAM_NVM_CFG_OPTION_ALL_SHIFT 16
@@ -12678,6 +12715,14 @@ struct public_drv_mb {
#define FW_MSG_CODE_DRV_CFG_PF_VFS_MSIX_DONE 0x00870000
#define FW_MSG_SEQ_NUMBER_MASK 0x0000ffff
+#define FW_MSG_CODE_DEBUG_DATA_SEND_INV_ARG 0xb0070000
+#define FW_MSG_CODE_DEBUG_DATA_SEND_BUF_FULL 0xb0080000
+#define FW_MSG_CODE_DEBUG_DATA_SEND_NO_BUF 0xb0090000
+#define FW_MSG_CODE_DEBUG_NOT_ENABLED 0xb00a0000
+#define FW_MSG_CODE_DEBUG_DATA_SEND_OK 0xb00b0000
+
+#define FW_MSG_CODE_MDUMP_INVALID_CMD 0x00030000
+
u32 fw_mb_param;
#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK 0xFFFF0000
#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT 16
@@ -12742,9 +12787,9 @@ enum MFW_DRV_MSG_TYPE {
MFW_DRV_MSG_GET_FCOE_STATS,
MFW_DRV_MSG_GET_ISCSI_STATS,
MFW_DRV_MSG_GET_RDMA_STATS,
- MFW_DRV_MSG_BW_UPDATE10,
+ MFW_DRV_MSG_FAILURE_DETECTED,
MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE,
- MFW_DRV_MSG_BW_UPDATE11,
+ MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED,
MFW_DRV_MSG_RESERVED,
MFW_DRV_MSG_GET_TLV_REQ,
MFW_DRV_MSG_OEM_CFG_UPDATE,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 4ab8cfaf63d1..5fa251489536 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -762,9 +762,10 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
dst_type,
length_cur);
if (qed_status) {
- DP_NOTICE(p_hwfn,
- "qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n",
- qed_status, src_addr, dst_addr, length_cur);
+ qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_DMAE_FAIL,
+ "qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n",
+ qed_status, src_addr,
+ dst_addr, length_cur);
break;
}
}
@@ -837,6 +838,41 @@ int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
return rc;
}
+void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ enum qed_hw_err_type err_type, char *fmt, ...)
+{
+ char buf[QED_HW_ERR_MAX_STR_SIZE];
+ va_list vl;
+ int len;
+
+ if (fmt) {
+ va_start(vl, fmt);
+ len = vsnprintf(buf, QED_HW_ERR_MAX_STR_SIZE, fmt, vl);
+ va_end(vl);
+
+ if (len > QED_HW_ERR_MAX_STR_SIZE - 1)
+ len = QED_HW_ERR_MAX_STR_SIZE - 1;
+
+ DP_NOTICE(p_hwfn, "%s", buf);
+ }
+
+ /* Fan failure cannot be masked by handling of another HW error */
+ if (p_hwfn->cdev->recov_in_prog &&
+ err_type != QED_HW_ERR_FAN_FAIL) {
+ DP_VERBOSE(p_hwfn,
+ NETIF_MSG_DRV,
+ "Recovery is in progress. Avoid notifying about HW error %d.\n",
+ err_type);
+ return;
+ }
+
+ qed_hw_error_occurred(p_hwfn, err_type);
+
+ if (fmt)
+ qed_mcp_send_raw_debug_data(p_hwfn, p_ptt, buf, len);
+}
+
int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, const char *phase)
{
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.h b/drivers/net/ethernet/qlogic/qed/qed_hw.h
index 505e94db939d..f5b109b04b66 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.h
@@ -315,4 +315,19 @@ int qed_init_fw_data(struct qed_dev *cdev,
int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, const char *phase);
+#define QED_HW_ERR_MAX_STR_SIZE 256
+
+/**
+ * @brief qed_hw_err_notify - Notify upper layer driver and management FW
+ * about a HW error.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param err_type
+ * @param fmt - debug data buffer to send to the MFW
+ * @param ... - buffer format args
+ */
+void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ enum qed_hw_err_type err_type, char *fmt, ...);
#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 9f5113639eaf..b7b974f0ef21 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -96,6 +96,7 @@ struct aeu_invert_reg_bit {
#define ATTENTION_BB(value) (value << ATTENTION_BB_SHIFT)
#define ATTENTION_BB_DIFFERENT BIT(23)
+#define ATTENTION_CLEAR_ENABLE BIT(28)
unsigned int flags;
/* Callback to call if attention will be triggered */
@@ -363,6 +364,21 @@ static int qed_pglueb_rbc_attn_cb(struct qed_hwfn *p_hwfn)
return qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_dpc_ptt);
}
+static int qed_fw_assertion(struct qed_hwfn *p_hwfn)
+{
+ qed_hw_err_notify(p_hwfn, p_hwfn->p_dpc_ptt, QED_HW_ERR_FW_ASSERT,
+ "FW assertion!\n");
+
+ return -EINVAL;
+}
+
+static int qed_general_attention_35(struct qed_hwfn *p_hwfn)
+{
+ DP_INFO(p_hwfn, "General attention 35!\n");
+
+ return 0;
+}
+
#define QED_DORQ_ATTENTION_REASON_MASK (0xfffff)
#define QED_DORQ_ATTENTION_OPAQUE_MASK (0xffff)
#define QED_DORQ_ATTENTION_OPAQUE_SHIFT (0x0)
@@ -605,13 +621,15 @@ static struct aeu_invert_reg aeu_descs[NUM_ATTN_REGS] = {
{
{ /* After Invert 4 */
- {"General Attention 32", ATTENTION_SINGLE,
- NULL, MAX_BLOCK_ID},
+ {"General Attention 32", ATTENTION_SINGLE |
+ ATTENTION_CLEAR_ENABLE, qed_fw_assertion,
+ MAX_BLOCK_ID},
{"General Attention %d",
(2 << ATTENTION_LENGTH_SHIFT) |
(33 << ATTENTION_OFFSET_SHIFT), NULL, MAX_BLOCK_ID},
- {"General Attention 35", ATTENTION_SINGLE,
- NULL, MAX_BLOCK_ID},
+ {"General Attention 35", ATTENTION_SINGLE |
+ ATTENTION_CLEAR_ENABLE, qed_general_attention_35,
+ MAX_BLOCK_ID},
{"NWS Parity",
ATTENTION_PAR | ATTENTION_BB_DIFFERENT |
ATTENTION_BB(AEU_INVERT_REG_SPECIAL_CNIG_0),
@@ -927,9 +945,12 @@ qed_int_deassertion_aeu_bit(struct qed_hwfn *p_hwfn,
qed_int_attn_print(p_hwfn, p_aeu->block_index,
ATTN_TYPE_INTERRUPT, !b_fatal);
-
- /* If the attention is benign, no need to prevent it */
- if (!rc)
+ /* Reach assertion if attention is fatal */
+ if (b_fatal)
+ qed_hw_err_notify(p_hwfn, p_hwfn->p_dpc_ptt, QED_HW_ERR_HW_ATTN,
+ "`%s': Fatal attention\n",
+ p_bit_name);
+ else /* If the attention is benign, no need to prevent it */
goto out;
/* Prevent this Attention from being asserted in the future */
@@ -2349,6 +2370,11 @@ void qed_int_disable_post_isr_release(struct qed_dev *cdev)
cdev->hwfns[i].b_int_requested = false;
}
+void qed_int_attn_clr_enable(struct qed_dev *cdev, bool clr_enable)
+{
+ cdev->attn_clr_en = clr_enable;
+}
+
int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
u8 timer_res, u16 sb_id, bool tx)
{
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index 9ad568d93ae6..e09db3386367 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -191,6 +191,17 @@ void qed_int_get_num_sbs(struct qed_hwfn *p_hwfn,
void qed_int_disable_post_isr_release(struct qed_dev *cdev);
/**
+ * @brief qed_int_attn_clr_enable - sets whether the general behavior is
+ * preventing attentions from being reasserted, or following the
+ * attributes of the specific attention.
+ *
+ * @param cdev
+ * @param clr_enable
+ *
+ */
+void qed_int_attn_clr_enable(struct qed_dev *cdev, bool clr_enable);
+
+/**
* @brief - Doorbell Recovery handler.
* Run doorbell recovery in case of PF overflow (and flush DORQ if
* needed).
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 38a1d26ca9db..83e798d4eebb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -2468,6 +2468,39 @@ void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
ops->schedule_recovery_handler(cookie);
}
+char *qed_hw_err_type_descr[] = {
+ [QED_HW_ERR_FAN_FAIL] = "Fan Failure",
+ [QED_HW_ERR_MFW_RESP_FAIL] = "MFW Response Failure",
+ [QED_HW_ERR_HW_ATTN] = "HW Attention",
+ [QED_HW_ERR_DMAE_FAIL] = "DMAE Failure",
+ [QED_HW_ERR_RAMROD_FAIL] = "Ramrod Failure",
+ [QED_HW_ERR_FW_ASSERT] = "FW Assertion",
+ [QED_HW_ERR_LAST] = "Unknown",
+};
+
+void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
+ enum qed_hw_err_type err_type)
+{
+ struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
+ void *cookie = p_hwfn->cdev->ops_cookie;
+ char *err_str;
+
+ if (err_type > QED_HW_ERR_LAST)
+ err_type = QED_HW_ERR_LAST;
+ err_str = qed_hw_err_type_descr[err_type];
+
+ DP_NOTICE(p_hwfn, "HW error occurred [%s]\n", err_str);
+
+ /* Call the HW error handler of the protocol driver.
+ * If it is not available - perform a minimal handling of preventing
+ * HW attentions from being reasserted.
+ */
+ if (ops && ops->schedule_hw_err_handler)
+ ops->schedule_hw_err_handler(cookie, err_type);
+ else
+ qed_int_attn_clr_enable(p_hwfn->cdev, true);
+}
+
static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
void *handle)
{
@@ -2689,6 +2722,7 @@ const struct qed_common_ops qed_common_ops_pass = {
.set_led = &qed_set_led,
.recovery_process = &qed_recovery_process,
.recovery_prolog = &qed_recovery_prolog,
+ .attn_clr_enable = &qed_int_attn_clr_enable,
.update_drv_state = &qed_update_drv_state,
.update_mac = &qed_update_mac,
.update_mtu = &qed_update_mtu,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 280527cc0578..9624616806e7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -575,6 +575,8 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
if (!QED_MB_FLAGS_IS_SET(p_mb_params, AVOID_BLOCK))
qed_mcp_cmd_set_blocking(p_hwfn, true);
+ qed_hw_err_notify(p_hwfn, p_ptt,
+ QED_HW_ERR_MFW_RESP_FAIL, NULL);
return -EAGAIN;
}
@@ -1704,6 +1706,127 @@ static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
&resp, &param);
}
+static void qed_mcp_handle_fan_failure(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt)
+{
+ /* A single notification should be sent to upper driver in CMT mode */
+ if (p_hwfn != QED_LEADING_HWFN(p_hwfn->cdev))
+ return;
+
+ qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_FAN_FAIL,
+ "Fan failure was detected on the network interface card and it's going to be shut down.\n");
+}
+
+struct qed_mdump_cmd_params {
+ u32 cmd;
+ void *p_data_src;
+ u8 data_src_size;
+ void *p_data_dst;
+ u8 data_dst_size;
+ u32 mcp_resp;
+};
+
+static int
+qed_mcp_mdump_cmd(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ struct qed_mdump_cmd_params *p_mdump_cmd_params)
+{
+ struct qed_mcp_mb_params mb_params;
+ int rc;
+
+ memset(&mb_params, 0, sizeof(mb_params));
+ mb_params.cmd = DRV_MSG_CODE_MDUMP_CMD;
+ mb_params.param = p_mdump_cmd_params->cmd;
+ mb_params.p_data_src = p_mdump_cmd_params->p_data_src;
+ mb_params.data_src_size = p_mdump_cmd_params->data_src_size;
+ mb_params.p_data_dst = p_mdump_cmd_params->p_data_dst;
+ mb_params.data_dst_size = p_mdump_cmd_params->data_dst_size;
+ rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+ if (rc)
+ return rc;
+
+ p_mdump_cmd_params->mcp_resp = mb_params.mcp_resp;
+
+ if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) {
+ DP_INFO(p_hwfn,
+ "The mdump sub command is unsupported by the MFW [mdump_cmd 0x%x]\n",
+ p_mdump_cmd_params->cmd);
+ rc = -EOPNOTSUPP;
+ } else if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+ DP_INFO(p_hwfn,
+ "The mdump command is not supported by the MFW\n");
+ rc = -EOPNOTSUPP;
+ }
+
+ return rc;
+}
+
+static int qed_mcp_mdump_ack(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+ struct qed_mdump_cmd_params mdump_cmd_params;
+
+ memset(&mdump_cmd_params, 0, sizeof(mdump_cmd_params));
+ mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_ACK;
+
+ return qed_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
+}
+
+int
+qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ struct mdump_retain_data_stc *p_mdump_retain)
+{
+ struct qed_mdump_cmd_params mdump_cmd_params;
+ int rc;
+
+ memset(&mdump_cmd_params, 0, sizeof(mdump_cmd_params));
+ mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_GET_RETAIN;
+ mdump_cmd_params.p_data_dst = p_mdump_retain;
+ mdump_cmd_params.data_dst_size = sizeof(*p_mdump_retain);
+
+ rc = qed_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
+ if (rc)
+ return rc;
+
+ if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
+ DP_INFO(p_hwfn,
+ "Failed to get the mdump retained data [mcp_resp 0x%x]\n",
+ mdump_cmd_params.mcp_resp);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void qed_mcp_handle_critical_error(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt)
+{
+ struct mdump_retain_data_stc mdump_retain;
+ int rc;
+
+ /* In CMT mode - no need for more than a single acknowledgment to the
+ * MFW, and no more than a single notification to the upper driver.
+ */
+ if (p_hwfn != QED_LEADING_HWFN(p_hwfn->cdev))
+ return;
+
+ rc = qed_mcp_mdump_get_retain(p_hwfn, p_ptt, &mdump_retain);
+ if (rc == 0 && mdump_retain.valid)
+ DP_NOTICE(p_hwfn,
+ "The MFW notified that a critical error occurred in the device [epoch 0x%08x, pf 0x%x, status 0x%08x]\n",
+ mdump_retain.epoch,
+ mdump_retain.pf, mdump_retain.status);
+ else
+ DP_NOTICE(p_hwfn,
+ "The MFW notified that a critical error occurred in the device\n");
+
+ DP_NOTICE(p_hwfn,
+ "Acknowledging the notification to not allow the MFW crash dump [driver debug data collection is preferable]\n");
+ qed_mcp_mdump_ack(p_hwfn, p_ptt);
+
+ qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_HW_ATTN, NULL);
+}
+
void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct public_func shmem_info;
@@ -1850,6 +1973,12 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
case MFW_DRV_MSG_S_TAG_UPDATE:
qed_mcp_update_stag(p_hwfn, p_ptt);
break;
+ case MFW_DRV_MSG_FAILURE_DETECTED:
+ qed_mcp_handle_fan_failure(p_hwfn, p_ptt);
+ break;
+ case MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED:
+ qed_mcp_handle_critical_error(p_hwfn, p_ptt);
+ break;
case MFW_DRV_MSG_GET_TLV_REQ:
qed_mfw_tlv_req(p_hwfn);
break;
@@ -3819,3 +3948,127 @@ int qed_mcp_nvm_set_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
DRV_MSG_CODE_SET_NVM_CFG_OPTION,
mb_param, &resp, &param, len, (u32 *)p_buf);
}
+
+#define QED_MCP_DBG_DATA_MAX_SIZE MCP_DRV_NVM_BUF_LEN
+#define QED_MCP_DBG_DATA_MAX_HEADER_SIZE sizeof(u32)
+#define QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE \
+ (QED_MCP_DBG_DATA_MAX_SIZE - QED_MCP_DBG_DATA_MAX_HEADER_SIZE)
+
+static int
+__qed_mcp_send_debug_data(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u8 *p_buf, u8 size)
+{
+ struct qed_mcp_mb_params mb_params;
+ int rc;
+
+ if (size > QED_MCP_DBG_DATA_MAX_SIZE) {
+ DP_ERR(p_hwfn,
+ "Debug data size is %d while it should not exceed %d\n",
+ size, QED_MCP_DBG_DATA_MAX_SIZE);
+ return -EINVAL;
+ }
+
+ memset(&mb_params, 0, sizeof(mb_params));
+ mb_params.cmd = DRV_MSG_CODE_DEBUG_DATA_SEND;
+ SET_MFW_FIELD(mb_params.param, DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE, size);
+ mb_params.p_data_src = p_buf;
+ mb_params.data_src_size = size;
+ rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+ if (rc)
+ return rc;
+
+ if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+ DP_INFO(p_hwfn,
+ "The DEBUG_DATA_SEND command is unsupported by the MFW\n");
+ return -EOPNOTSUPP;
+ } else if (mb_params.mcp_resp == (u32)FW_MSG_CODE_DEBUG_NOT_ENABLED) {
+ DP_INFO(p_hwfn, "The DEBUG_DATA_SEND command is not enabled\n");
+ return -EBUSY;
+ } else if (mb_params.mcp_resp != (u32)FW_MSG_CODE_DEBUG_DATA_SEND_OK) {
+ DP_NOTICE(p_hwfn,
+ "Failed to send debug data to the MFW [resp 0x%08x]\n",
+ mb_params.mcp_resp);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+enum qed_mcp_dbg_data_type {
+ QED_MCP_DBG_DATA_TYPE_RAW,
+};
+
+/* Header format: [31:28] PFID, [27:20] flags, [19:12] type, [11:0] S/N */
+#define QED_MCP_DBG_DATA_HDR_SN_OFFSET 0
+#define QED_MCP_DBG_DATA_HDR_SN_MASK 0x00000fff
+#define QED_MCP_DBG_DATA_HDR_TYPE_OFFSET 12
+#define QED_MCP_DBG_DATA_HDR_TYPE_MASK 0x000ff000
+#define QED_MCP_DBG_DATA_HDR_FLAGS_OFFSET 20
+#define QED_MCP_DBG_DATA_HDR_FLAGS_MASK 0x0ff00000
+#define QED_MCP_DBG_DATA_HDR_PF_OFFSET 28
+#define QED_MCP_DBG_DATA_HDR_PF_MASK 0xf0000000
+
+#define QED_MCP_DBG_DATA_HDR_FLAGS_FIRST 0x1
+#define QED_MCP_DBG_DATA_HDR_FLAGS_LAST 0x2
+
+static int
+qed_mcp_send_debug_data(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ enum qed_mcp_dbg_data_type type, u8 *p_buf, u32 size)
+{
+ u8 raw_data[QED_MCP_DBG_DATA_MAX_SIZE], *p_tmp_buf = p_buf;
+ u32 tmp_size = size, *p_header, *p_payload;
+ u8 flags = 0;
+ u16 seq;
+ int rc;
+
+ p_header = (u32 *)raw_data;
+ p_payload = (u32 *)(raw_data + QED_MCP_DBG_DATA_MAX_HEADER_SIZE);
+
+ seq = (u16)atomic_inc_return(&p_hwfn->mcp_info->dbg_data_seq);
+
+ /* First chunk is marked as 'first' */
+ flags |= QED_MCP_DBG_DATA_HDR_FLAGS_FIRST;
+
+ *p_header = 0;
+ SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_SN, seq);
+ SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_TYPE, type);
+ SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS, flags);
+ SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_PF, p_hwfn->abs_pf_id);
+
+ while (tmp_size > QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE) {
+ memcpy(p_payload, p_tmp_buf, QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE);
+ rc = __qed_mcp_send_debug_data(p_hwfn, p_ptt, raw_data,
+ QED_MCP_DBG_DATA_MAX_SIZE);
+ if (rc)
+ return rc;
+
+ /* Clear the 'first' marking after sending the first chunk */
+ if (p_tmp_buf == p_buf) {
+ flags &= ~QED_MCP_DBG_DATA_HDR_FLAGS_FIRST;
+ SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS,
+ flags);
+ }
+
+ p_tmp_buf += QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE;
+ tmp_size -= QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE;
+ }
+
+ /* Last chunk is marked as 'last' */
+ flags |= QED_MCP_DBG_DATA_HDR_FLAGS_LAST;
+ SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS, flags);
+ memcpy(p_payload, p_tmp_buf, tmp_size);
+
+ /* Casting the left size to u8 is ok since at this point it is <= 32 */
+ return __qed_mcp_send_debug_data(p_hwfn, p_ptt, raw_data,
+ (u8)(QED_MCP_DBG_DATA_MAX_HEADER_SIZE +
+ tmp_size));
+}
+
+int
+qed_mcp_send_raw_debug_data(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u8 *p_buf, u32 size)
+{
+ return qed_mcp_send_debug_data(p_hwfn, p_ptt,
+ QED_MCP_DBG_DATA_TYPE_RAW, p_buf, size);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 9c4c2763de8d..5750b4c5ef63 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -685,6 +685,18 @@ int qed_mcp_bist_nvm_get_image_att(struct qed_hwfn *p_hwfn,
*/
int qed_mfw_process_tlv_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+/**
+ * @brief Send raw debug data to the MFW
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_buf - raw debug data buffer
+ * @param size - buffer size
+ */
+int
+qed_mcp_send_raw_debug_data(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u8 *p_buf, u32 size);
+
/* Using hwfn number (and not pf_num) is required since in CMT mode,
* same pf_num may be used by two different hwfn
* TODO - this shouldn't really be in .h file, but until all fields
@@ -731,6 +743,9 @@ struct qed_mcp_info {
/* Capabilties negotiated with the MFW */
u32 capabilities;
+
+ /* S/N for debug data mailbox commands */
+ atomic_t dbg_data_seq;
};
struct qed_mcp_mb_params {
@@ -1001,6 +1016,19 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn,
int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, u32 mask_parities);
+/* @brief - Gets the mdump retained data from the MFW.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_mdump_retain
+ *
+ * @param return 0 upon success.
+ */
+int
+qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ struct mdump_retain_data_stc *p_mdump_retain);
+
/**
* @brief - Sets the MFW's max value for the given resource
*
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index f5f3c03b9dd2..790c28d696a0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -160,12 +160,16 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn,
return 0;
}
err:
- DP_NOTICE(p_hwfn,
- "Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n",
- le32_to_cpu(p_ent->elem.hdr.cid),
- p_ent->elem.hdr.cmd_id,
- p_ent->elem.hdr.protocol_id,
- le16_to_cpu(p_ent->elem.hdr.echo));
+ p_ptt = qed_ptt_acquire(p_hwfn);
+ if (!p_ptt)
+ return -EBUSY;
+ qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_RAMROD_FAIL,
+ "Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n",
+ le32_to_cpu(p_ent->elem.hdr.cid),
+ p_ent->elem.hdr.cmd_id,
+ p_ent->elem.hdr.protocol_id,
+ le16_to_cpu(p_ent->elem.hdr.echo));
+ qed_ptt_release(p_hwfn, p_ptt);
return -EBUSY;
}
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index f6f0b51620ab..8857da1208d7 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -278,6 +278,14 @@ struct qede_dev {
struct qede_rdma_dev rdma_info;
struct bpf_prog *xdp_prog;
+
+ unsigned long err_flags;
+#define QEDE_ERR_IS_HANDLED 31
+#define QEDE_ERR_ATTN_CLR_EN 0
+#define QEDE_ERR_GET_DBG_INFO 1
+#define QEDE_ERR_IS_RECOVERABLE 2
+#define QEDE_ERR_WARN 3
+
struct qede_dump_info dump_info;
};
@@ -485,12 +493,15 @@ struct qede_fastpath {
#define QEDE_SP_RECOVERY 0
#define QEDE_SP_RX_MODE 1
+#define QEDE_SP_RSVD1 2
+#define QEDE_SP_RSVD2 3
+#define QEDE_SP_HW_ERR 4
+#define QEDE_SP_ARFS_CONFIG 5
#define QEDE_SP_AER 7
#ifdef CONFIG_RFS_ACCEL
int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id);
-#define QEDE_SP_ARFS_CONFIG 4
#define QEDE_SP_TASK_POLL_DELAY (5 * HZ)
#endif
@@ -522,7 +533,6 @@ u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb,
netdev_features_t qede_features_check(struct sk_buff *skb,
struct net_device *dev,
netdev_features_t features);
-void qede_tx_log_print(struct qede_dev *edev, struct qede_fastpath *fp);
int qede_alloc_rx_buffer(struct qede_rx_queue *rxq, bool allow_lazy);
int qede_free_tx_pkt(struct qede_dev *edev,
struct qede_tx_queue *txq, int *len);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 812c7766e096..24cc68391ac4 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -190,12 +190,14 @@ static const struct {
enum {
QEDE_PRI_FLAG_CMT,
QEDE_PRI_FLAG_SMART_AN_SUPPORT, /* MFW supports SmartAN */
+ QEDE_PRI_FLAG_RECOVER_ON_ERROR,
QEDE_PRI_FLAG_LEN,
};
static const char qede_private_arr[QEDE_PRI_FLAG_LEN][ETH_GSTRING_LEN] = {
"Coupled-Function",
"SmartAN capable",
+ "Recover on error",
};
enum qede_ethtool_tests {
@@ -417,9 +419,30 @@ static u32 qede_get_priv_flags(struct net_device *dev)
if (edev->dev_info.common.smart_an)
flags |= BIT(QEDE_PRI_FLAG_SMART_AN_SUPPORT);
+ if (edev->err_flags & BIT(QEDE_ERR_IS_RECOVERABLE))
+ flags |= BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR);
+
return flags;
}
+static int qede_set_priv_flags(struct net_device *dev, u32 flags)
+{
+ struct qede_dev *edev = netdev_priv(dev);
+ u32 cflags = qede_get_priv_flags(dev);
+ u32 dflags = flags ^ cflags;
+
+ /* can only change RECOVER_ON_ERROR flag */
+ if (dflags & ~BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR))
+ return -EINVAL;
+
+ if (flags & BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR))
+ set_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags);
+ else
+ clear_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags);
+
+ return 0;
+}
+
struct qede_link_mode_mapping {
u32 qed_link_mode;
u32 ethtool_link_mode;
@@ -2098,6 +2121,7 @@ static const struct ethtool_ops qede_ethtool_ops = {
.set_phys_id = qede_set_phys_id,
.get_ethtool_stats = qede_get_ethtool_stats,
.get_priv_flags = qede_get_priv_flags,
+ .set_priv_flags = qede_set_priv_flags,
.get_sset_count = qede_get_sset_count,
.get_rxnfc = qede_get_rxnfc,
.set_rxnfc = qede_set_rxnfc,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 300405369c37..f50d9a9b76be 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -139,10 +139,12 @@ static void qede_shutdown(struct pci_dev *pdev);
static void qede_link_update(void *dev, struct qed_link_output *link);
static void qede_schedule_recovery_handler(void *dev);
static void qede_recovery_handler(struct qede_dev *edev);
+static void qede_schedule_hw_err_handler(void *dev,
+ enum qed_hw_err_type err_type);
static void qede_get_eth_tlv_data(void *edev, void *data);
static void qede_get_generic_tlv_data(void *edev,
struct qed_generic_tlvs *data);
-
+static void qede_generic_hw_err_handler(struct qede_dev *edev);
#ifdef CONFIG_QED_SRIOV
static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
__be16 vlan_proto)
@@ -230,6 +232,7 @@ static struct qed_eth_cb_ops qede_ll_ops = {
#endif
.link_update = qede_link_update,
.schedule_recovery_handler = qede_schedule_recovery_handler,
+ .schedule_hw_err_handler = qede_schedule_hw_err_handler,
.get_generic_tlv_data = qede_get_generic_tlv_data,
.get_protocol_tlv_data = qede_get_eth_tlv_data,
},
@@ -536,6 +539,51 @@ static int qede_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return 0;
}
+static void qede_tx_log_print(struct qede_dev *edev, struct qede_tx_queue *txq)
+{
+ DP_NOTICE(edev,
+ "Txq[%d]: FW cons [host] %04x, SW cons %04x, SW prod %04x [Jiffies %lu]\n",
+ txq->index, le16_to_cpu(*txq->hw_cons_ptr),
+ qed_chain_get_cons_idx(&txq->tx_pbl),
+ qed_chain_get_prod_idx(&txq->tx_pbl),
+ jiffies);
+}
+
+static void qede_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+ struct qede_dev *edev = netdev_priv(dev);
+ struct qede_tx_queue *txq;
+ int cos;
+
+ netif_carrier_off(dev);
+ DP_NOTICE(edev, "TX timeout on queue %u!\n", txqueue);
+
+ if (!(edev->fp_array[txqueue].type & QEDE_FASTPATH_TX))
+ return;
+
+ for_each_cos_in_txq(edev, cos) {
+ txq = &edev->fp_array[txqueue].txq[cos];
+
+ if (qed_chain_get_cons_idx(&txq->tx_pbl) !=
+ qed_chain_get_prod_idx(&txq->tx_pbl))
+ qede_tx_log_print(edev, txq);
+ }
+
+ if (IS_VF(edev))
+ return;
+
+ if (test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
+ edev->state == QEDE_STATE_RECOVERY) {
+ DP_INFO(edev,
+ "Avoid handling a Tx timeout while another HW error is being handled\n");
+ return;
+ }
+
+ set_bit(QEDE_ERR_GET_DBG_INFO, &edev->err_flags);
+ set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
+ schedule_delayed_work(&edev->sp_task, 0);
+}
+
static int qede_setup_tc(struct net_device *ndev, u8 num_tc)
{
struct qede_dev *edev = netdev_priv(ndev);
@@ -623,6 +671,7 @@ static const struct net_device_ops qede_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = qede_change_mtu,
.ndo_do_ioctl = qede_ioctl,
+ .ndo_tx_timeout = qede_tx_timeout,
#ifdef CONFIG_QED_SRIOV
.ndo_set_vf_mac = qede_set_vf_mac,
.ndo_set_vf_vlan = qede_set_vf_vlan,
@@ -1009,6 +1058,8 @@ static void qede_sp_task(struct work_struct *work)
qede_process_arfs_filters(edev, false);
}
#endif
+ if (test_and_clear_bit(QEDE_SP_HW_ERR, &edev->sp_flags))
+ qede_generic_hw_err_handler(edev);
__qede_unlock(edev);
if (test_and_clear_bit(QEDE_SP_AER, &edev->sp_flags)) {
@@ -2509,6 +2560,100 @@ err:
qede_recovery_failed(edev);
}
+static void qede_atomic_hw_err_handler(struct qede_dev *edev)
+{
+ struct qed_dev *cdev = edev->cdev;
+
+ DP_NOTICE(edev,
+ "Generic non-sleepable HW error handling started - err_flags 0x%lx\n",
+ edev->err_flags);
+
+ /* Get a call trace of the flow that led to the error */
+ WARN_ON(test_bit(QEDE_ERR_WARN, &edev->err_flags));
+
+ /* Prevent HW attentions from being reasserted */
+ if (test_bit(QEDE_ERR_ATTN_CLR_EN, &edev->err_flags))
+ edev->ops->common->attn_clr_enable(cdev, true);
+
+ DP_NOTICE(edev, "Generic non-sleepable HW error handling is done\n");
+}
+
+static void qede_generic_hw_err_handler(struct qede_dev *edev)
+{
+ struct qed_dev *cdev = edev->cdev;
+
+ DP_NOTICE(edev,
+ "Generic sleepable HW error handling started - err_flags 0x%lx\n",
+ edev->err_flags);
+
+ /* Trigger a recovery process.
+ * This is placed in the sleep requiring section just to make
+ * sure it is the last one, and that all the other operations
+ * were completed.
+ */
+ if (test_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags))
+ edev->ops->common->recovery_process(cdev);
+
+ clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
+
+ DP_NOTICE(edev, "Generic sleepable HW error handling is done\n");
+}
+
+static void qede_set_hw_err_flags(struct qede_dev *edev,
+ enum qed_hw_err_type err_type)
+{
+ unsigned long err_flags = 0;
+
+ switch (err_type) {
+ case QED_HW_ERR_DMAE_FAIL:
+ set_bit(QEDE_ERR_WARN, &err_flags);
+ fallthrough;
+ case QED_HW_ERR_MFW_RESP_FAIL:
+ case QED_HW_ERR_HW_ATTN:
+ case QED_HW_ERR_RAMROD_FAIL:
+ case QED_HW_ERR_FW_ASSERT:
+ set_bit(QEDE_ERR_ATTN_CLR_EN, &err_flags);
+ set_bit(QEDE_ERR_GET_DBG_INFO, &err_flags);
+ break;
+
+ default:
+ DP_NOTICE(edev, "Unexpected HW error [%d]\n", err_type);
+ break;
+ }
+
+ edev->err_flags |= err_flags;
+}
+
+static void qede_schedule_hw_err_handler(void *dev,
+ enum qed_hw_err_type err_type)
+{
+ struct qede_dev *edev = dev;
+
+ /* Fan failure cannot be masked by handling of another HW error or by a
+ * concurrent recovery process.
+ */
+ if ((test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
+ edev->state == QEDE_STATE_RECOVERY) &&
+ err_type != QED_HW_ERR_FAN_FAIL) {
+ DP_INFO(edev,
+ "Avoid scheduling an error handling while another HW error is being handled\n");
+ return;
+ }
+
+ if (err_type >= QED_HW_ERR_LAST) {
+ DP_NOTICE(edev, "Unknown HW error [%d]\n", err_type);
+ clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
+ return;
+ }
+
+ qede_set_hw_err_flags(edev, err_type);
+ qede_atomic_hw_err_handler(edev);
+ set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
+ schedule_delayed_work(&edev->sp_task, 0);
+
+ DP_INFO(edev, "Scheduled a error handler [err_type %d]\n", err_type);
+}
+
static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
{
struct netdev_queue *netdev_txq;
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 8f29e0d8a7b3..48325d7790f8 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -607,6 +607,16 @@ struct qed_sb_info {
struct qed_dev *cdev;
};
+enum qed_hw_err_type {
+ QED_HW_ERR_FAN_FAIL,
+ QED_HW_ERR_MFW_RESP_FAIL,
+ QED_HW_ERR_HW_ATTN,
+ QED_HW_ERR_DMAE_FAIL,
+ QED_HW_ERR_RAMROD_FAIL,
+ QED_HW_ERR_FW_ASSERT,
+ QED_HW_ERR_LAST,
+};
+
enum qed_dev_type {
QED_DEV_TYPE_BB,
QED_DEV_TYPE_AH,
@@ -811,10 +821,11 @@ enum qed_nvm_flash_cmd {
struct qed_common_cb_ops {
void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc);
- void (*link_update)(void *dev,
- struct qed_link_output *link);
+ void (*link_update)(void *dev, struct qed_link_output *link);
void (*schedule_recovery_handler)(void *dev);
- void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
+ void (*schedule_hw_err_handler)(void *dev,
+ enum qed_hw_err_type err_type);
+ void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
void (*get_protocol_tlv_data)(void *dev, void *data);
};
@@ -1034,6 +1045,15 @@ struct qed_common_ops {
*/
int (*set_led)(struct qed_dev *cdev,
enum qed_led_mode mode);
+
+/**
+ * @brief attn_clr_enable - Prevent attentions from being reasserted
+ *
+ * @param cdev
+ * @param clr_enable
+ */
+ void (*attn_clr_enable)(struct qed_dev *cdev, bool clr_enable);
+
/**
* @brief db_recovery_add - add doorbell information to the doorbell
* recovery mechanism.