aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-06-14 19:44:29 -0700
committerDavid S. Miller <davem@davemloft.net>2019-06-14 19:44:29 -0700
commitb4a6d9abeb545d79cc76f150f31c3cebc3472287 (patch)
tree375f99afcea33585667d62375efca67e4d3bb1d1 /include
parentipv4: Support multipath hashing on inner IP pkts for GRE tunnel (diff)
parentDocumentation: net: mlx5: Devlink health documentation (diff)
downloadlinux-dev-b4a6d9abeb545d79cc76f150f31c3cebc3472287.tar.xz
linux-dev-b4a6d9abeb545d79cc76f150f31c3cebc3472287.zip
Merge tag 'mlx5-updates-2019-06-13' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says: ==================== mlx5-updates-2019-06-13 Mlx5 devlink health fw reporters and sw reset support This series provides mlx5 firmware reset support and firmware devlink health reporters. 1) Add initial mlx5 kernel documentation and include devlink health reporters 2) Add CR-Space access and FW Crdump snapshot support via devlink region_snapshot 3) Issue software reset upon FW asserts 4) Add fw and fw_fatal devlink heath reporters to follow fw errors indication by dump and recover procedures and enable trigger these functionality by user. 4.1) fw reporter: The fw reporter implements diagnose and dump callbacks. It follows symptoms of fw error such as fw syndrome by triggering fw core dump and storing it and any other fw trace into the dump buffer. The fw reporter diagnose command can be triggered any time by the user to check current fw status. 4.2) fw_fatal repoter: The fw_fatal reporter implements dump and recover callbacks. It follows fatal errors indications by CR-space dump and recover flow. The CR-space dump uses vsc interface which is valid even if the FW command interface is not functional, which is the case in most FW fatal errors. The CR-space dump is stored as a memory region snapshot to ease read by address. The recover function runs recover flow which reloads the driver and triggers fw reset if needed. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/mlx5/device.h10
-rw-r--r--include/linux/mlx5/driver.h13
2 files changed, 19 insertions, 4 deletions
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 5e760067ac41..35ed38c2ae6c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -510,6 +510,10 @@ struct mlx5_cmd_layout {
u8 status_own;
};
+enum mlx5_fatal_assert_bit_offsets {
+ MLX5_RFR_OFFSET = 31,
+};
+
struct health_buffer {
__be32 assert_var[5];
__be32 rsvd0[3];
@@ -518,12 +522,16 @@ struct health_buffer {
__be32 rsvd1[2];
__be32 fw_ver;
__be32 hw_id;
- __be32 rsvd2;
+ __be32 rfr;
u8 irisc_index;
u8 synd;
__be16 ext_synd;
};
+enum mlx5_initializing_bit_offsets {
+ MLX5_FW_RESET_SUPPORTED_OFFSET = 30,
+};
+
enum mlx5_cmd_addr_l_sz_offset {
MLX5_NIC_IFC_OFFSET = 8,
};
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3a810bf043fe..25847beabd3f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -53,6 +53,7 @@
#include <linux/mlx5/eq.h>
#include <linux/timecounter.h>
#include <linux/ptp_clock_kernel.h>
+#include <net/devlink.h>
enum {
MLX5_BOARD_ID_LEN = 64,
@@ -434,13 +435,18 @@ struct mlx5_core_health {
struct timer_list timer;
u32 prev;
int miss_counter;
- bool sick;
+ u8 synd;
+ u32 fatal_error;
+ u32 crdump_size;
/* wq spinlock to synchronize draining */
spinlock_t wq_lock;
struct workqueue_struct *wq;
unsigned long flags;
- struct work_struct work;
+ struct work_struct fatal_report_work;
+ struct work_struct report_work;
struct delayed_work recover_work;
+ struct devlink_health_reporter *fw_reporter;
+ struct devlink_health_reporter *fw_fatal_reporter;
};
struct mlx5_qp_table {
@@ -581,6 +587,7 @@ struct mlx5_priv {
};
enum mlx5_device_state {
+ MLX5_DEVICE_STATE_UNINITIALIZED,
MLX5_DEVICE_STATE_UP,
MLX5_DEVICE_STATE_INTERNAL_ERROR,
};
@@ -693,6 +700,7 @@ struct mlx5_core_dev {
struct mlx5_clock clock;
struct mlx5_ib_clock_info *clock_info;
struct mlx5_fw_tracer *tracer;
+ u32 vsc_addr;
};
struct mlx5_db {
@@ -904,7 +912,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev);
void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health);
void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
-void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
struct mlx5_frag_buf *buf, int node);
int mlx5_buf_alloc(struct mlx5_core_dev *dev,