From cb464ba53c0cb497dcb4a3daaf4fad4b75291863 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 11 Oct 2021 13:14:28 +0300 Subject: net/mlx5: Extend health buffer dump Enhance health buffer to include: - assert_var5: expose the 6'th assert variable. - time: error's time-stamp in seconds (epoch time). - rfr: Recovery Flow Requiered. When set, indicates that the error cannot be recovered without flow involving reset. - severity: error's severity value, ranging from emergency to debug. Expose them in the health buffer dump (dmesg and devlink fw reporter). Health buffer in dmesg: mlx5_core 0000:08:00.0: print_health_info:425:(pid 912): Health issue observed, firmware internal error, severity(3) ERROR: mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[0] 0x08040700 mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[1] 0x00000000 mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[2] 0x00000000 mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[3] 0x00000000 mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[4] 0x00000000 mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[5] 0x00000000 mlx5_core 0000:08:00.0: print_health_info:432:(pid 912): assert_exit_ptr 0x00aaf800 mlx5_core 0000:08:00.0: print_health_info:434:(pid 912): assert_callra 0x00aaf70c mlx5_core 0000:08:00.0: print_health_info:436:(pid 912): fw_ver 16.32.492 mlx5_core 0000:08:00.0: print_health_info:437:(pid 912): time 1634819758 mlx5_core 0000:08:00.0: print_health_info:438:(pid 912): hw_id 0x0000020d mlx5_core 0000:08:00.0: print_health_info:439:(pid 912): rfr 0 mlx5_core 0000:08:00.0: print_health_info:440:(pid 912): severity 3 (ERROR) mlx5_core 0000:08:00.0: print_health_info:441:(pid 912): irisc_index 9 mlx5_core 0000:08:00.0: print_health_info:442:(pid 912): synd 0x1: firmware internal error mlx5_core 0000:08:00.0: print_health_info:444:(pid 912): ext_synd 0x802b mlx5_core 0000:08:00.0: print_health_info:445:(pid 912): raw fw_ver 0x102001ec Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 14 ++++++++------ include/linux/mlx5/mlx5_ifc.h | 10 ++++++++-- 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 347167c18802..f8a0bbb42c3b 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -541,19 +541,21 @@ struct mlx5_cmd_layout { u8 status_own; }; -enum mlx5_fatal_assert_bit_offsets { - MLX5_RFR_OFFSET = 31, +enum mlx5_rfr_severity_bit_offsets { + MLX5_RFR_BIT_OFFSET = 0x7, }; struct health_buffer { - __be32 assert_var[5]; - __be32 rsvd0[3]; + __be32 assert_var[6]; + __be32 rsvd0[2]; __be32 assert_exit_ptr; __be32 assert_callra; - __be32 rsvd1[2]; + __be32 rsvd1[1]; + __be32 time; __be32 fw_ver; __be32 hw_id; - __be32 rfr; + u8 rfr_severity; + u8 rsvd2[3]; u8 irisc_index; u8 synd; __be16 ext_synd; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 09e43019d877..6d292b5b8992 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4149,13 +4149,19 @@ struct mlx5_ifc_health_buffer_bits { u8 assert_callra[0x20]; - u8 reserved_at_140[0x40]; + u8 reserved_at_140[0x20]; + + u8 time[0x20]; u8 fw_version[0x20]; u8 hw_id[0x20]; - u8 reserved_at_1c0[0x20]; + u8 rfr[0x1]; + u8 reserved_at_1c1[0x3]; + u8 valid[0x1]; + u8 severity[0x3]; + u8 reserved_at_1c8[0x18]; u8 irisc_index[0x8]; u8 synd[0x8]; -- cgit v1.2.3-59-g8ed1b