aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/include/linux/mlx5/mlx5_ifc.h
diff options
context:
space:
mode:
authorAdham Faris <afaris@nvidia.com>2023-08-07 11:05:07 -0700
committerJakub Kicinski <kuba@kernel.org>2023-08-09 15:52:16 -0700
commit1f507e80c700e31e358bf4213dc7e4dd614c7c72 (patch)
treedd86c06009b74686a924bc683990f47ac9bfbe8c /include/linux/mlx5/mlx5_ifc.h
parentnet/mlx5: Expose port.c/mlx5_query_module_num() function (diff)
downloadwireguard-linux-1f507e80c700e31e358bf4213dc7e4dd614c7c72.tar.xz
wireguard-linux-1f507e80c700e31e358bf4213dc7e4dd614c7c72.zip
net/mlx5: Expose NIC temperature via hardware monitoring kernel API
Expose NIC temperature by implementing hwmon kernel API, which turns current thermal zone kernel API to redundant. For each one of the supported and exposed thermal diode sensors, expose the following attributes: 1) Input temperature. 2) Highest temperature. 3) Temperature label: Depends on the firmware capability, if firmware doesn't support sensors naming, the fallback naming convention would be: "sensorX", where X is the HW spec (MTMP register) sensor index. 4) Temperature critical max value: refers to the high threshold of Warning Event. Will be exposed as `tempY_crit` hwmon attribute (RO attribute). For example for ConnectX5 HCA's this temperature value will be 105 Celsius, 10 degrees lower than the HW shutdown temperature). 5) Temperature reset history: resets highest temperature. For example, for dualport ConnectX5 NIC with a single IC thermal diode sensor will have 2 hwmon directories (one for each PCI function) under "/sys/class/hwmon/hwmon[X,Y]". Listing one of the directories above (hwmonX/Y) generates the corresponding output below: $ grep -H -d skip . /sys/class/hwmon/hwmon0/* Output ======================================================================= /sys/class/hwmon/hwmon0/name:mlx5 /sys/class/hwmon/hwmon0/temp1_crit:105000 /sys/class/hwmon/hwmon0/temp1_highest:48000 /sys/class/hwmon/hwmon0/temp1_input:46000 /sys/class/hwmon/hwmon0/temp1_label:asic grep: /sys/class/hwmon/hwmon0/temp1_reset_history: Permission denied In addition, displaying the sensors data via lm_sensors generates the corresponding output below: $ sensors Output ======================================================================= mlx5-pci-0800 Adapter: PCI adapter asic: +46.0°C (crit = +105.0°C, highest = +48.0°C) mlx5-pci-0801 Adapter: PCI adapter asic: +46.0°C (crit = +105.0°C, highest = +48.0°C) CC: Jean Delvare <jdelvare@suse.com> Signed-off-by: Adham Faris <afaris@nvidia.com> Reviewed-by: Tariq Toukan <tariqt@nvidia.com> Reviewed-by: Gal Pressman <gal@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com> Acked-by: Guenter Roeck <linux@roeck-us.net> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://lore.kernel.org/r/20230807180507.22984-3-saeed@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/linux/mlx5/mlx5_ifc.h')
-rw-r--r--include/linux/mlx5/mlx5_ifc.h14
1 files changed, 13 insertions, 1 deletions
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index b3ad6b9852ec..87fd6f9ed82c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10196,7 +10196,9 @@ struct mlx5_ifc_mcam_access_reg_bits {
u8 mrtc[0x1];
u8 regs_44_to_32[0xd];
- u8 regs_31_to_0[0x20];
+ u8 regs_31_to_10[0x16];
+ u8 mtmp[0x1];
+ u8 regs_8_to_0[0x9];
};
struct mlx5_ifc_mcam_access_reg_bits1 {
@@ -10949,6 +10951,15 @@ struct mlx5_ifc_mrtc_reg_bits {
u8 time_l[0x20];
};
+struct mlx5_ifc_mtcap_reg_bits {
+ u8 reserved_at_0[0x19];
+ u8 sensor_count[0x7];
+
+ u8 reserved_at_20[0x20];
+
+ u8 sensor_map[0x40];
+};
+
struct mlx5_ifc_mtmp_reg_bits {
u8 reserved_at_0[0x14];
u8 sensor_index[0xc];
@@ -11036,6 +11047,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
struct mlx5_ifc_mfrl_reg_bits mfrl_reg;
struct mlx5_ifc_mtutc_reg_bits mtutc_reg;
struct mlx5_ifc_mrtc_reg_bits mrtc_reg;
+ struct mlx5_ifc_mtcap_reg_bits mtcap_reg;
struct mlx5_ifc_mtmp_reg_bits mtmp_reg;
u8 reserved_at_0[0x60e0];
};