aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox')
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/eq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c34
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c118
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c115
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c139
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ecpf.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ecpf.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h288
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c108
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h118
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c304
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c335
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c95
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c151
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h208
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c231
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c192
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c223
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c111
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c267
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c93
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h97
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c448
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dim.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c66
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c875
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c335
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c139
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c143
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c145
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c117
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c54
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c507
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c239
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h114
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c796
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c277
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/events.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c100
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c237
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c569
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c40
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c72
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c157
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c316
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c116
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c334
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rdma.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c52
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Kconfig2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/cmd.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h22
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_env.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c143
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_thermal.c248
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/i2c.c76
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/minimal.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci_hw.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h524
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c596
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h36
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c84
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c1111
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h186
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c346
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/switchx2.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/trap.h6
133 files changed, 11544 insertions, 2704 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index a5be27772b8e..c790a5fcea73 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1013,8 +1013,6 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
dma_list[i] = t;
eq->page_list[i].map = t;
-
- memset(eq->page_list[i].buf, 0, PAGE_SIZE);
}
eq->eqn = mlx4_bitmap_alloc(&priv->eq_table.bitmap);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 2391e3cfb56b..37fef8cd25e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -34,6 +34,7 @@ config MLX5_CORE_EN
depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
depends on IPV6=y || IPV6=n || MLX5_CORE=m
select PAGE_POOL
+ select DIMLIB
default n
---help---
Ethernet support in Mellanox Technologies ConnectX-4 NIC.
@@ -96,26 +97,60 @@ config MLX5_CORE_IPOIB
---help---
MLX5 IPoIB offloads & acceleration support.
+config MLX5_FPGA_IPSEC
+ bool "Mellanox Technologies IPsec Innova support"
+ depends on MLX5_CORE
+ depends on MLX5_FPGA
+ default n
+ help
+ Build IPsec support for the Innova family of network cards by Mellanox
+ Technologies. Innova network cards are comprised of a ConnectX chip
+ and an FPGA chip on one board. If you select this option, the
+ mlx5_core driver will include the Innova FPGA core and allow building
+ sandbox-specific client drivers.
+
config MLX5_EN_IPSEC
bool "IPSec XFRM cryptography-offload accelaration"
- depends on MLX5_ACCEL
depends on MLX5_CORE_EN
depends on XFRM_OFFLOAD
depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+ depends on MLX5_FPGA_IPSEC
default n
- ---help---
+ help
Build support for IPsec cryptography-offload accelaration in the NIC.
Note: Support for hardware with this capability needs to be selected
for this option to become available.
-config MLX5_EN_TLS
- bool "TLS cryptography-offload accelaration"
+config MLX5_FPGA_TLS
+ bool "Mellanox Technologies TLS Innova support"
+ depends on TLS_DEVICE
+ depends on TLS=y || MLX5_CORE=m
+ depends on MLX5_FPGA
+ default n
+ help
+ Build TLS support for the Innova family of network cards by Mellanox
+ Technologies. Innova network cards are comprised of a ConnectX chip
+ and an FPGA chip on one board. If you select this option, the
+ mlx5_core driver will include the Innova FPGA core and allow building
+ sandbox-specific client drivers.
+
+config MLX5_TLS
+ bool "Mellanox Technologies TLS Connect-X support"
depends on MLX5_CORE_EN
depends on TLS_DEVICE
depends on TLS=y || MLX5_CORE=m
- depends on MLX5_ACCEL
+ select MLX5_ACCEL
default n
- ---help---
- Build support for TLS cryptography-offload accelaration in the NIC.
- Note: Support for hardware with this capability needs to be selected
- for this option to become available.
+ help
+ Build TLS support for the Connect-X family of network cards by Mellanox
+ Technologies.
+
+config MLX5_EN_TLS
+ bool "TLS cryptography-offload accelaration"
+ depends on MLX5_CORE_EN
+ depends on MLX5_FPGA_TLS || MLX5_TLS
+ default y
+ help
+ Build support for TLS cryptography-offload accelaration in the NIC.
+ Note: Support for hardware with this capability needs to be selected
+ for this option to become available.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 243368dc23db..57d2cc666fe3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -13,9 +13,10 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
#
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
- transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
+ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
- lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o
+ lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \
+ diag/fw_tracer.o diag/crdump.o devlink.o
#
# Netdev basic
@@ -23,7 +24,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o \
- en/params.o
+ en/params.o en/xsk/umem.o en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o
#
# Netdev extra
@@ -31,12 +32,15 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \
+ lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
+ en/tc_tun_geneve.o
#
# Core extra
#
-mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o rdma.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
+ ecpf.o rdma.o
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
@@ -49,12 +53,14 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib
#
# Accelerations & FPGA
#
-mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o
+mlx5_core-$(CONFIG_MLX5_FPGA_IPSEC) += fpga/ipsec.o
+mlx5_core-$(CONFIG_MLX5_FPGA_TLS) += fpga/tls.o
+mlx5_core-$(CONFIG_MLX5_ACCEL) += lib/crypto.o accel/tls.o accel/ipsec.o
-mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
- fpga/ipsec.o fpga/tls.o
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o
mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
en_accel/ipsec_stats.o
-mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o
+mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \
+ en_accel/ktls.o en_accel/ktls_tx.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
index 9f1b1939716a..eddc34e4a762 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -31,6 +31,8 @@
*
*/
+#ifdef CONFIG_MLX5_FPGA_IPSEC
+
#include <linux/mlx5/device.h>
#include "accel/ipsec.h"
@@ -74,6 +76,11 @@ int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
return mlx5_fpga_ipsec_init(mdev);
}
+void mlx5_accel_ipsec_build_fs_cmds(void)
+{
+ mlx5_fpga_ipsec_build_fs_cmds();
+}
+
void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
{
mlx5_fpga_ipsec_cleanup(mdev);
@@ -107,3 +114,5 @@ int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
return mlx5_fpga_esp_modify_xfrm(xfrm, attrs);
}
EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
index 024dbd22a89b..530e428d46ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -37,7 +37,7 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/accel.h>
-#ifdef CONFIG_MLX5_ACCEL
+#ifdef CONFIG_MLX5_FPGA_IPSEC
#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
MLX5_ACCEL_IPSEC_CAP_DEVICE)
@@ -54,6 +54,7 @@ void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
void mlx5_accel_esp_free_hw_context(void *context);
int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_accel_ipsec_build_fs_cmds(void);
void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
#else
@@ -79,6 +80,10 @@ static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
return 0;
}
+static inline void mlx5_accel_ipsec_build_fs_cmds(void)
+{
+}
+
static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
{
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
index da7bd26368f9..cab708af3422 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
@@ -35,6 +35,9 @@
#include "accel/tls.h"
#include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+#ifdef CONFIG_MLX5_FPGA_TLS
#include "fpga/tls.h"
int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
@@ -61,7 +64,8 @@ int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
{
- return mlx5_fpga_is_tls_device(mdev);
+ return mlx5_fpga_is_tls_device(mdev) ||
+ mlx5_accel_is_ktls_device(mdev);
}
u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev)
@@ -78,3 +82,42 @@ void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev)
{
mlx5_fpga_tls_cleanup(mdev);
}
+#endif
+
+#ifdef CONFIG_MLX5_TLS
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info,
+ u32 *p_key_id)
+{
+ u32 sz_bytes;
+ void *key;
+
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info =
+ (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+
+ key = info->key;
+ sz_bytes = sizeof(info->key);
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info =
+ (struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+
+ key = info->key;
+ sz_bytes = sizeof(info->key);
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+
+ return mlx5_create_encryption_key(mdev, key, sz_bytes, p_key_id);
+}
+
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+ mlx5_destroy_encryption_key(mdev, key_id);
+}
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
index def4093ebfae..d787bc0a4155 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
@@ -37,7 +37,49 @@
#include <linux/mlx5/driver.h>
#include <linux/tls.h>
-#ifdef CONFIG_MLX5_ACCEL
+#ifdef CONFIG_MLX5_TLS
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info,
+ u32 *p_key_id);
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id);
+
+static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev)
+{
+ if (!MLX5_CAP_GEN(mdev, tls))
+ return false;
+
+ if (!MLX5_CAP_GEN(mdev, log_max_dek))
+ return false;
+
+ return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
+}
+
+static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info)
+{
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ if (crypto_info->version == TLS_1_2_VERSION)
+ return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
+ break;
+ }
+
+ return false;
+}
+#else
+static inline int
+mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info,
+ u32 *p_key_id) { return -ENOTSUPP; }
+static inline void
+mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) {}
+
+static inline bool
+mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; }
+static inline bool
+mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info) { return false; }
+#endif
enum {
MLX5_ACCEL_TLS_TX = BIT(0),
@@ -60,6 +102,7 @@ struct mlx5_ifc_tls_flow_bits {
u8 reserved_at_2[0x1e];
};
+#ifdef CONFIG_MLX5_FPGA_TLS
int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
struct tls_crypto_info *crypto_info,
u32 start_offload_tcp_sn, u32 *p_swid,
@@ -84,11 +127,13 @@ static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
bool direction_sx) { }
static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle,
u32 seq, u64 rcd_sn) { return 0; }
-static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; }
+static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
+{
+ return mlx5_accel_is_ktls_device(mdev);
+}
static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; }
static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; }
static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { }
-
#endif
#endif /* __MLX5_ACCEL_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index d2ab8cd8ad9f..8cdd7e66f8df 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -316,7 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
case MLX5_CMD_OP_DEALLOC_MEMIC:
case MLX5_CMD_OP_PAGE_FAULT_RESUME:
- case MLX5_CMD_OP_QUERY_HOST_PARAMS:
+ case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
return MLX5_CMD_STAT_OK;
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -441,6 +441,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_CREATE_UCTX:
+ case MLX5_CMD_OP_DESTROY_UCTX:
+ case MLX5_CMD_OP_CREATE_UMEM:
+ case MLX5_CMD_OP_DESTROY_UMEM:
case MLX5_CMD_OP_ALLOC_MEMIC:
*status = MLX5_DRIVER_STATUS_ABORTED;
*synd = MLX5_DRIVER_SYND;
@@ -628,7 +632,11 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
MLX5_COMMAND_STR_CASE(ALLOC_MEMIC);
MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC);
- MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS);
+ MLX5_COMMAND_STR_CASE(QUERY_ESW_FUNCTIONS);
+ MLX5_COMMAND_STR_CASE(CREATE_UCTX);
+ MLX5_COMMAND_STR_CASE(DESTROY_UCTX);
+ MLX5_COMMAND_STR_CASE(CREATE_UMEM);
+ MLX5_COMMAND_STR_CASE(DESTROY_UMEM);
default: return "unknown command opcode";
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 713a17ee3751..818edc63e428 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -58,7 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data)
list_for_each_entry_safe(mcq, temp, &ctx->process_list,
tasklet_ctx.list) {
list_del_init(&mcq->tasklet_ctx.list);
- mcq->tasklet_ctx.comp(mcq);
+ mcq->tasklet_ctx.comp(mcq, NULL);
mlx5_cq_put(mcq);
if (time_after(jiffies, end))
break;
@@ -68,7 +68,8 @@ void mlx5_cq_tasklet_cb(unsigned long data)
tasklet_schedule(&ctx->task);
}
-static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
+static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
+ struct mlx5_eqe *eqe)
{
unsigned long flags;
struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
@@ -87,11 +88,10 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
}
int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
- u32 *in, int inlen)
+ u32 *in, int inlen, u32 *out, int outlen)
{
int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
- u32 out[MLX5_ST_SZ_DW(create_cq_out)];
u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
struct mlx5_eq_comp *eq;
int err;
@@ -100,9 +100,9 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
if (IS_ERR(eq))
return PTR_ERR(eq);
- memset(out, 0, sizeof(out));
+ memset(out, 0, outlen);
MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ);
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
if (err)
return err;
@@ -158,13 +158,8 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
int err;
- err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
- if (err)
- return err;
-
- err = mlx5_eq_del_cq(&cq->eq->core, cq);
- if (err)
- return err;
+ mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
+ mlx5_eq_del_cq(&cq->eq->core, cq);
MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index ebc046fa97d3..5bb6a26ea267 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -248,11 +248,32 @@ void mlx5_unregister_interface(struct mlx5_interface *intf)
}
EXPORT_SYMBOL(mlx5_unregister_interface);
+/* Must be called with intf_mutex held */
+static bool mlx5_has_added_dev_by_protocol(struct mlx5_core_dev *mdev, int protocol)
+{
+ struct mlx5_device_context *dev_ctx;
+ struct mlx5_interface *intf;
+ bool found = false;
+
+ list_for_each_entry(intf, &intf_list, list) {
+ if (intf->protocol == protocol) {
+ dev_ctx = mlx5_get_device(intf, &mdev->priv);
+ if (dev_ctx && test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+ found = true;
+ break;
+ }
+ }
+
+ return found;
+}
+
void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
{
mutex_lock(&mlx5_intf_mutex);
- mlx5_remove_dev_by_protocol(mdev, protocol);
- mlx5_add_dev_by_protocol(mdev, protocol);
+ if (mlx5_has_added_dev_by_protocol(mdev, protocol)) {
+ mlx5_remove_dev_by_protocol(mdev, protocol);
+ mlx5_add_dev_by_protocol(mdev, protocol);
+ }
mutex_unlock(&mlx5_intf_mutex);
}
@@ -290,13 +311,20 @@ static u32 mlx5_gen_pci_id(struct mlx5_core_dev *dev)
/* Must be called with intf_mutex held */
struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
{
- u32 pci_id = mlx5_gen_pci_id(dev);
struct mlx5_core_dev *res = NULL;
struct mlx5_core_dev *tmp_dev;
struct mlx5_priv *priv;
+ u32 pci_id;
+ if (!mlx5_core_is_pf(dev))
+ return NULL;
+
+ pci_id = mlx5_gen_pci_id(dev);
list_for_each_entry(priv, &mlx5_dev_list, dev_list) {
tmp_dev = container_of(priv, struct mlx5_core_dev, priv);
+ if (!mlx5_core_is_pf(tmp_dev))
+ continue;
+
if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) {
res = tmp_dev;
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
new file mode 100644
index 000000000000..a400f4430c28
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <devlink.h>
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+static int mlx5_devlink_flash_update(struct devlink *devlink,
+ const char *file_name,
+ const char *component,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ const struct firmware *fw;
+ int err;
+
+ if (component)
+ return -EOPNOTSUPP;
+
+ err = request_firmware_direct(&fw, file_name, &dev->pdev->dev);
+ if (err)
+ return err;
+
+ return mlx5_firmware_flash(dev, fw, extack);
+}
+
+static u8 mlx5_fw_ver_major(u32 version)
+{
+ return (version >> 24) & 0xff;
+}
+
+static u8 mlx5_fw_ver_minor(u32 version)
+{
+ return (version >> 16) & 0xff;
+}
+
+static u16 mlx5_fw_ver_subminor(u32 version)
+{
+ return version & 0xffff;
+}
+
+#define DEVLINK_FW_STRING_LEN 32
+
+static int
+mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ char version_str[DEVLINK_FW_STRING_LEN];
+ u32 running_fw, stored_fw;
+ int err;
+
+ err = devlink_info_driver_name_put(req, DRIVER_NAME);
+ if (err)
+ return err;
+
+ err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id);
+ if (err)
+ return err;
+
+ err = mlx5_fw_version_query(dev, &running_fw, &stored_fw);
+ if (err)
+ return err;
+
+ snprintf(version_str, sizeof(version_str), "%d.%d.%04d",
+ mlx5_fw_ver_major(running_fw), mlx5_fw_ver_minor(running_fw),
+ mlx5_fw_ver_subminor(running_fw));
+ err = devlink_info_version_running_put(req, "fw.version", version_str);
+ if (err)
+ return err;
+
+ /* no pending version, return running (stored) version */
+ if (stored_fw == 0)
+ stored_fw = running_fw;
+
+ snprintf(version_str, sizeof(version_str), "%d.%d.%04d",
+ mlx5_fw_ver_major(stored_fw), mlx5_fw_ver_minor(stored_fw),
+ mlx5_fw_ver_subminor(stored_fw));
+ err = devlink_info_version_stored_put(req, "fw.version", version_str);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static const struct devlink_ops mlx5_devlink_ops = {
+#ifdef CONFIG_MLX5_ESWITCH
+ .eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
+ .eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
+ .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
+ .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
+ .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set,
+ .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
+#endif
+ .flash_update = mlx5_devlink_flash_update,
+ .info_get = mlx5_devlink_info_get,
+};
+
+struct devlink *mlx5_devlink_alloc(void)
+{
+ return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev));
+}
+
+void mlx5_devlink_free(struct devlink *devlink)
+{
+ devlink_free(devlink);
+}
+
+int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
+{
+ return devlink_register(devlink, dev);
+}
+
+void mlx5_devlink_unregister(struct devlink *devlink)
+{
+ devlink_unregister(devlink);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
new file mode 100644
index 000000000000..d0ba03774ddf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019, Mellanox Technologies */
+
+#ifndef __MLX5_DEVLINK_H__
+#define __MLX5_DEVLINK_H__
+
+#include <net/devlink.h>
+
+struct devlink *mlx5_devlink_alloc(void);
+void mlx5_devlink_free(struct devlink *devlink);
+int mlx5_devlink_register(struct devlink *devlink, struct device *dev);
+void mlx5_devlink_unregister(struct devlink *devlink);
+
+#endif /* __MLX5_DEVLINK_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
new file mode 100644
index 000000000000..28d02749d3c4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "lib/pci_vsc.h"
+#include "lib/mlx5.h"
+
+#define BAD_ACCESS 0xBADACCE5
+#define MLX5_PROTECTED_CR_SCAN_CRSPACE 0x7
+
+static bool mlx5_crdump_enabled(struct mlx5_core_dev *dev)
+{
+ return !!dev->priv.health.crdump_size;
+}
+
+static int mlx5_crdump_fill(struct mlx5_core_dev *dev, u32 *cr_data)
+{
+ u32 crdump_size = dev->priv.health.crdump_size;
+ int i, ret;
+
+ for (i = 0; i < (crdump_size / 4); i++)
+ cr_data[i] = BAD_ACCESS;
+
+ ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump_size);
+ if (ret <= 0) {
+ if (ret == 0)
+ return -EIO;
+ return ret;
+ }
+
+ if (crdump_size != ret) {
+ mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n",
+ ret, crdump_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data)
+{
+ int ret;
+
+ if (!mlx5_crdump_enabled(dev))
+ return -ENODEV;
+
+ ret = mlx5_vsc_gw_lock(dev);
+ if (ret) {
+ mlx5_core_warn(dev, "crdump: failed to lock vsc gw err %d\n",
+ ret);
+ return ret;
+ }
+ /* Verify no other PF is running cr-dump or sw reset */
+ ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET,
+ MLX5_VSC_LOCK);
+ if (ret) {
+ mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
+ goto unlock_gw;
+ }
+
+ ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL);
+ if (ret)
+ goto unlock_sem;
+
+ ret = mlx5_crdump_fill(dev, cr_data);
+
+unlock_sem:
+ mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, MLX5_VSC_UNLOCK);
+unlock_gw:
+ mlx5_vsc_gw_unlock(dev);
+ return ret;
+}
+
+int mlx5_crdump_enable(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ u32 space_size;
+ int ret;
+
+ if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) ||
+ mlx5_crdump_enabled(dev))
+ return 0;
+
+ ret = mlx5_vsc_gw_lock(dev);
+ if (ret)
+ return ret;
+
+ /* Check if space is supported and get space size */
+ ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE,
+ &space_size);
+ if (ret) {
+ /* Unlock and mask error since space is not supported */
+ mlx5_vsc_gw_unlock(dev);
+ return 0;
+ }
+
+ if (!space_size) {
+ mlx5_core_warn(dev, "Invalid Crspace size, zero\n");
+ mlx5_vsc_gw_unlock(dev);
+ return -EINVAL;
+ }
+
+ ret = mlx5_vsc_gw_unlock(dev);
+ if (ret)
+ return ret;
+
+ priv->health.crdump_size = space_size;
+ return 0;
+}
+
+void mlx5_crdump_disable(struct mlx5_core_dev *dev)
+{
+ dev->priv.health.crdump_size = 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index a4cf123e3f17..ddf1b87f1bc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -187,6 +187,7 @@ TRACE_EVENT(mlx5_fs_set_fte,
__field(u32, index)
__field(u32, action)
__field(u32, flow_tag)
+ __field(u32, flow_source)
__field(u8, mask_enable)
__field(int, new_fte)
__array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
@@ -204,7 +205,8 @@ TRACE_EVENT(mlx5_fs_set_fte,
__entry->index = fte->index;
__entry->action = fte->action.action;
__entry->mask_enable = __entry->fg->mask.match_criteria_enable;
- __entry->flow_tag = fte->action.flow_tag;
+ __entry->flow_tag = fte->flow_context.flow_tag;
+ __entry->flow_source = fte->flow_context.flow_source;
memcpy(__entry->mask_outer,
MLX5_ADDR_OF(fte_match_param,
&__entry->fg->mask.match_criteria,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index 6999f4486e9e..8a4930c8bf62 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -243,6 +243,19 @@ free_strings_db:
return -ENOMEM;
}
+static void
+mlx5_fw_tracer_init_saved_traces_array(struct mlx5_fw_tracer *tracer)
+{
+ tracer->st_arr.saved_traces_index = 0;
+ mutex_init(&tracer->st_arr.lock);
+}
+
+static void
+mlx5_fw_tracer_clean_saved_traces_array(struct mlx5_fw_tracer *tracer)
+{
+ mutex_destroy(&tracer->st_arr.lock);
+}
+
static void mlx5_tracer_read_strings_db(struct work_struct *work)
{
struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
@@ -522,6 +535,24 @@ static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer)
list_del(&str_frmt->list);
}
+static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer,
+ u64 timestamp, bool lost,
+ u8 event_id, char *msg)
+{
+ struct mlx5_fw_trace_data *trace_data;
+
+ mutex_lock(&tracer->st_arr.lock);
+ trace_data = &tracer->st_arr.straces[tracer->st_arr.saved_traces_index];
+ trace_data->timestamp = timestamp;
+ trace_data->lost = lost;
+ trace_data->event_id = event_id;
+ strncpy(trace_data->msg, msg, TRACE_STR_MSG);
+
+ tracer->st_arr.saved_traces_index =
+ (tracer->st_arr.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1);
+ mutex_unlock(&tracer->st_arr.lock);
+}
+
static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
struct mlx5_core_dev *dev,
u64 trace_timestamp)
@@ -540,6 +571,9 @@ static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
str_frmt->event_id, tmp);
+ mlx5_fw_tracer_save_trace(dev->tracer, trace_timestamp,
+ str_frmt->lost, str_frmt->event_id, tmp);
+
/* remove it from hash */
mlx5_tracer_clean_message(str_frmt);
}
@@ -786,6 +820,109 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
mlx5_fw_tracer_start(tracer);
}
+static int mlx5_fw_tracer_set_core_dump_reg(struct mlx5_core_dev *dev,
+ u32 *in, int size_in)
+{
+ u32 out[MLX5_ST_SZ_DW(core_dump_reg)] = {};
+
+ if (!MLX5_CAP_DEBUG(dev, core_dump_general) &&
+ !MLX5_CAP_DEBUG(dev, core_dump_qp))
+ return -EOPNOTSUPP;
+
+ return mlx5_core_access_reg(dev, in, size_in, out, sizeof(out),
+ MLX5_REG_CORE_DUMP, 0, 1);
+}
+
+int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_tracer *tracer = dev->tracer;
+ u32 in[MLX5_ST_SZ_DW(core_dump_reg)] = {};
+ int err;
+
+ if (!MLX5_CAP_DEBUG(dev, core_dump_general) || !tracer)
+ return -EOPNOTSUPP;
+ if (!tracer->owner)
+ return -EPERM;
+
+ MLX5_SET(core_dump_reg, in, core_dump_type, 0x0);
+
+ err = mlx5_fw_tracer_set_core_dump_reg(dev, in, sizeof(in));
+ if (err)
+ return err;
+ queue_work(tracer->work_queue, &tracer->handle_traces_work);
+ flush_workqueue(tracer->work_queue);
+ return 0;
+}
+
+static int
+mlx5_devlink_fmsg_fill_trace(struct devlink_fmsg *fmsg,
+ struct mlx5_fw_trace_data *trace_data)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "timestamp", trace_data->timestamp);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_bool_pair_put(fmsg, "lost", trace_data->lost);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "event_id", trace_data->event_id);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_pair_put(fmsg, "msg", trace_data->msg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+ return 0;
+}
+
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5_fw_trace_data *straces = tracer->st_arr.straces;
+ u32 index, start_index, end_index;
+ u32 saved_traces_index;
+ int err;
+
+ if (!straces[0].timestamp)
+ return -ENOMSG;
+
+ mutex_lock(&tracer->st_arr.lock);
+ saved_traces_index = tracer->st_arr.saved_traces_index;
+ if (straces[saved_traces_index].timestamp)
+ start_index = saved_traces_index;
+ else
+ start_index = 0;
+ end_index = (saved_traces_index - 1) & (SAVED_TRACES_NUM - 1);
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "dump fw traces");
+ if (err)
+ goto unlock;
+ index = start_index;
+ while (index != end_index) {
+ err = mlx5_devlink_fmsg_fill_trace(fmsg, &straces[index]);
+ if (err)
+ goto unlock;
+
+ index = (index + 1) & (SAVED_TRACES_NUM - 1);
+ }
+
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+unlock:
+ mutex_unlock(&tracer->st_arr.lock);
+ return err;
+}
+
/* Create software resources (Buffers, etc ..) */
struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
{
@@ -833,6 +970,7 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
goto free_log_buf;
}
+ mlx5_fw_tracer_init_saved_traces_array(tracer);
mlx5_core_dbg(dev, "FWTracer: Tracer created\n");
return tracer;
@@ -917,6 +1055,7 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
cancel_work_sync(&tracer->read_fw_strings_work);
mlx5_fw_tracer_clean_ready_list(tracer);
mlx5_fw_tracer_clean_print_hash(tracer);
+ mlx5_fw_tracer_clean_saved_traces_array(tracer);
mlx5_fw_tracer_free_strings_db(tracer);
mlx5_fw_tracer_destroy_log_buf(tracer);
flush_workqueue(tracer->work_queue);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index a8b8747f2b61..40601fba80ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -46,6 +46,9 @@
#define TRACER_BLOCK_SIZE_BYTE 256
#define TRACES_PER_BLOCK 32
+#define TRACE_STR_MSG 256
+#define SAVED_TRACES_NUM 8192
+
#define TRACER_MAX_PARAMS 7
#define MESSAGE_HASH_BITS 6
#define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS)
@@ -53,6 +56,13 @@
#define MASK_52_7 (0x1FFFFFFFFFFF80)
#define MASK_6_0 (0x7F)
+struct mlx5_fw_trace_data {
+ u64 timestamp;
+ bool lost;
+ u8 event_id;
+ char msg[TRACE_STR_MSG];
+};
+
struct mlx5_fw_tracer {
struct mlx5_core_dev *dev;
struct mlx5_nb nb;
@@ -83,6 +93,13 @@ struct mlx5_fw_tracer {
u32 consumer_index;
} buff;
+ /* Saved Traces Array */
+ struct {
+ struct mlx5_fw_trace_data straces[SAVED_TRACES_NUM];
+ u32 saved_traces_index;
+ struct mutex lock; /* Protect st_arr access */
+ } st_arr;
+
u64 last_timestamp;
struct work_struct handle_traces_work;
struct hlist_head hash[MESSAGE_HASH_SIZE];
@@ -171,5 +188,8 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
+int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev);
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
+ struct devlink_fmsg *fmsg);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
index 0ccd6d40baf7..d2228e37450f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -83,30 +83,3 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
mlx5_peer_pf_cleanup(dev);
}
-
-static int mlx5_query_host_params_context(struct mlx5_core_dev *dev,
- u32 *out, int outlen)
-{
- u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {};
-
- MLX5_SET(query_host_params_in, in, opcode,
- MLX5_CMD_OP_QUERY_HOST_PARAMS);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-
-int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf)
-{
- u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {};
- int err;
-
- err = mlx5_query_host_params_context(dev, out, sizeof(out));
- if (err)
- return err;
-
- *num_vf = MLX5_GET(query_host_params_out, out,
- host_params_context.host_num_of_vfs);
- mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf);
-
- return 0;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
index 346372df218f..d3d7a00a02ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
@@ -16,7 +16,6 @@ enum {
bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev);
int mlx5_ec_init(struct mlx5_core_dev *dev);
void mlx5_ec_cleanup(struct mlx5_core_dev *dev);
-int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf);
#else /* CONFIG_MLX5_ESWITCH */
@@ -24,9 +23,6 @@ static inline bool
mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; }
static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {}
-static inline int
-mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf)
-{ return -EOPNOTSUPP; }
#endif /* CONFIG_MLX5_ESWITCH */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 3a183d690e23..79d93d6c7d7a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -48,7 +48,7 @@
#include <linux/rhashtable.h>
#include <net/switchdev.h>
#include <net/xdp.h>
-#include <linux/net_dim.h>
+#include <linux/dim.h>
#include <linux/bits.h>
#include "wq.h"
#include "mlx5_core.h"
@@ -137,6 +137,7 @@ struct page_pool;
#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1)
#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
#define MLX5E_TX_CQ_POLL_BUDGET 128
+#define MLX5E_TX_XSK_POLL_BUDGET 64
#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */
#define MLX5E_UMR_WQE_INLINE_SZ \
@@ -155,6 +156,11 @@ do { \
##__VA_ARGS__); \
} while (0)
+enum mlx5e_rq_group {
+ MLX5E_RQ_GROUP_REGULAR,
+ MLX5E_RQ_GROUP_XSK,
+ MLX5E_NUM_RQ_GROUPS /* Keep last. */
+};
static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
{
@@ -179,7 +185,8 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
/* Use this function to get max num channels after netdev was created */
static inline int mlx5e_get_netdev_max_channels(struct net_device *netdev)
{
- return min_t(unsigned int, netdev->num_rx_queues,
+ return min_t(unsigned int,
+ netdev->num_rx_queues / MLX5E_NUM_RQ_GROUPS,
netdev->num_tx_queues);
}
@@ -202,7 +209,10 @@ struct mlx5e_umr_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_umr_ctrl_seg uctrl;
struct mlx5_mkey_seg mkc;
- struct mlx5_mtt inline_mtts[0];
+ union {
+ struct mlx5_mtt inline_mtts[0];
+ u8 tls_static_params_ctx[0];
+ };
};
extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
@@ -238,9 +248,9 @@ struct mlx5e_params {
u16 num_channels;
u8 num_tc;
bool rx_cqe_compress_def;
- struct net_dim_cq_moder rx_cq_moderation;
- struct net_dim_cq_moder tx_cq_moderation;
bool tunneled_offload_en;
+ struct dim_cq_moder rx_cq_moderation;
+ struct dim_cq_moder tx_cq_moderation;
bool lro_en;
u8 tx_min_inline_mode;
bool vlan_strip_disable;
@@ -250,6 +260,7 @@ struct mlx5e_params {
u32 lro_timeout;
u32 pflags;
struct bpf_prog *xdp_prog;
+ struct mlx5e_xsk *xsk;
unsigned int sw_mtu;
int hard_mtu;
};
@@ -294,6 +305,7 @@ enum {
MLX5E_RQ_STATE_ENABLED,
MLX5E_RQ_STATE_AM,
MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
+ MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
};
struct mlx5e_cq {
@@ -325,6 +337,9 @@ struct mlx5e_tx_wqe_info {
u32 num_bytes;
u8 num_wqebbs;
u8 num_dma;
+#ifdef CONFIG_MLX5_EN_TLS
+ skb_frag_t *resync_dump_frag;
+#endif
};
enum mlx5e_dma_map_type {
@@ -348,6 +363,13 @@ enum {
struct mlx5e_sq_wqe_info {
u8 opcode;
+
+ /* Auxiliary data for different opcodes. */
+ union {
+ struct {
+ struct mlx5e_rq *rq;
+ } umr;
+ };
};
struct mlx5e_txqsq {
@@ -356,7 +378,7 @@ struct mlx5e_txqsq {
/* dirtied @completion */
u16 cc;
u32 dma_fifo_cc;
- struct net_dim dim; /* Adaptive Moderation */
+ struct dim dim; /* Adaptive Moderation */
/* dirtied @xmit */
u16 pc ____cacheline_aligned_in_smp;
@@ -375,6 +397,7 @@ struct mlx5e_txqsq {
void __iomem *uar_map;
struct netdev_queue *txq;
u32 sqn;
+ u16 stop_room;
u8 min_inline_mode;
struct device *pdev;
__be32 mkey_be;
@@ -385,20 +408,62 @@ struct mlx5e_txqsq {
/* control path */
struct mlx5_wq_ctrl wq_ctrl;
struct mlx5e_channel *channel;
+ int ch_ix;
int txq_ix;
u32 rate_limit;
struct work_struct recover_work;
} ____cacheline_aligned_in_smp;
struct mlx5e_dma_info {
- struct page *page;
- dma_addr_t addr;
+ dma_addr_t addr;
+ union {
+ struct page *page;
+ struct {
+ u64 handle;
+ void *data;
+ } xsk;
+ };
+};
+
+/* XDP packets can be transmitted in different ways. On completion, we need to
+ * distinguish between them to clean up things in a proper way.
+ */
+enum mlx5e_xdp_xmit_mode {
+ /* An xdp_frame was transmitted due to either XDP_REDIRECT from another
+ * device or XDP_TX from an XSK RQ. The frame has to be unmapped and
+ * returned.
+ */
+ MLX5E_XDP_XMIT_MODE_FRAME,
+
+ /* The xdp_frame was created in place as a result of XDP_TX from a
+ * regular RQ. No DMA remapping happened, and the page belongs to us.
+ */
+ MLX5E_XDP_XMIT_MODE_PAGE,
+
+ /* No xdp_frame was created at all, the transmit happened from a UMEM
+ * page. The UMEM Completion Ring producer pointer has to be increased.
+ */
+ MLX5E_XDP_XMIT_MODE_XSK,
};
struct mlx5e_xdp_info {
- struct xdp_frame *xdpf;
- dma_addr_t dma_addr;
- struct mlx5e_dma_info di;
+ enum mlx5e_xdp_xmit_mode mode;
+ union {
+ struct {
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
+ } frame;
+ struct {
+ struct mlx5e_rq *rq;
+ struct mlx5e_dma_info di;
+ } page;
+ };
+};
+
+struct mlx5e_xdp_xmit_data {
+ dma_addr_t dma_addr;
+ void *data;
+ u32 len;
};
struct mlx5e_xdp_info_fifo {
@@ -424,8 +489,12 @@ struct mlx5e_xdp_mpwqe {
};
struct mlx5e_xdpsq;
-typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*,
- struct mlx5e_xdp_info*);
+typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
+typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
+ struct mlx5e_xdp_xmit_data *,
+ struct mlx5e_xdp_info *,
+ int);
+
struct mlx5e_xdpsq {
/* data path */
@@ -442,8 +511,10 @@ struct mlx5e_xdpsq {
struct mlx5e_cq cq;
/* read only */
+ struct xdp_umem *umem;
struct mlx5_wq_cyc wq;
struct mlx5e_xdpsq_stats *stats;
+ mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check;
mlx5e_fp_xmit_xdp_frame xmit_xdp_frame;
struct {
struct mlx5e_xdp_wqe_info *wqe_info;
@@ -486,12 +557,6 @@ struct mlx5e_icosq {
struct mlx5e_channel *channel;
} ____cacheline_aligned_in_smp;
-static inline bool
-mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
-{
- return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
-}
-
struct mlx5e_wqe_frag_info {
struct mlx5e_dma_info *di;
u32 offset;
@@ -570,9 +635,11 @@ struct mlx5e_rq {
u8 log_stride_sz;
u8 umr_in_progress;
u8 umr_last_bulk;
+ u8 umr_completed;
} mpwqe;
};
struct {
+ u16 umem_headroom;
u16 headroom;
u8 map_dir; /* dma map direction */
} buff;
@@ -595,14 +662,18 @@ struct mlx5e_rq {
int ix;
unsigned int hw_mtu;
- struct net_dim dim; /* Dynamic Interrupt Moderation */
+ struct dim dim; /* Dynamic Interrupt Moderation */
/* XDP */
struct bpf_prog *xdp_prog;
- struct mlx5e_xdpsq xdpsq;
+ struct mlx5e_xdpsq *xdpsq;
DECLARE_BITMAP(flags, 8);
struct page_pool *page_pool;
+ /* AF_XDP zero-copy */
+ struct zero_copy_allocator zca;
+ struct xdp_umem *umem;
+
/* control */
struct mlx5_wq_ctrl wq_ctrl;
__be32 mkey_be;
@@ -615,9 +686,15 @@ struct mlx5e_rq {
struct xdp_rxq_info xdp_rxq;
} ____cacheline_aligned_in_smp;
+enum mlx5e_channel_state {
+ MLX5E_CHANNEL_STATE_XSK,
+ MLX5E_CHANNEL_NUM_STATES
+};
+
struct mlx5e_channel {
/* data path */
struct mlx5e_rq rq;
+ struct mlx5e_xdpsq rq_xdpsq;
struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC];
struct mlx5e_icosq icosq; /* internal control operations */
bool xdp;
@@ -630,6 +707,13 @@ struct mlx5e_channel {
/* XDP_REDIRECT */
struct mlx5e_xdpsq xdpsq;
+ /* AF_XDP zero-copy */
+ struct mlx5e_rq xskrq;
+ struct mlx5e_xdpsq xsksq;
+ struct mlx5e_icosq xskicosq;
+ /* xskicosq can be accessed from any CPU - the spinlock protects it. */
+ spinlock_t xskicosq_lock;
+
/* data path - accessed per napi poll */
struct irq_desc *irq_desc;
struct mlx5e_ch_stats *stats;
@@ -638,6 +722,7 @@ struct mlx5e_channel {
struct mlx5e_priv *priv;
struct mlx5_core_dev *mdev;
struct hwtstamp_config *tstamp;
+ DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
int ix;
int cpu;
cpumask_var_t xps_cpumask;
@@ -653,14 +738,17 @@ struct mlx5e_channel_stats {
struct mlx5e_ch_stats ch;
struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
struct mlx5e_rq_stats rq;
+ struct mlx5e_rq_stats xskrq;
struct mlx5e_xdpsq_stats rq_xdpsq;
struct mlx5e_xdpsq_stats xdpsq;
+ struct mlx5e_xdpsq_stats xsksq;
} ____cacheline_aligned_in_smp;
enum {
MLX5E_STATE_OPENED,
MLX5E_STATE_DESTROYING,
MLX5E_STATE_XDP_TX_ENABLED,
+ MLX5E_STATE_XDP_OPEN,
};
struct mlx5e_rqt {
@@ -693,6 +781,17 @@ struct mlx5e_modify_sq_param {
int rl_index;
};
+struct mlx5e_xsk {
+ /* UMEMs are stored separately from channels, because we don't want to
+ * lose them when channels are recreated. The kernel also stores UMEMs,
+ * but it doesn't distinguish between zero-copy and non-zero-copy UMEMs,
+ * so rely on our mechanism.
+ */
+ struct xdp_umem **umems;
+ u16 refcnt;
+ bool ever_used;
+};
+
struct mlx5e_priv {
/* priv data path fields - start */
struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
@@ -713,6 +812,7 @@ struct mlx5e_priv {
struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS];
+ struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS];
struct mlx5e_rss_params rss_params;
u32 tx_rates[MLX5E_MAX_NUM_SQS];
@@ -749,6 +849,7 @@ struct mlx5e_priv {
struct mlx5e_tls *tls;
#endif
struct devlink_health_reporter *tx_reporter;
+ struct mlx5e_xsk xsk;
};
struct mlx5e_profile {
@@ -762,6 +863,7 @@ struct mlx5e_profile {
void (*cleanup_tx)(struct mlx5e_priv *priv);
void (*enable)(struct mlx5e_priv *priv);
void (*disable)(struct mlx5e_priv *priv);
+ int (*update_rx)(struct mlx5e_priv *priv);
void (*update_stats)(struct mlx5e_priv *priv);
void (*update_carrier)(struct mlx5e_priv *priv);
struct {
@@ -780,7 +882,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);
void mlx5e_trigger_irq(struct mlx5e_icosq *sq);
-void mlx5e_completion_event(struct mlx5_core_cq *mcq);
+void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe);
void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
int mlx5e_napi_poll(struct napi_struct *napi, int budget);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
@@ -792,11 +894,13 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
-void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
- bool recycle);
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info,
+ bool recycle);
void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
+void mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq);
void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
@@ -852,6 +956,30 @@ void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen);
struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt);
+struct mlx5e_xsk_param;
+
+struct mlx5e_rq_param;
+int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
+ struct xdp_umem *umem, struct mlx5e_rq *rq);
+int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
+void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
+void mlx5e_close_rq(struct mlx5e_rq *rq);
+
+struct mlx5e_sq_param;
+int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct mlx5e_icosq *sq);
+void mlx5e_close_icosq(struct mlx5e_icosq *sq);
+int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct xdp_umem *umem,
+ struct mlx5e_xdpsq *sq, bool is_redirect);
+void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq);
+
+struct mlx5e_cq_param;
+int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder,
+ struct mlx5e_cq_param *param, struct mlx5e_cq *cq);
+void mlx5e_close_cq(struct mlx5e_cq *cq);
+
int mlx5e_open_locked(struct net_device *netdev);
int mlx5e_close_locked(struct net_device *netdev);
@@ -897,102 +1025,6 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
MLX5_CAP_ETH(mdev, swp_csum) && MLX5_CAP_ETH(mdev, swp_lso);
}
-struct mlx5e_swp_spec {
- __be16 l3_proto;
- u8 l4_proto;
- u8 is_tun;
- __be16 tun_l3_proto;
- u8 tun_l4_proto;
-};
-
-static inline void
-mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
- struct mlx5e_swp_spec *swp_spec)
-{
- /* SWP offsets are in 2-bytes words */
- eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
- if (swp_spec->l3_proto == htons(ETH_P_IPV6))
- eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
- if (swp_spec->l4_proto) {
- eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2;
- if (swp_spec->l4_proto == IPPROTO_UDP)
- eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP;
- }
-
- if (swp_spec->is_tun) {
- eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
- if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6))
- eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
- } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */
- eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
- if (swp_spec->l3_proto == htons(ETH_P_IPV6))
- eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
- }
- switch (swp_spec->tun_l4_proto) {
- case IPPROTO_UDP:
- eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
- /* fall through */
- case IPPROTO_TCP:
- eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
- break;
- }
-}
-
-static inline void mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq,
- struct mlx5e_tx_wqe **wqe,
- u16 *pi)
-{
- struct mlx5_wq_cyc *wq = &sq->wq;
-
- *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- *wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
- memset(*wqe, 0, sizeof(**wqe));
-}
-
-static inline
-struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
-{
- u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc);
- struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
- struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
-
- memset(cseg, 0, sizeof(*cseg));
-
- cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
- cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01);
-
- (*pc)++;
-
- return wqe;
-}
-
-static inline
-void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc,
- void __iomem *uar_map,
- struct mlx5_wqe_ctrl_seg *ctrl)
-{
- ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
- /* ensure wqe is visible to device before updating doorbell record */
- dma_wmb();
-
- *wq->db = cpu_to_be32(pc);
-
- /* ensure doorbell record is visible to device before ringing the
- * doorbell
- */
- wmb();
-
- mlx5_write64((__be32 *)ctrl, uar_map);
-}
-
-static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
-{
- struct mlx5_core_cq *mcq;
-
- mcq = &cq->mcq;
- mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
-}
-
extern const struct ethtool_ops mlx5e_ethtool_ops;
#ifdef CONFIG_MLX5_CORE_EN_DCB
extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
@@ -1022,17 +1054,17 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv);
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
-int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
- u32 underlay_qpn, u32 *tisn);
+int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
int mlx5e_create_tises(struct mlx5e_priv *priv);
+int mlx5e_update_nic_rx(struct mlx5e_priv *priv);
void mlx5e_update_carrier(struct mlx5e_priv *priv);
int mlx5e_close(struct net_device *netdev);
int mlx5e_open(struct net_device *netdev);
@@ -1074,8 +1106,6 @@ u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv);
u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv);
int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
struct ethtool_ts_info *info);
-int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
- struct ethtool_flash *flash);
void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
struct ethtool_pauseparam *pauseparam);
int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
@@ -1096,6 +1126,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_xsk *xsk,
struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
u16 max_channels, u16 mtu);
@@ -1112,6 +1143,7 @@ void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
netdev_features_t mlx5e_features_check(struct sk_buff *skb,
struct net_device *netdev,
netdev_features_t features);
+int mlx5e_set_features(struct net_device *netdev, netdev_features_t features);
#ifdef CONFIG_MLX5_ESWITCH
int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac);
int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index d3744bffbae3..79301d116667 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -3,65 +3,102 @@
#include "en/params.h"
-u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params)
+static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- u16 linear_rq_headroom = params->xdp_prog ?
- XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
- u32 frag_sz;
+ return params->xdp_prog || xsk;
+}
+
+u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u16 headroom = NET_IP_ALIGN;
+
+ if (mlx5e_rx_is_xdp(params, xsk)) {
+ headroom += XDP_PACKET_HEADROOM;
+ if (xsk)
+ headroom += xsk->headroom;
+ } else {
+ headroom += MLX5_RX_HEADROOM;
+ }
+
+ return headroom;
+}
+
+u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+ u32 frag_sz = linear_rq_headroom + hw_mtu;
- linear_rq_headroom += NET_IP_ALIGN;
+ /* AF_XDP doesn't build SKBs in place. */
+ if (!xsk)
+ frag_sz = MLX5_SKB_FRAG_SZ(frag_sz);
- frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu);
+ /* XDP in mlx5e doesn't support multiple packets per page. */
+ if (mlx5e_rx_is_xdp(params, xsk))
+ frag_sz = max_t(u32, frag_sz, PAGE_SIZE);
- if (params->xdp_prog && frag_sz < PAGE_SIZE)
- frag_sz = PAGE_SIZE;
+ /* Even if we can go with a smaller fragment size, we must not put
+ * multiple packets into a single frame.
+ */
+ if (xsk)
+ frag_sz = max_t(u32, frag_sz, xsk->chunk_size);
return frag_sz;
}
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
+u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+ u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
}
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params)
+bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+ /* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more
+ * than one page. For this, check both with and without xsk.
+ */
+ u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk),
+ mlx5e_rx_get_linear_frag_sz(params, NULL));
- return !params->lro_en && frag_sz <= PAGE_SIZE;
+ return !params->lro_en && linear_frag_sz <= PAGE_SIZE;
}
#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \
MLX5_MPWQE_LOG_STRIDE_SZ_BASE)
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+ u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
s8 signed_log_num_strides_param;
u8 log_num_strides;
- if (!mlx5e_rx_is_linear_skb(params))
+ if (!mlx5e_rx_is_linear_skb(params, xsk))
return false;
- if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
+ if (order_base_2(linear_frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
return false;
if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
return true;
- log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz);
+ log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
signed_log_num_strides_param =
(s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
return signed_log_num_strides_param >= 0;
}
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params);
+ u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk);
/* Numbers are unsigned, don't subtract to avoid underflow. */
if (params->log_rq_mtu_frames <
@@ -72,33 +109,30 @@ u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
}
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
- return order_base_2(mlx5e_rx_get_linear_frag_sz(params));
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk));
return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
}
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
return MLX5_MPWRQ_LOG_WQE_SZ -
- mlx5e_mpwqe_get_log_stride_size(mdev, params);
+ mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
}
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u16 linear_rq_headroom = params->xdp_prog ?
- XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
- bool is_linear_skb;
-
- linear_rq_headroom += NET_IP_ALIGN;
-
- is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
- mlx5e_rx_is_linear_skb(params) :
- mlx5e_rx_mpwqe_is_linear_skb(mdev, params);
+ bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
+ mlx5e_rx_is_linear_skb(params, xsk) :
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
- return is_linear_skb ? linear_rq_headroom : 0;
+ return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index b106a0236f36..bd882b5ee9a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -6,17 +6,119 @@
#include "en.h"
-u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params);
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params);
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params);
+struct mlx5e_xsk_param {
+ u16 headroom;
+ u16 chunk_size;
+};
+
+struct mlx5e_rq_param {
+ u32 rqc[MLX5_ST_SZ_DW(rqc)];
+ struct mlx5_wq_param wq;
+ struct mlx5e_rq_frags_info frags_info;
+};
+
+struct mlx5e_sq_param {
+ u32 sqc[MLX5_ST_SZ_DW(sqc)];
+ struct mlx5_wq_param wq;
+ bool is_mpw;
+};
+
+struct mlx5e_cq_param {
+ u32 cqc[MLX5_ST_SZ_DW(cqc)];
+ struct mlx5_wq_param wq;
+ u16 eq_ix;
+ u8 cq_period_mode;
+};
+
+struct mlx5e_channel_param {
+ struct mlx5e_rq_param rq;
+ struct mlx5e_sq_param sq;
+ struct mlx5e_sq_param xdp_sq;
+ struct mlx5e_sq_param icosq;
+ struct mlx5e_cq_param rx_cq;
+ struct mlx5e_cq_param tx_cq;
+ struct mlx5e_cq_param icosq_cq;
+};
+
+static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params,
+ u16 qid,
+ enum mlx5e_rq_group group,
+ u16 *ix)
+{
+ int nch = params->num_channels;
+ int ch = qid - nch * group;
+
+ if (ch < 0 || ch >= nch)
+ return false;
+
+ *ix = ch;
+ return true;
+}
+
+static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params,
+ u16 qid,
+ u16 *ix,
+ enum mlx5e_rq_group *group)
+{
+ u16 nch = params->num_channels;
+
+ *ix = qid % nch;
+ *group = qid / nch;
+}
+
+static inline bool mlx5e_qid_validate(struct mlx5e_params *params, u64 qid)
+{
+ return qid < params->num_channels * MLX5E_NUM_RQ_GROUPS;
+}
+
+/* Parameter calculations */
+
+u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+
+/* Build queue parameters */
+
+void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_param *param);
+void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+ struct mlx5e_sq_param *param);
+void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_cq_param *param);
+void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param);
+void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_cq_param *param);
+void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param);
+void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param);
#endif /* __MLX5_EN_PARAMS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 476dd97f7f2f..f3d98748b211 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -142,22 +142,20 @@ static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq)
{
struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
u32 eqe_count;
- int ret;
netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
eq->core.eqn, eq->core.cons_index, eq->core.irqn);
eqe_count = mlx5_eq_poll_irq_disabled(eq);
- ret = eqe_count ? false : true;
if (!eqe_count) {
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
- return ret;
+ return -EIO;
}
netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n",
eqe_count, eq->core.eqn);
sq->channel->stats->eq_rearm++;
- return ret;
+ return 0;
}
int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq)
@@ -264,13 +262,13 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
if (err)
- break;
+ goto unlock;
err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq->sqn,
state,
netif_xmit_stopped(sq->txq));
if (err)
- break;
+ goto unlock;
}
err = devlink_fmsg_arr_pair_nest_end(fmsg);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index fe5d4d7f15ed..a6a52806be45 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -3,37 +3,53 @@
#include <net/vxlan.h>
#include <net/gre.h>
-#include "lib/vxlan.h"
+#include <net/geneve.h>
#include "en/tc_tun.h"
+#include "en_tc.h"
+
+struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
+{
+ if (netif_is_vxlan(tunnel_dev))
+ return &vxlan_tunnel;
+ else if (netif_is_geneve(tunnel_dev))
+ return &geneve_tunnel;
+ else if (netif_is_gretap(tunnel_dev) ||
+ netif_is_ip6gretap(tunnel_dev))
+ return &gre_tunnel;
+ else
+ return NULL;
+}
static int get_route_and_out_devs(struct mlx5e_priv *priv,
struct net_device *dev,
struct net_device **route_dev,
struct net_device **out_dev)
{
+ struct net_device *uplink_dev, *uplink_upper, *real_dev;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct net_device *uplink_dev, *uplink_upper;
bool dst_is_lag_dev;
+ real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev;
uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
uplink_upper = netdev_master_upper_dev_get(uplink_dev);
dst_is_lag_dev = (uplink_upper &&
netif_is_lag_master(uplink_upper) &&
- dev == uplink_upper &&
+ real_dev == uplink_upper &&
mlx5_lag_is_sriov(priv->mdev));
/* if the egress device isn't on the same HW e-switch or
* it's a LAG device, use the uplink
*/
- if (!netdev_port_same_parent_id(priv->netdev, dev) ||
+ if (!netdev_port_same_parent_id(priv->netdev, real_dev) ||
dst_is_lag_dev) {
- *route_dev = uplink_dev;
- *out_dev = *route_dev;
+ *route_dev = dev;
+ *out_dev = uplink_dev;
} else {
*route_dev = dev;
if (is_vlan_dev(*route_dev))
*out_dev = uplink_dev;
- else if (mlx5e_eswitch_rep(dev))
+ else if (mlx5e_eswitch_rep(dev) &&
+ mlx5e_is_valid_eswitch_fwd_dev(priv, dev))
*out_dev = *route_dev;
else
return -EOPNOTSUPP;
@@ -141,63 +157,15 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
return 0;
}
-static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key)
-{
- __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
- struct udphdr *udp = (struct udphdr *)(buf);
- struct vxlanhdr *vxh = (struct vxlanhdr *)
- ((char *)udp + sizeof(struct udphdr));
-
- udp->dest = tun_key->tp_dst;
- vxh->vx_flags = VXLAN_HF_VNI;
- vxh->vx_vni = vxlan_vni_field(tun_id);
-
- return 0;
-}
-
-static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key)
-{
- __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
- int hdr_len;
- struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf);
-
- /* the HW does not calculate GRE csum or sequences */
- if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ))
- return -EOPNOTSUPP;
-
- greh->protocol = htons(ETH_P_TEB);
-
- /* GRE key */
- hdr_len = gre_calc_hlen(tun_key->tun_flags);
- greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags);
- if (tun_key->tun_flags & TUNNEL_KEY) {
- __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
-
- *ptr = tun_id;
- }
-
- return 0;
-}
-
static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
struct mlx5e_encap_entry *e)
{
- int err = 0;
- struct ip_tunnel_key *key = &e->tun_info.key;
-
- if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
- *ip_proto = IPPROTO_UDP;
- err = mlx5e_gen_vxlan_header(buf, key);
- } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
- *ip_proto = IPPROTO_GRE;
- err = mlx5e_gen_gre_header(buf, key);
- } else {
- pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n"
- , e->tunnel_type);
- err = -EOPNOTSUPP;
+ if (!e->tunnel) {
+ pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n");
+ return -EOPNOTSUPP;
}
- return err;
+ return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e);
}
static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
@@ -229,7 +197,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e)
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
- struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
struct net_device *out_dev, *route_dev;
struct neighbour *n = NULL;
struct flowi4 fl4 = {};
@@ -253,7 +221,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
ipv4_encap_size =
(is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
sizeof(struct iphdr) +
- e->tunnel_hlen;
+ e->tunnel->calc_hlen(e);
if (max_encap_size < ipv4_encap_size) {
mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
@@ -345,7 +313,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e)
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
- struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
struct net_device *out_dev, *route_dev;
struct neighbour *n = NULL;
struct flowi6 fl6 = {};
@@ -369,7 +337,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
ipv6_encap_size =
(is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
sizeof(struct ipv6hdr) +
- e->tunnel_hlen;
+ e->tunnel->calc_hlen(e);
if (max_encap_size < ipv6_encap_size) {
mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
@@ -455,27 +423,12 @@ out:
return err;
}
-int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev)
-{
- if (netif_is_vxlan(tunnel_dev))
- return MLX5E_TC_TUNNEL_TYPE_VXLAN;
- else if (netif_is_gretap(tunnel_dev) ||
- netif_is_ip6gretap(tunnel_dev))
- return MLX5E_TC_TUNNEL_TYPE_GRETAP;
- else
- return MLX5E_TC_TUNNEL_TYPE_UNKNOWN;
-}
-
bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev)
{
- int tunnel_type = mlx5e_tc_tun_get_type(netdev);
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev);
- if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN &&
- MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
- return true;
- else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP &&
- MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap))
+ if (tunnel && tunnel->can_offload(priv))
return true;
else
return false;
@@ -486,71 +439,87 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
struct mlx5e_encap_entry *e,
struct netlink_ext_ack *extack)
{
- e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev);
-
- if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
- int dst_port = be16_to_cpu(e->tun_info.key.tp_dst);
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev);
- if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) {
- NL_SET_ERR_MSG_MOD(extack,
- "vxlan udp dport was not registered with the HW");
- netdev_warn(priv->netdev,
- "%d isn't an offloaded vxlan udp dport\n",
- dst_port);
- return -EOPNOTSUPP;
- }
- e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
- e->tunnel_hlen = VXLAN_HLEN;
- } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
- e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE;
- e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags);
- } else {
+ if (!tunnel) {
e->reformat_type = -1;
- e->tunnel_hlen = -1;
return -EOPNOTSUPP;
}
- return 0;
+
+ return tunnel->init_encap_attr(tunnel_dev, priv, e, extack);
+}
+
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v, u8 *match_level)
+{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
+ int err = 0;
+
+ if (!tunnel) {
+ netdev_warn(priv->netdev,
+ "decapsulation offload is not supported for %s net device\n",
+ mlx5e_netdev_kind(filter_dev));
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ *match_level = tunnel->match_level;
+
+ if (tunnel->parse_udp_ports) {
+ err = tunnel->parse_udp_ports(priv, spec, f,
+ headers_c, headers_v);
+ if (err)
+ goto out;
+ }
+
+ if (tunnel->parse_tunnel) {
+ err = tunnel->parse_tunnel(priv, spec, f,
+ headers_c, headers_v);
+ if (err)
+ goto out;
+ }
+
+out:
+ return err;
}
-static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f,
- void *headers_c,
- void *headers_v)
+int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
{
- struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct netlink_ext_ack *extack = f->common.extack;
- void *misc_c = MLX5_ADDR_OF(fte_match_param,
- spec->match_criteria,
- misc_parameters);
- void *misc_v = MLX5_ADDR_OF(fte_match_param,
- spec->match_value,
- misc_parameters);
struct flow_match_ports enc_ports;
- flow_rule_match_enc_ports(rule, &enc_ports);
-
/* Full udp dst port must be given */
- if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) ||
- memchr_inv(&enc_ports.mask->dst, 0xff, sizeof(enc_ports.mask->dst))) {
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
NL_SET_ERR_MSG_MOD(extack,
- "VXLAN decap filter must include enc_dst_port condition");
+ "UDP tunnel decap filter must include enc_dst_port condition");
netdev_warn(priv->netdev,
- "VXLAN decap filter must include enc_dst_port condition\n");
+ "UDP tunnel decap filter must include enc_dst_port condition\n");
return -EOPNOTSUPP;
}
- /* udp dst port must be knonwn as a VXLAN port */
- if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(enc_ports.key->dst))) {
+ flow_rule_match_enc_ports(rule, &enc_ports);
+
+ if (memchr_inv(&enc_ports.mask->dst, 0xff,
+ sizeof(enc_ports.mask->dst))) {
NL_SET_ERR_MSG_MOD(extack,
- "Matched UDP port is not registered as a VXLAN port");
+ "UDP tunnel decap filter must match enc_dst_port fully");
netdev_warn(priv->netdev,
- "UDP port %d is not registered as a VXLAN port\n",
- be16_to_cpu(enc_ports.key->dst));
+ "UDP tunnel decap filter must match enc_dst_port fully\n");
return -EOPNOTSUPP;
}
- /* dst UDP port is valid here */
+ /* match on UDP protocol and dst port number */
+
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
@@ -559,92 +528,15 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
ntohs(enc_ports.key->dst));
+ /* UDP src port on outer header is generated by HW,
+ * so it is probably a bad idea to request matching it.
+ * Nonetheless, it is allowed.
+ */
+
MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
ntohs(enc_ports.mask->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
ntohs(enc_ports.key->src));
- /* match on VNI */
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
- struct flow_match_enc_keyid enc_keyid;
-
- flow_rule_match_enc_keyid(rule, &enc_keyid);
-
- MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
- be32_to_cpu(enc_keyid.mask->keyid));
- MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
- be32_to_cpu(enc_keyid.key->keyid));
- }
return 0;
}
-
-static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f,
- void *outer_headers_c,
- void *outer_headers_v)
-{
- void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
- struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
-
- if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) {
- NL_SET_ERR_MSG_MOD(f->common.extack,
- "GRE HW offloading is not supported");
- netdev_warn(priv->netdev, "GRE HW offloading is not supported\n");
- return -EOPNOTSUPP;
- }
-
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
- ip_protocol, IPPROTO_GRE);
-
- /* gre protocol*/
- MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol);
- MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
-
- /* gre key */
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
- struct flow_match_enc_keyid enc_keyid;
-
- flow_rule_match_enc_keyid(rule, &enc_keyid);
- MLX5_SET(fte_match_set_misc, misc_c,
- gre_key.key, be32_to_cpu(enc_keyid.mask->keyid));
- MLX5_SET(fte_match_set_misc, misc_v,
- gre_key.key, be32_to_cpu(enc_keyid.key->keyid));
- }
-
- return 0;
-}
-
-int mlx5e_tc_tun_parse(struct net_device *filter_dev,
- struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f,
- void *headers_c,
- void *headers_v, u8 *match_level)
-{
- int tunnel_type;
- int err = 0;
-
- tunnel_type = mlx5e_tc_tun_get_type(filter_dev);
- if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
- *match_level = MLX5_MATCH_L4;
- err = mlx5e_tc_tun_parse_vxlan(priv, spec, f,
- headers_c, headers_v);
- } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
- *match_level = MLX5_MATCH_L3;
- err = mlx5e_tc_tun_parse_gretap(priv, spec, f,
- headers_c, headers_v);
- } else {
- netdev_warn(priv->netdev,
- "decapsulation offload is not supported for %s (kind: \"%s\")\n",
- netdev_name(filter_dev),
- mlx5e_netdev_kind(filter_dev));
-
- return -EOPNOTSUPP;
- }
- return err;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index b63f15de899d..c362b9225dc2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -14,9 +14,41 @@
enum {
MLX5E_TC_TUNNEL_TYPE_UNKNOWN,
MLX5E_TC_TUNNEL_TYPE_VXLAN,
- MLX5E_TC_TUNNEL_TYPE_GRETAP
+ MLX5E_TC_TUNNEL_TYPE_GENEVE,
+ MLX5E_TC_TUNNEL_TYPE_GRETAP,
};
+struct mlx5e_tc_tunnel {
+ int tunnel_type;
+ enum mlx5_flow_match_level match_level;
+
+ bool (*can_offload)(struct mlx5e_priv *priv);
+ int (*calc_hlen)(struct mlx5e_encap_entry *e);
+ int (*init_encap_attr)(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack);
+ int (*generate_ip_tun_hdr)(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e);
+ int (*parse_udp_ports)(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v);
+ int (*parse_tunnel)(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v);
+};
+
+extern struct mlx5e_tc_tunnel vxlan_tunnel;
+extern struct mlx5e_tc_tunnel geneve_tunnel;
+extern struct mlx5e_tc_tunnel gre_tunnel;
+
+struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev);
+
int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e,
@@ -30,15 +62,20 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e);
-int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev);
bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev);
int mlx5e_tc_tun_parse(struct net_device *filter_dev,
struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f,
+ struct flow_cls_offload *f,
void *headers_c,
void *headers_v, u8 *match_level);
+int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v);
+
#endif //__MLX5_EN_TC_TUNNEL_H__
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
new file mode 100644
index 000000000000..951ea26d96bc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/geneve.h>
+#include "lib/geneve.h"
+#include "en/tc_tun.h"
+
+#define MLX5E_GENEVE_VER 0
+
+static bool mlx5e_tc_tun_can_offload_geneve(struct mlx5e_priv *priv)
+{
+ return !!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_GENEVE);
+}
+
+static int mlx5e_tc_tun_calc_hlen_geneve(struct mlx5e_encap_entry *e)
+{
+ return sizeof(struct udphdr) +
+ sizeof(struct genevehdr) +
+ e->tun_info->options_len;
+}
+
+static int mlx5e_tc_tun_check_udp_dport_geneve(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_ports enc_ports;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS))
+ return -EOPNOTSUPP;
+
+ flow_rule_match_enc_ports(rule, &enc_ports);
+
+ /* Currently we support only default GENEVE
+ * port, so udp dst port must match.
+ */
+ if (be16_to_cpu(enc_ports.key->dst) != GENEVE_UDP_PORT) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matched UDP dst port is not registered as a GENEVE port");
+ netdev_warn(priv->netdev,
+ "UDP port %d is not registered as a GENEVE port\n",
+ be16_to_cpu(enc_ports.key->dst));
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_udp_ports_geneve(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ int err;
+
+ err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+ if (err)
+ return err;
+
+ return mlx5e_tc_tun_check_udp_dport_geneve(priv, f);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_geneve(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel = &geneve_tunnel;
+
+ /* Reformat type for GENEVE encap is similar to VXLAN:
+ * in both cases the HW adds in the same place a
+ * defined encapsulation header that the SW provides.
+ */
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+ return 0;
+}
+
+static void mlx5e_tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+ vni[0] = (__force __u8)(tun_id >> 16);
+ vni[1] = (__force __u8)(tun_id >> 8);
+ vni[2] = (__force __u8)tun_id;
+#else
+ vni[0] = (__force __u8)((__force u64)tun_id >> 40);
+ vni[1] = (__force __u8)((__force u64)tun_id >> 48);
+ vni[2] = (__force __u8)((__force u64)tun_id >> 56);
+#endif
+}
+
+static int mlx5e_gen_ip_tunnel_header_geneve(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ const struct ip_tunnel_info *tun_info = e->tun_info;
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct genevehdr *geneveh;
+
+ geneveh = (struct genevehdr *)((char *)udp + sizeof(struct udphdr));
+
+ *ip_proto = IPPROTO_UDP;
+
+ udp->dest = tun_info->key.tp_dst;
+
+ memset(geneveh, 0, sizeof(*geneveh));
+ geneveh->ver = MLX5E_GENEVE_VER;
+ geneveh->opt_len = tun_info->options_len / 4;
+ geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM);
+ geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT);
+ mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni);
+ geneveh->proto_type = htons(ETH_P_TEB);
+
+ if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+ if (!geneveh->opt_len)
+ return -EOPNOTSUPP;
+ ip_tunnel_info_opts_get(geneveh->options, tun_info);
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_vni(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_enc_keyid enc_keyid;
+ void *misc_c, *misc_v;
+
+ misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return 0;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+ if (!enc_keyid.mask->keyid)
+ return 0;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_vni)) {
+ NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE VNI is not supported");
+ netdev_warn(priv->netdev, "Matching on GENEVE VNI is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, geneve_vni, be32_to_cpu(enc_keyid.mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_vni, be32_to_cpu(enc_keyid.key->keyid));
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f)
+{
+ u8 max_tlv_option_data_len = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_option_data_len);
+ u8 max_tlv_options = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_options);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ void *misc_c, *misc_v, *misc_3_c, *misc_3_v;
+ struct geneve_opt *option_key, *option_mask;
+ __be32 opt_data_key = 0, opt_data_mask = 0;
+ struct flow_match_enc_opts enc_opts;
+ int res = 0;
+
+ misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ misc_3_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_3);
+ misc_3_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS))
+ return 0;
+
+ flow_rule_match_enc_opts(rule, &enc_opts);
+
+ if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.geneve_tlv_option_0_data)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options is not supported");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* make sure that we're talking about GENEVE options */
+
+ if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: option type is not GENEVE");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: option type is not GENEVE\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (enc_opts.mask->len &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_geneve_opt_len)) {
+ NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options len is not supported");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options len is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* max_geneve_tlv_option_data_len comes in multiples of 4 bytes, and it
+ * doesn't include the TLV option header. 'geneve_opt_len' is a total
+ * len of all the options, including the headers, also multiples of 4
+ * bytes. Len that comes from the dissector is in bytes.
+ */
+
+ if ((enc_opts.key->len / 4) > ((max_tlv_option_data_len + 1) * max_tlv_options)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: unsupported options len");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: unsupported options len (len=%d)\n",
+ enc_opts.key->len);
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, geneve_opt_len, enc_opts.mask->len / 4);
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_opt_len, enc_opts.key->len / 4);
+
+ /* we support matching on one option only, so just get it */
+ option_key = (struct geneve_opt *)&enc_opts.key->data[0];
+ option_mask = (struct geneve_opt *)&enc_opts.mask->data[0];
+
+ if (option_key->length > max_tlv_option_data_len) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: unsupported option len");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: unsupported option len (key=%d, mask=%d)\n",
+ option_key->length, option_mask->length);
+ return -EOPNOTSUPP;
+ }
+
+ /* data can't be all 0 - fail to offload such rule */
+ if (!memchr_inv(option_key->opt_data, 0, option_key->length * 4)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: can't match on 0 data field");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: can't match on 0 data field\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* add new GENEVE TLV options object */
+ res = mlx5_geneve_tlv_option_add(priv->mdev->geneve, option_key);
+ if (res) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: failed creating TLV opt object");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: failed creating TLV opt object (class:type:len = 0x%x:0x%x:%d)\n",
+ be16_to_cpu(option_key->opt_class),
+ option_key->type, option_key->length);
+ return res;
+ }
+
+ /* In general, after creating the object, need to query it
+ * in order to check which option data to set in misc3.
+ * But we support only geneve_tlv_option_0_data, so no
+ * point querying at this stage.
+ */
+
+ memcpy(&opt_data_key, option_key->opt_data, option_key->length * 4);
+ memcpy(&opt_data_mask, option_mask->opt_data, option_mask->length * 4);
+ MLX5_SET(fte_match_set_misc3, misc_3_v,
+ geneve_tlv_option_0_data, be32_to_cpu(opt_data_key));
+ MLX5_SET(fte_match_set_misc3, misc_3_c,
+ geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask));
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f)
+{
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ struct netlink_ext_ack *extack = f->common.extack;
+
+ /* match on OAM - packets with OAM bit on should NOT be offloaded */
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_oam)) {
+ NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE OAM is not supported");
+ netdev_warn(priv->netdev, "Matching on GENEVE OAM is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_oam);
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_oam, 0);
+
+ /* Match on GENEVE protocol. We support only Transparent Eth Bridge. */
+
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_geneve_protocol_type)) {
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_protocol_type);
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB);
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ int err;
+
+ err = mlx5e_tc_tun_parse_geneve_params(priv, spec, f);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_tun_parse_geneve_vni(priv, spec, f);
+ if (err)
+ return err;
+
+ return mlx5e_tc_tun_parse_geneve_options(priv, spec, f);
+}
+
+struct mlx5e_tc_tunnel geneve_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GENEVE,
+ .match_level = MLX5_MATCH_L4,
+ .can_offload = mlx5e_tc_tun_can_offload_geneve,
+ .calc_hlen = mlx5e_tc_tun_calc_hlen_geneve,
+ .init_encap_attr = mlx5e_tc_tun_init_encap_attr_geneve,
+ .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_geneve,
+ .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_geneve,
+ .parse_tunnel = mlx5e_tc_tun_parse_geneve,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
new file mode 100644
index 000000000000..58b13192df23
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/gre.h>
+#include "en/tc_tun.h"
+
+static bool mlx5e_tc_tun_can_offload_gretap(struct mlx5e_priv *priv)
+{
+ return !!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap);
+}
+
+static int mlx5e_tc_tun_calc_hlen_gretap(struct mlx5e_encap_entry *e)
+{
+ return gre_calc_hlen(e->tun_info->key.tun_flags);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_gretap(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel = &gre_tunnel;
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE;
+ return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header_gretap(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf);
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ int hdr_len;
+
+ *ip_proto = IPPROTO_GRE;
+
+ /* the HW does not calculate GRE csum or sequences */
+ if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ))
+ return -EOPNOTSUPP;
+
+ greh->protocol = htons(ETH_P_TEB);
+
+ /* GRE key */
+ hdr_len = mlx5e_tc_tun_calc_hlen_gretap(e);
+ greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags);
+ if (tun_key->tun_flags & TUNNEL_KEY) {
+ __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+ *ptr = tun_id;
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE);
+
+ /* gre protocol */
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol);
+ MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
+
+ /* gre key */
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_match_enc_keyid enc_keyid;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+ MLX5_SET(fte_match_set_misc, misc_c,
+ gre_key.key, be32_to_cpu(enc_keyid.mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v,
+ gre_key.key, be32_to_cpu(enc_keyid.key->keyid));
+ }
+
+ return 0;
+}
+
+struct mlx5e_tc_tunnel gre_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GRETAP,
+ .match_level = MLX5_MATCH_L3,
+ .can_offload = mlx5e_tc_tun_can_offload_gretap,
+ .calc_hlen = mlx5e_tc_tun_calc_hlen_gretap,
+ .init_encap_attr = mlx5e_tc_tun_init_encap_attr_gretap,
+ .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_gretap,
+ .parse_udp_ports = NULL,
+ .parse_tunnel = mlx5e_tc_tun_parse_gretap,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
new file mode 100644
index 000000000000..37b176801bcc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/vxlan.h>
+#include "lib/vxlan.h"
+#include "en/tc_tun.h"
+
+static bool mlx5e_tc_tun_can_offload_vxlan(struct mlx5e_priv *priv)
+{
+ return !!MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap);
+}
+
+static int mlx5e_tc_tun_calc_hlen_vxlan(struct mlx5e_encap_entry *e)
+{
+ return VXLAN_HLEN;
+}
+
+static int mlx5e_tc_tun_check_udp_dport_vxlan(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_ports enc_ports;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS))
+ return -EOPNOTSUPP;
+
+ flow_rule_match_enc_ports(rule, &enc_ports);
+
+ /* check the UDP destination port validity */
+
+ if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan,
+ be16_to_cpu(enc_ports.key->dst))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matched UDP dst port is not registered as a VXLAN port");
+ netdev_warn(priv->netdev,
+ "UDP port %d is not registered as a VXLAN port\n",
+ be16_to_cpu(enc_ports.key->dst));
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_udp_ports_vxlan(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ int err = 0;
+
+ err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+ if (err)
+ return err;
+
+ return mlx5e_tc_tun_check_udp_dport_vxlan(priv, f);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_vxlan(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ int dst_port = be16_to_cpu(e->tun_info->key.tp_dst);
+
+ e->tunnel = &vxlan_tunnel;
+
+ if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "vxlan udp dport was not registered with the HW");
+ netdev_warn(priv->netdev,
+ "%d isn't an offloaded vxlan udp dport\n",
+ dst_port);
+ return -EOPNOTSUPP;
+ }
+
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+ return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct vxlanhdr *vxh;
+
+ vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+ *ip_proto = IPPROTO_UDP;
+
+ udp->dest = tun_key->tp_dst;
+ vxh->vx_flags = VXLAN_HF_VNI;
+ vxh->vx_vni = vxlan_vni_field(tun_id);
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_enc_keyid enc_keyid;
+ void *misc_c, *misc_v;
+
+ misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return 0;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+ if (!enc_keyid.mask->keyid)
+ return 0;
+
+ /* match on VNI is required */
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_vxlan_vni)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on VXLAN VNI is not supported");
+ netdev_warn(priv->netdev,
+ "Matching on VXLAN VNI is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
+ be32_to_cpu(enc_keyid.mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
+ be32_to_cpu(enc_keyid.key->keyid));
+
+ return 0;
+}
+
+struct mlx5e_tc_tunnel vxlan_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_VXLAN,
+ .match_level = MLX5_MATCH_L4,
+ .can_offload = mlx5e_tc_tun_can_offload_vxlan,
+ .calc_hlen = mlx5e_tc_tun_calc_hlen_vxlan,
+ .init_encap_attr = mlx5e_tc_tun_init_encap_attr_vxlan,
+ .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan,
+ .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan,
+ .parse_tunnel = mlx5e_tc_tun_parse_vxlan,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
new file mode 100644
index 000000000000..ddfe19adb3d9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TXRX_H___
+#define __MLX5_EN_TXRX_H___
+
+#include "en.h"
+
+#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS
+#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
+ MLX5E_SQ_NOPS_ROOM)
+
+#ifndef CONFIG_MLX5_EN_TLS
+#define MLX5E_SQ_TLS_ROOM (0)
+#else
+/* TLS offload requires additional stop_room for:
+ * - a resync SKB.
+ * kTLS offload requires additional stop_room for:
+ * - static params WQE,
+ * - progress params WQE, and
+ * - resync DUMP per frag.
+ */
+#define MLX5E_SQ_TLS_ROOM \
+ (MLX5_SEND_WQE_MAX_WQEBBS + \
+ MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + \
+ MAX_SKB_FRAGS * MLX5E_KTLS_MAX_DUMP_WQEBBS)
+#endif
+
+#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+
+static inline bool
+mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
+{
+ return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
+}
+
+static inline void *
+mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, size_t size, u16 *pi)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ void *wqe;
+
+ *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+ memset(wqe, 0, size);
+
+ return wqe;
+}
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc);
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+ memset(cseg, 0, sizeof(*cseg));
+
+ cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+ cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01);
+
+ (*pc)++;
+
+ return wqe;
+}
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc);
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+ memset(cseg, 0, sizeof(*cseg));
+
+ cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+ cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01);
+ cseg->fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL;
+
+ (*pc)++;
+
+ return wqe;
+}
+
+static inline void
+mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
+ u16 pi, u16 nnops)
+{
+ struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
+
+ edge_wi = wi + nnops;
+
+ /* fill sq frag edge with nops to avoid wqe wrapping two pages */
+ for (; wi < edge_wi; wi++) {
+ wi->skb = NULL;
+ wi->num_wqebbs = 1;
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nop += nnops;
+}
+
+static inline void
+mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
+ struct mlx5_wqe_ctrl_seg *ctrl)
+{
+ ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+
+ *wq->db = cpu_to_be32(pc);
+
+ /* ensure doorbell record is visible to device before ringing the
+ * doorbell
+ */
+ wmb();
+
+ mlx5_write64((__be32 *)ctrl, uar_map);
+}
+
+static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5e_tx_wqe *wqe)
+{
+ return !!wqe->ctrl.tisn;
+}
+
+static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
+{
+ struct mlx5_core_cq *mcq;
+
+ mcq = &cq->mcq;
+ mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
+}
+
+static inline struct mlx5e_sq_dma *
+mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
+{
+ return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
+}
+
+static inline void
+mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
+ enum mlx5e_dma_map_type map_type)
+{
+ struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++);
+
+ dma->addr = addr;
+ dma->size = size;
+ dma->type = map_type;
+}
+
+static inline void
+mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
+{
+ switch (dma->type) {
+ case MLX5E_DMA_MAP_SINGLE:
+ dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ case MLX5E_DMA_MAP_PAGE:
+ dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ default:
+ WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
+ }
+}
+
+/* SW parser related functions */
+
+struct mlx5e_swp_spec {
+ __be16 l3_proto;
+ u8 l4_proto;
+ u8 is_tun;
+ __be16 tun_l3_proto;
+ u8 tun_l4_proto;
+};
+
+static inline void
+mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
+ struct mlx5e_swp_spec *swp_spec)
+{
+ /* SWP offsets are in 2-bytes words */
+ eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+ if (swp_spec->l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+ if (swp_spec->l4_proto) {
+ eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2;
+ if (swp_spec->l4_proto == IPPROTO_UDP)
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP;
+ }
+
+ if (swp_spec->is_tun) {
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+ if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */
+ eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+ if (swp_spec->l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ }
+ switch (swp_spec->tun_l4_proto) {
+ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+ /* fall through */
+ case IPPROTO_TCP:
+ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ break;
+ }
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index eb8ef78e5626..b0b982cf69bb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -31,11 +31,13 @@
*/
#include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
#include "en/xdp.h"
+#include "en/params.h"
-int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
{
- int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
+ int hr = mlx5e_get_linear_rq_headroom(params, xsk);
/* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
* The condition checked in mlx5e_rx_is_linear_skb is:
@@ -54,25 +56,70 @@ int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
}
static inline bool
-mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
- struct xdp_buff *xdp)
+mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *di, struct xdp_buff *xdp)
{
+ struct mlx5e_xdp_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
- xdpi.xdpf = convert_to_xdp_frame(xdp);
- if (unlikely(!xdpi.xdpf))
+ xdpf = convert_to_xdp_frame(xdp);
+ if (unlikely(!xdpf))
return false;
- xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf);
- dma_sync_single_for_device(sq->pdev, xdpi.dma_addr,
- xdpi.xdpf->len, PCI_DMA_TODEVICE);
- xdpi.di = *di;
- return sq->xmit_xdp_frame(sq, &xdpi);
+ xdptxd.data = xdpf->data;
+ xdptxd.len = xdpf->len;
+
+ if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) {
+ /* The xdp_buff was in the UMEM and was copied into a newly
+ * allocated page. The UMEM page was returned via the ZCA, and
+ * this new page has to be mapped at this point and has to be
+ * unmapped and returned via xdp_return_frame on completion.
+ */
+
+ /* Prevent double recycling of the UMEM page. Even in case this
+ * function returns false, the xdp_buff shouldn't be recycled,
+ * as it was already done in xdp_convert_zc_to_xdp_frame.
+ */
+ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
+
+ dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(sq->pdev, dma_addr)) {
+ xdp_return_frame(xdpf);
+ return false;
+ }
+
+ xdptxd.dma_addr = dma_addr;
+ xdpi.frame.xdpf = xdpf;
+ xdpi.frame.dma_addr = dma_addr;
+ } else {
+ /* Driver assumes that convert_to_xdp_frame returns an xdp_frame
+ * that points to the same memory region as the original
+ * xdp_buff. It allows to map the memory only once and to use
+ * the DMA_BIDIRECTIONAL mode.
+ */
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
+
+ dma_addr = di->addr + (xdpf->data - (void *)xdpf);
+ dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len,
+ DMA_TO_DEVICE);
+
+ xdptxd.dma_addr = dma_addr;
+ xdpi.page.rq = rq;
+ xdpi.page.di = *di;
+ }
+
+ return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0);
}
/* returns true if packet was consumed by xdp */
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- void *va, u16 *rx_headroom, u32 *len)
+ void *va, u16 *rx_headroom, u32 *len, bool xsk)
{
struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
struct xdp_buff xdp;
@@ -86,16 +133,20 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + *len;
xdp.data_hard_start = va;
+ if (xsk)
+ xdp.handle = di->xsk.handle;
xdp.rxq = &rq->xdp_rxq;
act = bpf_prog_run_xdp(prog, &xdp);
+ if (xsk)
+ xdp.handle += xdp.data - xdp.data_hard_start;
switch (act) {
case XDP_PASS:
*rx_headroom = xdp.data - xdp.data_hard_start;
*len = xdp.data_end - xdp.data;
return false;
case XDP_TX:
- if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp)))
+ if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp)))
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
return true;
@@ -106,7 +157,8 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
- mlx5e_page_dma_unmap(rq, di);
+ if (!xsk)
+ mlx5e_page_dma_unmap(rq, di);
rq->stats->xdp_redirect++;
return true;
default:
@@ -160,7 +212,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
stats->mpwqe++;
}
-static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
{
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
@@ -183,32 +235,55 @@ static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
session->wqe = NULL; /* Close session */
}
+enum {
+ MLX5E_XDP_CHECK_OK = 1,
+ MLX5E_XDP_CHECK_START_MPWQE = 2,
+};
+
+static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
+{
+ if (unlikely(!sq->mpwqe.wqe)) {
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+ MLX5_SEND_WQE_MAX_WQEBBS))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->stats->full++;
+ return -EBUSY;
+ }
+
+ return MLX5E_XDP_CHECK_START_MPWQE;
+ }
+
+ return MLX5E_XDP_CHECK_OK;
+}
+
static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
- struct mlx5e_xdp_info *xdpi)
+ struct mlx5e_xdp_xmit_data *xdptxd,
+ struct mlx5e_xdp_info *xdpi,
+ int check_result)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
- struct xdp_frame *xdpf = xdpi->xdpf;
-
- if (unlikely(sq->hw_mtu < xdpf->len)) {
+ if (unlikely(xdptxd->len > sq->hw_mtu)) {
stats->err++;
return false;
}
- if (unlikely(!session->wqe)) {
- if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
- MLX5_SEND_WQE_MAX_WQEBBS))) {
- /* SQ is full, ring doorbell */
- mlx5e_xmit_xdp_doorbell(sq);
- stats->full++;
- return false;
- }
+ if (!check_result)
+ check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq);
+ if (unlikely(check_result < 0))
+ return false;
+ if (check_result == MLX5E_XDP_CHECK_START_MPWQE) {
+ /* Start the session when nothing can fail, so it's guaranteed
+ * that if there is an active session, it has at least one dseg,
+ * and it's safe to complete it at any time.
+ */
mlx5e_xdp_mpwqe_session_start(sq);
}
- mlx5e_xdp_mpwqe_add_dseg(sq, xdpi, stats);
+ mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
if (unlikely(session->complete ||
session->ds_count == session->max_ds_count))
@@ -219,7 +294,22 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
return true;
}
-static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
+static int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
+{
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->stats->full++;
+ return -EBUSY;
+ }
+
+ return MLX5E_XDP_CHECK_OK;
+}
+
+static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_xmit_data *xdptxd,
+ struct mlx5e_xdp_info *xdpi,
+ int check_result)
{
struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -229,9 +319,8 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
struct mlx5_wqe_data_seg *dseg = wqe->data;
- struct xdp_frame *xdpf = xdpi->xdpf;
- dma_addr_t dma_addr = xdpi->dma_addr;
- unsigned int dma_len = xdpf->len;
+ dma_addr_t dma_addr = xdptxd->dma_addr;
+ u32 dma_len = xdptxd->len;
struct mlx5e_xdpsq_stats *stats = sq->stats;
@@ -242,18 +331,16 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *
return false;
}
- if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
- /* SQ is full, ring doorbell */
- mlx5e_xmit_xdp_doorbell(sq);
- stats->full++;
+ if (!check_result)
+ check_result = mlx5e_xmit_xdp_frame_check(sq);
+ if (unlikely(check_result < 0))
return false;
- }
cseg->fm_ce_se = 0;
/* copy the inline part if required */
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
- memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE);
+ memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
dma_len -= MLX5E_XDP_MIN_INLINE;
dma_addr += MLX5E_XDP_MIN_INLINE;
@@ -277,7 +364,7 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *
static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_wqe_info *wi,
- struct mlx5e_rq *rq,
+ u32 *xsk_frames,
bool recycle)
{
struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
@@ -286,22 +373,32 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
for (i = 0; i < wi->num_pkts; i++) {
struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
- if (rq) {
- /* XDP_TX */
- mlx5e_page_release(rq, &xdpi.di, recycle);
- } else {
- /* XDP_REDIRECT */
- dma_unmap_single(sq->pdev, xdpi.dma_addr,
- xdpi.xdpf->len, DMA_TO_DEVICE);
- xdp_return_frame(xdpi.xdpf);
+ switch (xdpi.mode) {
+ case MLX5E_XDP_XMIT_MODE_FRAME:
+ /* XDP_TX from the XSK RQ and XDP_REDIRECT */
+ dma_unmap_single(sq->pdev, xdpi.frame.dma_addr,
+ xdpi.frame.xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame(xdpi.frame.xdpf);
+ break;
+ case MLX5E_XDP_XMIT_MODE_PAGE:
+ /* XDP_TX from the regular RQ */
+ mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle);
+ break;
+ case MLX5E_XDP_XMIT_MODE_XSK:
+ /* AF_XDP send */
+ (*xsk_frames)++;
+ break;
+ default:
+ WARN_ON_ONCE(true);
}
}
}
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
{
struct mlx5e_xdpsq *sq;
struct mlx5_cqe64 *cqe;
+ u32 xsk_frames = 0;
u16 sqcc;
int i;
@@ -343,10 +440,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
sqcc += wi->num_wqebbs;
- mlx5e_free_xdpsq_desc(sq, wi, rq, true);
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true);
} while (!last_wqe);
} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+ if (xsk_frames)
+ xsk_umem_complete_tx(sq->umem, xsk_frames);
+
sq->stats->cqes += i;
mlx5_cqwq_update_db_record(&cq->wq);
@@ -358,8 +458,10 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
{
+ u32 xsk_frames = 0;
+
while (sq->cc != sq->pc) {
struct mlx5e_xdp_wqe_info *wi;
u16 ci;
@@ -369,8 +471,11 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
sq->cc += wi->num_wqebbs;
- mlx5e_free_xdpsq_desc(sq, wi, rq, false);
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false);
}
+
+ if (xsk_frames)
+ xsk_umem_complete_tx(sq->umem, xsk_frames);
}
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -398,21 +503,27 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
+ struct mlx5e_xdp_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
- xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len,
- DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) {
+ xdptxd.data = xdpf->data;
+ xdptxd.len = xdpf->len;
+ xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data,
+ xdptxd.len, DMA_TO_DEVICE);
+
+ if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) {
xdp_return_frame_rx_napi(xdpf);
drops++;
continue;
}
- xdpi.xdpf = xdpf;
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
+ xdpi.frame.xdpf = xdpf;
+ xdpi.frame.dma_addr = xdptxd.dma_addr;
- if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) {
- dma_unmap_single(sq->pdev, xdpi.dma_addr,
- xdpf->len, DMA_TO_DEVICE);
+ if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0))) {
+ dma_unmap_single(sq->pdev, xdptxd.dma_addr,
+ xdptxd.len, DMA_TO_DEVICE);
xdp_return_frame_rx_napi(xdpf);
drops++;
}
@@ -429,7 +540,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
{
- struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
+ struct mlx5e_xdpsq *xdpsq = rq->xdpsq;
if (xdpsq->mpwqe.wqe)
mlx5e_xdp_mpwqe_complete(xdpsq);
@@ -444,6 +555,8 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
{
+ sq->xmit_xdp_frame_check = is_mpw ?
+ mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check;
sq->xmit_xdp_frame = is_mpw ?
mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 8b537a4b0840..b90923932668 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -33,17 +33,20 @@
#define __MLX5_EN_XDP_H__
#include "en.h"
+#include "en/txrx.h"
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
-int mlx5e_xdp_max_mtu(struct mlx5e_params *params);
+struct mlx5e_xsk_param;
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- void *va, u16 *rx_headroom, u32 *len);
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq);
+ void *va, u16 *rx_headroom, u32 *len, bool xsk);
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -66,6 +69,21 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
}
+static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv)
+{
+ set_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
+static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv)
+{
+ clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
+static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv)
+{
+ return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
{
if (sq->doorbell_cseg) {
@@ -97,15 +115,14 @@ static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq)
}
static inline void
-mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi,
+mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_xmit_data *xdptxd,
struct mlx5e_xdpsq_stats *stats)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
- dma_addr_t dma_addr = xdpi->dma_addr;
- struct xdp_frame *xdpf = xdpi->xdpf;
struct mlx5_wqe_data_seg *dseg =
(struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
- u16 dma_len = xdpf->len;
+ u32 dma_len = xdptxd->len;
session->pkt_count++;
@@ -124,7 +141,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi,
}
inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG);
- memcpy(inline_dseg->data, xdpf->data, dma_len);
+ memcpy(inline_dseg->data, xdptxd->data, dma_len);
session->ds_count += ds_cnt;
stats->inlnw++;
@@ -132,7 +149,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi,
}
no_inline:
- dseg->addr = cpu_to_be64(dma_addr);
+ dseg->addr = cpu_to_be64(xdptxd->dma_addr);
dseg->byte_count = cpu_to_be32(dma_len);
dseg->lkey = sq->mkey_be;
session->ds_count++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile
new file mode 100644
index 000000000000..5ee42991900a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/../..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
new file mode 100644
index 000000000000..6a55573ec8f2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "rx.h"
+#include "en/xdp.h"
+#include <net/xdp_sock.h>
+
+/* RX data path */
+
+bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count)
+{
+ /* Check in advance that we have enough frames, instead of allocating
+ * one-by-one, failing and moving frames to the Reuse Ring.
+ */
+ return xsk_umem_has_addrs_rq(rq->umem, count);
+}
+
+int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ struct xdp_umem *umem = rq->umem;
+ u64 handle;
+
+ if (!xsk_umem_peek_addr_rq(umem, &handle))
+ return -ENOMEM;
+
+ dma_info->xsk.handle = handle + rq->buff.umem_headroom;
+ dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle);
+
+ /* No need to add headroom to the DMA address. In striding RQ case, we
+ * just provide pages for UMR, and headroom is counted at the setup
+ * stage when creating a WQE. In non-striding RQ case, headroom is
+ * accounted in mlx5e_alloc_rx_wqe.
+ */
+ dma_info->addr = xdp_umem_get_dma(umem, handle);
+
+ xsk_umem_discard_addr_rq(umem);
+
+ dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+ return 0;
+}
+
+static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle)
+{
+ xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask);
+}
+
+/* XSKRQ uses pages from UMEM, they must not be released. They are returned to
+ * the userspace if possible, and if not, this function is called to reuse them
+ * in the driver.
+ */
+void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle);
+}
+
+/* Return a frame back to the hardware to fill in again. It is used by XDP when
+ * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP.
+ */
+void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
+{
+ struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca);
+
+ mlx5e_xsk_recycle_frame(rq, handle);
+}
+
+static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data,
+ u32 cqe_bcnt)
+{
+ struct sk_buff *skb;
+
+ skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt);
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ skb_put_data(skb, data, cqe_bcnt);
+
+ return skb;
+}
+
+struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
+ struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt,
+ u32 head_offset,
+ u32 page_idx)
+{
+ struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
+ u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
+ u32 cqe_bcnt32 = cqe_bcnt;
+ void *va, *data;
+ u32 frag_size;
+ bool consumed;
+
+ /* Check packet size. Note LRO doesn't use linear SKB */
+ if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+ rq->stats->oversize_pkts_sw_drop++;
+ return NULL;
+ }
+
+ /* head_offset is not used in this function, because di->xsk.data and
+ * di->addr point directly to the necessary place. Furthermore, in the
+ * current implementation, one page = one packet = one frame, so
+ * head_offset should always be 0.
+ */
+ WARN_ON_ONCE(head_offset);
+
+ va = di->xsk.data;
+ data = va + rx_headroom;
+ frag_size = rq->buff.headroom + cqe_bcnt32;
+
+ dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
+ prefetch(data);
+
+ rcu_read_lock();
+ consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true);
+ rcu_read_unlock();
+
+ /* Possible flows:
+ * - XDP_REDIRECT to XSKMAP:
+ * The page is owned by the userspace from now.
+ * - XDP_TX and other XDP_REDIRECTs:
+ * The page was returned by ZCA and recycled.
+ * - XDP_DROP:
+ * Recycle the page.
+ * - XDP_PASS:
+ * Allocate an SKB, copy the data and recycle the page.
+ *
+ * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its
+ * size is the same as the Driver RX Ring's size, and pages for WQEs are
+ * allocated first from the Reuse Ring, so it has enough space.
+ */
+
+ if (likely(consumed)) {
+ if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
+ __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+ return NULL; /* page/packet was consumed by XDP */
+ }
+
+ /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
+ * frame. On SKB allocation failure, NULL is returned.
+ */
+ return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32);
+}
+
+struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt)
+{
+ struct mlx5e_dma_info *di = wi->di;
+ u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
+ void *va, *data;
+ bool consumed;
+ u32 frag_size;
+
+ /* wi->offset is not used in this function, because di->xsk.data and
+ * di->addr point directly to the necessary place. Furthermore, in the
+ * current implementation, one page = one packet = one frame, so
+ * wi->offset should always be 0.
+ */
+ WARN_ON_ONCE(wi->offset);
+
+ va = di->xsk.data;
+ data = va + rx_headroom;
+ frag_size = rq->buff.headroom + cqe_bcnt;
+
+ dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
+ prefetch(data);
+
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
+ rq->stats->wqe_err++;
+ return NULL;
+ }
+
+ rcu_read_lock();
+ consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true);
+ rcu_read_unlock();
+
+ if (likely(consumed))
+ return NULL; /* page/packet was consumed by XDP */
+
+ /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
+ * will be handled by mlx5e_put_rx_frag.
+ * On SKB allocation failure, NULL is returned.
+ */
+ return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
new file mode 100644
index 000000000000..307b923a1361
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_RX_H__
+#define __MLX5_EN_XSK_RX_H__
+
+#include "en.h"
+
+/* RX data path */
+
+bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count);
+int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info);
+void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info);
+void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
+struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
+ struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt,
+ u32 head_offset,
+ u32 page_idx);
+struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt);
+
+#endif /* __MLX5_EN_XSK_RX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
new file mode 100644
index 000000000000..aaffa6f68dc0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "setup.h"
+#include "en/params.h"
+
+bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5_core_dev *mdev)
+{
+ /* AF_XDP doesn't support frames larger than PAGE_SIZE, and the current
+ * mlx5e XDP implementation doesn't support multiple packets per page.
+ */
+ if (xsk->chunk_size != PAGE_SIZE)
+ return false;
+
+ /* Current MTU and XSK headroom don't allow packets to fit the frames. */
+ if (mlx5e_rx_get_linear_frag_sz(params, xsk) > xsk->chunk_size)
+ return false;
+
+ /* frag_sz is different for regular and XSK RQs, so ensure that linear
+ * SKB mode is possible.
+ */
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ return mlx5e_rx_is_linear_skb(params, xsk);
+ }
+}
+
+static void mlx5e_build_xskicosq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(priv, param);
+
+ MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+}
+
+static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_channel_param *cparam)
+{
+ const u8 xskicosq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+
+ mlx5e_build_rq_param(priv, params, xsk, &cparam->rq);
+ mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
+ mlx5e_build_xskicosq_param(priv, xskicosq_size, &cparam->icosq);
+ mlx5e_build_rx_cq_param(priv, params, xsk, &cparam->rx_cq);
+ mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
+ mlx5e_build_ico_cq_param(priv, xskicosq_size, &cparam->icosq_cq);
+}
+
+int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+ struct mlx5e_channel *c)
+{
+ struct mlx5e_channel_param cparam = {};
+ struct dim_cq_moder icocq_moder = {};
+ int err;
+
+ if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev))
+ return -EINVAL;
+
+ mlx5e_build_xsk_cparam(priv, params, xsk, &cparam);
+
+ err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq);
+ if (unlikely(err))
+ return err;
+
+ err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq);
+ if (unlikely(err))
+ goto err_close_rx_cq;
+
+ err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq);
+ if (unlikely(err))
+ goto err_close_rq;
+
+ /* Create a separate SQ, so that when the UMEM is disabled, we could
+ * close this SQ safely and stop receiving CQEs. In other case, e.g., if
+ * the XDPSQ was used instead, we might run into trouble when the UMEM
+ * is disabled and then reenabled, but the SQ continues receiving CQEs
+ * from the old UMEM.
+ */
+ err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true);
+ if (unlikely(err))
+ goto err_close_tx_cq;
+
+ err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq);
+ if (unlikely(err))
+ goto err_close_sq;
+
+ /* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be
+ * triggered and NAPI to be called on the correct CPU.
+ */
+ err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq);
+ if (unlikely(err))
+ goto err_close_icocq;
+
+ spin_lock_init(&c->xskicosq_lock);
+
+ set_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+
+ return 0;
+
+err_close_icocq:
+ mlx5e_close_cq(&c->xskicosq.cq);
+
+err_close_sq:
+ mlx5e_close_xdpsq(&c->xsksq);
+
+err_close_tx_cq:
+ mlx5e_close_cq(&c->xsksq.cq);
+
+err_close_rq:
+ mlx5e_close_rq(&c->xskrq);
+
+err_close_rx_cq:
+ mlx5e_close_cq(&c->xskrq.cq);
+
+ return err;
+}
+
+void mlx5e_close_xsk(struct mlx5e_channel *c)
+{
+ clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+ napi_synchronize(&c->napi);
+
+ mlx5e_close_rq(&c->xskrq);
+ mlx5e_close_cq(&c->xskrq.cq);
+ mlx5e_close_icosq(&c->xskicosq);
+ mlx5e_close_cq(&c->xskicosq.cq);
+ mlx5e_close_xdpsq(&c->xsksq);
+ mlx5e_close_cq(&c->xsksq.cq);
+}
+
+void mlx5e_activate_xsk(struct mlx5e_channel *c)
+{
+ set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+ /* TX queue is created active. */
+ mlx5e_trigger_irq(&c->xskicosq);
+}
+
+void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
+{
+ mlx5e_deactivate_rq(&c->xskrq);
+ /* TX queue is disabled on close. */
+}
+
+static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn)
+{
+ struct mlx5e_redirect_rqt_param direct_rrp = {
+ .is_rss = false,
+ {
+ .rqn = rqn,
+ },
+ };
+
+ u32 rqtn = priv->xsk_tir[ix].rqt.rqtn;
+
+ return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
+}
+
+int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c)
+{
+ return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn);
+}
+
+int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix)
+{
+ return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn);
+}
+
+int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+ int err, i;
+
+ if (!priv->xsk.refcnt)
+ return 0;
+
+ for (i = 0; i < chs->num; i++) {
+ struct mlx5e_channel *c = chs->c[i];
+
+ if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ continue;
+
+ err = mlx5e_xsk_redirect_rqt_to_channel(priv, c);
+ if (unlikely(err))
+ goto err_stop;
+ }
+
+ return 0;
+
+err_stop:
+ for (i--; i >= 0; i--) {
+ if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
+ continue;
+
+ mlx5e_xsk_redirect_rqt_to_drop(priv, i);
+ }
+
+ return err;
+}
+
+void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+ int i;
+
+ if (!priv->xsk.refcnt)
+ return;
+
+ for (i = 0; i < chs->num; i++) {
+ if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
+ continue;
+
+ mlx5e_xsk_redirect_rqt_to_drop(priv, i);
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
new file mode 100644
index 000000000000..0dd11b81c046
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_SETUP_H__
+#define __MLX5_EN_XSK_SETUP_H__
+
+#include "en.h"
+
+struct mlx5e_xsk_param;
+
+bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5_core_dev *mdev);
+int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+ struct mlx5e_channel *c);
+void mlx5e_close_xsk(struct mlx5e_channel *c);
+void mlx5e_activate_xsk(struct mlx5e_channel *c);
+void mlx5e_deactivate_xsk(struct mlx5e_channel *c);
+int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c);
+int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix);
+int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+
+#endif /* __MLX5_EN_XSK_SETUP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
new file mode 100644
index 000000000000..35e188cf4ea4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "tx.h"
+#include "umem.h"
+#include "en/xdp.h"
+#include "en/params.h"
+#include <net/xdp_sock.h>
+
+int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_channel *c;
+ u16 ix;
+
+ if (unlikely(!mlx5e_xdp_is_open(priv)))
+ return -ENETDOWN;
+
+ if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+ return -EINVAL;
+
+ c = priv->channels.c[ix];
+
+ if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)))
+ return -ENXIO;
+
+ if (!napi_if_scheduled_mark_missed(&c->napi)) {
+ spin_lock(&c->xskicosq_lock);
+ mlx5e_trigger_irq(&c->xskicosq);
+ spin_unlock(&c->xskicosq_lock);
+ }
+
+ return 0;
+}
+
+/* When TX fails (because of the size of the packet), we need to get completions
+ * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish
+ * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the
+ * same.
+ */
+static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_info *xdpi)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
+ struct mlx5e_tx_wqe *nopwqe;
+
+ wi->num_wqebbs = 1;
+ wi->num_pkts = 1;
+
+ nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
+ sq->doorbell_cseg = &nopwqe->ctrl;
+}
+
+bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
+{
+ struct xdp_umem *umem = sq->umem;
+ struct mlx5e_xdp_info xdpi;
+ struct mlx5e_xdp_xmit_data xdptxd;
+ bool work_done = true;
+ bool flush = false;
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK;
+
+ for (; budget; budget--) {
+ int check_result = sq->xmit_xdp_frame_check(sq);
+ struct xdp_desc desc;
+
+ if (unlikely(check_result < 0)) {
+ work_done = false;
+ break;
+ }
+
+ if (!xsk_umem_consume_tx(umem, &desc)) {
+ /* TX will get stuck until something wakes it up by
+ * triggering NAPI. Currently it's expected that the
+ * application calls sendto() if there are consumed, but
+ * not completed frames.
+ */
+ break;
+ }
+
+ xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr);
+ xdptxd.data = xdp_umem_get_data(umem, desc.addr);
+ xdptxd.len = desc.len;
+
+ dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr,
+ xdptxd.len, DMA_BIDIRECTIONAL);
+
+ if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, check_result))) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
+
+ mlx5e_xsk_tx_post_err(sq, &xdpi);
+ }
+
+ flush = true;
+ }
+
+ if (flush) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
+ mlx5e_xmit_xdp_doorbell(sq);
+
+ xsk_umem_consume_tx_done(umem);
+ }
+
+ return !(budget && work_done);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
new file mode 100644
index 000000000000..7add18bf78d8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_TX_H__
+#define __MLX5_EN_XSK_TX_H__
+
+#include "en.h"
+
+/* TX data path */
+
+int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid);
+
+bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
+
+#endif /* __MLX5_EN_XSK_TX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
new file mode 100644
index 000000000000..4baaa5788320
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <net/xdp_sock.h>
+#include "umem.h"
+#include "setup.h"
+#include "en/params.h"
+
+static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv,
+ struct xdp_umem *umem)
+{
+ struct device *dev = priv->mdev->device;
+ u32 i;
+
+ for (i = 0; i < umem->npgs; i++) {
+ dma_addr_t dma = dma_map_page(dev, umem->pgs[i], 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+ if (unlikely(dma_mapping_error(dev, dma)))
+ goto err_unmap;
+ umem->pages[i].dma = dma;
+ }
+
+ return 0;
+
+err_unmap:
+ while (i--) {
+ dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ umem->pages[i].dma = 0;
+ }
+
+ return -ENOMEM;
+}
+
+static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv,
+ struct xdp_umem *umem)
+{
+ struct device *dev = priv->mdev->device;
+ u32 i;
+
+ for (i = 0; i < umem->npgs; i++) {
+ dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ umem->pages[i].dma = 0;
+ }
+}
+
+static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk)
+{
+ if (!xsk->umems) {
+ xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS,
+ sizeof(*xsk->umems), GFP_KERNEL);
+ if (unlikely(!xsk->umems))
+ return -ENOMEM;
+ }
+
+ xsk->refcnt++;
+ xsk->ever_used = true;
+
+ return 0;
+}
+
+static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk)
+{
+ if (!--xsk->refcnt) {
+ kfree(xsk->umems);
+ xsk->umems = NULL;
+ }
+}
+
+static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix)
+{
+ int err;
+
+ err = mlx5e_xsk_get_umems(xsk);
+ if (unlikely(err))
+ return err;
+
+ xsk->umems[ix] = umem;
+ return 0;
+}
+
+static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix)
+{
+ xsk->umems[ix] = NULL;
+
+ mlx5e_xsk_put_umems(xsk);
+}
+
+static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem)
+{
+ return umem->headroom <= 0xffff && umem->chunk_size_nohr <= 0xffff;
+}
+
+void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk)
+{
+ xsk->headroom = umem->headroom;
+ xsk->chunk_size = umem->chunk_size_nohr + umem->headroom;
+}
+
+static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
+ struct xdp_umem *umem, u16 ix)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_xsk_param xsk;
+ struct mlx5e_channel *c;
+ int err;
+
+ if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix)))
+ return -EBUSY;
+
+ if (unlikely(!mlx5e_xsk_is_umem_sane(umem)))
+ return -EINVAL;
+
+ err = mlx5e_xsk_map_umem(priv, umem);
+ if (unlikely(err))
+ return err;
+
+ err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix);
+ if (unlikely(err))
+ goto err_unmap_umem;
+
+ mlx5e_build_xsk_param(umem, &xsk);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ /* XSK objects will be created on open. */
+ goto validate_closed;
+ }
+
+ if (!params->xdp_prog) {
+ /* XSK objects will be created when an XDP program is set,
+ * and the channels are reopened.
+ */
+ goto validate_closed;
+ }
+
+ c = priv->channels.c[ix];
+
+ err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+ if (unlikely(err))
+ goto err_remove_umem;
+
+ mlx5e_activate_xsk(c);
+
+ /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
+ * any Fill Ring entries at the setup stage.
+ */
+
+ err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]);
+ if (unlikely(err))
+ goto err_deactivate;
+
+ return 0;
+
+err_deactivate:
+ mlx5e_deactivate_xsk(c);
+ mlx5e_close_xsk(c);
+
+err_remove_umem:
+ mlx5e_xsk_remove_umem(&priv->xsk, ix);
+
+err_unmap_umem:
+ mlx5e_xsk_unmap_umem(priv, umem);
+
+ return err;
+
+validate_closed:
+ /* Check the configuration in advance, rather than fail at a later stage
+ * (in mlx5e_xdp_set or on open) and end up with no channels.
+ */
+ if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) {
+ err = -EINVAL;
+ goto err_remove_umem;
+ }
+
+ return 0;
+}
+
+static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
+{
+ struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params,
+ &priv->xsk, ix);
+ struct mlx5e_channel *c;
+
+ if (unlikely(!umem))
+ return -EINVAL;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto remove_umem;
+
+ /* XSK RQ and SQ are only created if XDP program is set. */
+ if (!priv->channels.params.xdp_prog)
+ goto remove_umem;
+
+ c = priv->channels.c[ix];
+ mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
+ mlx5e_deactivate_xsk(c);
+ mlx5e_close_xsk(c);
+
+remove_umem:
+ mlx5e_xsk_remove_umem(&priv->xsk, ix);
+ mlx5e_xsk_unmap_umem(priv, umem);
+
+ return 0;
+}
+
+static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem,
+ u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_xsk_enable_locked(priv, umem, ix);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_xsk_disable_locked(priv, ix);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_params *params = &priv->channels.params;
+ u16 ix;
+
+ if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+ return -EINVAL;
+
+ return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) :
+ mlx5e_xsk_disable_umem(priv, ix);
+}
+
+int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries)
+{
+ struct xdp_umem_fq_reuse *reuseq;
+
+ reuseq = xsk_reuseq_prepare(nentries);
+ if (unlikely(!reuseq))
+ return -ENOMEM;
+ xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+ return 0;
+}
+
+u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk)
+{
+ u16 res = xsk->refcnt ? params->num_channels : 0;
+
+ while (res) {
+ if (mlx5e_xsk_get_umem(params, xsk, res - 1))
+ break;
+ --res;
+ }
+
+ return res;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
new file mode 100644
index 000000000000..25b4cbe58b54
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_UMEM_H__
+#define __MLX5_EN_XSK_UMEM_H__
+
+#include "en.h"
+
+static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params,
+ struct mlx5e_xsk *xsk, u16 ix)
+{
+ if (!xsk || !xsk->umems)
+ return NULL;
+
+ if (unlikely(ix >= params->num_channels))
+ return NULL;
+
+ return xsk->umems[ix];
+}
+
+struct mlx5e_xsk_param;
+void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk);
+
+/* .ndo_bpf callback. */
+int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid);
+
+int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries);
+
+u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk);
+
+#endif /* __MLX5_EN_XSK_UMEM_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 6da7c88742dc..3022463f2284 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -39,6 +39,7 @@
#include "en_accel/ipsec_rxtx.h"
#include "en_accel/tls_rxtx.h"
#include "en.h"
+#include "en/txrx.h"
#if IS_ENABLED(CONFIG_GENEVE)
static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index ca47c0540904..db84500b024f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -39,6 +39,7 @@
#include <linux/skbuff.h>
#include <net/xfrm.h>
#include "en.h"
+#include "en/txrx.h"
struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
struct sk_buff *skb, u32 *cqe_bcnt);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
new file mode 100644
index 000000000000..d2ff74d52720
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "en.h"
+#include "en_accel/ktls.h"
+
+static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+ void *tisc;
+
+ tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ MLX5_SET(tisc, tisc, tls_en, 1);
+
+ return mlx5e_create_tis(mdev, in, tisn);
+}
+
+static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk,
+ enum tls_offload_ctx_dir direction,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_ktls_offload_context_tx *tx_priv;
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ if (WARN_ON(direction != TLS_OFFLOAD_CTX_DIR_TX))
+ return -EINVAL;
+
+ if (WARN_ON(!mlx5e_ktls_type_check(mdev, crypto_info)))
+ return -EOPNOTSUPP;
+
+ tx_priv = kvzalloc(sizeof(*tx_priv), GFP_KERNEL);
+ if (!tx_priv)
+ return -ENOMEM;
+
+ tx_priv->expected_seq = start_offload_tcp_sn;
+ tx_priv->crypto_info = crypto_info;
+ mlx5e_set_ktls_tx_priv_ctx(tls_ctx, tx_priv);
+
+ /* tc and underlay_qpn values are not in use for tls tis */
+ err = mlx5e_ktls_create_tis(mdev, &tx_priv->tisn);
+ if (err)
+ goto create_tis_fail;
+
+ err = mlx5_ktls_create_key(mdev, crypto_info, &tx_priv->key_id);
+ if (err)
+ goto encryption_key_create_fail;
+
+ mlx5e_ktls_tx_offload_set_pending(tx_priv);
+
+ return 0;
+
+encryption_key_create_fail:
+ mlx5e_destroy_tis(priv->mdev, tx_priv->tisn);
+create_tis_fail:
+ kvfree(tx_priv);
+ return err;
+}
+
+static void mlx5e_ktls_del(struct net_device *netdev,
+ struct tls_context *tls_ctx,
+ enum tls_offload_ctx_dir direction)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_ktls_offload_context_tx *tx_priv =
+ mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
+
+ mlx5_ktls_destroy_key(priv->mdev, tx_priv->key_id);
+ mlx5e_destroy_tis(priv->mdev, tx_priv->tisn);
+ kvfree(tx_priv);
+}
+
+static const struct tlsdev_ops mlx5e_ktls_ops = {
+ .tls_dev_add = mlx5e_ktls_add,
+ .tls_dev_del = mlx5e_ktls_del,
+};
+
+void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+
+ if (!mlx5_accel_is_ktls_device(priv->mdev))
+ return;
+
+ netdev->hw_features |= NETIF_F_HW_TLS_TX;
+ netdev->features |= NETIF_F_HW_TLS_TX;
+
+ netdev->tlsdev_ops = &mlx5e_ktls_ops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
new file mode 100644
index 000000000000..407da83474ef
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5E_KTLS_H__
+#define __MLX5E_KTLS_H__
+
+#include "en.h"
+
+#ifdef CONFIG_MLX5_EN_TLS
+#include <net/tls.h>
+#include "accel/tls.h"
+
+#define MLX5E_KTLS_STATIC_UMR_WQE_SZ \
+ (sizeof(struct mlx5e_umr_wqe) + MLX5_ST_SZ_BYTES(tls_static_params))
+#define MLX5E_KTLS_STATIC_WQEBBS \
+ (DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_BB))
+
+#define MLX5E_KTLS_PROGRESS_WQE_SZ \
+ (sizeof(struct mlx5e_tx_wqe) + MLX5_ST_SZ_BYTES(tls_progress_params))
+#define MLX5E_KTLS_PROGRESS_WQEBBS \
+ (DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB))
+#define MLX5E_KTLS_MAX_DUMP_WQEBBS 2
+
+enum {
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0,
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD = 1,
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION = 2,
+};
+
+enum {
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START = 0,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 1,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 2,
+};
+
+struct mlx5e_ktls_offload_context_tx {
+ struct tls_offload_context_tx *tx_ctx;
+ struct tls_crypto_info *crypto_info;
+ u32 expected_seq;
+ u32 tisn;
+ u32 key_id;
+ bool ctx_post_pending;
+};
+
+struct mlx5e_ktls_offload_context_tx_shadow {
+ struct tls_offload_context_tx tx_ctx;
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+};
+
+static inline void
+mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx,
+ struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+ struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx);
+ struct mlx5e_ktls_offload_context_tx_shadow *shadow;
+
+ BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX);
+
+ shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx;
+
+ shadow->priv_tx = priv_tx;
+ priv_tx->tx_ctx = tx_ctx;
+}
+
+static inline struct mlx5e_ktls_offload_context_tx *
+mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx)
+{
+ struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx);
+ struct mlx5e_ktls_offload_context_tx_shadow *shadow;
+
+ BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX);
+
+ shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx;
+
+ return shadow->priv_tx;
+}
+
+void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
+void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx);
+
+struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_tx_wqe **wqe, u16 *pi);
+void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ struct mlx5e_sq_dma *dma);
+
+#else
+
+static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
+{
+}
+
+#endif
+
+#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
new file mode 100644
index 000000000000..ea032f54197e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include <linux/tls.h>
+#include "en.h"
+#include "en/txrx.h"
+#include "en_accel/ktls.h"
+
+enum {
+ MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2 = 0x2,
+};
+
+enum {
+ MLX5E_ENCRYPTION_STANDARD_TLS = 0x1,
+};
+
+#define EXTRACT_INFO_FIELDS do { \
+ salt = info->salt; \
+ rec_seq = info->rec_seq; \
+ salt_sz = sizeof(info->salt); \
+ rec_seq_sz = sizeof(info->rec_seq); \
+} while (0)
+
+static void
+fill_static_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+ struct tls_crypto_info *crypto_info = priv_tx->crypto_info;
+ struct tls12_crypto_info_aes_gcm_128 *info;
+ char *initial_rn, *gcm_iv;
+ u16 salt_sz, rec_seq_sz;
+ char *salt, *rec_seq;
+ u8 tls_version;
+
+ if (WARN_ON(crypto_info->cipher_type != TLS_CIPHER_AES_GCM_128))
+ return;
+
+ info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+ EXTRACT_INFO_FIELDS;
+
+ gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv);
+ initial_rn = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number);
+
+ memcpy(gcm_iv, salt, salt_sz);
+ memcpy(initial_rn, rec_seq, rec_seq_sz);
+
+ tls_version = MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2;
+
+ MLX5_SET(tls_static_params, ctx, tls_version, tls_version);
+ MLX5_SET(tls_static_params, ctx, const_1, 1);
+ MLX5_SET(tls_static_params, ctx, const_2, 2);
+ MLX5_SET(tls_static_params, ctx, encryption_standard,
+ MLX5E_ENCRYPTION_STANDARD_TLS);
+ MLX5_SET(tls_static_params, ctx, dek_index, priv_tx->key_id);
+}
+
+static void
+build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ bool fence)
+{
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
+
+#define STATIC_PARAMS_DS_CNT \
+ DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_DS)
+
+ cseg->opmod_idx_opcode = cpu_to_be32((pc << 8) | MLX5_OPCODE_UMR |
+ (MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS << 24));
+ cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+ STATIC_PARAMS_DS_CNT);
+ cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+ cseg->imm = cpu_to_be32(priv_tx->tisn);
+
+ ucseg->flags = MLX5_UMR_INLINE;
+ ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16);
+
+ fill_static_params_ctx(wqe->tls_static_params_ctx, priv_tx);
+}
+
+static void
+fill_progress_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+ MLX5_SET(tls_progress_params, ctx, pd, priv_tx->tisn);
+ MLX5_SET(tls_progress_params, ctx, record_tracker_state,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START);
+ MLX5_SET(tls_progress_params, ctx, auth_state,
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD);
+}
+
+static void
+build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ bool fence)
+{
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+#define PROGRESS_PARAMS_DS_CNT \
+ DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_DS)
+
+ cseg->opmod_idx_opcode =
+ cpu_to_be32((pc << 8) | MLX5_OPCODE_SET_PSV |
+ (MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS << 24));
+ cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+ PROGRESS_PARAMS_DS_CNT);
+ cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+
+ fill_progress_params_ctx(wqe->data, priv_tx);
+}
+
+static void tx_fill_wi(struct mlx5e_txqsq *sq,
+ u16 pi, u8 num_wqebbs,
+ skb_frag_t *resync_dump_frag)
+{
+ struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi];
+
+ wi->skb = NULL;
+ wi->num_wqebbs = num_wqebbs;
+ wi->resync_dump_frag = resync_dump_frag;
+}
+
+void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+ priv_tx->ctx_post_pending = true;
+}
+
+static bool
+mlx5e_ktls_tx_offload_test_and_clear_pending(struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+ bool ret = priv_tx->ctx_post_pending;
+
+ priv_tx->ctx_post_pending = false;
+
+ return ret;
+}
+
+static void
+post_static_params(struct mlx5e_txqsq *sq,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ bool fence)
+{
+ struct mlx5e_umr_wqe *umr_wqe;
+ u16 pi;
+
+ umr_wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_STATIC_UMR_WQE_SZ, &pi);
+ build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence);
+ tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, NULL);
+ sq->pc += MLX5E_KTLS_STATIC_WQEBBS;
+}
+
+static void
+post_progress_params(struct mlx5e_txqsq *sq,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ bool fence)
+{
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_PROGRESS_WQE_SZ, &pi);
+ build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence);
+ tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, NULL);
+ sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS;
+}
+
+static void
+mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ bool skip_static_post, bool fence_first_post)
+{
+ bool progress_fence = skip_static_post || !fence_first_post;
+
+ if (!skip_static_post)
+ post_static_params(sq, priv_tx, fence_first_post);
+
+ post_progress_params(sq, priv_tx, progress_fence);
+}
+
+struct tx_sync_info {
+ u64 rcd_sn;
+ s32 sync_len;
+ int nr_frags;
+ skb_frag_t *frags[MAX_SKB_FRAGS];
+};
+
+static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
+ u32 tcp_seq, struct tx_sync_info *info)
+{
+ struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx;
+ struct tls_record_info *record;
+ int remaining, i = 0;
+ unsigned long flags;
+ bool ret = true;
+
+ spin_lock_irqsave(&tx_ctx->lock, flags);
+ record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn);
+
+ if (unlikely(!record)) {
+ ret = false;
+ goto out;
+ }
+
+ if (unlikely(tcp_seq < tls_record_start_seq(record))) {
+ if (!tls_record_is_start_marker(record))
+ ret = false;
+ goto out;
+ }
+
+ info->sync_len = tcp_seq - tls_record_start_seq(record);
+ remaining = info->sync_len;
+ while (remaining > 0) {
+ skb_frag_t *frag = &record->frags[i];
+
+ __skb_frag_ref(frag);
+ remaining -= skb_frag_size(frag);
+ info->frags[i++] = frag;
+ }
+ /* reduce the part which will be sent with the original SKB */
+ if (remaining < 0)
+ skb_frag_size_add(info->frags[i - 1], remaining);
+ info->nr_frags = i;
+out:
+ spin_unlock_irqrestore(&tx_ctx->lock, flags);
+ return ret;
+}
+
+static void
+tx_post_resync_params(struct mlx5e_txqsq *sq,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ u64 rcd_sn)
+{
+ struct tls_crypto_info *crypto_info = priv_tx->crypto_info;
+ struct tls12_crypto_info_aes_gcm_128 *info;
+ __be64 rn_be = cpu_to_be64(rcd_sn);
+ bool skip_static_post;
+ u16 rec_seq_sz;
+ char *rec_seq;
+
+ if (WARN_ON(crypto_info->cipher_type != TLS_CIPHER_AES_GCM_128))
+ return;
+
+ info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+ rec_seq = info->rec_seq;
+ rec_seq_sz = sizeof(info->rec_seq);
+
+ skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz);
+ if (!skip_static_post)
+ memcpy(rec_seq, &rn_be, rec_seq_sz);
+
+ mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, skip_static_post, true);
+}
+
+static int
+tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ skb_frag_t *frag, u32 tisn, bool first)
+{
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_wqe_eth_seg *eseg;
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5e_tx_wqe *wqe;
+ dma_addr_t dma_addr = 0;
+ u16 ds_cnt, ds_cnt_inl;
+ u8 num_wqebbs;
+ u16 pi, ihs;
+ int fsz;
+
+ ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
+ ihs = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb));
+ ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
+ ds_cnt += ds_cnt_inl;
+ ds_cnt += 1; /* one frag */
+
+ wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
+
+ num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+
+ cseg = &wqe->ctrl;
+ eseg = &wqe->eth;
+ dseg = wqe->data;
+
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ cseg->imm = cpu_to_be32(tisn);
+ cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+
+ eseg->inline_hdr.sz = cpu_to_be16(ihs);
+ memcpy(eseg->inline_hdr.start, skb->data, ihs);
+ dseg += ds_cnt_inl;
+
+ fsz = skb_frag_size(frag);
+ dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
+ return -ENOMEM;
+
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->lkey = sq->mkey_be;
+ dseg->byte_count = cpu_to_be32(fsz);
+ mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
+
+ tx_fill_wi(sq, pi, num_wqebbs, frag);
+ sq->pc += num_wqebbs;
+
+ WARN(num_wqebbs > MLX5E_KTLS_MAX_DUMP_WQEBBS,
+ "unexpected DUMP num_wqebbs, %d > %d",
+ num_wqebbs, MLX5E_KTLS_MAX_DUMP_WQEBBS);
+
+ return 0;
+}
+
+void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ struct mlx5e_sq_dma *dma)
+{
+ struct mlx5e_sq_stats *stats = sq->stats;
+
+ mlx5e_tx_dma_unmap(sq->pdev, dma);
+ __skb_frag_unref(wi->resync_dump_frag);
+ stats->tls_dump_packets++;
+ stats->tls_dump_bytes += wi->num_bytes;
+}
+
+static void tx_post_fence_nop(struct mlx5e_txqsq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+ tx_fill_wi(sq, pi, 1, NULL);
+
+ mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc);
+}
+
+static struct sk_buff *
+mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ u32 seq)
+{
+ struct mlx5e_sq_stats *stats = sq->stats;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct tx_sync_info info = {};
+ u16 contig_wqebbs_room, pi;
+ u8 num_wqebbs;
+ int i;
+
+ if (!tx_sync_info_get(priv_tx, seq, &info)) {
+ /* We might get here if a retransmission reaches the driver
+ * after the relevant record is acked.
+ * It should be safe to drop the packet in this case
+ */
+ stats->tls_drop_no_sync_data++;
+ goto err_out;
+ }
+
+ if (unlikely(info.sync_len < 0)) {
+ u32 payload;
+ int headln;
+
+ headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ payload = skb->len - headln;
+ if (likely(payload <= -info.sync_len))
+ return skb;
+
+ stats->tls_drop_bypass_req++;
+ goto err_out;
+ }
+
+ stats->tls_ooo++;
+
+ num_wqebbs = MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS +
+ (info.nr_frags ? info.nr_frags * MLX5E_KTLS_MAX_DUMP_WQEBBS : 1);
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs_room < num_wqebbs))
+ mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+
+ tx_post_resync_params(sq, priv_tx, info.rcd_sn);
+
+ for (i = 0; i < info.nr_frags; i++)
+ if (tx_post_resync_dump(sq, skb, info.frags[i],
+ priv_tx->tisn, !i))
+ goto err_out;
+
+ /* If no dump WQE was sent, we need to have a fence NOP WQE before the
+ * actual data xmit.
+ */
+ if (!info.nr_frags)
+ tx_post_fence_nop(sq);
+
+ return skb;
+
+err_out:
+ dev_kfree_skb_any(skb);
+ return NULL;
+}
+
+struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_tx_wqe **wqe, u16 *pi)
+{
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+ struct mlx5e_sq_stats *stats = sq->stats;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct tls_context *tls_ctx;
+ int datalen;
+ u32 seq;
+
+ if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
+ goto out;
+
+ datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+ if (!datalen)
+ goto out;
+
+ tls_ctx = tls_get_ctx(skb->sk);
+ if (unlikely(tls_ctx->netdev != netdev))
+ goto err_out;
+
+ priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
+
+ if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) {
+ mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false);
+ *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
+ stats->tls_ctx++;
+ }
+
+ seq = ntohl(tcp_hdr(skb)->seq);
+ if (unlikely(priv_tx->expected_seq != seq)) {
+ skb = mlx5e_ktls_tx_handle_ooo(priv_tx, sq, skb, seq);
+ if (unlikely(!skb))
+ goto out;
+ *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
+ }
+
+ priv_tx->expected_seq = seq + datalen;
+
+ cseg = &(*wqe)->ctrl;
+ cseg->imm = cpu_to_be32(priv_tx->tisn);
+
+ stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
+ stats->tls_encrypted_bytes += datalen;
+
+out:
+ return skb;
+
+err_out:
+ dev_kfree_skb_any(skb);
+ return NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
index e88340e196f7..fba561ffe1d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
@@ -160,25 +160,31 @@ static void mlx5e_tls_del(struct net_device *netdev,
direction == TLS_OFFLOAD_CTX_DIR_TX);
}
-static void mlx5e_tls_resync_rx(struct net_device *netdev, struct sock *sk,
- u32 seq, u64 rcd_sn)
+static int mlx5e_tls_resync(struct net_device *netdev, struct sock *sk,
+ u32 seq, u8 *rcd_sn_data,
+ enum tls_offload_ctx_dir direction)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_tls_offload_context_rx *rx_ctx;
+ u64 rcd_sn = *(u64 *)rcd_sn_data;
+ if (WARN_ON_ONCE(direction != TLS_OFFLOAD_CTX_DIR_RX))
+ return -EINVAL;
rx_ctx = mlx5e_get_tls_rx_context(tls_ctx);
netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq,
be64_to_cpu(rcd_sn));
mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn);
atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply);
+
+ return 0;
}
static const struct tlsdev_ops mlx5e_tls_ops = {
.tls_dev_add = mlx5e_tls_add,
.tls_dev_del = mlx5e_tls_del,
- .tls_dev_resync_rx = mlx5e_tls_resync_rx,
+ .tls_dev_resync = mlx5e_tls_resync,
};
void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
@@ -186,6 +192,11 @@ void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
struct net_device *netdev = priv->netdev;
u32 caps;
+ if (mlx5_accel_is_ktls_device(priv->mdev)) {
+ mlx5e_ktls_build_netdev(priv);
+ return;
+ }
+
if (!mlx5_accel_is_tls_device(priv->mdev))
return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
index 3f5d72163b56..9015f3f7792d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
@@ -33,8 +33,10 @@
#ifndef __MLX5E_TLS_H__
#define __MLX5E_TLS_H__
-#ifdef CONFIG_MLX5_EN_TLS
+#include "accel/tls.h"
+#include "en_accel/ktls.h"
+#ifdef CONFIG_MLX5_EN_TLS
#include <net/tls.h>
#include "en.h"
@@ -94,7 +96,12 @@ int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
#else
-static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) { }
+static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
+{
+ if (mlx5_accel_is_ktls_device(priv->mdev))
+ mlx5e_ktls_build_netdev(priv);
+}
+
static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { }
static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index 439bf5953885..71384ad1a443 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -248,7 +248,7 @@ mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
cpu_to_be64(info.rcd_sn));
mlx5e_sq_xmit(sq, nskb, *wqe, *pi, true);
- mlx5e_sq_fetch_wqe(sq, wqe, pi);
+ *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
return skb;
err_out:
@@ -269,6 +269,11 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
int datalen;
u32 skb_seq;
+ if (MLX5_CAP_GEN(sq->channel->mdev, tls)) {
+ skb = mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi);
+ goto out;
+ }
+
if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
index 311667ec71b8..90bc1f2384c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
@@ -38,6 +38,7 @@
#include <linux/skbuff.h>
#include "en.h"
+#include "en/txrx.h"
struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
struct mlx5e_txqsq *sq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 554672edf8c3..8dd31b5c740c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -680,7 +680,7 @@ static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev,
memset(perm_addr, 0xff, MAX_ADDR_LEN);
- mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr);
+ mlx5_query_mac_address(priv->mdev, perm_addr);
}
static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
index d67adf70a97b..ca9cfbf57d8f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
@@ -30,22 +30,22 @@
* SOFTWARE.
*/
-#include <linux/net_dim.h>
+#include <linux/dim.h>
#include "en.h"
static void
-mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder,
+mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder,
struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq)
{
mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts);
- dim->state = NET_DIM_START_MEASURE;
+ dim->state = DIM_START_MEASURE;
}
void mlx5e_rx_dim_work(struct work_struct *work)
{
- struct net_dim *dim = container_of(work, struct net_dim, work);
+ struct dim *dim = container_of(work, struct dim, work);
struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim);
- struct net_dim_cq_moder cur_moder =
+ struct dim_cq_moder cur_moder =
net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq);
@@ -53,9 +53,9 @@ void mlx5e_rx_dim_work(struct work_struct *work)
void mlx5e_tx_dim_work(struct work_struct *work)
{
- struct net_dim *dim = container_of(work, struct net_dim, work);
+ struct dim *dim = container_of(work, struct dim, work);
struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim);
- struct net_dim_cq_moder cur_moder =
+ struct dim_cq_moder cur_moder =
net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index dd764e0471f2..126ec4181286 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -32,6 +32,7 @@
#include "en.h"
#include "en/port.h"
+#include "en/xsk/umem.h"
#include "lib/clock.h"
void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
@@ -46,7 +47,7 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
"%d.%d.%04d (%.16s)",
fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev),
mdev->board_id);
- strlcpy(drvinfo->bus_info, pci_name(mdev->pdev),
+ strlcpy(drvinfo->bus_info, dev_name(mdev->device),
sizeof(drvinfo->bus_info));
}
@@ -388,8 +389,17 @@ static int mlx5e_set_ringparam(struct net_device *dev,
void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
struct ethtool_channels *ch)
{
+ mutex_lock(&priv->state_lock);
+
ch->max_combined = mlx5e_get_netdev_max_channels(priv->netdev);
ch->combined_count = priv->channels.params.num_channels;
+ if (priv->xsk.refcnt) {
+ /* The upper half are XSK queues. */
+ ch->max_combined *= 2;
+ ch->combined_count *= 2;
+ }
+
+ mutex_unlock(&priv->state_lock);
}
static void mlx5e_get_channels(struct net_device *dev,
@@ -403,6 +413,7 @@ static void mlx5e_get_channels(struct net_device *dev,
int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
struct ethtool_channels *ch)
{
+ struct mlx5e_params *cur_params = &priv->channels.params;
unsigned int count = ch->combined_count;
struct mlx5e_channels new_channels = {};
bool arfs_enabled;
@@ -414,16 +425,26 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
return -EINVAL;
}
- if (priv->channels.params.num_channels == count)
+ if (cur_params->num_channels == count)
return 0;
mutex_lock(&priv->state_lock);
+ /* Don't allow changing the number of channels if there is an active
+ * XSK, because the numeration of the XSK and regular RQs will change.
+ */
+ if (priv->xsk.refcnt) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n",
+ __func__);
+ goto out;
+ }
+
new_channels.params = priv->channels.params;
new_channels.params.num_channels = count;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- priv->channels.params = new_channels.params;
+ *cur_params = new_channels.params;
if (!netif_is_rxfh_configured(priv->netdev))
mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
MLX5E_INDIR_RQT_SIZE, count);
@@ -466,7 +487,7 @@ static int mlx5e_set_channels(struct net_device *dev,
int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal)
{
- struct net_dim_cq_moder *rx_moder, *tx_moder;
+ struct dim_cq_moder *rx_moder, *tx_moder;
if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
return -EOPNOTSUPP;
@@ -521,7 +542,7 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc
int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal)
{
- struct net_dim_cq_moder *rx_moder, *tx_moder;
+ struct dim_cq_moder *rx_moder, *tx_moder;
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_channels new_channels = {};
int err = 0;
@@ -1867,40 +1888,6 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev)
return priv->channels.params.pflags;
}
-int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
- struct ethtool_flash *flash)
-{
- struct mlx5_core_dev *mdev = priv->mdev;
- struct net_device *dev = priv->netdev;
- const struct firmware *fw;
- int err;
-
- if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
- return -EOPNOTSUPP;
-
- err = request_firmware_direct(&fw, flash->data, &dev->dev);
- if (err)
- return err;
-
- dev_hold(dev);
- rtnl_unlock();
-
- err = mlx5_firmware_flash(mdev, fw);
- release_firmware(fw);
-
- rtnl_lock();
- dev_put(dev);
- return err;
-}
-
-static int mlx5e_flash_device(struct net_device *dev,
- struct ethtool_flash *flash)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
-
- return mlx5e_ethtool_flash_device(priv, flash);
-}
-
#ifndef CONFIG_MLX5_EN_RXNFC
/* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS
* otherwise this function will be defined from en_fs_ethtool.c
@@ -1939,7 +1926,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
#ifdef CONFIG_MLX5_EN_RXNFC
.set_rxnfc = mlx5e_set_rxnfc,
#endif
- .flash_device = mlx5e_flash_device,
.get_tunable = mlx5e_get_tunable,
.set_tunable = mlx5e_set_tunable,
.get_pauseparam = mlx5e_get_pauseparam,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 4421c10f58ae..ea3a490b569a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -32,6 +32,8 @@
#include <linux/mlx5/fs.h>
#include "en.h"
+#include "en/params.h"
+#include "en/xsk/umem.h"
struct mlx5e_ethtool_rule {
struct list_head list;
@@ -414,6 +416,14 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
} else {
+ struct mlx5e_params *params = &priv->channels.params;
+ enum mlx5e_rq_group group;
+ struct mlx5e_tir *tir;
+ u16 ix;
+
+ mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group);
+ tir = group == MLX5E_RQ_GROUP_XSK ? priv->xsk_tir : priv->direct_tir;
+
dst = kzalloc(sizeof(*dst), GFP_KERNEL);
if (!dst) {
err = -ENOMEM;
@@ -421,12 +431,12 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
}
dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn;
+ dst->tir_num = tir[ix].tirn;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
}
spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
- flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -600,9 +610,9 @@ static int validate_flow(struct mlx5e_priv *priv,
if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES)
return -ENOSPC;
- if (fs->ring_cookie >= priv->channels.params.num_channels &&
- fs->ring_cookie != RX_CLS_FLOW_DISC)
- return -EINVAL;
+ if (fs->ring_cookie != RX_CLS_FLOW_DISC)
+ if (!mlx5e_qid_validate(&priv->channels.params, fs->ring_cookie))
+ return -EINVAL;
switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
case ETHER_FLOW:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 457cc39423f2..47eea6b3a1c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -38,8 +38,10 @@
#include <linux/bpf.h>
#include <linux/if_bridge.h>
#include <net/page_pool.h>
+#include <net/xdp_sock.h>
#include "eswitch.h"
#include "en.h"
+#include "en/txrx.h"
#include "en_tc.h"
#include "en_rep.h"
#include "en_accel/ipsec.h"
@@ -56,35 +58,11 @@
#include "en/monitor_stats.h"
#include "en/reporter.h"
#include "en/params.h"
+#include "en/xsk/umem.h"
+#include "en/xsk/setup.h"
+#include "en/xsk/rx.h"
+#include "en/xsk/tx.h"
-struct mlx5e_rq_param {
- u32 rqc[MLX5_ST_SZ_DW(rqc)];
- struct mlx5_wq_param wq;
- struct mlx5e_rq_frags_info frags_info;
-};
-
-struct mlx5e_sq_param {
- u32 sqc[MLX5_ST_SZ_DW(sqc)];
- struct mlx5_wq_param wq;
- bool is_mpw;
-};
-
-struct mlx5e_cq_param {
- u32 cqc[MLX5_ST_SZ_DW(cqc)];
- struct mlx5_wq_param wq;
- u16 eq_ix;
- u8 cq_period_mode;
-};
-
-struct mlx5e_channel_param {
- struct mlx5e_rq_param rq;
- struct mlx5e_sq_param sq;
- struct mlx5e_sq_param xdp_sq;
- struct mlx5e_sq_param icosq;
- struct mlx5e_cq_param rx_cq;
- struct mlx5e_cq_param tx_cq;
- struct mlx5e_cq_param icosq_cq;
-};
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
{
@@ -114,18 +92,31 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
- BIT(mlx5e_mpwqe_get_log_rq_size(params)) :
+ BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) :
BIT(params->log_rq_mtu_frames),
- BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)),
+ BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
}
bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
{
- return mlx5e_check_fragmented_striding_rq_cap(mdev) &&
- !MLX5_IPSEC_DEV(mdev) &&
- !(params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params));
+ if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
+ return false;
+
+ if (MLX5_IPSEC_DEV(mdev))
+ return false;
+
+ if (params->xdp_prog) {
+ /* XSK params are not considered here. If striding RQ is in use,
+ * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will
+ * be called with the known XSK params.
+ */
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
+ return false;
+ }
+
+ return true;
}
void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
@@ -394,6 +385,8 @@ static void mlx5e_free_di_list(struct mlx5e_rq *rq)
static int mlx5e_alloc_rq(struct mlx5e_channel *c,
struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct xdp_umem *umem,
struct mlx5e_rq_param *rqp,
struct mlx5e_rq *rq)
{
@@ -401,6 +394,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
struct mlx5_core_dev *mdev = c->mdev;
void *rqc = rqp->rqc;
void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ u32 num_xsk_frames = 0;
+ u32 rq_xdp_ix;
u32 pool_size;
int wq_sz;
int err;
@@ -417,7 +412,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq->ix = c->ix;
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- rq->stats = &c->priv->channel_stats[c->ix].rq;
+ rq->xdpsq = &c->rq_xdpsq;
+ rq->umem = umem;
+
+ if (rq->umem)
+ rq->stats = &c->priv->channel_stats[c->ix].xskrq;
+ else
+ rq->stats = &c->priv->channel_stats[c->ix].rq;
rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL;
if (IS_ERR(rq->xdp_prog)) {
@@ -426,12 +427,16 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
goto err_rq_wq_destroy;
}
- err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix);
+ rq_xdp_ix = rq->ix;
+ if (xsk)
+ rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK;
+ err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix);
if (err < 0)
goto err_rq_wq_destroy;
rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
- rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params);
+ rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
+ rq->buff.umem_headroom = xsk ? xsk->headroom : 0;
pool_size = 1 << params->log_rq_mtu_frames;
switch (rq->wq_type) {
@@ -445,7 +450,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
- pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params);
+ if (xsk)
+ num_xsk_frames = wq_sz <<
+ mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+
+ pool_size = MLX5_MPWRQ_PAGES_PER_WQE <<
+ mlx5e_mpwqe_get_log_rq_size(params, xsk);
rq->post_wqes = mlx5e_post_rx_mpwqes;
rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
@@ -464,12 +474,15 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
goto err_rq_wq_destroy;
}
- rq->mpwqe.skb_from_cqe_mpwrq =
- mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ?
- mlx5e_skb_from_cqe_mpwrq_linear :
- mlx5e_skb_from_cqe_mpwrq_nonlinear;
- rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params);
- rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params));
+ rq->mpwqe.skb_from_cqe_mpwrq = xsk ?
+ mlx5e_xsk_skb_from_cqe_mpwrq_linear :
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ?
+ mlx5e_skb_from_cqe_mpwrq_linear :
+ mlx5e_skb_from_cqe_mpwrq_nonlinear;
+
+ rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ rq->mpwqe.num_strides =
+ BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
err = mlx5e_create_rq_umr_mkey(mdev, rq);
if (err)
@@ -490,6 +503,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
+ if (xsk)
+ num_xsk_frames = wq_sz << rq->wqe.info.log_num_frags;
+
rq->wqe.info = rqp->frags_info;
rq->wqe.frags =
kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
@@ -503,6 +519,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
err = mlx5e_init_di_list(rq, wq_sz, c->cpu);
if (err)
goto err_free;
+
rq->post_wqes = mlx5e_post_rx_wqes;
rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
@@ -518,33 +535,49 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
goto err_free;
}
- rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(params) ?
- mlx5e_skb_from_cqe_linear :
- mlx5e_skb_from_cqe_nonlinear;
+ rq->wqe.skb_from_cqe = xsk ?
+ mlx5e_xsk_skb_from_cqe_linear :
+ mlx5e_rx_is_linear_skb(params, NULL) ?
+ mlx5e_skb_from_cqe_linear :
+ mlx5e_skb_from_cqe_nonlinear;
rq->mkey_be = c->mkey_be;
}
- /* Create a page_pool and register it with rxq */
- pp_params.order = 0;
- pp_params.flags = 0; /* No-internal DMA mapping in page_pool */
- pp_params.pool_size = pool_size;
- pp_params.nid = cpu_to_node(c->cpu);
- pp_params.dev = c->pdev;
- pp_params.dma_dir = rq->buff.map_dir;
-
- /* page_pool can be used even when there is no rq->xdp_prog,
- * given page_pool does not handle DMA mapping there is no
- * required state to clear. And page_pool gracefully handle
- * elevated refcnt.
- */
- rq->page_pool = page_pool_create(&pp_params);
- if (IS_ERR(rq->page_pool)) {
- err = PTR_ERR(rq->page_pool);
- rq->page_pool = NULL;
- goto err_free;
+ if (xsk) {
+ err = mlx5e_xsk_resize_reuseq(umem, num_xsk_frames);
+ if (unlikely(err)) {
+ mlx5_core_err(mdev, "Unable to allocate the Reuse Ring for %u frames\n",
+ num_xsk_frames);
+ goto err_free;
+ }
+
+ rq->zca.free = mlx5e_xsk_zca_free;
+ err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+ MEM_TYPE_ZERO_COPY,
+ &rq->zca);
+ } else {
+ /* Create a page_pool and register it with rxq */
+ pp_params.order = 0;
+ pp_params.flags = 0; /* No-internal DMA mapping in page_pool */
+ pp_params.pool_size = pool_size;
+ pp_params.nid = cpu_to_node(c->cpu);
+ pp_params.dev = c->pdev;
+ pp_params.dma_dir = rq->buff.map_dir;
+
+ /* page_pool can be used even when there is no rq->xdp_prog,
+ * given page_pool does not handle DMA mapping there is no
+ * required state to clear. And page_pool gracefully handle
+ * elevated refcnt.
+ */
+ rq->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(rq->page_pool)) {
+ err = PTR_ERR(rq->page_pool);
+ rq->page_pool = NULL;
+ goto err_free;
+ }
+ err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+ MEM_TYPE_PAGE_POOL, rq->page_pool);
}
- err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
- MEM_TYPE_PAGE_POOL, rq->page_pool);
if (err)
goto err_free;
@@ -584,11 +617,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
switch (params->rx_cq_moderation.cq_period_mode) {
case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
- rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
+ rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
break;
case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
default:
- rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
rq->page_cache.head = 0;
@@ -611,8 +644,7 @@ err_rq_wq_destroy:
if (rq->xdp_prog)
bpf_prog_put(rq->xdp_prog);
xdp_rxq_info_unreg(&rq->xdp_rxq);
- if (rq->page_pool)
- page_pool_destroy(rq->page_pool);
+ page_pool_destroy(rq->page_pool);
mlx5_wq_destroy(&rq->wq_ctrl);
return err;
@@ -625,10 +657,6 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
if (rq->xdp_prog)
bpf_prog_put(rq->xdp_prog);
- xdp_rxq_info_unreg(&rq->xdp_rxq);
- if (rq->page_pool)
- page_pool_destroy(rq->page_pool);
-
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
kvfree(rq->mpwqe.info);
@@ -643,8 +671,15 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i];
- mlx5e_page_release(rq, dma_info, false);
+ /* With AF_XDP, page_cache is not used, so this loop is not
+ * entered, and it's safe to call mlx5e_page_release_dynamic
+ * directly.
+ */
+ mlx5e_page_release_dynamic(rq, dma_info, false);
}
+
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+ page_pool_destroy(rq->page_pool);
mlx5_wq_destroy(&rq->wq_ctrl);
}
@@ -778,7 +813,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
mlx5_core_destroy_rq(rq->mdev, rq->rqn);
}
-static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
+int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
{
unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time);
struct mlx5e_channel *c = rq->channel;
@@ -836,14 +871,13 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
}
-static int mlx5e_open_rq(struct mlx5e_channel *c,
- struct mlx5e_params *params,
- struct mlx5e_rq_param *param,
- struct mlx5e_rq *rq)
+int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
+ struct xdp_umem *umem, struct mlx5e_rq *rq)
{
int err;
- err = mlx5e_alloc_rq(c, params, param, rq);
+ err = mlx5e_alloc_rq(c, params, xsk, umem, param, rq);
if (err)
return err;
@@ -855,6 +889,9 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
if (err)
goto err_destroy_rq;
+ if (MLX5_CAP_ETH(c->mdev, cqe_checksum_full))
+ __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &c->rq.state);
+
if (params->rx_dim_enabled)
__set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
@@ -881,13 +918,13 @@ static void mlx5e_activate_rq(struct mlx5e_rq *rq)
mlx5e_trigger_irq(&rq->channel->icosq);
}
-static void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
+void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
{
clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
}
-static void mlx5e_close_rq(struct mlx5e_rq *rq)
+void mlx5e_close_rq(struct mlx5e_rq *rq)
{
cancel_work_sync(&rq->dim.work);
mlx5e_destroy_rq(rq);
@@ -940,6 +977,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
struct mlx5e_params *params,
+ struct xdp_umem *umem,
struct mlx5e_sq_param *param,
struct mlx5e_xdpsq *sq,
bool is_redirect)
@@ -955,9 +993,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- sq->stats = is_redirect ?
- &c->priv->channel_stats[c->ix].xdpsq :
- &c->priv->channel_stats[c->ix].rq_xdpsq;
+ sq->umem = umem;
+
+ sq->stats = sq->umem ?
+ &c->priv->channel_stats[c->ix].xsksq :
+ is_redirect ?
+ &c->priv->channel_stats[c->ix].xdpsq :
+ &c->priv->channel_stats[c->ix].rq_xdpsq;
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1082,15 +1124,19 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->clock = &mdev->clock;
sq->mkey_be = c->mkey_be;
sq->channel = c;
+ sq->ch_ix = c->ix;
sq->txq_ix = txq_ix;
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
+ sq->stop_room = MLX5E_SQ_STOP_ROOM;
INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
if (MLX5_IPSEC_DEV(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
- if (mlx5_accel_is_tls_device(c->priv->mdev))
+ if (mlx5_accel_is_tls_device(c->priv->mdev)) {
set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
+ sq->stop_room += MLX5E_SQ_TLS_ROOM;
+ }
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1336,10 +1382,8 @@ static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
mlx5e_tx_reporter_err_cqe(sq);
}
-static int mlx5e_open_icosq(struct mlx5e_channel *c,
- struct mlx5e_params *params,
- struct mlx5e_sq_param *param,
- struct mlx5e_icosq *sq)
+int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct mlx5e_icosq *sq)
{
struct mlx5e_create_sq_param csp = {};
int err;
@@ -1365,7 +1409,7 @@ err_free_icosq:
return err;
}
-static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+void mlx5e_close_icosq(struct mlx5e_icosq *sq)
{
struct mlx5e_channel *c = sq->channel;
@@ -1376,16 +1420,14 @@ static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
mlx5e_free_icosq(sq);
}
-static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
- struct mlx5e_params *params,
- struct mlx5e_sq_param *param,
- struct mlx5e_xdpsq *sq,
- bool is_redirect)
+int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct xdp_umem *umem,
+ struct mlx5e_xdpsq *sq, bool is_redirect)
{
struct mlx5e_create_sq_param csp = {};
int err;
- err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect);
+ err = mlx5e_alloc_xdpsq(c, params, umem, param, sq, is_redirect);
if (err)
return err;
@@ -1439,7 +1481,7 @@ err_free_xdpsq:
return err;
}
-static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
+void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
{
struct mlx5e_channel *c = sq->channel;
@@ -1447,7 +1489,7 @@ static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
napi_synchronize(&c->napi);
mlx5e_destroy_sq(c->mdev, sq->sqn);
- mlx5e_free_xdpsq_descs(sq, rq);
+ mlx5e_free_xdpsq_descs(sq);
mlx5e_free_xdpsq(sq);
}
@@ -1517,6 +1559,7 @@ static void mlx5e_free_cq(struct mlx5e_cq *cq)
static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
{
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
struct mlx5_core_dev *mdev = cq->mdev;
struct mlx5_core_cq *mcq = &cq->mcq;
@@ -1551,7 +1594,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
- err = mlx5_core_create_cq(mdev, mcq, in, inlen);
+ err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
kvfree(in);
@@ -1568,10 +1611,8 @@ static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
}
-static int mlx5e_open_cq(struct mlx5e_channel *c,
- struct net_dim_cq_moder moder,
- struct mlx5e_cq_param *param,
- struct mlx5e_cq *cq)
+int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder,
+ struct mlx5e_cq_param *param, struct mlx5e_cq *cq)
{
struct mlx5_core_dev *mdev = c->mdev;
int err;
@@ -1594,7 +1635,7 @@ err_free_cq:
return err;
}
-static void mlx5e_close_cq(struct mlx5e_cq *cq)
+void mlx5e_close_cq(struct mlx5e_cq *cq)
{
mlx5e_destroy_cq(cq);
mlx5e_free_cq(cq);
@@ -1768,49 +1809,16 @@ static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c)
free_cpumask_var(c->xps_cpumask);
}
-static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
- struct mlx5e_params *params,
- struct mlx5e_channel_param *cparam,
- struct mlx5e_channel **cp)
+static int mlx5e_open_queues(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam)
{
- int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
- struct net_dim_cq_moder icocq_moder = {0, 0};
- struct net_device *netdev = priv->netdev;
- struct mlx5e_channel *c;
- unsigned int irq;
+ struct dim_cq_moder icocq_moder = {0, 0};
int err;
- int eqn;
-
- err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
- if (err)
- return err;
-
- c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
- if (!c)
- return -ENOMEM;
-
- c->priv = priv;
- c->mdev = priv->mdev;
- c->tstamp = &priv->tstamp;
- c->ix = ix;
- c->cpu = cpu;
- c->pdev = priv->mdev->device;
- c->netdev = priv->netdev;
- c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
- c->num_tc = params->num_tc;
- c->xdp = !!params->xdp_prog;
- c->stats = &priv->channel_stats[ix].ch;
- c->irq_desc = irq_to_desc(irq);
-
- err = mlx5e_alloc_xps_cpumask(c, params);
- if (err)
- goto err_free_channel;
-
- netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq);
if (err)
- goto err_napi_del;
+ return err;
err = mlx5e_open_tx_cqs(c, params, cparam);
if (err)
@@ -1826,7 +1834,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
/* XDP SQ CQ params are same as normal TXQ sq CQ params */
err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation,
- &cparam->tx_cq, &c->rq.xdpsq.cq) : 0;
+ &cparam->tx_cq, &c->rq_xdpsq.cq) : 0;
if (err)
goto err_close_rx_cq;
@@ -1840,20 +1848,21 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
if (err)
goto err_close_icosq;
- err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq, false) : 0;
- if (err)
- goto err_close_sqs;
+ if (c->xdp) {
+ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL,
+ &c->rq_xdpsq, false);
+ if (err)
+ goto err_close_sqs;
+ }
- err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq);
+ err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq);
if (err)
goto err_close_xdp_sq;
- err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->xdpsq, true);
+ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true);
if (err)
goto err_close_rq;
- *cp = c;
-
return 0;
err_close_rq:
@@ -1861,7 +1870,7 @@ err_close_rq:
err_close_xdp_sq:
if (c->xdp)
- mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq);
+ mlx5e_close_xdpsq(&c->rq_xdpsq);
err_close_sqs:
mlx5e_close_sqs(c);
@@ -1871,8 +1880,9 @@ err_close_icosq:
err_disable_napi:
napi_disable(&c->napi);
+
if (c->xdp)
- mlx5e_close_cq(&c->rq.xdpsq.cq);
+ mlx5e_close_cq(&c->rq_xdpsq.cq);
err_close_rx_cq:
mlx5e_close_cq(&c->rq.cq);
@@ -1886,6 +1896,85 @@ err_close_tx_cqs:
err_close_icosq_cq:
mlx5e_close_cq(&c->icosq.cq);
+ return err;
+}
+
+static void mlx5e_close_queues(struct mlx5e_channel *c)
+{
+ mlx5e_close_xdpsq(&c->xdpsq);
+ mlx5e_close_rq(&c->rq);
+ if (c->xdp)
+ mlx5e_close_xdpsq(&c->rq_xdpsq);
+ mlx5e_close_sqs(c);
+ mlx5e_close_icosq(&c->icosq);
+ napi_disable(&c->napi);
+ if (c->xdp)
+ mlx5e_close_cq(&c->rq_xdpsq.cq);
+ mlx5e_close_cq(&c->rq.cq);
+ mlx5e_close_cq(&c->xdpsq.cq);
+ mlx5e_close_tx_cqs(c);
+ mlx5e_close_cq(&c->icosq.cq);
+}
+
+static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam,
+ struct xdp_umem *umem,
+ struct mlx5e_channel **cp)
+{
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
+ struct net_device *netdev = priv->netdev;
+ struct mlx5e_xsk_param xsk;
+ struct mlx5e_channel *c;
+ unsigned int irq;
+ int err;
+ int eqn;
+
+ err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
+ if (err)
+ return err;
+
+ c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
+ if (!c)
+ return -ENOMEM;
+
+ c->priv = priv;
+ c->mdev = priv->mdev;
+ c->tstamp = &priv->tstamp;
+ c->ix = ix;
+ c->cpu = cpu;
+ c->pdev = priv->mdev->device;
+ c->netdev = priv->netdev;
+ c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+ c->num_tc = params->num_tc;
+ c->xdp = !!params->xdp_prog;
+ c->stats = &priv->channel_stats[ix].ch;
+ c->irq_desc = irq_to_desc(irq);
+
+ err = mlx5e_alloc_xps_cpumask(c, params);
+ if (err)
+ goto err_free_channel;
+
+ netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
+
+ err = mlx5e_open_queues(c, params, cparam);
+ if (unlikely(err))
+ goto err_napi_del;
+
+ if (umem) {
+ mlx5e_build_xsk_param(umem, &xsk);
+ err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+ if (unlikely(err))
+ goto err_close_queues;
+ }
+
+ *cp = c;
+
+ return 0;
+
+err_close_queues:
+ mlx5e_close_queues(c);
+
err_napi_del:
netif_napi_del(&c->napi);
mlx5e_free_xps_cpumask(c);
@@ -1904,12 +1993,18 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
mlx5e_activate_txqsq(&c->sq[tc]);
mlx5e_activate_rq(&c->rq);
netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix);
+
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ mlx5e_activate_xsk(c);
}
static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
{
int tc;
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ mlx5e_deactivate_xsk(c);
+
mlx5e_deactivate_rq(&c->rq);
for (tc = 0; tc < c->num_tc; tc++)
mlx5e_deactivate_txqsq(&c->sq[tc]);
@@ -1917,19 +2012,9 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
static void mlx5e_close_channel(struct mlx5e_channel *c)
{
- mlx5e_close_xdpsq(&c->xdpsq, NULL);
- mlx5e_close_rq(&c->rq);
- if (c->xdp)
- mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq);
- mlx5e_close_sqs(c);
- mlx5e_close_icosq(&c->icosq);
- napi_disable(&c->napi);
- if (c->xdp)
- mlx5e_close_cq(&c->rq.xdpsq.cq);
- mlx5e_close_cq(&c->rq.cq);
- mlx5e_close_cq(&c->xdpsq.cq);
- mlx5e_close_tx_cqs(c);
- mlx5e_close_cq(&c->icosq.cq);
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ mlx5e_close_xsk(c);
+ mlx5e_close_queues(c);
netif_napi_del(&c->napi);
mlx5e_free_xps_cpumask(c);
@@ -1940,6 +2025,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
struct mlx5e_rq_frags_info *info)
{
u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
@@ -1952,10 +2038,10 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
byte_count += MLX5E_METADATA_ETHER_LEN;
#endif
- if (mlx5e_rx_is_linear_skb(params)) {
+ if (mlx5e_rx_is_linear_skb(params, xsk)) {
int frag_stride;
- frag_stride = mlx5e_rx_get_linear_frag_sz(params);
+ frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk);
frag_stride = roundup_pow_of_two(frag_stride);
info->arr[0].frag_size = byte_count;
@@ -2013,9 +2099,10 @@ static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
return MLX5_GET(wq, wq, log_wq_sz);
}
-static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_rq_param *param)
+void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_param *param)
{
struct mlx5_core_dev *mdev = priv->mdev;
void *rqc = param->rqc;
@@ -2025,16 +2112,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
MLX5_SET(wq, wq, log_wqe_num_of_strides,
- mlx5e_mpwqe_get_log_num_strides(mdev, params) -
+ mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) -
MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
MLX5_SET(wq, wq, log_wqe_stride_size,
- mlx5e_mpwqe_get_log_stride_size(mdev, params) -
+ mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) -
MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
- MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params));
+ MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
- mlx5e_build_rq_frags_info(mdev, params, &param->frags_info);
+ mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
ndsegs = param->frags_info.num_frags;
}
@@ -2065,8 +2152,8 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
param->wq.buf_numa_node = dev_to_node(mdev->device);
}
-static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
- struct mlx5e_sq_param *param)
+void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+ struct mlx5e_sq_param *param)
{
void *sqc = param->sqc;
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -2102,9 +2189,10 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
}
-static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_cq_param *param)
+void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_cq_param *param)
{
struct mlx5_core_dev *mdev = priv->mdev;
void *cqc = param->cqc;
@@ -2112,8 +2200,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) +
- mlx5e_mpwqe_get_log_num_strides(mdev, params);
+ log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
+ mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
log_cq_size = params->log_rq_mtu_frames;
@@ -2129,9 +2217,9 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
}
-static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_cq_param *param)
+void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param)
{
void *cqc = param->cqc;
@@ -2141,9 +2229,9 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
}
-static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
- u8 log_wq_size,
- struct mlx5e_cq_param *param)
+void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_cq_param *param)
{
void *cqc = param->cqc;
@@ -2151,12 +2239,12 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
mlx5e_build_common_cq_param(priv, param);
- param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
-static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
- u8 log_wq_size,
- struct mlx5e_sq_param *param)
+void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param)
{
void *sqc = param->sqc;
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -2167,9 +2255,9 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
}
-static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_sq_param *param)
+void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param)
{
void *sqc = param->sqc;
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -2197,14 +2285,14 @@ static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
{
u8 icosq_log_wq_sz;
- mlx5e_build_rq_param(priv, params, &cparam->rq);
+ mlx5e_build_rq_param(priv, params, NULL, &cparam->rq);
icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
mlx5e_build_sq_param(priv, params, &cparam->sq);
mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq);
- mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq);
+ mlx5e_build_rx_cq_param(priv, params, NULL, &cparam->rx_cq);
mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq);
}
@@ -2225,7 +2313,12 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
mlx5e_build_channel_param(priv, &chs->params, cparam);
for (i = 0; i < chs->num; i++) {
- err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]);
+ struct xdp_umem *umem = NULL;
+
+ if (chs->params.xdp_prog)
+ umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, i);
+
+ err = mlx5e_open_channel(priv, i, &chs->params, cparam, umem, &chs->c[i]);
if (err)
goto err_close_channels;
}
@@ -2267,6 +2360,10 @@ static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT;
err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout);
+
+ /* Don't wait on the XSK RQ, because the newer xdpsock sample
+ * doesn't provide any Fill Ring entries at the setup stage.
+ */
}
return err ? -ETIMEDOUT : 0;
@@ -2339,35 +2436,35 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv)
return err;
}
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
{
- struct mlx5e_rqt *rqt;
+ const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
int err;
int ix;
- for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) {
- rqt = &priv->direct_tir[ix].rqt;
- err = mlx5e_create_rqt(priv, 1 /*size */, rqt);
- if (err)
+ for (ix = 0; ix < max_nch; ix++) {
+ err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt);
+ if (unlikely(err))
goto err_destroy_rqts;
}
return 0;
err_destroy_rqts:
- mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err);
+ mlx5_core_warn(priv->mdev, "create rqts failed, %d\n", err);
for (ix--; ix >= 0; ix--)
- mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt);
+ mlx5e_destroy_rqt(priv, &tirs[ix].rqt);
return err;
}
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv)
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
{
+ const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
int i;
- for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++)
- mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+ for (i = 0; i < max_nch; i++)
+ mlx5e_destroy_rqt(priv, &tirs[i].rqt);
}
static int mlx5e_rx_hash_fn(int hfunc)
@@ -2787,11 +2884,12 @@ static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv)
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
{
int num_txqs = priv->channels.num * priv->channels.params.num_tc;
+ int num_rxqs = priv->channels.num * MLX5E_NUM_RQ_GROUPS;
struct net_device *netdev = priv->netdev;
mlx5e_netdev_set_tcs(netdev);
netif_set_real_num_tx_queues(netdev, num_txqs);
- netif_set_real_num_rx_queues(netdev, priv->channels.num);
+ netif_set_real_num_rx_queues(netdev, num_rxqs);
mlx5e_build_tx2sq_maps(priv);
mlx5e_activate_channels(&priv->channels);
@@ -2803,10 +2901,14 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
mlx5e_wait_channels_min_rx_wqes(&priv->channels);
mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
+
+ mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels);
}
void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
{
+ mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels);
+
mlx5e_redirect_rqts_to_drop(priv);
if (mlx5e_is_vport_rep(priv))
@@ -2846,7 +2948,7 @@ static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
if (hw_modify)
hw_modify(priv);
- mlx5e_refresh_tirs(priv, false);
+ priv->profile->update_rx(priv);
mlx5e_activate_priv_channels(priv);
/* return carrier back if needed */
@@ -2885,15 +2987,18 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
int mlx5e_open_locked(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ bool is_xdp = priv->channels.params.xdp_prog;
int err;
set_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (is_xdp)
+ mlx5e_xdp_set_open(priv);
err = mlx5e_open_channels(priv, &priv->channels);
if (err)
goto err_clear_state_opened_flag;
- mlx5e_refresh_tirs(priv, false);
+ priv->profile->update_rx(priv);
mlx5e_activate_priv_channels(priv);
if (priv->profile->update_carrier)
priv->profile->update_carrier(priv);
@@ -2902,6 +3007,8 @@ int mlx5e_open_locked(struct net_device *netdev)
return 0;
err_clear_state_opened_flag:
+ if (is_xdp)
+ mlx5e_xdp_set_closed(priv);
clear_bit(MLX5E_STATE_OPENED, &priv->state);
return err;
}
@@ -2933,6 +3040,8 @@ int mlx5e_close_locked(struct net_device *netdev)
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
return 0;
+ if (priv->channels.params.xdp_prog)
+ mlx5e_xdp_set_closed(priv);
clear_bit(MLX5E_STATE_OPENED, &priv->state);
netif_carrier_off(priv->netdev);
@@ -3044,20 +3153,19 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
mlx5e_free_cq(&drop_rq->cq);
}
-int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
- u32 underlay_qpn, u32 *tisn)
+int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn)
{
- u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
- MLX5_SET(tisc, tisc, prio, tc << 1);
- MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn);
MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
+ if (MLX5_GET(tisc, tisc, tls_en))
+ MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn);
+
if (mlx5_lag_is_lacp_owner(mdev))
MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
- return mlx5_core_create_tis(mdev, in, sizeof(in), tisn);
+ return mlx5_core_create_tis(mdev, in, MLX5_ST_SZ_BYTES(create_tis_in), tisn);
}
void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn)
@@ -3071,7 +3179,14 @@ int mlx5e_create_tises(struct mlx5e_priv *priv)
int tc;
for (tc = 0; tc < priv->profile->max_tc; tc++) {
- err = mlx5e_create_tis(priv->mdev, tc, 0, &priv->tisn[tc]);
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+ void *tisc;
+
+ tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ MLX5_SET(tisc, tisc, prio, tc << 1);
+
+ err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[tc]);
if (err)
goto err_close_tises;
}
@@ -3189,13 +3304,13 @@ err_destroy_inner_tirs:
return err;
}
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
{
- int nch = mlx5e_get_netdev_max_channels(priv->netdev);
+ const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
struct mlx5e_tir *tir;
void *tirc;
int inlen;
- int err;
+ int err = 0;
u32 *in;
int ix;
@@ -3204,25 +3319,24 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
if (!in)
return -ENOMEM;
- for (ix = 0; ix < nch; ix++) {
+ for (ix = 0; ix < max_nch; ix++) {
memset(in, 0, inlen);
- tir = &priv->direct_tir[ix];
+ tir = &tirs[ix];
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
- mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc);
+ mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc);
err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
- if (err)
+ if (unlikely(err))
goto err_destroy_ch_tirs;
}
- kvfree(in);
-
- return 0;
+ goto out;
err_destroy_ch_tirs:
- mlx5_core_warn(priv->mdev, "create direct tirs failed, %d\n", err);
+ mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err);
for (ix--; ix >= 0; ix--)
- mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]);
+ mlx5e_destroy_tir(priv->mdev, &tirs[ix]);
+out:
kvfree(in);
return err;
@@ -3242,13 +3356,13 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
}
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
{
- int nch = mlx5e_get_netdev_max_channels(priv->netdev);
+ const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
int i;
- for (i = 0; i < nch; i++)
- mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]);
+ for (i = 0; i < max_nch; i++)
+ mlx5e_destroy_tir(priv->mdev, &tirs[i]);
}
static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
@@ -3279,10 +3393,9 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
return 0;
}
-static int mlx5e_setup_tc_mqprio(struct net_device *netdev,
+static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
struct tc_mqprio_qopt *mqprio)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_channels new_channels = {};
u8 tc = mqprio->num_tc;
int err = 0;
@@ -3315,17 +3428,17 @@ out:
#ifdef CONFIG_MLX5_ESWITCH
static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *cls_flower,
+ struct flow_cls_offload *cls_flower,
int flags)
{
switch (cls_flower->command) {
- case TC_CLSFLOWER_REPLACE:
+ case FLOW_CLS_REPLACE:
return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
flags);
- case TC_CLSFLOWER_DESTROY:
+ case FLOW_CLS_DESTROY:
return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
flags);
- case TC_CLSFLOWER_STATS:
+ case FLOW_CLS_STATS:
return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
flags);
default:
@@ -3346,39 +3459,25 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
return -EOPNOTSUPP;
}
}
-
-static int mlx5e_setup_tc_block(struct net_device *dev,
- struct tc_block_offload *f)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
-
- if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
- return -EOPNOTSUPP;
-
- switch (f->command) {
- case TC_BLOCK_BIND:
- return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb,
- priv, priv, f->extack);
- case TC_BLOCK_UNBIND:
- tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb,
- priv);
- return 0;
- default:
- return -EOPNOTSUPP;
- }
-}
#endif
+static LIST_HEAD(mlx5e_block_cb_list);
+
static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
switch (type) {
#ifdef CONFIG_MLX5_ESWITCH
case TC_SETUP_BLOCK:
- return mlx5e_setup_tc_block(dev, type_data);
+ return flow_block_cb_setup_simple(type_data,
+ &mlx5e_block_cb_list,
+ mlx5e_setup_tc_block_cb,
+ priv, priv, true);
#endif
case TC_SETUP_QDISC_MQPRIO:
- return mlx5e_setup_tc_mqprio(dev, type_data);
+ return mlx5e_setup_tc_mqprio(priv, type_data);
default:
return -EOPNOTSUPP;
}
@@ -3390,11 +3489,12 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) {
struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i];
+ struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
int j;
- s->rx_packets += rq_stats->packets;
- s->rx_bytes += rq_stats->bytes;
+ s->rx_packets += rq_stats->packets + xskrq_stats->packets;
+ s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes;
for (j = 0; j < priv->max_opened_tc; j++) {
struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
@@ -3493,6 +3593,13 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
mutex_lock(&priv->state_lock);
+ if (enable && priv->xsk.refcnt) {
+ netdev_warn(netdev, "LRO is incompatible with AF_XDP (%hu XSKs are active)\n",
+ priv->xsk.refcnt);
+ err = -EINVAL;
+ goto out;
+ }
+
old_params = &priv->channels.params;
if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
netdev_warn(netdev, "can't set LRO with legacy RQ\n");
@@ -3506,8 +3613,8 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
new_channels.params.lro_en = enable;
if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
- if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) ==
- mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params))
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params, NULL) ==
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL))
reset = false;
}
@@ -3635,8 +3742,7 @@ static int mlx5e_handle_feature(struct net_device *netdev,
return 0;
}
-static int mlx5e_set_features(struct net_device *netdev,
- netdev_features_t features)
+int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
{
netdev_features_t oper_features = netdev->features;
int err = 0;
@@ -3687,11 +3793,54 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
}
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+ features &= ~NETIF_F_RXHASH;
+ if (netdev->features & NETIF_F_RXHASH)
+ netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
+ }
+
mutex_unlock(&priv->state_lock);
return features;
}
+static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
+ struct mlx5e_channels *chs,
+ struct mlx5e_params *new_params,
+ struct mlx5_core_dev *mdev)
+{
+ u16 ix;
+
+ for (ix = 0; ix < chs->params.num_channels; ix++) {
+ struct xdp_umem *umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, ix);
+ struct mlx5e_xsk_param xsk;
+
+ if (!umem)
+ continue;
+
+ mlx5e_build_xsk_param(umem, &xsk);
+
+ if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) {
+ u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
+ int max_mtu_frame, max_mtu_page, max_mtu;
+
+ /* Two criteria must be met:
+ * 1. HW MTU + all headrooms <= XSK frame size.
+ * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE.
+ */
+ max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr);
+ max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk);
+ max_mtu = min(max_mtu_frame, max_mtu_page);
+
+ netdev_err(netdev, "MTU %d is too big for an XSK running on channel %hu. Try MTU <= %d\n",
+ new_params->sw_mtu, ix, max_mtu);
+ return false;
+ }
+ }
+
+ return true;
+}
+
int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
change_hw_mtu_cb set_mtu_cb)
{
@@ -3712,18 +3861,31 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
new_channels.params.sw_mtu = new_mtu;
if (params->xdp_prog &&
- !mlx5e_rx_is_linear_skb(&new_channels.params)) {
+ !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) {
netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
- new_mtu, mlx5e_xdp_max_mtu(params));
+ new_mtu, mlx5e_xdp_max_mtu(params, NULL));
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (priv->xsk.refcnt &&
+ !mlx5e_xsk_validate_mtu(netdev, &priv->channels,
+ &new_channels.params, priv->mdev)) {
err = -EINVAL;
goto out;
}
if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
- bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_channels.params);
- u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params);
- u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params);
+ bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
+ &new_channels.params,
+ NULL);
+ u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL);
+ u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params, NULL);
+ /* If XSK is active, XSK RQs are linear. */
+ is_linear |= priv->xsk.refcnt;
+
+ /* Always reset in linear mode - hw_mtu is used in data path. */
reset = reset && (is_linear || (ppw_old != ppw_new));
}
@@ -3812,6 +3974,9 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
memcpy(&priv->tstamp, &config, sizeof(config));
mutex_unlock(&priv->state_lock);
+ /* might need to fix some features */
+ netdev_update_features(priv->netdev);
+
return copy_to_user(ifr->ifr_data, &config,
sizeof(config)) ? -EFAULT : 0;
}
@@ -4153,16 +4318,29 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
new_channels.params = priv->channels.params;
new_channels.params.xdp_prog = prog;
- if (!mlx5e_rx_is_linear_skb(&new_channels.params)) {
+ /* No XSK params: AF_XDP can't be enabled yet at the point of setting
+ * the XDP program.
+ */
+ if (!mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) {
netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
new_channels.params.sw_mtu,
- mlx5e_xdp_max_mtu(&new_channels.params));
+ mlx5e_xdp_max_mtu(&new_channels.params, NULL));
return -EINVAL;
}
return 0;
}
+static int mlx5e_xdp_update_state(struct mlx5e_priv *priv)
+{
+ if (priv->channels.params.xdp_prog)
+ mlx5e_xdp_set_open(priv);
+ else
+ mlx5e_xdp_set_closed(priv);
+
+ return 0;
+}
+
static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4183,8 +4361,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
/* no need for full reset when exchanging programs */
reset = (!priv->channels.params.xdp_prog || !prog);
- if (was_opened && reset)
- mlx5e_close_locked(netdev);
if (was_opened && !reset) {
/* num_channels is invariant here, so we can take the
* batched reference right upfront.
@@ -4196,20 +4372,31 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
}
}
- /* exchange programs, extra prog reference we got from caller
- * as long as we don't fail from this point onwards.
- */
- old_prog = xchg(&priv->channels.params.xdp_prog, prog);
+ if (was_opened && reset) {
+ struct mlx5e_channels new_channels = {};
+
+ new_channels.params = priv->channels.params;
+ new_channels.params.xdp_prog = prog;
+ mlx5e_set_rq_type(priv->mdev, &new_channels.params);
+ old_prog = priv->channels.params.xdp_prog;
+
+ err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_xdp_update_state);
+ if (err)
+ goto unlock;
+ } else {
+ /* exchange programs, extra prog reference we got from caller
+ * as long as we don't fail from this point onwards.
+ */
+ old_prog = xchg(&priv->channels.params.xdp_prog, prog);
+ }
+
if (old_prog)
bpf_prog_put(old_prog);
- if (reset) /* change RQ type according to priv->xdp_prog */
+ if (!was_opened && reset) /* change RQ type according to priv->xdp_prog */
mlx5e_set_rq_type(priv->mdev, &priv->channels.params);
- if (was_opened && reset)
- err = mlx5e_open_locked(netdev);
-
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
+ if (!was_opened || reset)
goto unlock;
/* exchanging programs w/o reset, we update ref counts on behalf
@@ -4217,19 +4404,29 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
*/
for (i = 0; i < priv->channels.num; i++) {
struct mlx5e_channel *c = priv->channels.c[i];
+ bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
+ if (xsk_open)
+ clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
napi_synchronize(&c->napi);
/* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */
old_prog = xchg(&c->rq.xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ if (xsk_open) {
+ old_prog = xchg(&c->xskrq.xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ }
set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
+ if (xsk_open)
+ set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
/* napi_schedule in case we have missed anything */
napi_schedule(&c->napi);
-
- if (old_prog)
- bpf_prog_put(old_prog);
}
unlock:
@@ -4260,6 +4457,9 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
case XDP_QUERY_PROG:
xdp->prog_id = mlx5e_xdp_query(dev);
return 0;
+ case XDP_SETUP_XSK_UMEM:
+ return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem,
+ xdp->xsk.queue_id);
default:
return -EINVAL;
}
@@ -4342,6 +4542,7 @@ const struct net_device_ops mlx5e_netdev_ops = {
.ndo_tx_timeout = mlx5e_tx_timeout,
.ndo_bpf = mlx5e_xdp,
.ndo_xdp_xmit = mlx5e_xdp_xmit,
+ .ndo_xsk_async_xmit = mlx5e_xsk_async_xmit,
#ifdef CONFIG_MLX5_EN_ARFS
.ndo_rx_flow_steer = mlx5e_rx_flow_steer,
#endif
@@ -4411,9 +4612,9 @@ static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
}
-static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
+static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
{
- struct net_dim_cq_moder moder;
+ struct dim_cq_moder moder;
moder.cq_period_mode = cq_period_mode;
moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
@@ -4424,9 +4625,9 @@ static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
return moder;
}
-static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
+static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
{
- struct net_dim_cq_moder moder;
+ struct dim_cq_moder moder;
moder.cq_period_mode = cq_period_mode;
moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
@@ -4440,8 +4641,8 @@ static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
{
return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
- NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE :
- NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+ DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
@@ -4493,11 +4694,13 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
* - Striding RQ configuration is not possible/supported.
* - Slow PCI heuristic.
* - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
+ *
+ * No XSK params: checking the availability of striding RQ in general.
*/
if (!slow_pci_heuristic(mdev) &&
mlx5e_striding_rq_possible(mdev, params) &&
- (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ||
- !mlx5e_rx_is_linear_skb(params)))
+ (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
+ !mlx5e_rx_is_linear_skb(params, NULL)))
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
mlx5e_set_rq_type(mdev, params);
mlx5e_init_rq_type_params(mdev, params);
@@ -4519,6 +4722,7 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
}
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_xsk *xsk,
struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
u16 max_channels, u16 mtu)
@@ -4554,9 +4758,11 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
/* HW LRO */
/* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */
- if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
- if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
+ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ /* No XSK params: checking the availability of striding RQ in general. */
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
params->lro_en = !slow_pci_heuristic(mdev);
+ }
params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
/* CQ moderation params */
@@ -4575,13 +4781,16 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
mlx5e_build_rss_params(rss_params, params->num_channels);
params->tunneled_offload_en =
mlx5e_tunnel_inner_ft_supported(mdev);
+
+ /* AF_XDP */
+ params->xsk = xsk;
}
static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr);
+ mlx5_query_mac_address(priv->mdev, netdev->dev_addr);
if (is_zero_ether_addr(netdev->dev_addr) &&
!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) {
eth_hw_addr_random(netdev);
@@ -4610,14 +4819,18 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->ethtool_ops = &mlx5e_ethtool_ops;
netdev->vlan_features |= NETIF_F_SG;
- netdev->vlan_features |= NETIF_F_IP_CSUM;
- netdev->vlan_features |= NETIF_F_IPV6_CSUM;
+ netdev->vlan_features |= NETIF_F_HW_CSUM;
netdev->vlan_features |= NETIF_F_GRO;
netdev->vlan_features |= NETIF_F_TSO;
netdev->vlan_features |= NETIF_F_TSO6;
netdev->vlan_features |= NETIF_F_RXCSUM;
netdev->vlan_features |= NETIF_F_RXHASH;
+ netdev->mpls_features |= NETIF_F_SG;
+ netdev->mpls_features |= NETIF_F_HW_CSUM;
+ netdev->mpls_features |= NETIF_F_TSO;
+ netdev->mpls_features |= NETIF_F_TSO6;
+
netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX;
netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX;
@@ -4633,8 +4846,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) ||
MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
- netdev->hw_enc_features |= NETIF_F_IP_CSUM;
- netdev->hw_enc_features |= NETIF_F_IPV6_CSUM;
+ netdev->hw_enc_features |= NETIF_F_HW_CSUM;
netdev->hw_enc_features |= NETIF_F_TSO;
netdev->hw_enc_features |= NETIF_F_TSO6;
netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
@@ -4680,6 +4892,10 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
if (!priv->channels.params.scatter_fcs_en)
netdev->features &= ~NETIF_F_RXFCS;
+ /* prefere CQE compression over rxhash */
+ if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
+ netdev->features &= ~NETIF_F_RXHASH;
+
#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
if (FT_CAP(flow_modify_en) &&
FT_CAP(modify_root) &&
@@ -4743,7 +4959,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
if (err)
return err;
- mlx5e_build_nic_params(mdev, rss, &priv->channels.params,
+ mlx5e_build_nic_params(mdev, &priv->xsk, rss, &priv->channels.params,
mlx5e_get_netdev_max_channels(netdev),
netdev->mtu);
@@ -4785,7 +5001,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv);
+ err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
if (err)
goto err_destroy_indirect_rqts;
@@ -4793,14 +5009,22 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
if (err)
goto err_destroy_direct_rqts;
- err = mlx5e_create_direct_tirs(priv);
+ err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
if (err)
goto err_destroy_indirect_tirs;
+ err = mlx5e_create_direct_rqts(priv, priv->xsk_tir);
+ if (unlikely(err))
+ goto err_destroy_direct_tirs;
+
+ err = mlx5e_create_direct_tirs(priv, priv->xsk_tir);
+ if (unlikely(err))
+ goto err_destroy_xsk_rqts;
+
err = mlx5e_create_flow_steering(priv);
if (err) {
mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
- goto err_destroy_direct_tirs;
+ goto err_destroy_xsk_tirs;
}
err = mlx5e_tc_nic_init(priv);
@@ -4811,12 +5035,16 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
err_destroy_flow_steering:
mlx5e_destroy_flow_steering(priv);
+err_destroy_xsk_tirs:
+ mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
+err_destroy_xsk_rqts:
+ mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
err_destroy_indirect_tirs:
mlx5e_destroy_indirect_tirs(priv, true);
err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
err_destroy_indirect_rqts:
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
err_close_drop_rq:
@@ -4830,9 +5058,11 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
{
mlx5e_tc_nic_cleanup(priv);
mlx5e_destroy_flow_steering(priv);
- mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
+ mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
+ mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
mlx5e_destroy_indirect_tirs(priv, true);
- mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_destroy_q_counters(priv);
@@ -4914,6 +5144,11 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
mlx5_lag_remove(mdev);
}
+int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
+{
+ return mlx5e_refresh_tirs(priv, false);
+}
+
static const struct mlx5e_profile mlx5e_nic_profile = {
.init = mlx5e_nic_init,
.cleanup = mlx5e_nic_cleanup,
@@ -4923,6 +5158,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
.cleanup_tx = mlx5e_cleanup_nic_tx,
.enable = mlx5e_nic_enable,
.disable = mlx5e_nic_disable,
+ .update_rx = mlx5e_update_nic_rx,
.update_stats = mlx5e_update_ndo_stats,
.update_carrier = mlx5e_update_carrier,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe,
@@ -4982,7 +5218,7 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
nch * profile->max_tc,
- nch);
+ nch * MLX5E_NUM_RQ_GROUPS);
if (!netdev) {
mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
return NULL;
@@ -5095,6 +5331,11 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv)
struct mlx5e_priv *priv = vpriv;
struct net_device *netdev = priv->netdev;
+#ifdef CONFIG_MLX5_ESWITCH
+ if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev)
+ return;
+#endif
+
if (!netif_device_present(netdev))
return;
@@ -5115,7 +5356,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
#ifdef CONFIG_MLX5_ESWITCH
if (MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+ mlx5_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) {
mlx5e_rep_register_vport_reps(mdev);
return mdev;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 5283e16c69e4..7f747cb1a4f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -37,6 +37,7 @@
#include <net/act_api.h>
#include <net/netevent.h>
#include <net/arp.h>
+#include <net/devlink.h>
#include "eswitch.h"
#include "en.h"
@@ -128,7 +129,7 @@ static void mlx5e_rep_get_strings(struct net_device *dev,
}
}
-static void mlx5e_vf_rep_update_hw_counters(struct mlx5e_priv *priv)
+static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -166,17 +167,6 @@ static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv)
vport_stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
}
-static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_eswitch_rep *rep = rpriv->rep;
-
- if (rep->vport == MLX5_VPORT_UPLINK)
- mlx5e_uplink_rep_update_hw_counters(priv);
- else
- mlx5e_vf_rep_update_hw_counters(priv);
-}
-
static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
{
struct mlx5e_sw_stats *s = &priv->stats.sw;
@@ -203,7 +193,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
mutex_lock(&priv->state_lock);
mlx5e_rep_update_sw_counters(priv);
- mlx5e_rep_update_hw_counters(priv);
+ priv->profile->update_stats(priv);
mutex_unlock(&priv->state_lock);
for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
@@ -363,7 +353,7 @@ static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev,
return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
}
-static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = {
+static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
.get_drvinfo = mlx5e_rep_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_strings = mlx5e_rep_get_strings,
@@ -402,30 +392,19 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
static int mlx5e_rep_get_port_parent_id(struct net_device *dev,
struct netdev_phys_item_id *ppid)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct net_device *uplink_upper = NULL;
- struct mlx5e_priv *uplink_priv = NULL;
- struct net_device *uplink_dev;
-
- if (esw->mode == SRIOV_NONE)
- return -EOPNOTSUPP;
+ struct mlx5_eswitch *esw;
+ struct mlx5e_priv *priv;
+ u64 parent_id;
- uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
- if (uplink_dev) {
- uplink_upper = netdev_master_upper_dev_get(uplink_dev);
- uplink_priv = netdev_priv(uplink_dev);
- }
+ priv = netdev_priv(dev);
+ esw = priv->mdev->priv.eswitch;
- ppid->id_len = ETH_ALEN;
- if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) {
- ether_addr_copy(ppid->id, uplink_upper->dev_addr);
- } else {
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_eswitch_rep *rep = rpriv->rep;
+ if (esw->mode == MLX5_ESWITCH_NONE)
+ return -EOPNOTSUPP;
- ether_addr_copy(ppid->id, rep->hw_id);
- }
+ parent_id = mlx5_query_nic_system_image_guid(priv->mdev);
+ ppid->id_len = sizeof(parent_id);
+ memcpy(ppid->id, &parent_id, sizeof(parent_id));
return 0;
}
@@ -436,7 +415,7 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
struct mlx5e_rep_sq *rep_sq, *tmp;
struct mlx5e_rep_priv *rpriv;
- if (esw->mode != SRIOV_OFFLOADS)
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
return;
rpriv = mlx5e_rep_to_rep_priv(rep);
@@ -457,7 +436,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
int err;
int i;
- if (esw->mode != SRIOV_OFFLOADS)
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
return 0;
rpriv = mlx5e_rep_to_rep_priv(rep);
@@ -677,7 +656,7 @@ static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv)
static int
mlx5e_rep_indr_offload(struct net_device *netdev,
- struct tc_cls_flower_offload *flower,
+ struct flow_cls_offload *flower,
struct mlx5e_rep_indr_block_priv *indr_priv)
{
struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
@@ -685,13 +664,13 @@ mlx5e_rep_indr_offload(struct net_device *netdev,
int err = 0;
switch (flower->command) {
- case TC_CLSFLOWER_REPLACE:
+ case FLOW_CLS_REPLACE:
err = mlx5e_configure_flower(netdev, priv, flower, flags);
break;
- case TC_CLSFLOWER_DESTROY:
+ case FLOW_CLS_DESTROY:
err = mlx5e_delete_flower(netdev, priv, flower, flags);
break;
- case TC_CLSFLOWER_STATS:
+ case FLOW_CLS_STATS:
err = mlx5e_stats_flower(netdev, priv, flower, flags);
break;
default:
@@ -714,23 +693,39 @@ static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type,
}
}
+static void mlx5e_rep_indr_tc_block_unbind(void *cb_priv)
+{
+ struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
+
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+}
+
+static LIST_HEAD(mlx5e_block_cb_list);
+
static int
mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
struct mlx5e_rep_priv *rpriv,
- struct tc_block_offload *f)
+ struct flow_block_offload *f)
{
struct mlx5e_rep_indr_block_priv *indr_priv;
- int err = 0;
+ struct flow_block_cb *block_cb;
- if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
return -EOPNOTSUPP;
+ f->driver_block_list = &mlx5e_block_cb_list;
+
switch (f->command) {
- case TC_BLOCK_BIND:
+ case FLOW_BLOCK_BIND:
indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
if (indr_priv)
return -EEXIST;
+ if (flow_block_cb_is_busy(mlx5e_rep_indr_setup_block_cb,
+ indr_priv, &mlx5e_block_cb_list))
+ return -EBUSY;
+
indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
if (!indr_priv)
return -ENOMEM;
@@ -740,26 +735,31 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
list_add(&indr_priv->list,
&rpriv->uplink_priv.tc_indr_block_priv_list);
- err = tcf_block_cb_register(f->block,
- mlx5e_rep_indr_setup_block_cb,
- indr_priv, indr_priv, f->extack);
- if (err) {
+ block_cb = flow_block_cb_alloc(mlx5e_rep_indr_setup_block_cb,
+ indr_priv, indr_priv,
+ mlx5e_rep_indr_tc_block_unbind);
+ if (IS_ERR(block_cb)) {
list_del(&indr_priv->list);
kfree(indr_priv);
+ return PTR_ERR(block_cb);
}
+ flow_block_cb_add(block_cb, f);
+ list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
- return err;
- case TC_BLOCK_UNBIND:
+ return 0;
+ case FLOW_BLOCK_UNBIND:
indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
if (!indr_priv)
return -ENOENT;
- tcf_block_cb_unregister(f->block,
- mlx5e_rep_indr_setup_block_cb,
- indr_priv);
- list_del(&indr_priv->list);
- kfree(indr_priv);
+ block_cb = flow_block_cb_lookup(f->block,
+ mlx5e_rep_indr_setup_block_cb,
+ indr_priv);
+ if (!block_cb)
+ return -ENOENT;
+ flow_block_cb_remove(block_cb, f);
+ list_del(&block_cb->driver_list);
return 0;
default:
return -EOPNOTSUPP;
@@ -813,7 +813,7 @@ static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
- !is_vlan_dev(netdev))
+ !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev))
return NOTIFY_OK;
switch (event) {
@@ -1101,7 +1101,7 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
}
-static int mlx5e_vf_rep_open(struct net_device *dev)
+static int mlx5e_rep_open(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -1124,7 +1124,7 @@ unlock:
return err;
}
-static int mlx5e_vf_rep_close(struct net_device *dev)
+static int mlx5e_rep_close(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -1141,42 +1141,18 @@ static int mlx5e_vf_rep_close(struct net_device *dev)
return ret;
}
-static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
- char *buf, size_t len)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_eswitch_rep *rep = rpriv->rep;
- unsigned int fn;
- int ret;
-
- fn = PCI_FUNC(priv->mdev->pdev->devfn);
- if (fn >= MLX5_MAX_PORTS)
- return -EOPNOTSUPP;
-
- if (rep->vport == MLX5_VPORT_UPLINK)
- ret = snprintf(buf, len, "p%d", fn);
- else
- ret = snprintf(buf, len, "pf%dvf%d", fn, rep->vport - 1);
-
- if (ret >= len)
- return -EOPNOTSUPP;
-
- return 0;
-}
-
static int
mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *cls_flower, int flags)
+ struct flow_cls_offload *cls_flower, int flags)
{
switch (cls_flower->command) {
- case TC_CLSFLOWER_REPLACE:
+ case FLOW_CLS_REPLACE:
return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
flags);
- case TC_CLSFLOWER_DESTROY:
+ case FLOW_CLS_DESTROY:
return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
flags);
- case TC_CLSFLOWER_STATS:
+ case FLOW_CLS_STATS:
return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
flags);
default:
@@ -1198,32 +1174,19 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
}
}
-static int mlx5e_rep_setup_tc_block(struct net_device *dev,
- struct tc_block_offload *f)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
-
- if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
- return -EOPNOTSUPP;
-
- switch (f->command) {
- case TC_BLOCK_BIND:
- return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb,
- priv, priv, f->extack);
- case TC_BLOCK_UNBIND:
- tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv);
- return 0;
- default:
- return -EOPNOTSUPP;
- }
-}
+static LIST_HEAD(mlx5e_rep_block_cb_list);
static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
switch (type) {
case TC_SETUP_BLOCK:
- return mlx5e_rep_setup_tc_block(dev, type_data);
+ return flow_block_cb_setup_simple(type_data,
+ &mlx5e_rep_block_cb_list,
+ mlx5e_rep_setup_tc_cb,
+ priv, priv, true);
default:
return -EOPNOTSUPP;
}
@@ -1276,7 +1239,7 @@ static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev
}
static void
-mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -1285,7 +1248,7 @@ mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
}
-static int mlx5e_vf_rep_change_mtu(struct net_device *netdev, int new_mtu)
+static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu)
{
return mlx5e_change_mtu(netdev, new_mtu, NULL);
}
@@ -1318,17 +1281,24 @@ static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan
return 0;
}
-static const struct net_device_ops mlx5e_netdev_ops_vf_rep = {
- .ndo_open = mlx5e_vf_rep_open,
- .ndo_stop = mlx5e_vf_rep_close,
+static struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ return &rpriv->dl_port;
+}
+
+static const struct net_device_ops mlx5e_netdev_ops_rep = {
+ .ndo_open = mlx5e_rep_open,
+ .ndo_stop = mlx5e_rep_close,
.ndo_start_xmit = mlx5e_xmit,
- .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name,
.ndo_setup_tc = mlx5e_rep_setup_tc,
- .ndo_get_stats64 = mlx5e_vf_rep_get_stats,
+ .ndo_get_devlink_port = mlx5e_get_devlink_port,
+ .ndo_get_stats64 = mlx5e_rep_get_stats,
.ndo_has_offload_stats = mlx5e_rep_has_offload_stats,
.ndo_get_offload_stats = mlx5e_rep_get_offload_stats,
- .ndo_change_mtu = mlx5e_vf_rep_change_mtu,
- .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id,
+ .ndo_change_mtu = mlx5e_rep_change_mtu,
};
static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
@@ -1336,8 +1306,8 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
.ndo_stop = mlx5e_close,
.ndo_start_xmit = mlx5e_xmit,
.ndo_set_mac_address = mlx5e_uplink_rep_set_mac,
- .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name,
.ndo_setup_tc = mlx5e_rep_setup_tc,
+ .ndo_get_devlink_port = mlx5e_get_devlink_port,
.ndo_get_stats64 = mlx5e_get_stats,
.ndo_has_offload_stats = mlx5e_rep_has_offload_stats,
.ndo_get_offload_stats = mlx5e_rep_get_offload_stats,
@@ -1350,12 +1320,12 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
.ndo_get_vf_config = mlx5e_get_vf_config,
.ndo_get_vf_stats = mlx5e_get_vf_stats,
.ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan,
- .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id,
+ .ndo_set_features = mlx5e_set_features,
};
bool mlx5e_eswitch_rep(struct net_device *netdev)
{
- if (netdev->netdev_ops == &mlx5e_netdev_ops_vf_rep ||
+ if (netdev->netdev_ops == &mlx5e_netdev_ops_rep ||
netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep)
return true;
@@ -1411,24 +1381,23 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
SET_NETDEV_DEV(netdev, mdev->device);
netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep;
/* we want a persistent mac for the uplink rep */
- mlx5_query_nic_vport_mac_address(mdev, 0, netdev->dev_addr);
+ mlx5_query_mac_address(mdev, netdev->dev_addr);
netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
#ifdef CONFIG_MLX5_CORE_EN_DCB
if (MLX5_CAP_GEN(mdev, qos))
netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
#endif
} else {
- netdev->netdev_ops = &mlx5e_netdev_ops_vf_rep;
+ netdev->netdev_ops = &mlx5e_netdev_ops_rep;
eth_hw_addr_random(netdev);
- netdev->ethtool_ops = &mlx5e_vf_rep_ethtool_ops;
+ netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
}
netdev->watchdog_timeo = 15 * HZ;
+ netdev->features |= NETIF_F_NETNS_LOCAL;
- netdev->features |= NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
- netdev->hw_features |= NETIF_F_HW_TC;
-
+ netdev->hw_features |= NETIF_F_HW_TC;
netdev->hw_features |= NETIF_F_SG;
netdev->hw_features |= NETIF_F_IP_CSUM;
netdev->hw_features |= NETIF_F_IPV6_CSUM;
@@ -1437,7 +1406,9 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
netdev->hw_features |= NETIF_F_TSO6;
netdev->hw_features |= NETIF_F_RXCSUM;
- if (rep->vport != MLX5_VPORT_UPLINK)
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+ else
netdev->features |= NETIF_F_VLAN_CHALLENGED;
netdev->features |= netdev->hw_features;
@@ -1528,7 +1499,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv);
+ err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
if (err)
goto err_destroy_indirect_rqts;
@@ -1536,7 +1507,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
if (err)
goto err_destroy_direct_rqts;
- err = mlx5e_create_direct_tirs(priv);
+ err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
if (err)
goto err_destroy_indirect_tirs;
@@ -1553,11 +1524,11 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
err_destroy_ttc_table:
mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
err_destroy_indirect_tirs:
mlx5e_destroy_indirect_tirs(priv, false);
err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
err_destroy_indirect_rqts:
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
err_close_drop_rq:
@@ -1571,9 +1542,9 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
mlx5_del_flow_rules(rpriv->vport_rx_rule);
mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
- mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
mlx5e_destroy_indirect_tirs(priv, false);
- mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
mlx5e_close_drop_rq(&priv->drop_rq);
}
@@ -1640,11 +1611,16 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
}
}
-static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv)
+static void mlx5e_rep_enable(struct mlx5e_priv *priv)
{
mlx5e_set_netdev_mtu_boundaries(priv);
}
+static int mlx5e_update_rep_rx(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
{
struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
@@ -1712,15 +1688,16 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
mlx5_lag_remove(mdev);
}
-static const struct mlx5e_profile mlx5e_vf_rep_profile = {
+static const struct mlx5e_profile mlx5e_rep_profile = {
.init = mlx5e_init_rep,
.cleanup = mlx5e_cleanup_rep,
.init_rx = mlx5e_init_rep_rx,
.cleanup_rx = mlx5e_cleanup_rep_rx,
.init_tx = mlx5e_init_rep_tx,
.cleanup_tx = mlx5e_cleanup_rep_tx,
- .enable = mlx5e_vf_rep_enable,
- .update_stats = mlx5e_vf_rep_update_hw_counters,
+ .enable = mlx5e_rep_enable,
+ .update_rx = mlx5e_update_rep_rx,
+ .update_stats = mlx5e_rep_update_hw_counters,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
.max_tc = 1,
@@ -1735,6 +1712,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
.cleanup_tx = mlx5e_cleanup_rep_tx,
.enable = mlx5e_uplink_rep_enable,
.disable = mlx5e_uplink_rep_disable,
+ .update_rx = mlx5e_update_rep_rx,
.update_stats = mlx5e_uplink_rep_update_hw_counters,
.update_carrier = mlx5e_update_carrier,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
@@ -1742,6 +1720,55 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
.max_tc = MLX5E_MAX_NUM_TC,
};
+static bool
+is_devlink_port_supported(const struct mlx5_core_dev *dev,
+ const struct mlx5e_rep_priv *rpriv)
+{
+ return rpriv->rep->vport == MLX5_VPORT_UPLINK ||
+ rpriv->rep->vport == MLX5_VPORT_PF ||
+ mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport);
+}
+
+static int register_devlink_port(struct mlx5_core_dev *dev,
+ struct mlx5e_rep_priv *rpriv)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct netdev_phys_item_id ppid = {};
+ int ret;
+
+ if (!is_devlink_port_supported(dev, rpriv))
+ return 0;
+
+ ret = mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid);
+ if (ret)
+ return ret;
+
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ devlink_port_attrs_set(&rpriv->dl_port,
+ DEVLINK_PORT_FLAVOUR_PHYSICAL,
+ PCI_FUNC(dev->pdev->devfn), false, 0,
+ &ppid.id[0], ppid.id_len);
+ else if (rep->vport == MLX5_VPORT_PF)
+ devlink_port_attrs_pci_pf_set(&rpriv->dl_port,
+ &ppid.id[0], ppid.id_len,
+ dev->pdev->devfn);
+ else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport))
+ devlink_port_attrs_pci_vf_set(&rpriv->dl_port,
+ &ppid.id[0], ppid.id_len,
+ dev->pdev->devfn,
+ rep->vport - 1);
+
+ return devlink_port_register(devlink, &rpriv->dl_port, rep->vport);
+}
+
+static void unregister_devlink_port(struct mlx5_core_dev *dev,
+ struct mlx5e_rep_priv *rpriv)
+{
+ if (is_devlink_port_supported(dev, rpriv))
+ devlink_port_unregister(&rpriv->dl_port);
+}
+
/* e-Switch vport representors */
static int
mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
@@ -1759,7 +1786,8 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
rpriv->rep = rep;
nch = mlx5e_get_max_num_channels(dev);
- profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile;
+ profile = (rep->vport == MLX5_VPORT_UPLINK) ?
+ &mlx5e_uplink_rep_profile : &mlx5e_rep_profile;
netdev = mlx5e_create_netdev(dev, profile, nch, rpriv);
if (!netdev) {
pr_warn("Failed to create representor netdev for vport %d\n",
@@ -1769,7 +1797,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
}
rpriv->netdev = netdev;
- rep->rep_if[REP_ETH].priv = rpriv;
+ rep->rep_data[REP_ETH].priv = rpriv;
INIT_LIST_HEAD(&rpriv->vport_sqs_list);
if (rep->vport == MLX5_VPORT_UPLINK) {
@@ -1792,15 +1820,27 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
goto err_detach_netdev;
}
+ err = register_devlink_port(dev, rpriv);
+ if (err) {
+ esw_warn(dev, "Failed to register devlink port %d\n",
+ rep->vport);
+ goto err_neigh_cleanup;
+ }
+
err = register_netdev(netdev);
if (err) {
pr_warn("Failed to register representor netdev for vport %d\n",
rep->vport);
- goto err_neigh_cleanup;
+ goto err_devlink_cleanup;
}
+ if (is_devlink_port_supported(dev, rpriv))
+ devlink_port_type_eth_set(&rpriv->dl_port, netdev);
return 0;
+err_devlink_cleanup:
+ unregister_devlink_port(dev, rpriv);
+
err_neigh_cleanup:
mlx5e_rep_neigh_cleanup(rpriv);
@@ -1823,9 +1863,13 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *dev = priv->mdev;
void *ppriv = priv->ppriv;
+ if (is_devlink_port_supported(dev, rpriv))
+ devlink_port_type_clear(&rpriv->dl_port);
unregister_netdev(netdev);
+ unregister_devlink_port(dev, rpriv);
mlx5e_rep_neigh_cleanup(rpriv);
mlx5e_detach_netdev(priv);
if (rep->vport == MLX5_VPORT_UPLINK)
@@ -1843,16 +1887,17 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
return rpriv->netdev;
}
+static const struct mlx5_eswitch_rep_ops rep_ops = {
+ .load = mlx5e_vport_rep_load,
+ .unload = mlx5e_vport_rep_unload,
+ .get_proto_dev = mlx5e_vport_rep_get_proto_dev
+};
+
void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev)
{
struct mlx5_eswitch *esw = mdev->priv.eswitch;
- struct mlx5_eswitch_rep_if rep_if = {};
-
- rep_if.load = mlx5e_vport_rep_load;
- rep_if.unload = mlx5e_vport_rep_unload;
- rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
- mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH);
+ mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_ETH);
}
void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 83b573b1abac..c56e6ee4350c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -86,12 +86,13 @@ struct mlx5e_rep_priv {
struct mlx5_flow_handle *vport_rx_rule;
struct list_head vport_sqs_list;
struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */
+ struct devlink_port dl_port;
};
static inline
struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep)
{
- return (struct mlx5e_rep_priv *)rep->rep_if[REP_ETH].priv;
+ return rep->rep_data[REP_ETH].priv;
}
struct mlx5e_neigh {
@@ -150,13 +151,12 @@ struct mlx5e_encap_entry {
struct hlist_node encap_hlist;
struct list_head flows;
u32 encap_id;
- struct ip_tunnel_info tun_info;
+ const struct ip_tunnel_info *tun_info;
unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
struct net_device *out_dev;
struct net_device *route_dev;
- int tunnel_type;
- int tunnel_hlen;
+ struct mlx5e_tc_tunnel *tunnel;
int reformat_type;
u8 flags;
char *encap_header;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 13133e7f088e..ac6e586d403d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -34,6 +34,7 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/ip6_checksum.h>
#include <net/page_pool.h>
#include <net/inet_ecn.h>
@@ -46,6 +47,7 @@
#include "en_accel/tls_rxtx.h"
#include "lib/clock.h"
#include "en/xdp.h"
+#include "en/xsk/rx.h"
static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
{
@@ -234,8 +236,8 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
return true;
}
-static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
+static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
{
if (mlx5e_rx_cache_get(rq, dma_info))
return 0;
@@ -247,7 +249,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
PAGE_SIZE, rq->buff.map_dir);
if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
- put_page(dma_info->page);
+ page_pool_recycle_direct(rq->page_pool, dma_info->page);
dma_info->page = NULL;
return -ENOMEM;
}
@@ -255,13 +257,23 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
return 0;
}
+static inline int mlx5e_page_alloc(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ if (rq->umem)
+ return mlx5e_xsk_page_alloc_umem(rq, dma_info);
+ else
+ return mlx5e_page_alloc_pool(rq, dma_info);
+}
+
void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info)
{
dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir);
}
-void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
- bool recycle)
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info,
+ bool recycle)
{
if (likely(recycle)) {
if (mlx5e_rx_cache_put(rq, dma_info))
@@ -271,10 +283,25 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
page_pool_recycle_direct(rq->page_pool, dma_info->page);
} else {
mlx5e_page_dma_unmap(rq, dma_info);
+ page_pool_release_page(rq->page_pool, dma_info->page);
put_page(dma_info->page);
}
}
+static inline void mlx5e_page_release(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info,
+ bool recycle)
+{
+ if (rq->umem)
+ /* The `recycle` parameter is ignored, and the page is always
+ * put into the Reuse Ring, because there is no way to return
+ * the page to the userspace when the interface goes down.
+ */
+ mlx5e_xsk_page_release(rq, dma_info);
+ else
+ mlx5e_page_release_dynamic(rq, dma_info, recycle);
+}
+
static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
struct mlx5e_wqe_frag_info *frag)
{
@@ -286,7 +313,7 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
* offset) should just use the new one without replenishing again
* by themselves.
*/
- err = mlx5e_page_alloc_mapped(rq, frag->di);
+ err = mlx5e_page_alloc(rq, frag->di);
return err;
}
@@ -352,6 +379,13 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
int err;
int i;
+ if (rq->umem) {
+ int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags;
+
+ if (unlikely(!mlx5e_xsk_pages_enough_umem(rq, pages_desired)))
+ return -ENOMEM;
+ }
+
for (i = 0; i < wqe_bulk; i++) {
struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i);
@@ -399,11 +433,17 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
static void
mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
{
- const bool no_xdp_xmit =
- bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
+ bool no_xdp_xmit;
struct mlx5e_dma_info *dma_info = wi->umr.dma_info;
int i;
+ /* A common case for AF_XDP. */
+ if (bitmap_full(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE))
+ return;
+
+ no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap,
+ MLX5_MPWRQ_PAGES_PER_WQE);
+
for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++)
if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
mlx5e_page_release(rq, &dma_info[i], recycle);
@@ -425,11 +465,6 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
mlx5_wq_ll_update_db_record(wq);
}
-static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
-{
- return mlx5_wq_cyc_get_ctr_wrap_cnt(&sq->wq, sq->pc);
-}
-
static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
struct mlx5_wq_cyc *wq,
u16 pi, u16 nnops)
@@ -457,6 +492,12 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
int err;
int i;
+ if (rq->umem &&
+ unlikely(!mlx5e_xsk_pages_enough_umem(rq, MLX5_MPWRQ_PAGES_PER_WQE))) {
+ err = -ENOMEM;
+ goto err;
+ }
+
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
@@ -465,12 +506,10 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
}
umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
- if (unlikely(mlx5e_icosq_wrap_cnt(sq) < 2))
- memcpy(umr_wqe, &rq->mpwqe.umr_wqe,
- offsetof(struct mlx5e_umr_wqe, inline_mtts));
+ memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts));
for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
- err = mlx5e_page_alloc_mapped(rq, dma_info);
+ err = mlx5e_page_alloc(rq, dma_info);
if (unlikely(err))
goto err_unmap;
umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
@@ -485,6 +524,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
+ sq->db.ico_wqe[pi].umr.rq = rq;
sq->pc += MLX5E_UMR_WQEBBS;
sq->doorbell_cseg = &umr_wqe->ctrl;
@@ -496,6 +536,8 @@ err_unmap:
dma_info--;
mlx5e_page_release(rq, dma_info, true);
}
+
+err:
rq->stats->buff_alloc_err++;
return err;
@@ -542,11 +584,10 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
return !!err;
}
-static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
+void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
{
struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
struct mlx5_cqe64 *cqe;
- u8 completed_umr = 0;
u16 sqcc;
int i;
@@ -587,7 +628,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
if (likely(wi->opcode == MLX5_OPCODE_UMR)) {
sqcc += MLX5E_UMR_WQEBBS;
- completed_umr++;
+ wi->umr.rq->mpwqe.umr_completed++;
} else if (likely(wi->opcode == MLX5_OPCODE_NOP)) {
sqcc++;
} else {
@@ -603,24 +644,25 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
sq->cc = sqcc;
mlx5_cqwq_update_db_record(&cq->wq);
-
- if (likely(completed_umr)) {
- mlx5e_post_rx_mpwqe(rq, completed_umr);
- rq->mpwqe.umr_in_progress -= completed_umr;
- }
}
bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
{
struct mlx5e_icosq *sq = &rq->channel->icosq;
struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+ u8 umr_completed = rq->mpwqe.umr_completed;
+ int alloc_err = 0;
u8 missing, i;
u16 head;
if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
return false;
- mlx5e_poll_ico_cq(&sq->cq, rq);
+ if (umr_completed) {
+ mlx5e_post_rx_mpwqe(rq, umr_completed);
+ rq->mpwqe.umr_in_progress -= umr_completed;
+ rq->mpwqe.umr_completed = 0;
+ }
missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress;
@@ -634,7 +676,9 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
head = rq->mpwqe.actual_wq_head;
i = missing;
do {
- if (unlikely(mlx5e_alloc_rx_mpwqe(rq, head)))
+ alloc_err = mlx5e_alloc_rx_mpwqe(rq, head);
+
+ if (unlikely(alloc_err))
break;
head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
} while (--i);
@@ -648,6 +692,12 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk;
rq->mpwqe.actual_wq_head = head;
+ /* If XSK Fill Ring doesn't have enough frames, busy poll by
+ * rescheduling the NAPI poll.
+ */
+ if (unlikely(alloc_err == -ENOMEM && rq->umem))
+ return true;
+
return false;
}
@@ -873,8 +923,14 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP))
goto csum_unnecessary;
+ stats->csum_complete++;
skb->ip_summed = CHECKSUM_COMPLETE;
skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
+
+ if (test_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state))
+ return; /* CQE csum covers all received bytes */
+
+ /* csum might need some fixups ...*/
if (network_depth > ETH_HLEN)
/* CQE csum is calculated from the IP header and does
* not cover VLAN headers (if present). This will add
@@ -885,7 +941,6 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
skb->csum);
mlx5e_skb_padding_csum(skb, network_depth, proto, stats);
- stats->csum_complete++;
return;
}
@@ -1016,7 +1071,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
}
rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt);
+ consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, false);
rcu_read_unlock();
if (consumed)
return NULL; /* page/packet was consumed by XDP */
@@ -1092,7 +1147,10 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
wi = get_frag(rq, ci);
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
- skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
+ mlx5e_skb_from_cqe_linear,
+ mlx5e_skb_from_cqe_nonlinear,
+ rq, cqe, wi, cqe_bcnt);
if (!skb) {
/* probably for XDP */
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
@@ -1230,7 +1288,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
prefetch(data);
rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32);
+ consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, false);
rcu_read_unlock();
if (consumed) {
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
@@ -1279,8 +1337,10 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
- skb = rq->mpwqe.skb_from_cqe_mpwrq(rq, wi, cqe_bcnt, head_offset,
- page_idx);
+ skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq,
+ mlx5e_skb_from_cqe_mpwrq_linear,
+ mlx5e_skb_from_cqe_mpwrq_nonlinear,
+ rq, wi, cqe_bcnt, head_offset, page_idx);
if (!skb)
goto mpwrq_cqe_out;
@@ -1327,7 +1387,8 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
mlx5_cqwq_pop(cqwq);
- rq->handle_rx_cqe(rq, cqe);
+ INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+ mlx5e_handle_rx_cqe, rq, cqe);
} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
out:
@@ -1437,7 +1498,10 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
wi = get_frag(rq, ci);
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
- skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
+ mlx5e_skb_from_cqe_linear,
+ mlx5e_skb_from_cqe_nonlinear,
+ rq, cqe, wi, cqe_bcnt);
if (!skb)
goto wq_free_wqe;
@@ -1469,7 +1533,10 @@ void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
wi = get_frag(rq, ci);
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
- skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
+ mlx5e_skb_from_cqe_linear,
+ mlx5e_skb_from_cqe_nonlinear,
+ rq, cqe, wi, cqe_bcnt);
if (unlikely(!skb)) {
/* a DROP, save the page-reuse checks */
mlx5e_free_rx_wqe(rq, wi, true);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 4382ef85488c..840ec945ccba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -64,7 +64,7 @@ static int mlx5e_test_health_info(struct mlx5e_priv *priv)
{
struct mlx5_core_health *health = &priv->mdev->priv.health;
- return health->sick ? 1 : 0;
+ return health->fatal_error ? 1 : 0;
}
static int mlx5e_test_link_state(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 483d321d2151..539b4d3656da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -48,8 +48,15 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) },
#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_no_sync_data) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_bypass_req) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) },
#endif
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
@@ -104,7 +111,33 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_force_irq) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_complete) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary_inner) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_none) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_ecn_mark) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_removed_vlan_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_redirect) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_wqe_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_cqes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_strides) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_oversize_pkts_sw_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_buff_alloc_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_blks) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_pkts) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_congst_umr) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_arfs_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_xmit) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_mpwqe) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_inlnw) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_cqes) },
};
#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc)
@@ -144,6 +177,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
&priv->channel_stats[i];
struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq;
struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq;
+ struct mlx5e_xdpsq_stats *xsksq_stats = &channel_stats->xsksq;
+ struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
struct mlx5e_ch_stats *ch_stats = &channel_stats->ch;
int j;
@@ -186,6 +221,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->ch_poll += ch_stats->poll;
s->ch_arm += ch_stats->arm;
s->ch_aff_change += ch_stats->aff_change;
+ s->ch_force_irq += ch_stats->force_irq;
s->ch_eq_rearm += ch_stats->eq_rearm;
/* xdp redirect */
s->tx_xdp_xmit += xdpsq_red_stats->xmit;
@@ -194,6 +230,32 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->tx_xdp_full += xdpsq_red_stats->full;
s->tx_xdp_err += xdpsq_red_stats->err;
s->tx_xdp_cqes += xdpsq_red_stats->cqes;
+ /* AF_XDP zero-copy */
+ s->rx_xsk_packets += xskrq_stats->packets;
+ s->rx_xsk_bytes += xskrq_stats->bytes;
+ s->rx_xsk_csum_complete += xskrq_stats->csum_complete;
+ s->rx_xsk_csum_unnecessary += xskrq_stats->csum_unnecessary;
+ s->rx_xsk_csum_unnecessary_inner += xskrq_stats->csum_unnecessary_inner;
+ s->rx_xsk_csum_none += xskrq_stats->csum_none;
+ s->rx_xsk_ecn_mark += xskrq_stats->ecn_mark;
+ s->rx_xsk_removed_vlan_packets += xskrq_stats->removed_vlan_packets;
+ s->rx_xsk_xdp_drop += xskrq_stats->xdp_drop;
+ s->rx_xsk_xdp_redirect += xskrq_stats->xdp_redirect;
+ s->rx_xsk_wqe_err += xskrq_stats->wqe_err;
+ s->rx_xsk_mpwqe_filler_cqes += xskrq_stats->mpwqe_filler_cqes;
+ s->rx_xsk_mpwqe_filler_strides += xskrq_stats->mpwqe_filler_strides;
+ s->rx_xsk_oversize_pkts_sw_drop += xskrq_stats->oversize_pkts_sw_drop;
+ s->rx_xsk_buff_alloc_err += xskrq_stats->buff_alloc_err;
+ s->rx_xsk_cqe_compress_blks += xskrq_stats->cqe_compress_blks;
+ s->rx_xsk_cqe_compress_pkts += xskrq_stats->cqe_compress_pkts;
+ s->rx_xsk_congst_umr += xskrq_stats->congst_umr;
+ s->rx_xsk_arfs_err += xskrq_stats->arfs_err;
+ s->tx_xsk_xmit += xsksq_stats->xmit;
+ s->tx_xsk_mpwqe += xsksq_stats->mpwqe;
+ s->tx_xsk_inlnw += xsksq_stats->inlnw;
+ s->tx_xsk_full += xsksq_stats->full;
+ s->tx_xsk_err += xsksq_stats->err;
+ s->tx_xsk_cqes += xsksq_stats->cqes;
for (j = 0; j < priv->max_opened_tc; j++) {
struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
@@ -216,8 +278,15 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->tx_csum_none += sq_stats->csum_none;
s->tx_csum_partial += sq_stats->csum_partial;
#ifdef CONFIG_MLX5_EN_TLS
- s->tx_tls_ooo += sq_stats->tls_ooo;
- s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes;
+ s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets;
+ s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes;
+ s->tx_tls_ctx += sq_stats->tls_ctx;
+ s->tx_tls_ooo += sq_stats->tls_ooo;
+ s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes;
+ s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data;
+ s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req;
+ s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes;
+ s->tx_tls_dump_packets += sq_stats->tls_dump_packets;
#endif
s->tx_cqes += sq_stats->cqes;
}
@@ -1238,6 +1307,16 @@ static const struct counter_desc sq_stats_desc[] = {
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
+#endif
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
@@ -1266,11 +1345,43 @@ static const struct counter_desc xdpsq_stats_desc[] = {
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
};
+static const struct counter_desc xskrq_stats_desc[] = {
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, packets) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, bytes) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_complete) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_none) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, ecn_mark) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_drop) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_redirect) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, wqe_err) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, congst_umr) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, arfs_err) },
+};
+
+static const struct counter_desc xsksq_stats_desc[] = {
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, full) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, err) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
+};
+
static const struct counter_desc ch_stats_desc[] = {
{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) },
{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) },
{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) },
{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, force_irq) },
{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) },
};
@@ -1278,6 +1389,8 @@ static const struct counter_desc ch_stats_desc[] = {
#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc)
#define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc)
#define NUM_RQ_XDPSQ_STATS ARRAY_SIZE(rq_xdpsq_stats_desc)
+#define NUM_XSKRQ_STATS ARRAY_SIZE(xskrq_stats_desc)
+#define NUM_XSKSQ_STATS ARRAY_SIZE(xsksq_stats_desc)
#define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc)
static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
@@ -1288,13 +1401,16 @@ static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
(NUM_CH_STATS * max_nch) +
(NUM_SQ_STATS * max_nch * priv->max_opened_tc) +
(NUM_RQ_XDPSQ_STATS * max_nch) +
- (NUM_XDPSQ_STATS * max_nch);
+ (NUM_XDPSQ_STATS * max_nch) +
+ (NUM_XSKRQ_STATS * max_nch * priv->xsk.ever_used) +
+ (NUM_XSKSQ_STATS * max_nch * priv->xsk.ever_used);
}
static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
int idx)
{
int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
+ bool is_xsk = priv->xsk.ever_used;
int i, j, tc;
for (i = 0; i < max_nch; i++)
@@ -1306,6 +1422,9 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
for (j = 0; j < NUM_RQ_STATS; j++)
sprintf(data + (idx++) * ETH_GSTRING_LEN,
rq_stats_desc[j].format, i);
+ for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ xskrq_stats_desc[j].format, i);
for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
sprintf(data + (idx++) * ETH_GSTRING_LEN,
rq_xdpsq_stats_desc[j].format, i);
@@ -1318,10 +1437,14 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
sq_stats_desc[j].format,
priv->channel_tc2txq[i][tc]);
- for (i = 0; i < max_nch; i++)
+ for (i = 0; i < max_nch; i++) {
+ for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ xsksq_stats_desc[j].format, i);
for (j = 0; j < NUM_XDPSQ_STATS; j++)
sprintf(data + (idx++) * ETH_GSTRING_LEN,
xdpsq_stats_desc[j].format, i);
+ }
return idx;
}
@@ -1330,6 +1453,7 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
int idx)
{
int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
+ bool is_xsk = priv->xsk.ever_used;
int i, j, tc;
for (i = 0; i < max_nch; i++)
@@ -1343,6 +1467,10 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
data[idx++] =
MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq,
rq_stats_desc, j);
+ for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xskrq,
+ xskrq_stats_desc, j);
for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
data[idx++] =
MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq,
@@ -1356,11 +1484,16 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc],
sq_stats_desc, j);
- for (i = 0; i < max_nch; i++)
+ for (i = 0; i < max_nch; i++) {
+ for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xsksq,
+ xsksq_stats_desc, j);
for (j = 0; j < NUM_XDPSQ_STATS; j++)
data[idx++] =
MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq,
xdpsq_stats_desc, j);
+ }
return idx;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index cdddcc46971b..76ac111e14d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -46,6 +46,8 @@
#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld)
#define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld)
#define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_XSKRQ_STAT(type, fld) "rx%d_xsk_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_XSKSQ_STAT(type, fld) "tx%d_xsk_"#fld, offsetof(type, fld)
#define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld)
struct counter_desc {
@@ -116,12 +118,46 @@ struct mlx5e_sw_stats {
u64 ch_poll;
u64 ch_arm;
u64 ch_aff_change;
+ u64 ch_force_irq;
u64 ch_eq_rearm;
#ifdef CONFIG_MLX5_EN_TLS
+ u64 tx_tls_encrypted_packets;
+ u64 tx_tls_encrypted_bytes;
+ u64 tx_tls_ctx;
u64 tx_tls_ooo;
u64 tx_tls_resync_bytes;
+ u64 tx_tls_drop_no_sync_data;
+ u64 tx_tls_drop_bypass_req;
+ u64 tx_tls_dump_packets;
+ u64 tx_tls_dump_bytes;
#endif
+
+ u64 rx_xsk_packets;
+ u64 rx_xsk_bytes;
+ u64 rx_xsk_csum_complete;
+ u64 rx_xsk_csum_unnecessary;
+ u64 rx_xsk_csum_unnecessary_inner;
+ u64 rx_xsk_csum_none;
+ u64 rx_xsk_ecn_mark;
+ u64 rx_xsk_removed_vlan_packets;
+ u64 rx_xsk_xdp_drop;
+ u64 rx_xsk_xdp_redirect;
+ u64 rx_xsk_wqe_err;
+ u64 rx_xsk_mpwqe_filler_cqes;
+ u64 rx_xsk_mpwqe_filler_strides;
+ u64 rx_xsk_oversize_pkts_sw_drop;
+ u64 rx_xsk_buff_alloc_err;
+ u64 rx_xsk_cqe_compress_blks;
+ u64 rx_xsk_cqe_compress_pkts;
+ u64 rx_xsk_congst_umr;
+ u64 rx_xsk_arfs_err;
+ u64 tx_xsk_xmit;
+ u64 tx_xsk_mpwqe;
+ u64 tx_xsk_inlnw;
+ u64 tx_xsk_full;
+ u64 tx_xsk_err;
+ u64 tx_xsk_cqes;
};
struct mlx5e_qcounter_stats {
@@ -227,8 +263,15 @@ struct mlx5e_sq_stats {
u64 added_vlan_packets;
u64 nop;
#ifdef CONFIG_MLX5_EN_TLS
+ u64 tls_encrypted_packets;
+ u64 tls_encrypted_bytes;
+ u64 tls_ctx;
u64 tls_ooo;
u64 tls_resync_bytes;
+ u64 tls_drop_no_sync_data;
+ u64 tls_drop_bypass_req;
+ u64 tls_dump_packets;
+ u64 tls_dump_bytes;
#endif
/* less likely accessed in data path */
u64 csum_none;
@@ -256,6 +299,7 @@ struct mlx5e_ch_stats {
u64 poll;
u64 arm;
u64 aff_change;
+ u64 force_irq;
u64 eq_rearm;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 31cd02f11499..cc096f6011d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -53,6 +53,7 @@
#include "en/port.h"
#include "en/tc_tun.h"
#include "lib/devcom.h"
+#include "lib/geneve.h"
struct mlx5_nic_flow_attr {
u32 action;
@@ -126,7 +127,7 @@ struct mlx5e_tc_flow {
};
struct mlx5e_tc_flow_parse_attr {
- struct ip_tunnel_info tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
struct net_device *filter_dev;
struct mlx5_flow_spec spec;
int num_mod_hdr_actions;
@@ -716,19 +717,22 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
+ struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
struct mlx5_nic_flow_attr *attr = flow->nic_attr;
struct mlx5_core_dev *dev = priv->mdev;
struct mlx5_flow_destination dest[2] = {};
struct mlx5_flow_act flow_act = {
.action = attr->action,
- .flow_tag = attr->flow_tag,
.reformat_id = 0,
- .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND,
+ .flags = FLOW_ACT_NO_APPEND,
};
struct mlx5_fc *counter = NULL;
bool table_created = false;
int err, dest_ix = 0;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ flow_context->flow_tag = attr->flow_tag;
+
if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
if (err) {
@@ -799,7 +803,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
}
if (attr->match_level != MLX5_MATCH_NONE)
- parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
&flow_act, dest, dest_ix);
@@ -1063,6 +1067,19 @@ err_max_prio_chain:
return err;
}
+static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec;
+ void *headers_v = MLX5_ADDR_OF(fte_match_param,
+ spec->match_value,
+ misc_parameters_3);
+ u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
+ headers_v,
+ geneve_tlv_option_0_data);
+
+ return !!geneve_tlv_opt_0_data;
+}
+
static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
@@ -1084,6 +1101,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
}
+ if (mlx5_flow_has_geneve_opt(flow))
+ mlx5_geneve_tlv_option_del(priv->mdev->geneve);
+
mlx5_eswitch_del_vlan_action(esw, attr);
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
@@ -1330,7 +1350,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
static int parse_tunnel_attr(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f,
+ struct flow_cls_offload *f,
struct net_device *filter_dev, u8 *match_level)
{
struct netlink_ext_ack *extack = f->common.extack;
@@ -1338,8 +1358,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
outer_headers);
void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers);
- struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
- struct flow_match_control enc_control;
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
int err;
err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
@@ -1350,9 +1369,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
return err;
}
- flow_rule_match_enc_control(rule, &enc_control);
-
- if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
struct flow_match_ipv4_addrs match;
flow_rule_match_enc_ipv4_addrs(rule, &match);
@@ -1372,7 +1389,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
- } else if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
struct flow_match_ipv6_addrs match;
flow_rule_match_enc_ipv6_addrs(rule, &match);
@@ -1461,7 +1478,7 @@ static void *get_match_headers_value(u32 flags,
static int __parse_cls_flower(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f,
+ struct flow_cls_offload *f,
struct net_device *filter_dev,
u8 *match_level, u8 *tunnel_match_level)
{
@@ -1474,7 +1491,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
misc_parameters);
void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
misc_parameters);
- struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_dissector *dissector = rule->match.dissector;
u16 addr_type = 0;
u8 ip_proto = 0;
@@ -1482,7 +1499,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
*match_level = MLX5_MATCH_NONE;
if (dissector->used_keys &
- ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ ~(BIT(FLOW_DISSECTOR_KEY_META) |
+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
BIT(FLOW_DISSECTOR_KEY_BASIC) |
BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
BIT(FLOW_DISSECTOR_KEY_VLAN) |
@@ -1497,29 +1515,17 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
BIT(FLOW_DISSECTOR_KEY_TCP) |
BIT(FLOW_DISSECTOR_KEY_IP) |
- BIT(FLOW_DISSECTOR_KEY_ENC_IP))) {
+ BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
dissector->used_keys);
return -EOPNOTSUPP;
}
- if ((flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
- flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
- flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
- flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
- struct flow_match_control match;
-
- flow_rule_match_enc_control(rule, &match);
- switch (match.key->addr_type) {
- case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
- case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
- if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level))
- return -EOPNOTSUPP;
- break;
- default:
+ if (mlx5e_get_tc_tun(filter_dev)) {
+ if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level))
return -EOPNOTSUPP;
- }
/* In decap flow, header pointers should point to the inner
* headers, outer header were already set by parse_tunnel_attr
@@ -1822,7 +1828,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
static int parse_cls_flower(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f,
+ struct flow_cls_offload *f,
struct net_device *filter_dev)
{
struct netlink_ext_ack *extack = f->common.extack;
@@ -2581,21 +2587,21 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
}
struct encap_key {
- struct ip_tunnel_key *ip_tun_key;
- int tunnel_type;
+ const struct ip_tunnel_key *ip_tun_key;
+ struct mlx5e_tc_tunnel *tc_tunnel;
};
static inline int cmp_encap_info(struct encap_key *a,
struct encap_key *b)
{
return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
- a->tunnel_type != b->tunnel_type;
+ a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
}
static inline int hash_encap_info(struct encap_key *key)
{
return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
- key->tunnel_type);
+ key->tc_tunnel->tunnel_type);
}
@@ -2625,7 +2631,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_info *tun_info;
struct encap_key key, e_key;
struct mlx5e_encap_entry *e;
unsigned short family;
@@ -2634,17 +2640,21 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
int err = 0;
parse_attr = attr->parse_attr;
- tun_info = &parse_attr->tun_info[out_index];
+ tun_info = parse_attr->tun_info[out_index];
family = ip_tunnel_info_af(tun_info);
key.ip_tun_key = &tun_info->key;
- key.tunnel_type = mlx5e_tc_tun_get_type(mirred_dev);
+ key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
+ if (!key.tc_tunnel) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
+ return -EOPNOTSUPP;
+ }
hash_key = hash_encap_info(&key);
hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
encap_hlist, hash_key) {
- e_key.ip_tun_key = &e->tun_info.key;
- e_key.tunnel_type = e->tunnel_type;
+ e_key.ip_tun_key = &e->tun_info->key;
+ e_key.tc_tunnel = e->tunnel;
if (!cmp_encap_info(&e_key, &key)) {
found = true;
break;
@@ -2659,7 +2669,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
if (!e)
return -ENOMEM;
- e->tun_info = *tun_info;
+ e->tun_info = tun_info;
err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
if (err)
goto out_err;
@@ -2793,6 +2803,16 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv,
return err;
}
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
+ struct net_device *out_dev)
+{
+ if (is_merged_eswitch_dev(priv, out_dev))
+ return true;
+
+ return mlx5e_eswitch_rep(out_dev) &&
+ same_hw_devs(priv, netdev_priv(out_dev));
+}
+
static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
struct mlx5e_tc_flow *flow,
@@ -2812,9 +2832,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
if (!flow_action_has_entries(flow_action))
return -EINVAL;
- attr->in_rep = rpriv->rep;
- attr->in_mdev = priv->mdev;
-
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_DROP:
@@ -2861,9 +2878,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
- if (netdev_port_same_parent_id(priv->netdev,
- out_dev) ||
- is_merged_eswitch_dev(priv, out_dev)) {
+ if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev);
@@ -2880,6 +2895,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
if (err)
return err;
}
+
if (is_vlan_dev(parse_attr->filter_dev)) {
err = add_vlan_pop_action(priv, attr,
&action);
@@ -2887,8 +2903,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
return err;
}
- if (!mlx5e_eswitch_rep(out_dev))
+ if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not on same switch HW, can't offload forwarding");
+ pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
+ priv->netdev->name, out_dev->name);
return -EOPNOTSUPP;
+ }
out_priv = netdev_priv(out_dev);
rpriv = out_priv->ppriv;
@@ -2898,7 +2919,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
} else if (encap) {
parse_attr->mirred_ifindex[attr->out_count] =
out_dev->ifindex;
- parse_attr->tun_info[attr->out_count] = *info;
+ parse_attr->tun_info[attr->out_count] = info;
encap = false;
attr->dests[attr->out_count].flags |=
MLX5_ESW_DEST_ENCAP;
@@ -3095,7 +3116,7 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
static int
mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
- struct tc_cls_flower_offload *f, u16 flow_flags,
+ struct flow_cls_offload *f, u16 flow_flags,
struct mlx5e_tc_flow_parse_attr **__parse_attr,
struct mlx5e_tc_flow **__flow)
{
@@ -3129,7 +3150,7 @@ static void
mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct tc_cls_flower_offload *f,
+ struct flow_cls_offload *f,
struct mlx5_eswitch_rep *in_rep,
struct mlx5_core_dev *in_mdev)
{
@@ -3151,13 +3172,13 @@ mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
static struct mlx5e_tc_flow *
__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f,
+ struct flow_cls_offload *f,
u16 flow_flags,
struct net_device *filter_dev,
struct mlx5_eswitch_rep *in_rep,
struct mlx5_core_dev *in_mdev)
{
- struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_tc_flow *flow;
@@ -3201,7 +3222,7 @@ out:
return ERR_PTR(err);
}
-static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f,
+static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
struct mlx5e_tc_flow *flow,
u16 flow_flags)
{
@@ -3253,7 +3274,7 @@ out:
static int
mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f,
+ struct flow_cls_offload *f,
u16 flow_flags,
struct net_device *filter_dev,
struct mlx5e_tc_flow **__flow)
@@ -3287,12 +3308,12 @@ out:
static int
mlx5e_add_nic_flow(struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f,
+ struct flow_cls_offload *f,
u16 flow_flags,
struct net_device *filter_dev,
struct mlx5e_tc_flow **__flow)
{
- struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_tc_flow *flow;
@@ -3338,7 +3359,7 @@ out:
static int
mlx5e_tc_add_flow(struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f,
+ struct flow_cls_offload *f,
int flags,
struct net_device *filter_dev,
struct mlx5e_tc_flow **flow)
@@ -3352,7 +3373,7 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv,
if (!tc_can_offload_extack(priv->netdev, f->common.extack))
return -EOPNOTSUPP;
- if (esw && esw->mode == SRIOV_OFFLOADS)
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
err = mlx5e_add_fdb_flow(priv, f, flow_flags,
filter_dev, flow);
else
@@ -3363,7 +3384,7 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv,
}
int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f, int flags)
+ struct flow_cls_offload *f, int flags)
{
struct netlink_ext_ack *extack = f->common.extack;
struct rhashtable *tc_ht = get_tc_ht(priv, flags);
@@ -3410,7 +3431,7 @@ static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
}
int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f, int flags)
+ struct flow_cls_offload *f, int flags)
{
struct rhashtable *tc_ht = get_tc_ht(priv, flags);
struct mlx5e_tc_flow *flow;
@@ -3429,7 +3450,7 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
}
int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f, int flags)
+ struct flow_cls_offload *f, int flags)
{
struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
struct rhashtable *tc_ht = get_tc_ht(priv, flags);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index f62e81902d27..3ab39275ca7d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -54,12 +54,12 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f, int flags);
+ struct flow_cls_offload *f, int flags);
int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f, int flags);
+ struct flow_cls_offload *f, int flags);
int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f, int flags);
+ struct flow_cls_offload *f, int flags);
struct mlx5e_encap_entry;
void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
@@ -74,6 +74,9 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags);
void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
+ struct net_device *out_dev);
+
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 195a7d903cec..600e92cb629a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -35,55 +35,12 @@
#include <net/geneve.h>
#include <net/dsfield.h>
#include "en.h"
+#include "en/txrx.h"
#include "ipoib/ipoib.h"
#include "en_accel/en_accel.h"
+#include "en_accel/ktls.h"
#include "lib/clock.h"
-#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS
-
-#ifndef CONFIG_MLX5_EN_TLS
-#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
- MLX5E_SQ_NOPS_ROOM)
-#else
-/* TLS offload requires MLX5E_SQ_STOP_ROOM to have
- * enough room for a resync SKB, a normal SKB and a NOP
- */
-#define MLX5E_SQ_STOP_ROOM (2 * MLX5_SEND_WQE_MAX_WQEBBS +\
- MLX5E_SQ_NOPS_ROOM)
-#endif
-
-static inline void mlx5e_tx_dma_unmap(struct device *pdev,
- struct mlx5e_sq_dma *dma)
-{
- switch (dma->type) {
- case MLX5E_DMA_MAP_SINGLE:
- dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
- break;
- case MLX5E_DMA_MAP_PAGE:
- dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
- break;
- default:
- WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
- }
-}
-
-static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
-{
- return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
-}
-
-static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq,
- dma_addr_t addr,
- u32 size,
- enum mlx5e_dma_map_type map_type)
-{
- struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++);
-
- dma->addr = addr;
- dma->size = size;
- dma->type = map_type;
-}
-
static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
{
int i;
@@ -113,13 +70,13 @@ static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev)
{
- int channel_ix = netdev_pick_tx(dev, skb, NULL);
+ int txq_ix = netdev_pick_tx(dev, skb, NULL);
struct mlx5e_priv *priv = netdev_priv(dev);
u16 num_channels;
int up = 0;
if (!netdev_get_num_tc(dev))
- return channel_ix;
+ return txq_ix;
#ifdef CONFIG_MLX5_CORE_EN_DCB
if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP)
@@ -129,14 +86,14 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
if (skb_vlan_tag_present(skb))
up = skb_vlan_tag_get_prio(skb);
- /* channel_ix can be larger than num_channels since
+ /* txq_ix can be larger than num_channels since
* dev->num_real_tx_queues = num_channels * num_tc
*/
num_channels = priv->channels.params.num_channels;
- if (channel_ix >= num_channels)
- channel_ix = reciprocal_scale(channel_ix, num_channels);
+ if (txq_ix >= num_channels)
+ txq_ix = priv->txq2sq[txq_ix]->ch_ix;
- return priv->channel_tc2txq[channel_ix][up];
+ return priv->channel_tc2txq[txq_ix][up];
}
static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
@@ -277,23 +234,6 @@ dma_unmap_wqe_err:
return -ENOMEM;
}
-static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
- struct mlx5_wq_cyc *wq,
- u16 pi, u16 nnops)
-{
- struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
- edge_wi = wi + nnops;
-
- /* fill sq frag edge with nops to avoid wqe wrapping two pages */
- for (; wi < edge_wi; wi++) {
- wi->skb = NULL;
- wi->num_wqebbs = 1;
- mlx5e_post_nop(wq, sq->sqn, &sq->pc);
- }
- sq->stats->nop += nnops;
-}
-
static inline void
mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma,
@@ -301,6 +241,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
bool xmit_more)
{
struct mlx5_wq_cyc *wq = &sq->wq;
+ bool send_doorbell;
wi->num_bytes = num_bytes;
wi->num_dma = num_dma;
@@ -310,23 +251,21 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
- netdev_tx_sent_queue(sq->txq, num_bytes);
-
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
sq->pc += wi->num_wqebbs;
- if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) {
+ if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room))) {
netif_tx_stop_queue(sq->txq);
sq->stats->stopped++;
}
- if (!xmit_more || netif_xmit_stopped(sq->txq))
+ send_doorbell = __netdev_tx_sent_queue(sq->txq, num_bytes,
+ xmit_more);
+ if (send_doorbell)
mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
}
-#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
-
netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
{
@@ -353,9 +292,12 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
stats->packets += skb_shinfo(skb)->gso_segs;
} else {
+ u8 mode = mlx5e_transport_inline_tx_wqe(wqe) ?
+ MLX5_INLINE_MODE_TCP_UDP : sq->min_inline_mode;
+
opcode = MLX5_OPCODE_SEND;
mss = 0;
- ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb);
+ ihs = mlx5e_calc_min_inline(mode, skb);
num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
stats->packets++;
}
@@ -380,11 +322,17 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
#ifdef CONFIG_MLX5_EN_IPSEC
struct mlx5_wqe_eth_seg cur_eth = wqe->eth;
#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ struct mlx5_wqe_ctrl_seg cur_ctrl = wqe->ctrl;
+#endif
mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
- mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
+ wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
#ifdef CONFIG_MLX5_EN_IPSEC
wqe->eth = cur_eth;
#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ wqe->ctrl = cur_ctrl;
+#endif
}
/* fill wqe */
@@ -443,7 +391,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
u16 pi;
sq = priv->txq2sq[skb_get_queue_mapping(skb)];
- mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
+ wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
/* might send skbs and update wqe and pi */
skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi);
@@ -531,8 +479,16 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
wi = &sq->db.wqe_info[ci];
skb = wi->skb;
- if (unlikely(!skb)) { /* nop */
- sqcc++;
+ if (unlikely(!skb)) {
+#ifdef CONFIG_MLX5_EN_TLS
+ if (wi->resync_dump_frag) {
+ struct mlx5e_sq_dma *dma =
+ mlx5e_dma_get(sq, dma_fifo_cc++);
+
+ mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma);
+ }
+#endif
+ sqcc += wi->num_wqebbs;
continue;
}
@@ -574,8 +530,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
netdev_tx_completed_queue(sq->txq, npkts, nbytes);
if (netif_tx_queue_stopped(sq->txq) &&
- mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
- MLX5E_SQ_STOP_ROOM) &&
+ mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
netif_tx_wake_queue(sq->txq);
stats->wake++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index f9862bf75491..c50b6f0769c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -33,6 +33,7 @@
#include <linux/irq.h>
#include "en.h"
#include "en/xdp.h"
+#include "en/xsk/tx.h"
static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
{
@@ -48,26 +49,24 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
{
struct mlx5e_sq_stats *stats = sq->stats;
- struct net_dim_sample dim_sample;
+ struct dim_sample dim_sample;
if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state)))
return;
- net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes,
- &dim_sample);
+ dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
net_dim(&sq->dim, dim_sample);
}
static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
{
struct mlx5e_rq_stats *stats = rq->stats;
- struct net_dim_sample dim_sample;
+ struct dim_sample dim_sample;
if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state)))
return;
- net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes,
- &dim_sample);
+ dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
net_dim(&rq->dim, dim_sample);
}
@@ -87,7 +86,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
napi);
struct mlx5e_ch_stats *ch_stats = c->stats;
+ struct mlx5e_xdpsq *xsksq = &c->xsksq;
+ struct mlx5e_rq *xskrq = &c->xskrq;
struct mlx5e_rq *rq = &c->rq;
+ bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+ bool aff_change = false;
+ bool busy_xsk = false;
bool busy = false;
int work_done = 0;
int i;
@@ -97,22 +101,38 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
for (i = 0; i < c->num_tc; i++)
busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
- busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq, NULL);
+ busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
if (c->xdp)
- busy |= mlx5e_poll_xdpsq_cq(&rq->xdpsq.cq, rq);
+ busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq);
if (likely(budget)) { /* budget=0 means: don't poll rx rings */
- work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
+ if (xsk_open)
+ work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
+
+ if (likely(budget - work_done))
+ work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
+
busy |= work_done == budget;
}
- busy |= c->rq.post_wqes(rq);
+ mlx5e_poll_ico_cq(&c->icosq.cq);
+
+ busy |= rq->post_wqes(rq);
+ if (xsk_open) {
+ mlx5e_poll_ico_cq(&c->xskicosq.cq);
+ busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq);
+ busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET);
+ busy_xsk |= xskrq->post_wqes(xskrq);
+ }
+
+ busy |= busy_xsk;
if (busy) {
if (likely(mlx5e_channel_no_affinity_change(c)))
return budget;
ch_stats->aff_change++;
+ aff_change = true;
if (budget && work_done == budget)
work_done--;
}
@@ -133,10 +153,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
mlx5e_cq_arm(&c->icosq.cq);
mlx5e_cq_arm(&c->xdpsq.cq);
+ if (xsk_open) {
+ mlx5e_handle_rx_dim(xskrq);
+ mlx5e_cq_arm(&c->xskicosq.cq);
+ mlx5e_cq_arm(&xsksq->cq);
+ mlx5e_cq_arm(&xskrq->cq);
+ }
+
+ if (unlikely(aff_change && busy_xsk)) {
+ mlx5e_trigger_irq(&c->icosq);
+ ch_stats->force_irq++;
+ }
+
return work_done;
}
-void mlx5e_completion_event(struct mlx5_core_cq *mcq)
+void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
{
struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 23883d1fa22f..41f25ea2e8d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -61,17 +61,21 @@ enum {
MLX5_EQ_DOORBEL_OFFSET = 0x40,
};
-struct mlx5_irq_info {
- cpumask_var_t mask;
- char name[MLX5_MAX_IRQ_NAME];
- void *context; /* dev_id provided to request_irq */
+/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update
+ * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is
+ * used to set the EQ size, budget must be smaller than the EQ size.
+ */
+enum {
+ MLX5_EQ_POLLING_BUDGET = 128,
};
+static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE);
+
struct mlx5_eq_table {
struct list_head comp_eqs_list;
- struct mlx5_eq pages_eq;
- struct mlx5_eq cmd_eq;
- struct mlx5_eq async_eq;
+ struct mlx5_eq_async pages_eq;
+ struct mlx5_eq_async cmd_eq;
+ struct mlx5_eq_async async_eq;
struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
@@ -79,11 +83,8 @@ struct mlx5_eq_table {
struct mlx5_nb cq_err_nb;
struct mutex lock; /* sync async eqs creations */
- int num_comp_vectors;
- struct mlx5_irq_info *irq_info;
-#ifdef CONFIG_RFS_ACCEL
- struct cpu_rmap *rmap;
-#endif
+ int num_comp_eqs;
+ struct mlx5_irq_table *irq_table;
};
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
@@ -124,16 +125,24 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
return cq;
}
-static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
+static int mlx5_eq_comp_int(struct notifier_block *nb,
+ __always_unused unsigned long action,
+ __always_unused void *data)
{
- struct mlx5_eq_comp *eq_comp = eq_ptr;
- struct mlx5_eq *eq = eq_ptr;
+ struct mlx5_eq_comp *eq_comp =
+ container_of(nb, struct mlx5_eq_comp, irq_nb);
+ struct mlx5_eq *eq = &eq_comp->core;
struct mlx5_eqe *eqe;
- int set_ci = 0;
+ int num_eqes = 0;
u32 cqn = -1;
- while ((eqe = next_eqe_sw(eq))) {
+ eqe = next_eqe_sw(eq);
+ if (!eqe)
+ goto out;
+
+ do {
struct mlx5_core_cq *cq;
+
/* Make sure we read EQ entry contents after we've
* checked the ownership bit.
*/
@@ -144,33 +153,23 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
cq = mlx5_eq_cq_get(eq, cqn);
if (likely(cq)) {
++cq->arm_sn;
- cq->comp(cq);
+ cq->comp(cq, eqe);
mlx5_cq_put(cq);
} else {
mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
}
++eq->cons_index;
- ++set_ci;
- /* The HCA will think the queue has overflowed if we
- * don't tell it we've been processing events. We
- * create our EQs with MLX5_NUM_SPARE_EQE extra
- * entries, so we must update our consumer index at
- * least that often.
- */
- if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
- eq_update_ci(eq, 0);
- set_ci = 0;
- }
- }
+ } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+out:
eq_update_ci(eq, 1);
if (cqn != -1)
tasklet_schedule(&eq_comp->tasklet_ctx.task);
- return IRQ_HANDLED;
+ return 0;
}
/* Some architectures don't latch interrupts when they are disabled, so using
@@ -184,25 +183,32 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
disable_irq(eq->core.irqn);
count_eqe = eq->core.cons_index;
- mlx5_eq_comp_int(eq->core.irqn, eq);
+ mlx5_eq_comp_int(&eq->irq_nb, 0, NULL);
count_eqe = eq->core.cons_index - count_eqe;
enable_irq(eq->core.irqn);
return count_eqe;
}
-static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
+static int mlx5_eq_async_int(struct notifier_block *nb,
+ unsigned long action, void *data)
{
- struct mlx5_eq *eq = eq_ptr;
+ struct mlx5_eq_async *eq_async =
+ container_of(nb, struct mlx5_eq_async, irq_nb);
+ struct mlx5_eq *eq = &eq_async->core;
struct mlx5_eq_table *eqt;
struct mlx5_core_dev *dev;
struct mlx5_eqe *eqe;
- int set_ci = 0;
+ int num_eqes = 0;
dev = eq->dev;
eqt = dev->priv.eq_table;
- while ((eqe = next_eqe_sw(eq))) {
+ eqe = next_eqe_sw(eq);
+ if (!eqe)
+ goto out;
+
+ do {
/*
* Make sure we read EQ entry contents after we've
* checked the ownership bit.
@@ -217,23 +223,13 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
++eq->cons_index;
- ++set_ci;
- /* The HCA will think the queue has overflowed if we
- * don't tell it we've been processing events. We
- * create our EQs with MLX5_NUM_SPARE_EQE extra
- * entries, so we must update our consumer index at
- * least that often.
- */
- if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
- eq_update_ci(eq, 0);
- set_ci = 0;
- }
- }
+ } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+out:
eq_update_ci(eq, 1);
- return IRQ_HANDLED;
+ return 0;
}
static void init_eq_buf(struct mlx5_eq *eq)
@@ -248,22 +244,19 @@ static void init_eq_buf(struct mlx5_eq *eq)
}
static int
-create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
struct mlx5_eq_param *param)
{
- struct mlx5_eq_table *eq_table = dev->priv.eq_table;
struct mlx5_cq_table *cq_table = &eq->cq_table;
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
struct mlx5_priv *priv = &dev->priv;
- u8 vecidx = param->index;
+ u8 vecidx = param->irq_index;
__be64 *pas;
void *eqc;
int inlen;
u32 *in;
int err;
-
- if (eq_table->irq_info[vecidx].context)
- return -EEXIST;
+ int i;
/* Init CQ table */
memset(cq_table, 0, sizeof(*cq_table));
@@ -291,10 +284,12 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
mlx5_fill_page_array(&eq->buf, pas);
MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
- if (!param->mask && MLX5_CAP_GEN(dev, log_max_uctx))
+ if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx))
MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID);
- MLX5_SET64(create_eq_in, in, event_bitmask, param->mask);
+ for (i = 0; i < 4; i++)
+ MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i,
+ param->mask[i]);
eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
@@ -307,34 +302,19 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
if (err)
goto err_in;
- snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
- name, pci_name(dev->pdev));
- eq_table->irq_info[vecidx].context = param->context;
-
eq->vecidx = vecidx;
eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
eq->irqn = pci_irq_vector(dev->pdev, vecidx);
eq->dev = dev;
eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
- err = request_irq(eq->irqn, param->handler, 0,
- eq_table->irq_info[vecidx].name, param->context);
- if (err)
- goto err_eq;
err = mlx5_debug_eq_add(dev, eq);
if (err)
- goto err_irq;
-
- /* EQs are created in ARMED state
- */
- eq_update_ci(eq, 1);
+ goto err_eq;
kvfree(in);
return 0;
-err_irq:
- free_irq(eq->irqn, eq);
-
err_eq:
mlx5_cmd_destroy_eq(dev, eq->eqn);
@@ -346,18 +326,48 @@ err_buf:
return err;
}
-static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+/**
+ * mlx5_eq_enable - Enable EQ for receiving EQEs
+ * @dev - Device which owns the eq
+ * @eq - EQ to enable
+ * @nb - notifier call block
+ * mlx5_eq_enable - must be called after EQ is created in device.
+ */
+int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ struct notifier_block *nb)
{
struct mlx5_eq_table *eq_table = dev->priv.eq_table;
- struct mlx5_irq_info *irq_info;
int err;
- irq_info = &eq_table->irq_info[eq->vecidx];
+ err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb);
+ if (!err)
+ eq_update_ci(eq, 1);
- mlx5_debug_eq_remove(dev, eq);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_eq_enable);
+
+/**
+ * mlx5_eq_disable - Enable EQ for receiving EQEs
+ * @dev - Device which owns the eq
+ * @eq - EQ to disable
+ * @nb - notifier call block
+ * mlx5_eq_disable - must be called before EQ is destroyed.
+ */
+void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ struct notifier_block *nb)
+{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+
+ mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb);
+}
+EXPORT_SYMBOL(mlx5_eq_disable);
+
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ int err;
- free_irq(eq->irqn, irq_info->context);
- irq_info->context = NULL;
+ mlx5_debug_eq_remove(dev, eq);
err = mlx5_cmd_destroy_eq(dev, eq->eqn);
if (err)
@@ -382,7 +392,7 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
return err;
}
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
{
struct mlx5_cq_table *table = &eq->cq_table;
struct mlx5_core_cq *tmp;
@@ -392,16 +402,14 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
spin_unlock(&table->lock);
if (!tmp) {
- mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
- return -ENOENT;
- }
-
- if (tmp != cq) {
- mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn);
- return -EINVAL;
+ mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n",
+ eq->eqn, cq->cqn);
+ return;
}
- return 0;
+ if (tmp != cq)
+ mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n",
+ eq->eqn, cq->cqn);
}
int mlx5_eq_table_init(struct mlx5_core_dev *dev)
@@ -423,6 +431,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev)
for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
+ eq_table->irq_table = dev->priv.irq_table;
return 0;
kvfree_eq_table:
@@ -439,19 +448,20 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
/* Async EQs */
-static int create_async_eq(struct mlx5_core_dev *dev, const char *name,
+static int create_async_eq(struct mlx5_core_dev *dev,
struct mlx5_eq *eq, struct mlx5_eq_param *param)
{
struct mlx5_eq_table *eq_table = dev->priv.eq_table;
int err;
mutex_lock(&eq_table->lock);
- if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) {
- err = -ENOSPC;
+ /* Async EQs must share irq index 0 */
+ if (param->irq_index != 0) {
+ err = -EINVAL;
goto unlock;
}
- err = create_map_eq(dev, eq, name, param);
+ err = create_map_eq(dev, eq, param);
unlock:
mutex_unlock(&eq_table->lock);
return err;
@@ -480,7 +490,7 @@ static int cq_err_event_notifier(struct notifier_block *nb,
/* type == MLX5_EVENT_TYPE_CQ_ERROR */
eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
- eq = &eqt->async_eq;
+ eq = &eqt->async_eq.core;
eqe = data;
cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
@@ -493,14 +503,31 @@ static int cq_err_event_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
- cq->event(cq, type);
+ if (cq->event)
+ cq->event(cq, type);
mlx5_cq_put(cq);
return NOTIFY_OK;
}
-static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
+static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4])
+{
+ __be64 *user_unaffiliated_events;
+ __be64 *user_affiliated_events;
+ int i;
+
+ user_affiliated_events =
+ MLX5_CAP_DEV_EVENT(dev, user_affiliated_events);
+ user_unaffiliated_events =
+ MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events);
+
+ for (i = 0; i < 4; i++)
+ mask[i] |= be64_to_cpu(user_affiliated_events[i] |
+ user_unaffiliated_events[i]);
+}
+
+static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4])
{
u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
@@ -533,10 +560,14 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);
- if (mlx5_core_is_ecpf_esw_manager(dev))
- async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE);
+ if (mlx5_eswitch_is_funcs_handler(dev))
+ async_event_mask |=
+ (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED);
- return async_event_mask;
+ mask[0] = async_event_mask;
+
+ if (MLX5_CAP_GEN(dev, event_cap))
+ gather_user_async_events(dev, mask);
}
static int create_async_eqs(struct mlx5_core_dev *dev)
@@ -548,55 +579,76 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
mlx5_eq_notifier_register(dev, &table->cq_err_nb);
+ table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int;
param = (struct mlx5_eq_param) {
- .index = MLX5_EQ_CMD_IDX,
- .mask = 1ull << MLX5_EVENT_TYPE_CMD,
+ .irq_index = 0,
.nent = MLX5_NUM_CMD_EQE,
- .context = &table->cmd_eq,
- .handler = mlx5_eq_async_int,
};
- err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, &param);
+
+ param.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD;
+ err = create_async_eq(dev, &table->cmd_eq.core, &param);
if (err) {
mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
goto err0;
}
-
+ err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err);
+ goto err1;
+ }
mlx5_cmd_use_events(dev);
+ table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int;
param = (struct mlx5_eq_param) {
- .index = MLX5_EQ_ASYNC_IDX,
- .mask = gather_async_events_mask(dev),
+ .irq_index = 0,
.nent = MLX5_NUM_ASYNC_EQE,
- .context = &table->async_eq,
- .handler = mlx5_eq_async_int,
};
- err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, &param);
+
+ gather_async_events_mask(dev, param.mask);
+ err = create_async_eq(dev, &table->async_eq.core, &param);
if (err) {
mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
- goto err1;
+ goto err2;
+ }
+ err = mlx5_eq_enable(dev, &table->async_eq.core,
+ &table->async_eq.irq_nb);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable async EQ %d\n", err);
+ goto err3;
}
+ table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int;
param = (struct mlx5_eq_param) {
- .index = MLX5_EQ_PAGEREQ_IDX,
- .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST,
+ .irq_index = 0,
.nent = /* TODO: sriov max_vf + */ 1,
- .context = &table->pages_eq,
- .handler = mlx5_eq_async_int,
};
- err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, &param);
+
+ param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST;
+ err = create_async_eq(dev, &table->pages_eq.core, &param);
if (err) {
mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
- goto err2;
+ goto err4;
+ }
+ err = mlx5_eq_enable(dev, &table->pages_eq.core,
+ &table->pages_eq.irq_nb);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err);
+ goto err5;
}
return err;
+err5:
+ destroy_async_eq(dev, &table->pages_eq.core);
+err4:
+ mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+err3:
+ destroy_async_eq(dev, &table->async_eq.core);
err2:
- destroy_async_eq(dev, &table->async_eq);
-
-err1:
mlx5_cmd_use_polling(dev);
- destroy_async_eq(dev, &table->cmd_eq);
+ mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+err1:
+ destroy_async_eq(dev, &table->cmd_eq.core);
err0:
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
return err;
@@ -607,19 +659,22 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
struct mlx5_eq_table *table = dev->priv.eq_table;
int err;
- err = destroy_async_eq(dev, &table->pages_eq);
+ mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb);
+ err = destroy_async_eq(dev, &table->pages_eq.core);
if (err)
mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
err);
- err = destroy_async_eq(dev, &table->async_eq);
+ mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+ err = destroy_async_eq(dev, &table->async_eq.core);
if (err)
mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
err);
mlx5_cmd_use_polling(dev);
- err = destroy_async_eq(dev, &table->cmd_eq);
+ mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+ err = destroy_async_eq(dev, &table->cmd_eq.core);
if (err)
mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
err);
@@ -629,24 +684,24 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
{
- return &dev->priv.eq_table->async_eq;
+ return &dev->priv.eq_table->async_eq.core;
}
void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
{
- synchronize_irq(dev->priv.eq_table->async_eq.irqn);
+ synchronize_irq(dev->priv.eq_table->async_eq.core.irqn);
}
void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
{
- synchronize_irq(dev->priv.eq_table->cmd_eq.irqn);
+ synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn);
}
/* Generic EQ API for mlx5_core consumers
* Needed For RDMA ODP EQ for now
*/
struct mlx5_eq *
-mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+mlx5_eq_create_generic(struct mlx5_core_dev *dev,
struct mlx5_eq_param *param)
{
struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
@@ -655,7 +710,7 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
if (!eq)
return ERR_PTR(-ENOMEM);
- err = create_async_eq(dev, name, eq, param);
+ err = create_async_eq(dev, eq, param);
if (err) {
kvfree(eq);
eq = ERR_PTR(err);
@@ -713,84 +768,14 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
}
EXPORT_SYMBOL(mlx5_eq_update_ci);
-/* Completion EQs */
-
-static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
- struct mlx5_priv *priv = &mdev->priv;
- int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
- int irq = pci_irq_vector(mdev->pdev, vecidx);
- struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
-
- if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) {
- mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
- return -ENOMEM;
- }
-
- cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
- irq_info->mask);
-
- if (IS_ENABLED(CONFIG_SMP) &&
- irq_set_affinity_hint(irq, irq_info->mask))
- mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
-
- return 0;
-}
-
-static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
- int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
- struct mlx5_priv *priv = &mdev->priv;
- int irq = pci_irq_vector(mdev->pdev, vecidx);
- struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
-
- irq_set_affinity_hint(irq, NULL);
- free_cpumask_var(irq_info->mask);
-}
-
-static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
-{
- int err;
- int i;
-
- for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) {
- err = set_comp_irq_affinity_hint(mdev, i);
- if (err)
- goto err_out;
- }
-
- return 0;
-
-err_out:
- for (i--; i >= 0; i--)
- clear_comp_irq_affinity_hint(mdev, i);
-
- return err;
-}
-
-static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
-{
- int i;
-
- for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++)
- clear_comp_irq_affinity_hint(mdev, i);
-}
-
static void destroy_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
struct mlx5_eq_comp *eq, *n;
- clear_comp_irqs_affinity_hints(dev);
-
-#ifdef CONFIG_RFS_ACCEL
- if (table->rmap) {
- free_irq_cpu_rmap(table->rmap);
- table->rmap = NULL;
- }
-#endif
list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
list_del(&eq->list);
+ mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
if (destroy_unmap_eq(dev, &eq->core))
mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
eq->core.eqn);
@@ -802,23 +787,17 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev)
static int create_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
- char name[MLX5_MAX_IRQ_NAME];
struct mlx5_eq_comp *eq;
- int ncomp_vec;
+ int ncomp_eqs;
int nent;
int err;
int i;
INIT_LIST_HEAD(&table->comp_eqs_list);
- ncomp_vec = table->num_comp_vectors;
+ ncomp_eqs = table->num_comp_eqs;
nent = MLX5_COMP_EQ_SIZE;
-#ifdef CONFIG_RFS_ACCEL
- table->rmap = alloc_irq_cpu_rmap(ncomp_vec);
- if (!table->rmap)
- return -ENOMEM;
-#endif
- for (i = 0; i < ncomp_vec; i++) {
- int vecidx = i + MLX5_EQ_VEC_COMP_BASE;
+ for (i = 0; i < ncomp_eqs; i++) {
+ int vecidx = i + MLX5_IRQ_VEC_COMP_BASE;
struct mlx5_eq_param param = {};
eq = kzalloc(sizeof(*eq), GFP_KERNEL);
@@ -833,33 +812,28 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
(unsigned long)&eq->tasklet_ctx);
-#ifdef CONFIG_RFS_ACCEL
- irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx));
-#endif
- snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
+ eq->irq_nb.notifier_call = mlx5_eq_comp_int;
param = (struct mlx5_eq_param) {
- .index = vecidx,
- .mask = 0,
+ .irq_index = vecidx,
.nent = nent,
- .context = &eq->core,
- .handler = mlx5_eq_comp_int
};
- err = create_map_eq(dev, &eq->core, name, &param);
+ err = create_map_eq(dev, &eq->core, &param);
+ if (err) {
+ kfree(eq);
+ goto clean;
+ }
+ err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
if (err) {
+ destroy_unmap_eq(dev, &eq->core);
kfree(eq);
goto clean;
}
+
mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
/* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
list_add_tail(&eq->list, &table->comp_eqs_list);
}
- err = set_comp_irq_affinity_hints(dev);
- if (err) {
- mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
- goto clean;
- }
-
return 0;
clean:
@@ -890,22 +864,24 @@ EXPORT_SYMBOL(mlx5_vector2eqn);
unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
{
- return dev->priv.eq_table->num_comp_vectors;
+ return dev->priv.eq_table->num_comp_eqs;
}
EXPORT_SYMBOL(mlx5_comp_vectors_count);
struct cpumask *
mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
{
- /* TODO: consider irq_get_affinity_mask(irq) */
- return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask;
+ int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE;
+
+ return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table,
+ vecidx);
}
EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
{
- return dev->priv.eq_table->rmap;
+ return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table);
}
#endif
@@ -926,82 +902,19 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
- int i, max_eqs;
-
- clear_comp_irqs_affinity_hints(dev);
-
-#ifdef CONFIG_RFS_ACCEL
- if (table->rmap) {
- free_irq_cpu_rmap(table->rmap);
- table->rmap = NULL;
- }
-#endif
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
- max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
- for (i = max_eqs - 1; i >= 0; i--) {
- if (!table->irq_info[i].context)
- continue;
- free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context);
- table->irq_info[i].context = NULL;
- }
+ mlx5_irq_table_destroy(dev);
mutex_unlock(&table->lock);
- pci_free_irq_vectors(dev->pdev);
-}
-
-static int alloc_irq_vectors(struct mlx5_core_dev *dev)
-{
- struct mlx5_priv *priv = &dev->priv;
- struct mlx5_eq_table *table = priv->eq_table;
- int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
- MLX5_CAP_GEN(dev, max_num_eqs) :
- 1 << MLX5_CAP_GEN(dev, log_max_eq);
- int nvec;
- int err;
-
- nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
- MLX5_EQ_VEC_COMP_BASE;
- nvec = min_t(int, nvec, num_eqs);
- if (nvec <= MLX5_EQ_VEC_COMP_BASE)
- return -ENOMEM;
-
- table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL);
- if (!table->irq_info)
- return -ENOMEM;
-
- nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1,
- nvec, PCI_IRQ_MSIX);
- if (nvec < 0) {
- err = nvec;
- goto err_free_irq_info;
- }
-
- table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
-
- return 0;
-
-err_free_irq_info:
- kfree(table->irq_info);
- return err;
-}
-
-static void free_irq_vectors(struct mlx5_core_dev *dev)
-{
- struct mlx5_priv *priv = &dev->priv;
-
- pci_free_irq_vectors(dev->pdev);
- kfree(priv->eq_table->irq_info);
}
int mlx5_eq_table_create(struct mlx5_core_dev *dev)
{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
int err;
- err = alloc_irq_vectors(dev);
- if (err) {
- mlx5_core_err(dev, "alloc irq vectors failed\n");
- return err;
- }
+ eq_table->num_comp_eqs =
+ mlx5_irq_get_num_comp(eq_table->irq_table);
err = create_async_eqs(dev);
if (err) {
@@ -1019,7 +932,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
err_comp_eqs:
destroy_async_eqs(dev);
err_async_eqs:
- free_irq_vectors(dev);
return err;
}
@@ -1027,7 +939,6 @@ void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
{
destroy_comp_eqs(dev);
destroy_async_eqs(dev);
- free_irq_vectors(dev);
}
int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
@@ -1039,6 +950,7 @@ int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
}
+EXPORT_SYMBOL(mlx5_eq_notifier_register);
int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
{
@@ -1049,3 +961,4 @@ int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
}
+EXPORT_SYMBOL(mlx5_eq_notifier_unregister);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 6a921e24cd5e..1f3891fde2eb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -134,6 +134,30 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
}
+int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *in, int inlen)
+{
+ return modify_esw_vport_context_cmd(esw->dev, vport, in, inlen);
+}
+
+static int query_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
+ void *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
+
+ MLX5_SET(query_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+ MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *out, int outlen)
+{
+ return query_esw_vport_context_cmd(esw->dev, vport, out, outlen);
+}
+
static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
u16 vlan, u8 qos, u8 set_flags)
{
@@ -473,7 +497,7 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
fdb_add:
/* SRIOV is enabled: Forward UC MAC to vport */
- if (esw->fdb_table.legacy.fdb && esw->mode == SRIOV_LEGACY)
+ if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY)
vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n",
@@ -873,7 +897,7 @@ static void esw_vport_change_handle_locked(struct mlx5_vport *vport)
struct mlx5_eswitch *esw = dev->priv.eswitch;
u8 mac[ETH_ALEN];
- mlx5_query_nic_vport_mac_address(dev, vport->vport, mac);
+ mlx5_query_nic_vport_mac_address(dev, vport->vport, true, mac);
esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n",
vport->vport, mac);
@@ -939,7 +963,7 @@ int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
- vport->vport);
+ mlx5_eswitch_vport_num_to_index(esw, vport->vport));
if (!root_ns) {
esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
return -EOPNOTSUPP;
@@ -1057,7 +1081,7 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
- vport->vport);
+ mlx5_eswitch_vport_num_to_index(esw, vport->vport));
if (!root_ns) {
esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport);
return -EOPNOTSUPP;
@@ -1168,6 +1192,8 @@ void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
vport->ingress.drop_rule = NULL;
vport->ingress.allow_rule = NULL;
+
+ esw_vport_del_ingress_acl_modify_metadata(esw, vport);
}
void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
@@ -1527,6 +1553,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
u16 vport_num = vport->vport;
+ int flags;
if (esw->manager_vport == vport_num)
return;
@@ -1544,11 +1571,13 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
vport->info.node_guid);
}
+ flags = (vport->info.vlan || vport->info.qos) ?
+ SET_VLAN_STRIP | SET_VLAN_INSERT : 0;
modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
- (vport->info.vlan || vport->info.qos));
+ flags);
/* Only legacy mode needs ACLs */
- if (esw->mode == SRIOV_LEGACY) {
+ if (esw->mode == MLX5_ESWITCH_LEGACY) {
esw_vport_ingress_config(esw, vport);
esw_vport_egress_config(esw, vport);
}
@@ -1600,7 +1629,7 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
/* Create steering drop counters for ingress and egress ACLs */
- if (vport_num && esw->mode == SRIOV_LEGACY)
+ if (vport_num && esw->mode == MLX5_ESWITCH_LEGACY)
esw_vport_create_drop_counters(vport);
/* Restore old vport configuration */
@@ -1654,7 +1683,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw,
vport->enabled_events = 0;
esw_vport_disable_qos(esw, vport);
if (esw->manager_vport != vport_num &&
- esw->mode == SRIOV_LEGACY) {
+ esw->mode == MLX5_ESWITCH_LEGACY) {
mlx5_modify_vport_admin_state(esw->dev,
MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
vport_num, 1,
@@ -1686,54 +1715,91 @@ static int eswitch_vport_event(struct notifier_block *nb,
return NOTIFY_OK;
}
+/**
+ * mlx5_esw_query_functions - Returns raw output about functions state
+ * @dev: Pointer to device to query
+ *
+ * mlx5_esw_query_functions() allocates and returns functions changed
+ * raw output memory pointer from device on success. Otherwise returns ERR_PTR.
+ * Caller must free the memory using kvfree() when valid pointer is returned.
+ */
+const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out);
+ u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {};
+ u32 *out;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(query_esw_functions_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ESW_FUNCTIONS);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+ if (!err)
+ return out;
+
+ kvfree(out);
+ return ERR_PTR(err);
+}
+
+static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw)
+{
+ MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
+ mlx5_eq_notifier_register(esw->dev, &esw->nb);
+
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) {
+ MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler,
+ ESW_FUNCTIONS_CHANGED);
+ mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb);
+ }
+}
+
+static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw)
+{
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev))
+ mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb);
+
+ mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+
+ flush_workqueue(esw->work_queue);
+}
+
/* Public E-Switch API */
#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
-int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
+int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode)
{
- int vf_nvports = 0, total_nvports = 0;
struct mlx5_vport *vport;
int err;
int i, enabled_events;
if (!ESW_ALLOWED(esw) ||
!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
- esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
+ esw_warn(esw->dev, "FDB is not supported, aborting ...\n");
return -EOPNOTSUPP;
}
if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support))
- esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n");
+ esw_warn(esw->dev, "ingress ACL is not supported by FW\n");
if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support))
- esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n");
-
- esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
-
- if (mode == SRIOV_OFFLOADS) {
- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports);
- if (err)
- return err;
- total_nvports = esw->total_vports;
- } else {
- vf_nvports = nvfs;
- total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev);
- }
- }
+ esw_warn(esw->dev, "engress ACL is not supported by FW\n");
esw->mode = mode;
mlx5_lag_update(esw->dev);
- if (mode == SRIOV_LEGACY) {
+ if (mode == MLX5_ESWITCH_LEGACY) {
err = esw_create_legacy_table(esw);
if (err)
goto abort;
} else {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- err = esw_offloads_init(esw, vf_nvports, total_nvports);
+ err = esw_offloads_init(esw);
}
if (err)
@@ -1743,11 +1809,8 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
if (err)
esw_warn(esw->dev, "Failed to create eswitch TSAR");
- /* Don't enable vport events when in SRIOV_OFFLOADS mode, since:
- * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode
- * 2. FDB/Eswitch is programmed by user space tools
- */
- enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0;
+ enabled_events = (mode == MLX5_ESWITCH_LEGACY) ? SRIOV_VPORT_EVENTS :
+ UC_ADDR_CHANGE;
/* Enable PF vport */
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
@@ -1760,22 +1823,21 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
}
/* Enable VF vports */
- mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs)
+ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
esw_enable_vport(esw, vport, enabled_events);
- if (mode == SRIOV_LEGACY) {
- MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
- mlx5_eq_notifier_register(esw->dev, &esw->nb);
- }
+ mlx5_eswitch_event_handlers_register(esw);
+
+ esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n",
+ mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+ esw->esw_funcs.num_vfs, esw->enabled_vports);
- esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
- esw->enabled_vports);
return 0;
abort:
- esw->mode = SRIOV_NONE;
+ esw->mode = MLX5_ESWITCH_NONE;
- if (mode == SRIOV_OFFLOADS) {
+ if (mode == MLX5_ESWITCH_OFFLOADS) {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
}
@@ -1783,23 +1845,22 @@ abort:
return err;
}
-void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
{
struct esw_mc_addr *mc_promisc;
struct mlx5_vport *vport;
int old_mode;
int i;
- if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE)
+ if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE)
return;
- esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n",
- esw->enabled_vports, esw->mode);
+ esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n",
+ esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+ esw->esw_funcs.num_vfs, esw->enabled_vports);
mc_promisc = &esw->mc_promisc;
-
- if (esw->mode == SRIOV_LEGACY)
- mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+ mlx5_eswitch_event_handlers_unregister(esw);
mlx5_esw_for_all_vports(esw, i, vport)
esw_disable_vport(esw, vport);
@@ -1809,17 +1870,17 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
esw_destroy_tsar(esw);
- if (esw->mode == SRIOV_LEGACY)
+ if (esw->mode == MLX5_ESWITCH_LEGACY)
esw_destroy_legacy_table(esw);
- else if (esw->mode == SRIOV_OFFLOADS)
+ else if (esw->mode == MLX5_ESWITCH_OFFLOADS)
esw_offloads_cleanup(esw);
old_mode = esw->mode;
- esw->mode = SRIOV_NONE;
+ esw->mode = MLX5_ESWITCH_NONE;
mlx5_lag_update(esw->dev);
- if (old_mode == SRIOV_OFFLOADS) {
+ if (old_mode == MLX5_ESWITCH_OFFLOADS) {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
}
@@ -1827,14 +1888,16 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
int mlx5_eswitch_init(struct mlx5_core_dev *dev)
{
- int total_vports = MLX5_TOTAL_VPORTS(dev);
struct mlx5_eswitch *esw;
struct mlx5_vport *vport;
+ int total_vports;
int err, i;
if (!MLX5_VPORT_MANAGER(dev))
return 0;
+ total_vports = mlx5_eswitch_get_total_vports(dev);
+
esw_info(dev,
"Total vports %d, per vport: max uc(%d) max mc(%d)\n",
total_vports,
@@ -1847,6 +1910,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
esw->dev = dev;
esw->manager_vport = mlx5_eswitch_manager_vport(dev);
+ esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev);
esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
if (!esw->work_queue) {
@@ -1880,13 +1944,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
}
esw->enabled_vports = 0;
- esw->mode = SRIOV_NONE;
+ esw->mode = MLX5_ESWITCH_NONE;
esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
- if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
- MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
- esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
- else
- esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
dev->priv.eswitch = esw;
return 0;
@@ -1950,7 +2009,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
ether_addr_copy(evport->info.mac, mac);
evport->info.node_guid = node_guid;
- if (evport->enabled && esw->mode == SRIOV_LEGACY)
+ if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
err = esw_vport_ingress_config(esw, evport);
unlock:
@@ -2034,7 +2093,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
evport->info.vlan = vlan;
evport->info.qos = qos;
- if (evport->enabled && esw->mode == SRIOV_LEGACY) {
+ if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) {
err = esw_vport_ingress_config(esw, evport);
if (err)
goto unlock;
@@ -2076,7 +2135,7 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
mlx5_core_warn(esw->dev,
"Spoofchk in set while MAC is invalid, vport(%d)\n",
evport->vport);
- if (evport->enabled && esw->mode == SRIOV_LEGACY)
+ if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
err = esw_vport_ingress_config(esw, evport);
if (err)
evport->info.spoofchk = pschk;
@@ -2172,7 +2231,7 @@ int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
return -EPERM;
mutex_lock(&esw->state_lock);
- if (esw->mode != SRIOV_LEGACY) {
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
err = -EOPNOTSUPP;
goto out;
}
@@ -2195,7 +2254,7 @@ int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
return -EPERM;
mutex_lock(&esw->state_lock);
- if (esw->mode != SRIOV_LEGACY) {
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
err = -EOPNOTSUPP;
goto out;
}
@@ -2338,7 +2397,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
u64 bytes = 0;
int err = 0;
- if (!vport->enabled || esw->mode != SRIOV_LEGACY)
+ if (!vport->enabled || esw->mode != MLX5_ESWITCH_LEGACY)
return 0;
if (vport->egress.drop_counter)
@@ -2391,7 +2450,6 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
MLX5_SET(query_vport_counter_in, in, vport_number, vport->vport);
MLX5_SET(query_vport_counter_in, in, other_vport, 1);
- memset(out, 0, outlen);
err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen);
if (err)
goto free_out;
@@ -2448,16 +2506,27 @@ free_out:
u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
{
- return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE;
+ return ESW_ALLOWED(esw) ? esw->mode : MLX5_ESWITCH_NONE;
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
+enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
+{
+ struct mlx5_eswitch *esw;
+
+ esw = dev->priv.eswitch;
+ return ESW_ALLOWED(esw) ? esw->offloads.encap :
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
+
bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
{
- if ((dev0->priv.eswitch->mode == SRIOV_NONE &&
- dev1->priv.eswitch->mode == SRIOV_NONE) ||
- (dev0->priv.eswitch->mode == SRIOV_OFFLOADS &&
- dev1->priv.eswitch->mode == SRIOV_OFFLOADS))
+ if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) ||
+ (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS))
return true;
return false;
@@ -2466,6 +2535,26 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
struct mlx5_core_dev *dev1)
{
- return (dev0->priv.eswitch->mode == SRIOV_OFFLOADS &&
- dev1->priv.eswitch->mode == SRIOV_OFFLOADS);
+ return (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS);
+}
+
+void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs)
+{
+ const u32 *out;
+
+ WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE);
+
+ if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ esw->esw_funcs.num_vfs = num_vfs;
+ return;
+ }
+
+ out = mlx5_esw_query_functions(esw->dev);
+ if (IS_ERR(out))
+ return;
+
+ esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_num_of_vfs);
+ kvfree(out);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index d043d6f9797d..a38e8a3c7c9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -68,6 +68,8 @@ struct vport_ingress {
struct mlx5_flow_group *allow_spoofchk_only_grp;
struct mlx5_flow_group *allow_untagged_only_grp;
struct mlx5_flow_group *drop_grp;
+ int modify_metadata_id;
+ struct mlx5_flow_handle *modify_metadata_rule;
struct mlx5_flow_handle *allow_rule;
struct mlx5_flow_handle *drop_rule;
struct mlx5_fc *drop_counter;
@@ -173,9 +175,12 @@ struct mlx5_esw_offload {
struct mutex peer_mutex;
DECLARE_HASHTABLE(encap_tbl, 8);
DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+ DECLARE_HASHTABLE(termtbl_tbl, 8);
+ struct mutex termtbl_mutex; /* protects termtbl hash */
+ const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
u8 inline_mode;
u64 num_flows;
- u8 encap;
+ enum devlink_eswitch_encap_mode encap;
};
/* E-Switch MC FDB table hash node */
@@ -190,11 +195,15 @@ struct mlx5_host_work {
struct mlx5_eswitch *esw;
};
-struct mlx5_host_info {
+struct mlx5_esw_functions {
struct mlx5_nb nb;
u16 num_vfs;
};
+enum {
+ MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0),
+};
+
struct mlx5_eswitch {
struct mlx5_core_dev *dev;
struct mlx5_nb nb;
@@ -202,6 +211,7 @@ struct mlx5_eswitch {
struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
struct workqueue_struct *work_queue;
struct mlx5_vport *vports;
+ u32 flags;
int total_vports;
int enabled_vports;
/* Synchronize between vport change events
@@ -219,12 +229,12 @@ struct mlx5_eswitch {
int mode;
int nvports;
u16 manager_vport;
- struct mlx5_host_info host_info;
+ u16 first_host_vport;
+ struct mlx5_esw_functions esw_funcs;
};
void esw_offloads_cleanup(struct mlx5_eswitch *esw);
-int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
- int total_nvports);
+int esw_offloads_init(struct mlx5_eswitch *esw);
void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
int esw_offloads_init_reps(struct mlx5_eswitch *esw);
void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
@@ -239,12 +249,14 @@ void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
+void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
-int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
-void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
+int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode);
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
u16 vport, u8 mac[ETH_ALEN]);
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
@@ -266,8 +278,32 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
struct ifla_vf_stats *vf_stats);
void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
+int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *in, int inlen);
+int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *out, int outlen);
+
struct mlx5_flow_spec;
struct mlx5_esw_flow_attr;
+struct mlx5_termtbl_handle;
+
+bool
+mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_spec *spec);
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int num_dest);
+
+void
+mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
+ struct mlx5_termtbl_handle *tt);
struct mlx5_flow_handle *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
@@ -338,6 +374,7 @@ struct mlx5_esw_flow_attr {
struct mlx5_eswitch_rep *rep;
struct mlx5_core_dev *mdev;
u32 encap_id;
+ struct mlx5_termtbl_handle *termtbl;
} dests[MLX5_MAX_FLOW_FWD_VPORTS];
u32 mod_hdr_id;
u8 match_level;
@@ -355,10 +392,12 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
struct netlink_ext_ack *extack);
int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
-int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode);
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode encap,
struct netlink_ext_ack *extack);
-int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode *encap);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
@@ -386,6 +425,8 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
struct mlx5_core_dev *dev1);
+const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev);
+
#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
#define esw_info(__dev, format, ...) \
@@ -404,6 +445,24 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
MLX5_VPORT_ECPF : MLX5_VPORT_PF;
}
+static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
+{
+ return mlx5_core_is_ecpf_esw_manager(dev) ?
+ MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF;
+}
+
+static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev)
+{
+ /* Ideally device should have the functions changed supported
+ * capability regardless of it being ECPF or PF wherever such
+ * event should be processed such as on eswitch manager device.
+ * However, some ECPF based device might not have this capability
+ * set. Hence OR for ECPF check to cover such device.
+ */
+ return MLX5_CAP_ESW(dev, esw_functions_changed) ||
+ mlx5_core_is_ecpf_esw_manager(dev);
+}
+
static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw)
{
/* Uplink always locate at the last element of the array.*/
@@ -488,16 +547,47 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
#define mlx5_esw_for_each_vf_vport_num_reverse(esw, vport, nvfs) \
for ((vport) = (nvfs); (vport) >= MLX5_VPORT_FIRST_VF; (vport)--)
+/* Includes host PF (vport 0) if it's not esw manager. */
+#define mlx5_esw_for_each_host_func_rep(esw, i, rep, nvfs) \
+ for ((i) = (esw)->first_host_vport; \
+ (rep) = &(esw)->offloads.vport_reps[i], \
+ (i) <= (nvfs); (i)++)
+
+#define mlx5_esw_for_each_host_func_rep_reverse(esw, i, rep, nvfs) \
+ for ((i) = (nvfs); \
+ (rep) = &(esw)->offloads.vport_reps[i], \
+ (i) >= (esw)->first_host_vport; (i)--)
+
+#define mlx5_esw_for_each_host_func_vport(esw, vport, nvfs) \
+ for ((vport) = (esw)->first_host_vport; \
+ (vport) <= (nvfs); (vport)++)
+
+#define mlx5_esw_for_each_host_func_vport_reverse(esw, vport, nvfs) \
+ for ((vport) = (nvfs); \
+ (vport) >= (esw)->first_host_vport; (vport)--)
+
struct mlx5_vport *__must_check
mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num);
+bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num);
+
+void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs);
+int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
-static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
-static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; }
+static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
+static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
+static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {}
#define FDB_MAX_CHAIN 1
#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 47b446d30f71..089ae4d48a82 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -41,7 +41,6 @@
#include "en.h"
#include "fs_core.h"
#include "lib/devcom.h"
-#include "ecpf.h"
#include "lib/eq.h"
/* There are two match-all miss flows, one for unicast dst mac and
@@ -89,6 +88,53 @@ u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw)
return 1;
}
+static void
+mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr)
+{
+ void *misc2;
+ void *misc;
+
+ /* Use metadata matching because vport is not represented by single
+ * VHCA in dual-port RoCE mode, and matching on source vport may fail.
+ */
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch,
+ attr->in_rep->vport));
+
+ misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc)))
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ }
+
+ if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) &&
+ attr->in_rep->vport == MLX5_VPORT_UPLINK)
+ spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+}
+
struct mlx5_flow_handle *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
@@ -100,9 +146,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule;
struct mlx5_flow_table *fdb;
int j, i = 0;
- void *misc;
- if (esw->mode != SRIOV_OFFLOADS)
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
return ERR_PTR(-EOPNOTSUPP);
flow_act.action = attr->action;
@@ -160,21 +205,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
i++;
}
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
-
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr);
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET_TO_ONES(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id);
-
- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
if (attr->tunnel_match_level != MLX5_MATCH_NONE)
spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
@@ -193,7 +225,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
goto err_esw_get;
}
- rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
+ if (mlx5_eswitch_termtbl_required(esw, &flow_act, spec))
+ rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr,
+ &flow_act, dest, i);
+ else
+ rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
if (IS_ERR(rule))
goto err_add_rule;
else
@@ -220,7 +256,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_table *fast_fdb;
struct mlx5_flow_table *fwd_fdb;
struct mlx5_flow_handle *rule;
- void *misc;
int i;
fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0);
@@ -252,25 +287,11 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
dest[i].ft = fwd_fdb,
i++;
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
-
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr);
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET_TO_ONES(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id);
-
- if (attr->match_level == MLX5_MATCH_NONE)
- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
- else
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
- MLX5_MATCH_MISC_PARAMETERS;
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ if (attr->match_level != MLX5_MATCH_NONE)
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
@@ -295,8 +316,16 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
bool fwd_rule)
{
bool split = (attr->split_count > 0);
+ int i;
mlx5_del_flow_rules(rule);
+
+ /* unref the term table */
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (attr->dests[i].termtbl)
+ mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl);
+ }
+
esw->offloads.num_flows--;
if (fwd_rule) {
@@ -328,12 +357,11 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
{
struct mlx5_eswitch_rep *rep;
- int vf_vport, err = 0;
+ int i, err = 0;
esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
- for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
- rep = &esw->offloads.vport_reps[vf_vport];
- if (atomic_read(&rep->rep_if[REP_ETH].state) != REP_LOADED)
+ mlx5_esw_for_each_host_func_rep(esw, i, rep, esw->esw_funcs.num_vfs) {
+ if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
continue;
err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
@@ -559,23 +587,87 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
mlx5_del_flow_rules(rule);
}
-static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
+static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw)
+{
+ u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+ u8 fdb_to_vport_reg_c_id;
+ int err;
+
+ err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
+ out, sizeof(out));
+ if (err)
+ return err;
+
+ fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
+ esw_vport_context.fdb_to_vport_reg_c_id);
+
+ fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.fdb_to_vport_reg_c_id, 1);
+
+ return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
+ in, sizeof(in));
+}
+
+static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw)
+{
+ u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+ u8 fdb_to_vport_reg_c_id;
+ int err;
+
+ err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
+ out, sizeof(out));
+ if (err)
+ return err;
+
+ fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
+ esw_vport_context.fdb_to_vport_reg_c_id);
+
+ fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.fdb_to_vport_reg_c_id, 1);
+
+ return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
+ in, sizeof(in));
+}
+
+static void peer_miss_rules_setup(struct mlx5_eswitch *esw,
+ struct mlx5_core_dev *peer_dev,
struct mlx5_flow_spec *spec,
struct mlx5_flow_destination *dest)
{
- void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
+ void *misc;
- MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(peer_dev, vhca_id));
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id);
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(peer_dev, vhca_id));
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+ }
dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest->vport.num = peer_dev->priv.eswitch->manager_vport;
@@ -583,6 +675,26 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
}
+static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch *peer_esw,
+ struct mlx5_flow_spec *spec,
+ u16 vport)
+{
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(peer_esw,
+ vport));
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+ }
+}
+
static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
struct mlx5_core_dev *peer_dev)
{
@@ -600,7 +712,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
if (!spec)
return -ENOMEM;
- peer_miss_rules_setup(peer_dev, spec, &dest);
+ peer_miss_rules_setup(esw, peer_dev, spec, &dest);
flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL);
if (!flows) {
@@ -613,7 +725,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
misc_parameters);
if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF);
+ esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
+ spec, MLX5_VPORT_PF);
+
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow)) {
@@ -635,7 +749,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
}
mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) {
- MLX5_SET(fte_match_set_misc, misc, source_port, i);
+ esw_set_peer_miss_rule_source_port(esw,
+ peer_dev->priv.eswitch,
+ spec, i);
+
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow)) {
@@ -919,6 +1036,30 @@ static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw)
#define MAX_PF_SQ 256
#define MAX_SQ_NVPORTS 32
+static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
+ u32 *flow_group_in)
+{
+ void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in,
+ match_criteria);
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_0);
+ } else {
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_port);
+ }
+}
+
static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -993,7 +1134,6 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
}
/* create send-to-vport group */
- memset(flow_group_in, 0, inlen);
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
MLX5_MATCH_MISC_PARAMETERS);
@@ -1016,19 +1156,21 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
/* create peer esw miss group */
memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_MISC_PARAMETERS);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
- match_criteria);
+ esw_set_flow_group_source_port(esw, flow_group_in);
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in,
+ match_criteria);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria,
- misc_parameters.source_port);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria,
- misc_parameters.source_eswitch_owner_vhca_id);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ }
- MLX5_SET(create_flow_group_in, flow_group_in,
- source_eswitch_owner_vhca_id_valid, 1);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
ix + esw->total_vports - 1);
@@ -1142,7 +1284,6 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_group *g;
u32 *flow_group_in;
- void *match_criteria, *misc;
int err = 0;
nvports = nvports + MLX5_ESW_MISS_FLOWS;
@@ -1151,13 +1292,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
return -ENOMEM;
/* create vport rx group */
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_MISC_PARAMETERS);
-
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
- misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ esw_set_flow_group_source_port(esw, flow_group_in);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
@@ -1196,13 +1331,24 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
goto out;
}
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport));
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ }
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
@@ -1220,21 +1366,22 @@ out:
static int esw_offloads_start(struct mlx5_eswitch *esw,
struct netlink_ext_ack *extack)
{
- int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
+ int err, err1;
- if (esw->mode != SRIOV_LEGACY &&
+ if (esw->mode != MLX5_ESWITCH_LEGACY &&
!mlx5_core_is_ecpf_esw_manager(esw->dev)) {
NL_SET_ERR_MSG_MOD(extack,
"Can't set offloads mode, SRIOV legacy not enabled");
return -EINVAL;
}
- mlx5_eswitch_disable_sriov(esw);
- err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+ mlx5_eswitch_disable(esw);
+ mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs);
+ err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"Failed setting eswitch to offloads");
- err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+ err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
if (err1) {
NL_SET_ERR_MSG_MOD(extack,
"Failed setting eswitch back to legacy");
@@ -1242,7 +1389,6 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
}
if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
if (mlx5_eswitch_inline_mode_get(esw,
- num_vfs,
&esw->offloads.inline_mode)) {
esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
NL_SET_ERR_MSG_MOD(extack,
@@ -1259,11 +1405,11 @@ void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
int esw_offloads_init_reps(struct mlx5_eswitch *esw)
{
- int total_vports = MLX5_TOTAL_VPORTS(esw->dev);
+ int total_vports = esw->total_vports;
struct mlx5_core_dev *dev = esw->dev;
struct mlx5_eswitch_rep *rep;
u8 hw_id[ETH_ALEN], rep_type;
- int vport;
+ int vport_index;
esw->offloads.vport_reps = kcalloc(total_vports,
sizeof(struct mlx5_eswitch_rep),
@@ -1271,14 +1417,15 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
if (!esw->offloads.vport_reps)
return -ENOMEM;
- mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
+ mlx5_query_mac_address(dev, hw_id);
- mlx5_esw_for_all_reps(esw, vport, rep) {
- rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport);
+ mlx5_esw_for_all_reps(esw, vport_index, rep) {
+ rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
+ rep->vport_index = vport_index;
ether_addr_copy(rep->hw_id, hw_id);
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
- atomic_set(&rep->rep_if[rep_type].state,
+ atomic_set(&rep->rep_data[rep_type].state,
REP_UNREGISTERED);
}
@@ -1288,9 +1435,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep, u8 rep_type)
{
- if (atomic_cmpxchg(&rep->rep_if[rep_type].state,
+ if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
REP_LOADED, REP_REGISTERED) == REP_LOADED)
- rep->rep_if[rep_type].unload(rep);
+ esw->offloads.rep_ops[rep_type]->unload(rep);
}
static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
@@ -1329,21 +1476,20 @@ static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports)
__unload_reps_vf_vport(esw, nvports, rep_type);
}
-static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
- u8 rep_type)
+static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
{
- __unload_reps_vf_vport(esw, nvports, rep_type);
+ __unload_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type);
/* Special vports must be the last to unload. */
__unload_reps_special_vport(esw, rep_type);
}
-static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports)
+static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw)
{
u8 rep_type = NUM_REP_TYPES;
while (rep_type-- > 0)
- __unload_reps_all_vport(esw, nvports, rep_type);
+ __unload_reps_all_vport(esw, rep_type);
}
static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
@@ -1351,11 +1497,11 @@ static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
{
int err = 0;
- if (atomic_cmpxchg(&rep->rep_if[rep_type].state,
+ if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
REP_REGISTERED, REP_LOADED) == REP_REGISTERED) {
- err = rep->rep_if[rep_type].load(esw->dev, rep);
+ err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
if (err)
- atomic_set(&rep->rep_if[rep_type].state,
+ atomic_set(&rep->rep_data[rep_type].state,
REP_REGISTERED);
}
@@ -1419,6 +1565,26 @@ err_vf:
return err;
}
+static int __load_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
+{
+ int err;
+
+ /* Special vports must be loaded first, uplink rep creates mdev resource. */
+ err = __load_reps_special_vport(esw, rep_type);
+ if (err)
+ return err;
+
+ err = __load_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type);
+ if (err)
+ goto err_vfs;
+
+ return 0;
+
+err_vfs:
+ __unload_reps_special_vport(esw, rep_type);
+ return err;
+}
+
static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports)
{
u8 rep_type = 0;
@@ -1438,34 +1604,13 @@ err_reps:
return err;
}
-static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
- u8 rep_type)
-{
- int err;
-
- /* Special vports must be loaded first. */
- err = __load_reps_special_vport(esw, rep_type);
- if (err)
- return err;
-
- err = __load_reps_vf_vport(esw, nvports, rep_type);
- if (err)
- goto err_vfs;
-
- return 0;
-
-err_vfs:
- __unload_reps_special_vport(esw, rep_type);
- return err;
-}
-
-static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)
+static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw)
{
u8 rep_type = 0;
int err;
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
- err = __load_reps_all_vport(esw, nvports, rep_type);
+ err = __load_reps_all_vport(esw, rep_type);
if (err)
goto err_reps;
}
@@ -1474,7 +1619,7 @@ static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)
err_reps:
while (rep_type-- > 0)
- __unload_reps_all_vport(esw, nvports, rep_type);
+ __unload_reps_all_vport(esw, rep_type);
return err;
}
@@ -1510,6 +1655,10 @@ static int mlx5_esw_offloads_devcom_event(int event,
switch (event) {
case ESW_OFFLOADS_DEVCOM_PAIR:
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
+ mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
+ break;
+
err = mlx5_esw_offloads_pair(esw, peer_esw);
if (err)
goto err_out;
@@ -1578,32 +1727,16 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
- struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_act flow_act = {0};
struct mlx5_flow_spec *spec;
int err = 0;
/* For prio tag mode, there is only 1 FTEs:
- * 1) Untagged packets - push prio tag VLAN, allow
+ * 1) Untagged packets - push prio tag VLAN and modify metadata if
+ * required, allow
* Unmatched traffic is allowed by default
*/
- if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
- return -EOPNOTSUPP;
-
- esw_vport_cleanup_ingress_rules(esw, vport);
-
- err = esw_vport_enable_ingress_acl(esw, vport);
- if (err) {
- mlx5_core_warn(esw->dev,
- "failed to enable prio tag ingress acl (%d) on vport[%d]\n",
- err, vport->vport);
- return err;
- }
-
- esw_debug(esw->dev,
- "vport[%d] configure ingress rules\n", vport->vport);
-
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec) {
err = -ENOMEM;
@@ -1619,6 +1752,12 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
flow_act.vlan[0].ethtype = ETH_P_8021Q;
flow_act.vlan[0].vid = 0;
flow_act.vlan[0].prio = 0;
+
+ if (vport->ingress.modify_metadata_rule) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.modify_id = vport->ingress.modify_metadata_id;
+ }
+
vport->ingress.allow_rule =
mlx5_add_flow_rules(vport->ingress.acl, spec,
&flow_act, NULL, 0);
@@ -1639,6 +1778,58 @@ out_no_mem:
return err;
}
+static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {};
+ static const struct mlx5_flow_spec spec = {};
+ struct mlx5_flow_act flow_act = {};
+ int err = 0;
+
+ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
+ MLX5_SET(set_action_in, action, data,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport));
+
+ err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ 1, action, &vport->ingress.modify_metadata_id);
+ if (err) {
+ esw_warn(esw->dev,
+ "failed to alloc modify header for vport %d ingress acl (%d)\n",
+ vport->vport, err);
+ return err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ flow_act.modify_id = vport->ingress.modify_metadata_id;
+ vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl,
+ &spec, &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.modify_metadata_rule)) {
+ err = PTR_ERR(vport->ingress.modify_metadata_rule);
+ esw_warn(esw->dev,
+ "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.modify_metadata_rule = NULL;
+ goto out;
+ }
+
+out:
+ if (err)
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+ return err;
+}
+
+void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (vport->ingress.modify_metadata_rule) {
+ mlx5_del_flow_rules(vport->ingress.modify_metadata_rule);
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+
+ vport->ingress.modify_metadata_rule = NULL;
+ }
+}
+
static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
@@ -1646,6 +1837,9 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec;
int err = 0;
+ if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
+ return 0;
+
/* For prio tag mode, there is only 1 FTEs:
* 1) prio tag packets - pop the prio tag VLAN, allow
* Unmatched traffic is allowed by default
@@ -1699,27 +1893,98 @@ out_no_mem:
return err;
}
-static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports)
+static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
{
- struct mlx5_vport *vport = NULL;
- int i, j;
int err;
- mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) {
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ !MLX5_CAP_GEN(esw->dev, prio_tag_required))
+ return 0;
+
+ esw_vport_cleanup_ingress_rules(esw, vport);
+
+ err = esw_vport_enable_ingress_acl(esw, vport);
+ if (err) {
+ esw_warn(esw->dev,
+ "failed to enable ingress acl (%d) on vport[%d]\n",
+ err, vport->vport);
+ return err;
+ }
+
+ esw_debug(esw->dev,
+ "vport[%d] configure ingress rules\n", vport->vport);
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ err = esw_vport_add_ingress_acl_modify_metadata(esw, vport);
+ if (err)
+ goto out;
+ }
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+ mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
err = esw_vport_ingress_prio_tag_config(esw, vport);
if (err)
- goto err_ingress;
- err = esw_vport_egress_prio_tag_config(esw, vport);
+ goto out;
+ }
+
+out:
+ if (err)
+ esw_vport_disable_ingress_acl(esw, vport);
+ return err;
+}
+
+static bool
+esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
+{
+ if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl))
+ return false;
+
+ if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
+ MLX5_FDB_TO_VPORT_REG_C_0))
+ return false;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
+ return false;
+
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+ mlx5_ecpf_vport_exists(esw->dev))
+ return false;
+
+ return true;
+}
+
+static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ int i, j;
+ int err;
+
+ if (esw_check_vport_match_metadata_supported(esw))
+ esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
+ mlx5_esw_for_all_vports(esw, i, vport) {
+ err = esw_vport_ingress_common_config(esw, vport);
if (err)
- goto err_egress;
+ goto err_ingress;
+
+ if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
+ err = esw_vport_egress_prio_tag_config(esw, vport);
+ if (err)
+ goto err_egress;
+ }
}
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ esw_info(esw->dev, "Use metadata reg_c as source vport to match\n");
+
return 0;
err_egress:
esw_vport_disable_ingress_acl(esw, vport);
err_ingress:
- mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) {
+ for (j = MLX5_VPORT_PF; j < i; j++) {
+ vport = &esw->vports[j];
esw_vport_disable_egress_acl(esw, vport);
esw_vport_disable_ingress_acl(esw, vport);
}
@@ -1727,40 +1992,46 @@ err_ingress:
return err;
}
-static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
int i;
- mlx5_esw_for_each_vf_vport(esw, i, vport, esw->dev->priv.sriov.num_vfs) {
+ mlx5_esw_for_all_vports(esw, i, vport) {
esw_vport_disable_egress_acl(esw, vport);
esw_vport_disable_ingress_acl(esw, vport);
}
+
+ esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
}
-static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int vf_nvports,
- int nvports)
+static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
{
+ int num_vfs = esw->esw_funcs.num_vfs;
+ int total_vports;
int err;
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev))
+ total_vports = esw->total_vports;
+ else
+ total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev);
+
memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
- if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
- err = esw_prio_tag_acls_config(esw, vf_nvports);
- if (err)
- return err;
- }
-
- err = esw_create_offloads_fdb_tables(esw, nvports);
+ err = esw_create_offloads_acl_tables(esw);
if (err)
return err;
- err = esw_create_offloads_table(esw, nvports);
+ err = esw_create_offloads_fdb_tables(esw, total_vports);
+ if (err)
+ goto create_fdb_err;
+
+ err = esw_create_offloads_table(esw, total_vports);
if (err)
goto create_ft_err;
- err = esw_create_vport_rx_group(esw, nvports);
+ err = esw_create_vport_rx_group(esw, total_vports);
if (err)
goto create_fg_err;
@@ -1772,6 +2043,9 @@ create_fg_err:
create_ft_err:
esw_destroy_offloads_fdb_tables(esw);
+create_fdb_err:
+ esw_destroy_offloads_acl_tables(esw);
+
return err;
}
@@ -1780,88 +2054,111 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
esw_destroy_vport_rx_group(esw);
esw_destroy_offloads_table(esw);
esw_destroy_offloads_fdb_tables(esw);
- if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
- esw_prio_tag_acls_cleanup(esw);
+ esw_destroy_offloads_acl_tables(esw);
}
-static void esw_host_params_event_handler(struct work_struct *work)
+static void
+esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out)
{
- struct mlx5_host_work *host_work;
- struct mlx5_eswitch *esw;
- int err, num_vf = 0;
+ bool host_pf_disabled;
+ u16 new_num_vfs;
- host_work = container_of(work, struct mlx5_host_work, work);
- esw = host_work->esw;
+ new_num_vfs = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_num_of_vfs);
+ host_pf_disabled = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_pf_disabled);
- err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf);
- if (err || num_vf == esw->host_info.num_vfs)
- goto out;
+ if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled)
+ return;
/* Number of VFs can only change from "0 to x" or "x to 0". */
- if (esw->host_info.num_vfs > 0) {
- esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs);
+ if (esw->esw_funcs.num_vfs > 0) {
+ esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs);
} else {
- err = esw_offloads_load_vf_reps(esw, num_vf);
+ int err;
+ err = esw_offloads_load_vf_reps(esw, new_num_vfs);
if (err)
- goto out;
+ return;
}
+ esw->esw_funcs.num_vfs = new_num_vfs;
+}
+
+static void esw_functions_changed_event_handler(struct work_struct *work)
+{
+ struct mlx5_host_work *host_work;
+ struct mlx5_eswitch *esw;
+ const u32 *out;
- esw->host_info.num_vfs = num_vf;
+ host_work = container_of(work, struct mlx5_host_work, work);
+ esw = host_work->esw;
+ out = mlx5_esw_query_functions(esw->dev);
+ if (IS_ERR(out))
+ goto out;
+
+ esw_vfs_changed_event_handler(esw, out);
+ kvfree(out);
out:
kfree(host_work);
}
-static int esw_host_params_event(struct notifier_block *nb,
- unsigned long type, void *data)
+int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data)
{
+ struct mlx5_esw_functions *esw_funcs;
struct mlx5_host_work *host_work;
- struct mlx5_host_info *host_info;
struct mlx5_eswitch *esw;
host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC);
if (!host_work)
return NOTIFY_DONE;
- host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb);
- esw = container_of(host_info, struct mlx5_eswitch, host_info);
+ esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb);
+ esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs);
host_work->esw = esw;
- INIT_WORK(&host_work->work, esw_host_params_event_handler);
+ INIT_WORK(&host_work->work, esw_functions_changed_event_handler);
queue_work(esw->work_queue, &host_work->work);
return NOTIFY_OK;
}
-int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
- int total_nvports)
+int esw_offloads_init(struct mlx5_eswitch *esw)
{
int err;
- err = esw_offloads_steering_init(esw, vf_nvports, total_nvports);
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
+ MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap))
+ esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
+ else
+ esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+
+ err = esw_offloads_steering_init(esw);
if (err)
return err;
- err = esw_offloads_load_all_reps(esw, vf_nvports);
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ err = mlx5_eswitch_enable_passing_vport_metadata(esw);
+ if (err)
+ goto err_vport_metadata;
+ }
+
+ err = esw_offloads_load_all_reps(esw);
if (err)
goto err_reps;
esw_offloads_devcom_init(esw);
-
- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event,
- HOST_PARAMS_CHANGE);
- mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb);
- esw->host_info.num_vfs = vf_nvports;
- }
+ mutex_init(&esw->offloads.termtbl_mutex);
mlx5_rdma_enable_roce(esw->dev);
return 0;
err_reps:
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ mlx5_eswitch_disable_passing_vport_metadata(esw);
+err_vport_metadata:
esw_offloads_steering_cleanup(esw);
return err;
}
@@ -1869,13 +2166,13 @@ err_reps:
static int esw_offloads_stop(struct mlx5_eswitch *esw,
struct netlink_ext_ack *extack)
{
- int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
+ int err, err1;
- mlx5_eswitch_disable_sriov(esw);
- err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+ mlx5_eswitch_disable(esw);
+ err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
- err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+ err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
if (err1) {
NL_SET_ERR_MSG_MOD(extack,
"Failed setting eswitch back to offloads");
@@ -1887,30 +2184,23 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
void esw_offloads_cleanup(struct mlx5_eswitch *esw)
{
- u16 num_vfs;
-
- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb);
- flush_workqueue(esw->work_queue);
- num_vfs = esw->host_info.num_vfs;
- } else {
- num_vfs = esw->dev->priv.sriov.num_vfs;
- }
-
mlx5_rdma_disable_roce(esw->dev);
esw_offloads_devcom_cleanup(esw);
- esw_offloads_unload_all_reps(esw, num_vfs);
+ esw_offloads_unload_all_reps(esw);
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ mlx5_eswitch_disable_passing_vport_metadata(esw);
esw_offloads_steering_cleanup(esw);
+ esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
}
static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
{
switch (mode) {
case DEVLINK_ESWITCH_MODE_LEGACY:
- *mlx5_mode = SRIOV_LEGACY;
+ *mlx5_mode = MLX5_ESWITCH_LEGACY;
break;
case DEVLINK_ESWITCH_MODE_SWITCHDEV:
- *mlx5_mode = SRIOV_OFFLOADS;
+ *mlx5_mode = MLX5_ESWITCH_OFFLOADS;
break;
default:
return -EINVAL;
@@ -1922,10 +2212,10 @@ static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
{
switch (mlx5_mode) {
- case SRIOV_LEGACY:
+ case MLX5_ESWITCH_LEGACY:
*mode = DEVLINK_ESWITCH_MODE_LEGACY;
break;
- case SRIOV_OFFLOADS:
+ case MLX5_ESWITCH_OFFLOADS:
*mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
break;
default:
@@ -1989,7 +2279,7 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink)
if(!MLX5_ESWITCH_MANAGER(dev))
return -EPERM;
- if (dev->priv.eswitch->mode == SRIOV_NONE &&
+ if (dev->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
!mlx5_core_is_ecpf_esw_manager(dev))
return -EOPNOTSUPP;
@@ -2040,7 +2330,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct mlx5_eswitch *esw = dev->priv.eswitch;
- int err, vport;
+ int err, vport, num_vport;
u8 mlx5_mode;
err = mlx5_devlink_eswitch_check(devlink);
@@ -2069,7 +2359,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
if (err)
goto out;
- for (vport = 1; vport < esw->enabled_vports; vport++) {
+ mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
@@ -2082,7 +2372,8 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
return 0;
revert_inline_mode:
- while (--vport > 0)
+ num_vport = --vport;
+ mlx5_esw_for_each_host_func_vport_reverse(esw, vport, num_vport)
mlx5_modify_nic_vport_min_inline(dev,
vport,
esw->offloads.inline_mode);
@@ -2103,7 +2394,7 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
}
-int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
{
u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
struct mlx5_core_dev *dev = esw->dev;
@@ -2112,7 +2403,7 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
if (!MLX5_CAP_GEN(dev, vport_group_manager))
return -EOPNOTSUPP;
- if (esw->mode == SRIOV_NONE)
+ if (esw->mode == MLX5_ESWITCH_NONE)
return -EOPNOTSUPP;
switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
@@ -2127,9 +2418,10 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
}
query_vports:
- for (vport = 1; vport <= nvfs; vport++) {
+ mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode);
+ mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
- if (vport > 1 && prev_mlx5_mode != mlx5_mode)
+ if (prev_mlx5_mode != mlx5_mode)
return -EINVAL;
prev_mlx5_mode = mlx5_mode;
}
@@ -2139,7 +2431,8 @@ out:
return 0;
}
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode encap,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
@@ -2158,7 +2451,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC)
return -EOPNOTSUPP;
- if (esw->mode == SRIOV_LEGACY) {
+ if (esw->mode == MLX5_ESWITCH_LEGACY) {
esw->offloads.encap = encap;
return 0;
}
@@ -2188,7 +2481,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
return err;
}
-int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode *encap)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct mlx5_eswitch *esw = dev->priv.eswitch;
@@ -2203,36 +2497,31 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
}
void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
- struct mlx5_eswitch_rep_if *__rep_if,
+ const struct mlx5_eswitch_rep_ops *ops,
u8 rep_type)
{
- struct mlx5_eswitch_rep_if *rep_if;
+ struct mlx5_eswitch_rep_data *rep_data;
struct mlx5_eswitch_rep *rep;
int i;
+ esw->offloads.rep_ops[rep_type] = ops;
mlx5_esw_for_all_reps(esw, i, rep) {
- rep_if = &rep->rep_if[rep_type];
- rep_if->load = __rep_if->load;
- rep_if->unload = __rep_if->unload;
- rep_if->get_proto_dev = __rep_if->get_proto_dev;
- rep_if->priv = __rep_if->priv;
-
- atomic_set(&rep_if->state, REP_REGISTERED);
+ rep_data = &rep->rep_data[rep_type];
+ atomic_set(&rep_data->state, REP_REGISTERED);
}
}
EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
{
- u16 max_vf = mlx5_core_max_vfs(esw->dev);
struct mlx5_eswitch_rep *rep;
int i;
- if (esw->mode == SRIOV_OFFLOADS)
- __unload_reps_all_vport(esw, max_vf, rep_type);
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+ __unload_reps_all_vport(esw, rep_type);
mlx5_esw_for_all_reps(esw, i, rep)
- atomic_set(&rep->rep_if[rep_type].state, REP_UNREGISTERED);
+ atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
}
EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
@@ -2241,7 +2530,7 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
struct mlx5_eswitch_rep *rep;
rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
- return rep->rep_if[rep_type].priv;
+ return rep->rep_data[rep_type].priv;
}
void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
@@ -2252,9 +2541,9 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
rep = mlx5_eswitch_get_rep(esw, vport);
- if (atomic_read(&rep->rep_if[rep_type].state) == REP_LOADED &&
- rep->rep_if[rep_type].get_proto_dev)
- return rep->rep_if[rep_type].get_proto_dev(rep);
+ if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+ esw->offloads.rep_ops[rep_type]->get_proto_dev)
+ return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep);
return NULL;
}
EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
@@ -2271,3 +2560,22 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
return mlx5_eswitch_get_rep(esw, vport);
}
EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
+
+bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return vport_num >= MLX5_VPORT_FIRST_VF &&
+ vport_num <= esw->dev->priv.sriov.max_vfs;
+}
+
+bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
+{
+ return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA);
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
+
+u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
new file mode 100644
index 000000000000..1d55a324a17e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include <linux/mlx5/fs.h>
+#include "eswitch.h"
+
+struct mlx5_termtbl_handle {
+ struct hlist_node termtbl_hlist;
+
+ struct mlx5_flow_table *termtbl;
+ struct mlx5_flow_act flow_act;
+ struct mlx5_flow_destination dest;
+
+ struct mlx5_flow_handle *rule;
+ int ref_count;
+};
+
+static u32
+mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest)
+{
+ u32 hash;
+
+ hash = jhash_1word(flow_act->action, 0);
+ hash = jhash((const void *)&flow_act->vlan,
+ sizeof(flow_act->vlan), hash);
+ hash = jhash((const void *)&dest->vport.num,
+ sizeof(dest->vport.num), hash);
+ hash = jhash((const void *)&dest->vport.vhca_id,
+ sizeof(dest->vport.num), hash);
+ return hash;
+}
+
+static int
+mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1,
+ struct mlx5_flow_destination *dest1,
+ struct mlx5_flow_act *flow_act2,
+ struct mlx5_flow_destination *dest2)
+{
+ return flow_act1->action != flow_act2->action ||
+ dest1->vport.num != dest2->vport.num ||
+ dest1->vport.vhca_id != dest2->vport.vhca_id ||
+ memcmp(&flow_act1->vlan, &flow_act2->vlan,
+ sizeof(flow_act1->vlan));
+}
+
+static int
+mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
+ struct mlx5_termtbl_handle *tt,
+ struct mlx5_flow_act *flow_act)
+{
+ static const struct mlx5_flow_spec spec = {};
+ struct mlx5_flow_namespace *root_ns;
+ int prio, flags;
+ int err;
+
+ root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* As this is the terminating action then the termination table is the
+ * same prio as the slow path
+ */
+ prio = FDB_SLOW_PATH;
+ flags = MLX5_FLOW_TABLE_TERMINATION;
+ tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, prio, 1, 1,
+ 0, flags);
+ if (IS_ERR(tt->termtbl)) {
+ esw_warn(dev, "Failed to create termination table\n");
+ return -EOPNOTSUPP;
+ }
+
+ tt->rule = mlx5_add_flow_rules(tt->termtbl, &spec, flow_act,
+ &tt->dest, 1);
+
+ if (IS_ERR(tt->rule)) {
+ esw_warn(dev, "Failed to create termination table rule\n");
+ goto add_flow_err;
+ }
+ return 0;
+
+add_flow_err:
+ err = mlx5_destroy_flow_table(tt->termtbl);
+ if (err)
+ esw_warn(dev, "Failed to destroy termination table\n");
+
+ return -EOPNOTSUPP;
+}
+
+static struct mlx5_termtbl_handle *
+mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest)
+{
+ struct mlx5_termtbl_handle *tt;
+ bool found = false;
+ u32 hash_key;
+ int err;
+
+ mutex_lock(&esw->offloads.termtbl_mutex);
+
+ hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest);
+ hash_for_each_possible(esw->offloads.termtbl_tbl, tt,
+ termtbl_hlist, hash_key) {
+ if (!mlx5_eswitch_termtbl_cmp(&tt->flow_act, &tt->dest,
+ flow_act, dest)) {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ goto tt_add_ref;
+
+ tt = kzalloc(sizeof(*tt), GFP_KERNEL);
+ if (!tt) {
+ err = -ENOMEM;
+ goto tt_create_err;
+ }
+
+ tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ tt->dest.vport.num = dest->vport.num;
+ tt->dest.vport.vhca_id = dest->vport.vhca_id;
+ memcpy(&tt->flow_act, flow_act, sizeof(*flow_act));
+
+ err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act);
+ if (err) {
+ esw_warn(esw->dev, "Failed to create termination table\n");
+ goto tt_create_err;
+ }
+ hash_add(esw->offloads.termtbl_tbl, &tt->termtbl_hlist, hash_key);
+tt_add_ref:
+ tt->ref_count++;
+ mutex_unlock(&esw->offloads.termtbl_mutex);
+ return tt;
+tt_create_err:
+ kfree(tt);
+ mutex_unlock(&esw->offloads.termtbl_mutex);
+ return ERR_PTR(err);
+}
+
+void
+mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
+ struct mlx5_termtbl_handle *tt)
+{
+ mutex_lock(&esw->offloads.termtbl_mutex);
+ if (--tt->ref_count == 0)
+ hash_del(&tt->termtbl_hlist);
+ mutex_unlock(&esw->offloads.termtbl_mutex);
+
+ if (!tt->ref_count) {
+ mlx5_del_flow_rules(tt->rule);
+ mlx5_destroy_flow_table(tt->termtbl);
+ kfree(tt);
+ }
+}
+
+static void
+mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src,
+ struct mlx5_flow_act *dst)
+{
+ if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))
+ return;
+
+ src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0]));
+ memset(&src->vlan[0], 0, sizeof(src->vlan[0]));
+
+ if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
+ return;
+
+ src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+ dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+ memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1]));
+ memset(&src->vlan[1], 0, sizeof(src->vlan[1]));
+}
+
+bool
+mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_spec *spec)
+{
+ u32 port_mask = MLX5_GET(fte_match_param, spec->match_criteria,
+ misc_parameters.source_port);
+ u32 port_value = MLX5_GET(fte_match_param, spec->match_value,
+ misc_parameters.source_port);
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table))
+ return false;
+
+ /* push vlan on RX */
+ return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) &&
+ ((port_mask & port_value) == MLX5_VPORT_UPLINK);
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int num_dest)
+{
+ struct mlx5_flow_act term_tbl_act = {};
+ struct mlx5_flow_handle *rule = NULL;
+ bool term_table_created = false;
+ int num_vport_dests = 0;
+ int i, curr_dest;
+
+ mlx5_eswitch_termtbl_actions_move(flow_act, &term_tbl_act);
+ term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ for (i = 0; i < num_dest; i++) {
+ struct mlx5_termtbl_handle *tt;
+
+ /* only vport destinations can be terminated */
+ if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT)
+ continue;
+
+ /* get the terminating table for the action list */
+ tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act,
+ &dest[i]);
+ if (IS_ERR(tt)) {
+ esw_warn(esw->dev, "Failed to create termination table\n");
+ goto revert_changes;
+ }
+ attr->dests[num_vport_dests].termtbl = tt;
+ num_vport_dests++;
+
+ /* link the destination with the termination table */
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = tt->termtbl;
+ term_table_created = true;
+ }
+
+ /* at least one destination should reference a termination table */
+ if (!term_table_created)
+ goto revert_changes;
+
+ /* create the FTE */
+ rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest);
+ if (IS_ERR(rule))
+ goto revert_changes;
+
+ goto out;
+
+revert_changes:
+ /* revert the changes that were made to the original flow_act
+ * and fall-back to the original rule actions
+ */
+ mlx5_eswitch_termtbl_actions_move(&term_tbl_act, flow_act);
+
+ for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) {
+ struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl;
+
+ /* search for the destination associated with the
+ * current term table
+ */
+ for (i = 0; i < num_dest; i++) {
+ if (dest[i].ft != tt->termtbl)
+ continue;
+
+ memset(&dest[i], 0, sizeof(dest[i]));
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest[i].vport.num = tt->dest.vport.num;
+ dest[i].vport.vhca_id = tt->dest.vport.vhca_id;
+ mlx5_eswitch_termtbl_put(esw, tt);
+ break;
+ }
+ }
+ rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest);
+out:
+ return rule;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
index a81e8d2168d8..8bcf3426b9c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/events.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@@ -108,8 +108,8 @@ static const char *eqe_type_str(u8 type)
return "MLX5_EVENT_TYPE_STALL_EVENT";
case MLX5_EVENT_TYPE_CMD:
return "MLX5_EVENT_TYPE_CMD";
- case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE:
- return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE";
+ case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
+ return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
case MLX5_EVENT_TYPE_PAGE_REQUEST:
return "MLX5_EVENT_TYPE_PAGE_REQUEST";
case MLX5_EVENT_TYPE_PAGE_FAULT:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index ca2296a2f9ee..4c50efe4e7f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -414,7 +414,8 @@ static void mlx5_fpga_conn_cq_tasklet(unsigned long data)
mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
}
-static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq)
+static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq,
+ struct mlx5_eqe *eqe)
{
struct mlx5_fpga_conn *conn;
@@ -429,6 +430,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
struct mlx5_fpga_device *fdev = conn->fdev;
struct mlx5_core_dev *mdev = fdev->mdev;
u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
struct mlx5_wq_param wqp;
struct mlx5_cqe64 *cqe;
int inlen, err, eqn;
@@ -476,7 +478,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas);
- err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen);
+ err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen, out, sizeof(out));
kvfree(in);
if (err)
@@ -867,7 +869,7 @@ struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
conn->cb_arg = attr->cb_arg;
remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32);
- err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac);
+ err = mlx5_query_mac_address(fdev->mdev, remote_mac);
if (err) {
mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err);
ret = ERR_PTR(err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 52c47d3dd5a5..c76da309506b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -636,7 +636,8 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
u8 match_criteria_enable,
const u32 *match_c,
const u32 *match_v,
- struct mlx5_flow_act *flow_act)
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_context *flow_context)
{
const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
outer_headers);
@@ -655,7 +656,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
(match_criteria_enable &
~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
(flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
- (flow_act->flags & FLOW_ACT_HAS_TAG))
+ (flow_context->flags & FLOW_CONTEXT_HAS_TAG))
return false;
return true;
@@ -767,7 +768,8 @@ mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
fg->mask.match_criteria_enable,
fg->mask.match_criteria,
fte->val,
- &fte->action))
+ &fte->action,
+ &fte->flow_context))
return ERR_PTR(-EINVAL);
else if (!mlx5_is_fpga_ipsec_rule(mdev,
fg->mask.match_criteria_enable,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
index 2b5e63b0d4d6..382985e65b48 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -37,8 +37,6 @@
#include "accel/ipsec.h"
#include "fs_cmd.h"
-#ifdef CONFIG_MLX5_FPGA
-
u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
@@ -66,77 +64,4 @@ int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
const struct mlx5_flow_cmds *
mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
-#else
-
-static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
-{
- return 0;
-}
-
-static inline unsigned int
-mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
-{
- return 0;
-}
-
-static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev,
- u64 *counters)
-{
- return 0;
-}
-
-static inline void *
-mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *accel_xfrm,
- const __be32 saddr[4],
- const __be32 daddr[4],
- const __be32 spi, bool is_ipv6)
-{
- return NULL;
-}
-
-static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
-{
-}
-
-static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
-{
- return 0;
-}
-
-static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
-{
-}
-
-static inline void mlx5_fpga_ipsec_build_fs_cmds(void)
-{
-}
-
-static inline struct mlx5_accel_esp_xfrm *
-mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags)
-{
- return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
-{
-}
-
-static inline int
-mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
- return -EOPNOTSUPP;
-}
-
-static inline const struct mlx5_flow_cmds *
-mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
-{
- return mlx5_fs_cmd_get_default(type);
-}
-
-#endif /* CONFIG_MLX5_FPGA */
-
#endif /* __MLX5_FPGA_SADB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 013b1ca4a791..7ac1249eadc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -147,6 +147,7 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
{
int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+ int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION);
u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0};
struct mlx5_core_dev *dev = ns->dev;
@@ -167,6 +168,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
en_decap);
MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en,
en_encap);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.termination_table,
+ term);
switch (ft->op_mod) {
case FS_FT_OP_MOD_NORMAL:
@@ -393,7 +396,11 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
MLX5_SET(flow_context, in_flow_context, group_id, group_id);
- MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
+ MLX5_SET(flow_context, in_flow_context, flow_tag,
+ fte->flow_context.flow_tag);
+ MLX5_SET(flow_context, in_flow_context, flow_source,
+ fte->flow_context.flow_source);
+
MLX5_SET(flow_context, in_flow_context, extended_destination,
extended_dest);
if (extended_dest) {
@@ -768,6 +775,10 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
table_type = FS_FT_NIC_TX;
break;
+ case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
+ max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions);
+ table_type = FS_FT_ESW_INGRESS_ACL;
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index d7ca7e82a832..3e99799bdb40 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -584,7 +584,7 @@ err_ida_remove:
}
static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
- u32 *match_value,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act)
{
struct mlx5_flow_steering *steering = get_steering(&ft->node);
@@ -594,9 +594,10 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
if (!fte)
return ERR_PTR(-ENOMEM);
- memcpy(fte->val, match_value, sizeof(fte->val));
+ memcpy(fte->val, &spec->match_value, sizeof(fte->val));
fte->node.type = FS_TYPE_FLOW_ENTRY;
fte->action = *flow_act;
+ fte->flow_context = spec->flow_context;
tree_init_node(&fte->node, NULL, del_sw_fte);
@@ -612,7 +613,7 @@ static void dealloc_flow_group(struct mlx5_flow_steering *steering,
static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering,
u8 match_criteria_enable,
- void *match_criteria,
+ const void *match_criteria,
int start_index,
int end_index)
{
@@ -642,7 +643,7 @@ static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steer
static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft,
u8 match_criteria_enable,
- void *match_criteria,
+ const void *match_criteria,
int start_index,
int end_index,
struct list_head *prev)
@@ -1285,7 +1286,7 @@ free_handle:
}
static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec)
+ const struct mlx5_flow_spec *spec)
{
struct list_head *prev = &ft->node.children;
struct mlx5_flow_group *fg;
@@ -1430,7 +1431,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
return false;
}
-static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
+static int check_conflicting_ftes(struct fs_fte *fte,
+ const struct mlx5_flow_context *flow_context,
+ const struct mlx5_flow_act *flow_act)
{
if (check_conflicting_actions(flow_act->action, fte->action.action)) {
mlx5_core_warn(get_dev(&fte->node),
@@ -1438,12 +1441,12 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
return -EEXIST;
}
- if ((flow_act->flags & FLOW_ACT_HAS_TAG) &&
- fte->action.flow_tag != flow_act->flow_tag) {
+ if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) &&
+ fte->flow_context.flow_tag != flow_context->flow_tag) {
mlx5_core_warn(get_dev(&fte->node),
"FTE flow tag %u already exists with different flow tag %u\n",
- fte->action.flow_tag,
- flow_act->flow_tag);
+ fte->flow_context.flow_tag,
+ flow_context->flow_tag);
return -EEXIST;
}
@@ -1451,7 +1454,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
}
static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
- u32 *match_value,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int dest_num,
@@ -1462,7 +1465,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
int i;
int ret;
- ret = check_conflicting_ftes(fte, flow_act);
+ ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act);
if (ret)
return ERR_PTR(ret);
@@ -1536,7 +1539,7 @@ static void free_match_list(struct match_list_head *head)
static int build_match_list(struct match_list_head *match_head,
struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec)
+ const struct mlx5_flow_spec *spec)
{
struct rhlist_head *tmp, *list;
struct mlx5_flow_group *g;
@@ -1589,7 +1592,7 @@ static u64 matched_fgs_get_version(struct list_head *match_head)
static struct fs_fte *
lookup_fte_locked(struct mlx5_flow_group *g,
- u32 *match_value,
+ const u32 *match_value,
bool take_write)
{
struct fs_fte *fte_tmp;
@@ -1622,7 +1625,7 @@ out:
static struct mlx5_flow_handle *
try_add_to_existing_fg(struct mlx5_flow_table *ft,
struct list_head *match_head,
- struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int dest_num,
@@ -1637,7 +1640,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
u64 version;
int err;
- fte = alloc_fte(ft, spec->match_value, flow_act);
+ fte = alloc_fte(ft, spec, flow_act);
if (IS_ERR(fte))
return ERR_PTR(-ENOMEM);
@@ -1653,8 +1656,7 @@ search_again_locked:
fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
if (!fte_tmp)
continue;
- rule = add_rule_fg(g, spec->match_value,
- flow_act, dest, dest_num, fte_tmp);
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
up_write_ref_node(&fte_tmp->node, false);
tree_put_node(&fte_tmp->node, false);
kmem_cache_free(steering->ftes_cache, fte);
@@ -1701,8 +1703,7 @@ skip_search:
nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
up_write_ref_node(&g->node, false);
- rule = add_rule_fg(g, spec->match_value,
- flow_act, dest, dest_num, fte);
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
up_write_ref_node(&fte->node, false);
tree_put_node(&fte->node, false);
return rule;
@@ -1715,7 +1716,7 @@ out:
static struct mlx5_flow_handle *
_mlx5_add_flow_rules(struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int dest_num)
@@ -1788,7 +1789,7 @@ search_again_locked:
if (err)
goto err_release_fg;
- fte = alloc_fte(ft, spec->match_value, flow_act);
+ fte = alloc_fte(ft, spec, flow_act);
if (IS_ERR(fte)) {
err = PTR_ERR(fte);
goto err_release_fg;
@@ -1802,8 +1803,7 @@ search_again_locked:
nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
up_write_ref_node(&g->node, false);
- rule = add_rule_fg(g, spec->match_value, flow_act, dest,
- dest_num, fte);
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
up_write_ref_node(&fte->node, false);
tree_put_node(&fte->node, false);
tree_put_node(&g->node, false);
@@ -1823,7 +1823,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
struct mlx5_flow_handle *
mlx5_add_flow_rules(struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int num_dest)
@@ -2092,7 +2092,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d
{
struct mlx5_flow_steering *steering = dev->priv.steering;
- if (!steering || vport >= MLX5_TOTAL_VPORTS(dev))
+ if (!steering || vport >= mlx5_eswitch_get_total_vports(dev))
return NULL;
switch (type) {
@@ -2284,7 +2284,7 @@ static struct mlx5_flow_root_namespace
cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
/* Create the root namespace */
- root_ns = kvzalloc(sizeof(*root_ns), GFP_KERNEL);
+ root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL);
if (!root_ns)
return NULL;
@@ -2423,10 +2423,11 @@ static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev)
if (!steering->esw_egress_root_ns)
return;
- for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++)
+ for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
cleanup_root_ns(steering->esw_egress_root_ns[i]);
kfree(steering->esw_egress_root_ns);
+ steering->esw_egress_root_ns = NULL;
}
static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev)
@@ -2437,10 +2438,11 @@ static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev)
if (!steering->esw_ingress_root_ns)
return;
- for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++)
+ for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
cleanup_root_ns(steering->esw_ingress_root_ns[i]);
kfree(steering->esw_ingress_root_ns);
+ steering->esw_ingress_root_ns = NULL;
}
void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
@@ -2474,11 +2476,7 @@ static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
/* Create single prio */
prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1);
- if (IS_ERR(prio)) {
- cleanup_root_ns(steering->sniffer_tx_root_ns);
- return PTR_ERR(prio);
- }
- return 0;
+ return PTR_ERR_OR_ZERO(prio);
}
static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
@@ -2491,11 +2489,7 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
/* Create single prio */
prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1);
- if (IS_ERR(prio)) {
- cleanup_root_ns(steering->sniffer_rx_root_ns);
- return PTR_ERR(prio);
- }
- return 0;
+ return PTR_ERR_OR_ZERO(prio);
}
static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering)
@@ -2511,11 +2505,7 @@ static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering)
/* Create single prio */
prio = fs_create_prio(&steering->rdma_rx_root_ns->ns, 0, 1);
- if (IS_ERR(prio)) {
- cleanup_root_ns(steering->rdma_rx_root_ns);
- return PTR_ERR(prio);
- }
- return 0;
+ return PTR_ERR_OR_ZERO(prio);
}
static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
{
@@ -2616,16 +2606,18 @@ static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vpo
static int init_egress_acls_root_ns(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
+ int total_vports = mlx5_eswitch_get_total_vports(dev);
int err;
int i;
- steering->esw_egress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev),
- sizeof(*steering->esw_egress_root_ns),
- GFP_KERNEL);
+ steering->esw_egress_root_ns =
+ kcalloc(total_vports,
+ sizeof(*steering->esw_egress_root_ns),
+ GFP_KERNEL);
if (!steering->esw_egress_root_ns)
return -ENOMEM;
- for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) {
+ for (i = 0; i < total_vports; i++) {
err = init_egress_acl_root_ns(steering, i);
if (err)
goto cleanup_root_ns;
@@ -2637,22 +2629,25 @@ cleanup_root_ns:
for (i--; i >= 0; i--)
cleanup_root_ns(steering->esw_egress_root_ns[i]);
kfree(steering->esw_egress_root_ns);
+ steering->esw_egress_root_ns = NULL;
return err;
}
static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
+ int total_vports = mlx5_eswitch_get_total_vports(dev);
int err;
int i;
- steering->esw_ingress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev),
- sizeof(*steering->esw_ingress_root_ns),
- GFP_KERNEL);
+ steering->esw_ingress_root_ns =
+ kcalloc(total_vports,
+ sizeof(*steering->esw_ingress_root_ns),
+ GFP_KERNEL);
if (!steering->esw_ingress_root_ns)
return -ENOMEM;
- for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) {
+ for (i = 0; i < total_vports; i++) {
err = init_ingress_acl_root_ns(steering, i);
if (err)
goto cleanup_root_ns;
@@ -2664,6 +2659,7 @@ cleanup_root_ns:
for (i--; i >= 0; i--)
cleanup_root_ns(steering->esw_ingress_root_ns[i]);
kfree(steering->esw_ingress_root_ns);
+ steering->esw_ingress_root_ns = NULL;
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index a08c3d09a50f..c48c382f926f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -170,6 +170,7 @@ struct fs_fte {
u32 val[MLX5_ST_SZ_DW_MATCH_PARAM];
u32 dests_size;
u32 index;
+ struct mlx5_flow_context flow_context;
struct mlx5_flow_act action;
enum fs_fte_status status;
struct mlx5_fc *counter;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index c6c28f56aa29..b3762123a69c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -102,13 +102,15 @@ static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev,
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
unsigned long next_id = (unsigned long)id + 1;
struct mlx5_fc *counter;
+ unsigned long tmp;
rcu_read_lock();
/* skip counters that are in idr, but not yet in counters list */
- while ((counter = idr_get_next_ul(&fc_stats->counters_idr,
- &next_id)) != NULL &&
- list_empty(&counter->list))
- next_id++;
+ idr_for_each_entry_continue_ul(&fc_stats->counters_idr,
+ counter, tmp, next_id) {
+ if (!list_empty(&counter->list))
+ break;
+ }
rcu_read_unlock();
return counter ? &counter->list : &fc_stats->counters;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 1ab6f7e3bec6..a19790dee7b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -37,6 +37,37 @@
#include "mlx5_core.h"
#include "../../mlxfw/mlxfw.h"
+enum {
+ MCQS_IDENTIFIER_BOOT_IMG = 0x1,
+ MCQS_IDENTIFIER_OEM_NVCONFIG = 0x4,
+ MCQS_IDENTIFIER_MLNX_NVCONFIG = 0x5,
+ MCQS_IDENTIFIER_CS_TOKEN = 0x6,
+ MCQS_IDENTIFIER_DBG_TOKEN = 0x7,
+ MCQS_IDENTIFIER_GEARBOX = 0xA,
+};
+
+enum {
+ MCQS_UPDATE_STATE_IDLE,
+ MCQS_UPDATE_STATE_IN_PROGRESS,
+ MCQS_UPDATE_STATE_APPLIED,
+ MCQS_UPDATE_STATE_ACTIVE,
+ MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET,
+ MCQS_UPDATE_STATE_FAILED,
+ MCQS_UPDATE_STATE_CANCELED,
+ MCQS_UPDATE_STATE_BUSY,
+};
+
+enum {
+ MCQI_INFO_TYPE_CAPABILITIES = 0x0,
+ MCQI_INFO_TYPE_VERSION = 0x1,
+ MCQI_INFO_TYPE_ACTIVATION_METHOD = 0x5,
+};
+
+enum {
+ MCQI_FW_RUNNING_VERSION = 0,
+ MCQI_FW_STORED_VERSION = 1,
+};
+
static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out,
int outlen)
{
@@ -202,6 +233,18 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
+ if (MLX5_CAP_GEN(dev, event_cap)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_EVENT);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, tls)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_TLS);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -392,33 +435,49 @@ static int mlx5_reg_mcda_set(struct mlx5_core_dev *dev,
}
static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev,
- u16 component_index,
- u32 *max_component_size,
- u8 *log_mcda_word_size,
- u16 *mcda_max_write_size)
+ u16 component_index, bool read_pending,
+ u8 info_type, u16 data_size, void *mcqi_data)
{
- u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_ST_SZ_DW(mcqi_cap)];
- int offset = MLX5_ST_SZ_DW(mcqi_reg);
- u32 in[MLX5_ST_SZ_DW(mcqi_reg)];
+ u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_UN_SZ_DW(mcqi_reg_data)] = {};
+ u32 in[MLX5_ST_SZ_DW(mcqi_reg)] = {};
+ void *data;
int err;
- memset(in, 0, sizeof(in));
- memset(out, 0, sizeof(out));
-
MLX5_SET(mcqi_reg, in, component_index, component_index);
- MLX5_SET(mcqi_reg, in, data_size, MLX5_ST_SZ_BYTES(mcqi_cap));
+ MLX5_SET(mcqi_reg, in, read_pending_component, read_pending);
+ MLX5_SET(mcqi_reg, in, info_type, info_type);
+ MLX5_SET(mcqi_reg, in, data_size, data_size);
err = mlx5_core_access_reg(dev, in, sizeof(in), out,
- sizeof(out), MLX5_REG_MCQI, 0, 0);
+ MLX5_ST_SZ_BYTES(mcqi_reg) + data_size,
+ MLX5_REG_MCQI, 0, 0);
if (err)
- goto out;
+ return err;
- *max_component_size = MLX5_GET(mcqi_cap, out + offset, max_component_size);
- *log_mcda_word_size = MLX5_GET(mcqi_cap, out + offset, log_mcda_word_size);
- *mcda_max_write_size = MLX5_GET(mcqi_cap, out + offset, mcda_max_write_size);
+ data = MLX5_ADDR_OF(mcqi_reg, out, data);
+ memcpy(mcqi_data, data, data_size);
-out:
- return err;
+ return 0;
+}
+
+static int mlx5_reg_mcqi_caps_query(struct mlx5_core_dev *dev, u16 component_index,
+ u32 *max_component_size, u8 *log_mcda_word_size,
+ u16 *mcda_max_write_size)
+{
+ u32 mcqi_reg[MLX5_ST_SZ_DW(mcqi_cap)] = {};
+ int err;
+
+ err = mlx5_reg_mcqi_query(dev, component_index, 0,
+ MCQI_INFO_TYPE_CAPABILITIES,
+ MLX5_ST_SZ_BYTES(mcqi_cap), mcqi_reg);
+ if (err)
+ return err;
+
+ *max_component_size = MLX5_GET(mcqi_cap, mcqi_reg, max_component_size);
+ *log_mcda_word_size = MLX5_GET(mcqi_cap, mcqi_reg, log_mcda_word_size);
+ *mcda_max_write_size = MLX5_GET(mcqi_cap, mcqi_reg, mcda_max_write_size);
+
+ return 0;
}
struct mlx5_mlxfw_dev {
@@ -434,8 +493,13 @@ static int mlx5_component_query(struct mlxfw_dev *mlxfw_dev,
container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
- return mlx5_reg_mcqi_query(dev, component_index, p_max_size,
- p_align_bits, p_max_write_size);
+ if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi)) {
+ mlx5_core_warn(dev, "caps query isn't supported by running FW\n");
+ return -EOPNOTSUPP;
+ }
+
+ return mlx5_reg_mcqi_caps_query(dev, component_index, p_max_size,
+ p_align_bits, p_max_write_size);
}
static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle)
@@ -552,7 +616,8 @@ static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = {
};
int mlx5_firmware_flash(struct mlx5_core_dev *dev,
- const struct firmware *firmware)
+ const struct firmware *firmware,
+ struct netlink_ext_ack *extack)
{
struct mlx5_mlxfw_dev mlx5_mlxfw_dev = {
.mlxfw_dev = {
@@ -571,5 +636,133 @@ int mlx5_firmware_flash(struct mlx5_core_dev *dev,
return -EOPNOTSUPP;
}
- return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, firmware);
+ return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev,
+ firmware, extack);
+}
+
+static int mlx5_reg_mcqi_version_query(struct mlx5_core_dev *dev,
+ u16 component_index, bool read_pending,
+ u32 *mcqi_version_out)
+{
+ return mlx5_reg_mcqi_query(dev, component_index, read_pending,
+ MCQI_INFO_TYPE_VERSION,
+ MLX5_ST_SZ_BYTES(mcqi_version),
+ mcqi_version_out);
+}
+
+static int mlx5_reg_mcqs_query(struct mlx5_core_dev *dev, u32 *out,
+ u16 component_index)
+{
+ u8 out_sz = MLX5_ST_SZ_BYTES(mcqs_reg);
+ u32 in[MLX5_ST_SZ_DW(mcqs_reg)] = {};
+ int err;
+
+ memset(out, 0, out_sz);
+
+ MLX5_SET(mcqs_reg, in, component_index, component_index);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ out_sz, MLX5_REG_MCQS, 0, 0);
+ return err;
+}
+
+/* scans component index sequentially, to find the boot img index */
+static int mlx5_get_boot_img_component_index(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(mcqs_reg)] = {};
+ u16 identifier, component_idx = 0;
+ bool quit;
+ int err;
+
+ do {
+ err = mlx5_reg_mcqs_query(dev, out, component_idx);
+ if (err)
+ return err;
+
+ identifier = MLX5_GET(mcqs_reg, out, identifier);
+ quit = !!MLX5_GET(mcqs_reg, out, last_index_flag);
+ quit |= identifier == MCQS_IDENTIFIER_BOOT_IMG;
+ } while (!quit && ++component_idx);
+
+ if (identifier != MCQS_IDENTIFIER_BOOT_IMG) {
+ mlx5_core_warn(dev, "mcqs: can't find boot_img component ix, last scanned idx %d\n",
+ component_idx);
+ return -EOPNOTSUPP;
+ }
+
+ return component_idx;
+}
+
+static int
+mlx5_fw_image_pending(struct mlx5_core_dev *dev,
+ int component_index,
+ bool *pending_version_exists)
+{
+ u32 out[MLX5_ST_SZ_DW(mcqs_reg)];
+ u8 component_update_state;
+ int err;
+
+ err = mlx5_reg_mcqs_query(dev, out, component_index);
+ if (err)
+ return err;
+
+ component_update_state = MLX5_GET(mcqs_reg, out, component_update_state);
+
+ if (component_update_state == MCQS_UPDATE_STATE_IDLE) {
+ *pending_version_exists = false;
+ } else if (component_update_state == MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET) {
+ *pending_version_exists = true;
+ } else {
+ mlx5_core_warn(dev,
+ "mcqs: can't read pending fw version while fw state is %d\n",
+ component_update_state);
+ return -ENODATA;
+ }
+ return 0;
+}
+
+int mlx5_fw_version_query(struct mlx5_core_dev *dev,
+ u32 *running_ver, u32 *pending_ver)
+{
+ u32 reg_mcqi_version[MLX5_ST_SZ_DW(mcqi_version)] = {};
+ bool pending_version_exists;
+ int component_index;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi) ||
+ !MLX5_CAP_MCAM_REG(dev, mcqs)) {
+ mlx5_core_warn(dev, "fw query isn't supported by the FW\n");
+ return -EOPNOTSUPP;
+ }
+
+ component_index = mlx5_get_boot_img_component_index(dev);
+ if (component_index < 0)
+ return component_index;
+
+ err = mlx5_reg_mcqi_version_query(dev, component_index,
+ MCQI_FW_RUNNING_VERSION,
+ reg_mcqi_version);
+ if (err)
+ return err;
+
+ *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version);
+
+ err = mlx5_fw_image_pending(dev, component_index, &pending_version_exists);
+ if (err)
+ return err;
+
+ if (!pending_version_exists) {
+ *pending_ver = 0;
+ return 0;
+ }
+
+ err = mlx5_reg_mcqi_version_query(dev, component_index,
+ MCQI_FW_STORED_VERSION,
+ reg_mcqi_version);
+ if (err)
+ return err;
+
+ *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version);
+
+ return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index a2656f4008d9..9314777d99e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -40,6 +40,8 @@
#include "mlx5_core.h"
#include "lib/eq.h"
#include "lib/mlx5.h"
+#include "lib/pci_vsc.h"
+#include "diag/fw_tracer.h"
enum {
MLX5_HEALTH_POLL_INTERVAL = 2 * HZ,
@@ -62,12 +64,20 @@ enum {
enum {
MLX5_DROP_NEW_HEALTH_WORK,
- MLX5_DROP_NEW_RECOVERY_WORK,
+};
+
+enum {
+ MLX5_SENSOR_NO_ERR = 0,
+ MLX5_SENSOR_PCI_COMM_ERR = 1,
+ MLX5_SENSOR_PCI_ERR = 2,
+ MLX5_SENSOR_NIC_DISABLED = 3,
+ MLX5_SENSOR_NIC_SW_RESET = 4,
+ MLX5_SENSOR_FW_SYND_RFR = 5,
};
u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
{
- return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
+ return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
}
void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
@@ -80,18 +90,105 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
&dev->iseg->cmdq_addr_l_sz);
}
-static int in_fatal(struct mlx5_core_dev *dev)
+static bool sensor_pci_not_working(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
struct health_buffer __iomem *h = health->health;
+ /* Offline PCI reads return 0xffffffff */
+ return (ioread32be(&h->fw_ver) == 0xffffffff);
+}
+
+static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+ u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET;
+ u8 synd = ioread8(&h->synd);
+
+ if (rfr && synd)
+ mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
+ return rfr && synd;
+}
+
+static u32 check_fatal_sensors(struct mlx5_core_dev *dev)
+{
+ if (sensor_pci_not_working(dev))
+ return MLX5_SENSOR_PCI_COMM_ERR;
+ if (pci_channel_offline(dev->pdev))
+ return MLX5_SENSOR_PCI_ERR;
if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
- return 1;
+ return MLX5_SENSOR_NIC_DISABLED;
+ if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET)
+ return MLX5_SENSOR_NIC_SW_RESET;
+ if (sensor_fw_synd_rfr(dev))
+ return MLX5_SENSOR_FW_SYND_RFR;
- if (ioread32be(&h->fw_ver) == 0xffffffff)
- return 1;
+ return MLX5_SENSOR_NO_ERR;
+}
- return 0;
+static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock)
+{
+ enum mlx5_vsc_state state;
+ int ret;
+
+ if (!mlx5_core_is_pf(dev))
+ return -EBUSY;
+
+ /* Try to lock GW access, this stage doesn't return
+ * EBUSY because locked GW does not mean that other PF
+ * already started the reset.
+ */
+ ret = mlx5_vsc_gw_lock(dev);
+ if (ret == -EBUSY)
+ return -EINVAL;
+ if (ret)
+ return ret;
+
+ state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK;
+ /* At this stage, if the return status == EBUSY, then we know
+ * for sure that another PF started the reset, so don't allow
+ * another reset.
+ */
+ ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state);
+ if (ret)
+ mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
+
+ /* Unlock GW access */
+ mlx5_vsc_gw_unlock(dev);
+
+ return ret;
+}
+
+static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
+{
+ bool supported = (ioread32be(&dev->iseg->initializing) >>
+ MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
+ u32 fatal_error;
+
+ if (!supported)
+ return false;
+
+ /* The reset only needs to be issued by one PF. The health buffer is
+ * shared between all functions, and will be cleared during a reset.
+ * Check again to avoid a redundant 2nd reset. If the fatal erros was
+ * PCI related a reset won't help.
+ */
+ fatal_error = check_fatal_sensors(dev);
+ if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
+ fatal_error == MLX5_SENSOR_NIC_DISABLED ||
+ fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
+ mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.");
+ return false;
+ }
+
+ mlx5_core_warn(dev, "Issuing FW Reset\n");
+ /* Write the NIC interface field to initiate the reset, the command
+ * interface address also resides here, don't overwrite it.
+ */
+ mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET);
+
+ return true;
}
void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
@@ -99,14 +196,65 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
mutex_lock(&dev->intf_state_mutex);
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
goto unlock;
+ if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) {
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+ goto unlock;
+ }
- mlx5_core_err(dev, "start\n");
- if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) {
+ if (check_fatal_sensors(dev) || force) {
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
mlx5_cmd_flush(dev);
}
mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
+unlock:
+ mutex_unlock(&dev->intf_state_mutex);
+}
+
+#define MLX5_CRDUMP_WAIT_MS 60000
+#define MLX5_FW_RESET_WAIT_MS 1000
+void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
+{
+ unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS;
+ int lock = -EBUSY;
+
+ mutex_lock(&dev->intf_state_mutex);
+ if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto unlock;
+
+ mlx5_core_err(dev, "start\n");
+
+ if (check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
+ /* Get cr-dump and reset FW semaphore */
+ lock = lock_sem_sw_reset(dev, true);
+
+ if (lock == -EBUSY) {
+ delay_ms = MLX5_CRDUMP_WAIT_MS;
+ goto recover_from_sw_reset;
+ }
+ /* Execute SW reset */
+ reset_fw_if_needed(dev);
+ }
+
+recover_from_sw_reset:
+ /* Recover from SW reset */
+ end = jiffies + msecs_to_jiffies(delay_ms);
+ do {
+ if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+ break;
+
+ cond_resched();
+ } while (!time_after(jiffies, end));
+
+ if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
+ dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
+ mlx5_get_nic_state(dev), delay_ms);
+ }
+
+ /* Release FW semaphore if you are the lock owner */
+ if (!lock)
+ lock_sem_sw_reset(dev, false);
+
mlx5_core_err(dev, "end\n");
unlock:
@@ -129,6 +277,20 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
case MLX5_NIC_IFC_NO_DRAM_NIC:
mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
break;
+
+ case MLX5_NIC_IFC_SW_RESET:
+ /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases:
+ * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
+ * and this is a VF), this is not recoverable by SW reset.
+ * Logging of this is handled elsewhere.
+ * 2. FW reset has been issued by another function, driver can
+ * be reloaded to recover after the mode switches to
+ * MLX5_NIC_IFC_DISABLED.
+ */
+ if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
+ mlx5_core_warn(dev, "NIC SW reset in progress\n");
+ break;
+
default:
mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
nic_interface);
@@ -137,52 +299,32 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
mlx5_disable_device(dev);
}
-static void health_recover(struct work_struct *work)
-{
- struct mlx5_core_health *health;
- struct delayed_work *dwork;
- struct mlx5_core_dev *dev;
- struct mlx5_priv *priv;
- u8 nic_state;
-
- dwork = container_of(work, struct delayed_work, work);
- health = container_of(dwork, struct mlx5_core_health, recover_work);
- priv = container_of(health, struct mlx5_priv, health);
- dev = container_of(priv, struct mlx5_core_dev, priv);
-
- nic_state = mlx5_get_nic_state(dev);
- if (nic_state == MLX5_NIC_IFC_INVALID) {
- mlx5_core_err(dev, "health recovery flow aborted since the nic state is invalid\n");
- return;
- }
-
- mlx5_core_err(dev, "starting health recovery flow\n");
- mlx5_recover_device(dev);
-}
-
/* How much time to wait until health resetting the driver (in msecs) */
-#define MLX5_RECOVERY_DELAY_MSECS 60000
-static void health_care(struct work_struct *work)
+#define MLX5_RECOVERY_WAIT_MSECS 60000
+static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
{
- unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS);
- struct mlx5_core_health *health;
- struct mlx5_core_dev *dev;
- struct mlx5_priv *priv;
- unsigned long flags;
+ unsigned long end;
- health = container_of(work, struct mlx5_core_health, work);
- priv = container_of(health, struct mlx5_priv, health);
- dev = container_of(priv, struct mlx5_core_dev, priv);
mlx5_core_warn(dev, "handling bad device here\n");
mlx5_handle_bad_state(dev);
+ end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS);
+ while (sensor_pci_not_working(dev)) {
+ if (time_after(jiffies, end)) {
+ mlx5_core_err(dev,
+ "health recovery flow aborted, PCI reads still not working\n");
+ return -EIO;
+ }
+ msleep(100);
+ }
- spin_lock_irqsave(&health->wq_lock, flags);
- if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
- schedule_delayed_work(&health->recover_work, recover_delay);
- else
- mlx5_core_err(dev,
- "new health works are not permitted at this stage\n");
- spin_unlock_irqrestore(&health->wq_lock, flags);
+ mlx5_core_err(dev, "starting health recovery flow\n");
+ mlx5_recover_device(dev);
+ if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state) ||
+ check_fatal_sensors(dev)) {
+ mlx5_core_err(dev, "health recovery failed\n");
+ return -EIO;
+ }
+ return 0;
}
static const char *hsynd_str(u8 synd)
@@ -246,6 +388,282 @@ static void print_health_info(struct mlx5_core_dev *dev)
mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw);
}
+static int
+mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+ u8 synd;
+ int err;
+
+ synd = ioread8(&h->synd);
+ err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd);
+ if (err || !synd)
+ return err;
+ return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd));
+}
+
+struct mlx5_fw_reporter_ctx {
+ u8 err_synd;
+ int miss_counter;
+};
+
+static int
+mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg,
+ struct mlx5_fw_reporter_ctx *fw_reporter_ctx)
+{
+ int err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "syndrome",
+ fw_reporter_ctx->err_synd);
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter",
+ fw_reporter_ctx->miss_counter);
+ if (err)
+ return err;
+ return 0;
+}
+
+static int
+mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+ int err;
+ int i;
+
+ if (!ioread8(&h->synd))
+ return 0;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, "health buffer");
+ if (err)
+ return err;
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var");
+ if (err)
+ return err;
+
+ for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) {
+ err = devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i));
+ if (err)
+ return err;
+ }
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr",
+ ioread32be(&h->assert_exit_ptr));
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra",
+ ioread32be(&h->assert_callra));
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id));
+ if (err)
+ return err;
+ err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index",
+ ioread8(&h->irisc_index));
+ if (err)
+ return err;
+ err = devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd));
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd",
+ ioread16be(&h->ext_synd));
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver",
+ ioread32be(&h->fw_ver));
+ if (err)
+ return err;
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+ return devlink_fmsg_pair_nest_end(fmsg);
+}
+
+static int
+mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *priv_ctx)
+{
+ struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+ int err;
+
+ err = mlx5_fw_tracer_trigger_core_dump_general(dev);
+ if (err)
+ return err;
+
+ if (priv_ctx) {
+ struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
+
+ err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
+ if (err)
+ return err;
+ }
+
+ err = mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg);
+ if (err)
+ return err;
+ return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg);
+}
+
+static void mlx5_fw_reporter_err_work(struct work_struct *work)
+{
+ struct mlx5_fw_reporter_ctx fw_reporter_ctx;
+ struct mlx5_core_health *health;
+
+ health = container_of(work, struct mlx5_core_health, report_work);
+
+ if (IS_ERR_OR_NULL(health->fw_reporter))
+ return;
+
+ fw_reporter_ctx.err_synd = health->synd;
+ fw_reporter_ctx.miss_counter = health->miss_counter;
+ if (fw_reporter_ctx.err_synd) {
+ devlink_health_report(health->fw_reporter,
+ "FW syndrom reported", &fw_reporter_ctx);
+ return;
+ }
+ if (fw_reporter_ctx.miss_counter)
+ devlink_health_report(health->fw_reporter,
+ "FW miss counter reported",
+ &fw_reporter_ctx);
+}
+
+static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
+ .name = "fw",
+ .diagnose = mlx5_fw_reporter_diagnose,
+ .dump = mlx5_fw_reporter_dump,
+};
+
+static int
+mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
+ void *priv_ctx)
+{
+ struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+
+ return mlx5_health_try_recover(dev);
+}
+
+#define MLX5_CR_DUMP_CHUNK_SIZE 256
+static int
+mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *priv_ctx)
+{
+ struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+ u32 crdump_size = dev->priv.health.crdump_size;
+ u32 *cr_data;
+ u32 data_size;
+ u32 offset;
+ int err;
+
+ if (!mlx5_core_is_pf(dev))
+ return -EPERM;
+
+ cr_data = kvmalloc(crdump_size, GFP_KERNEL);
+ if (!cr_data)
+ return -ENOMEM;
+ err = mlx5_crdump_collect(dev, cr_data);
+ if (err)
+ return err;
+
+ if (priv_ctx) {
+ struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
+
+ err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
+ if (err)
+ goto free_data;
+ }
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "crdump_data");
+ if (err)
+ goto free_data;
+ for (offset = 0; offset < crdump_size; offset += data_size) {
+ if (crdump_size - offset < MLX5_CR_DUMP_CHUNK_SIZE)
+ data_size = crdump_size - offset;
+ else
+ data_size = MLX5_CR_DUMP_CHUNK_SIZE;
+ err = devlink_fmsg_binary_put(fmsg, cr_data, data_size);
+ if (err)
+ goto free_data;
+ }
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+
+free_data:
+ kvfree(cr_data);
+ return err;
+}
+
+static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
+{
+ struct mlx5_fw_reporter_ctx fw_reporter_ctx;
+ struct mlx5_core_health *health;
+ struct mlx5_core_dev *dev;
+ struct mlx5_priv *priv;
+
+ health = container_of(work, struct mlx5_core_health, fatal_report_work);
+ priv = container_of(health, struct mlx5_priv, health);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ mlx5_enter_error_state(dev, false);
+ if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
+ if (mlx5_health_try_recover(dev))
+ mlx5_core_err(dev, "health recovery failed\n");
+ return;
+ }
+ fw_reporter_ctx.err_synd = health->synd;
+ fw_reporter_ctx.miss_counter = health->miss_counter;
+ devlink_health_report(health->fw_fatal_reporter,
+ "FW fatal error reported", &fw_reporter_ctx);
+}
+
+static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
+ .name = "fw_fatal",
+ .recover = mlx5_fw_fatal_reporter_recover,
+ .dump = mlx5_fw_fatal_reporter_dump,
+};
+
+#define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000
+static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct devlink *devlink = priv_to_devlink(dev);
+
+ health->fw_reporter =
+ devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
+ 0, false, dev);
+ if (IS_ERR(health->fw_reporter))
+ mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
+ PTR_ERR(health->fw_reporter));
+
+ health->fw_fatal_reporter =
+ devlink_health_reporter_create(devlink,
+ &mlx5_fw_fatal_reporter_ops,
+ MLX5_REPORTER_FW_GRACEFUL_PERIOD,
+ true, dev);
+ if (IS_ERR(health->fw_fatal_reporter))
+ mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
+ PTR_ERR(health->fw_fatal_reporter));
+}
+
+static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ if (!IS_ERR_OR_NULL(health->fw_reporter))
+ devlink_health_reporter_destroy(health->fw_reporter);
+
+ if (!IS_ERR_OR_NULL(health->fw_fatal_reporter))
+ devlink_health_reporter_destroy(health->fw_fatal_reporter);
+}
+
static unsigned long get_next_poll_jiffies(void)
{
unsigned long next;
@@ -264,7 +682,7 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
spin_lock_irqsave(&health->wq_lock, flags);
if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
- queue_work(health->wq, &health->work);
+ queue_work(health->wq, &health->fatal_report_work);
else
mlx5_core_err(dev, "new health works are not permitted at this stage\n");
spin_unlock_irqrestore(&health->wq_lock, flags);
@@ -274,6 +692,9 @@ static void poll_health(struct timer_list *t)
{
struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+ u32 fatal_error;
+ u8 prev_synd;
u32 count;
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
@@ -289,10 +710,19 @@ static void poll_health(struct timer_list *t)
if (health->miss_counter == MAX_MISSES) {
mlx5_core_err(dev, "device's health compromised - reached miss count\n");
print_health_info(dev);
+ queue_work(health->wq, &health->report_work);
}
- if (in_fatal(dev) && !health->sick) {
- health->sick = true;
+ prev_synd = health->synd;
+ health->synd = ioread8(&h->synd);
+ if (health->synd && health->synd != prev_synd)
+ queue_work(health->wq, &health->report_work);
+
+ fatal_error = check_fatal_sensors(dev);
+
+ if (fatal_error && !health->fatal_error) {
+ mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
+ dev->priv.health.fatal_error = fatal_error;
print_health_info(dev);
mlx5_trigger_health_work(dev);
}
@@ -306,9 +736,8 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
struct mlx5_core_health *health = &dev->priv.health;
timer_setup(&health->timer, poll_health, 0);
- health->sick = 0;
+ health->fatal_error = MLX5_SENSOR_NO_ERR;
clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
- clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
health->health = &dev->iseg->health;
health->health_counter = &dev->iseg->health_counter;
@@ -324,7 +753,6 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
if (disable_health) {
spin_lock_irqsave(&health->wq_lock, flags);
set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
- set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
spin_unlock_irqrestore(&health->wq_lock, flags);
}
@@ -338,21 +766,9 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
spin_lock_irqsave(&health->wq_lock, flags);
set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
- set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
spin_unlock_irqrestore(&health->wq_lock, flags);
- cancel_delayed_work_sync(&health->recover_work);
- cancel_work_sync(&health->work);
-}
-
-void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
-{
- struct mlx5_core_health *health = &dev->priv.health;
- unsigned long flags;
-
- spin_lock_irqsave(&health->wq_lock, flags);
- set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
- spin_unlock_irqrestore(&health->wq_lock, flags);
- cancel_delayed_work_sync(&dev->priv.health.recover_work);
+ cancel_work_sync(&health->report_work);
+ cancel_work_sync(&health->fatal_report_work);
}
void mlx5_health_flush(struct mlx5_core_dev *dev)
@@ -367,6 +783,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev)
struct mlx5_core_health *health = &dev->priv.health;
destroy_workqueue(health->wq);
+ mlx5_fw_reporters_destroy(dev);
}
int mlx5_health_init(struct mlx5_core_dev *dev)
@@ -374,20 +791,26 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
struct mlx5_core_health *health;
char *name;
+ mlx5_fw_reporters_create(dev);
+
health = &dev->priv.health;
name = kmalloc(64, GFP_KERNEL);
if (!name)
- return -ENOMEM;
+ goto out_err;
strcpy(name, "mlx5_health");
strcat(name, dev_name(dev->device));
health->wq = create_singlethread_workqueue(name);
kfree(name);
if (!health->wq)
- return -ENOMEM;
+ goto out_err;
spin_lock_init(&health->wq_lock);
- INIT_WORK(&health->work, health_care);
- INIT_DELAYED_WORK(&health->recover_work, health_recover);
+ INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work);
+ INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work);
return 0;
+
+out_err:
+ mlx5_fw_reporters_destroy(dev);
+ return -ENOMEM;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index 90cb50fe17fd..ebd81f6b556e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -122,14 +122,6 @@ static int mlx5i_get_ts_info(struct net_device *netdev,
return mlx5e_ethtool_get_ts_info(priv, info);
}
-static int mlx5i_flash_device(struct net_device *netdev,
- struct ethtool_flash *flash)
-{
- struct mlx5e_priv *priv = mlx5i_epriv(netdev);
-
- return mlx5e_ethtool_flash_device(priv, flash);
-}
-
enum mlx5_ptys_width {
MLX5_PTYS_WIDTH_1X = 1 << 0,
MLX5_PTYS_WIDTH_2X = 1 << 1,
@@ -241,7 +233,6 @@ const struct ethtool_ops mlx5i_ethtool_ops = {
.get_ethtool_stats = mlx5i_get_ethtool_stats,
.get_ringparam = mlx5i_get_ringparam,
.set_ringparam = mlx5i_set_ringparam,
- .flash_device = mlx5i_flash_device,
.get_channels = mlx5i_get_channels,
.set_channels = mlx5i_set_channels,
.get_coalesce = mlx5i_get_coalesce,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 9ca492b430d8..6bfaaab362dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -87,7 +87,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev,
mlx5e_set_netdev_mtu_boundaries(priv);
netdev->mtu = netdev->max_mtu;
- mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params,
+ mlx5e_build_nic_params(mdev, NULL, &priv->rss_params, &priv->channels.params,
mlx5e_get_netdev_max_channels(netdev),
netdev->mtu);
mlx5i_build_nic_params(mdev, &priv->channels.params);
@@ -258,6 +258,18 @@ void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *
mlx5_core_destroy_qp(mdev, qp);
}
+int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+ void *tisc;
+
+ tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn);
+
+ return mlx5e_create_tis(mdev, in, tisn);
+}
+
static int mlx5i_init_tx(struct mlx5e_priv *priv)
{
struct mlx5i_priv *ipriv = priv->ppriv;
@@ -269,7 +281,7 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv)
return err;
}
- err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]);
+ err = mlx5i_create_tis(priv->mdev, ipriv->qp.qpn, &priv->tisn[0]);
if (err) {
mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
goto err_destroy_underlay_qp;
@@ -365,7 +377,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv);
+ err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
if (err)
goto err_destroy_indirect_rqts;
@@ -373,7 +385,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
if (err)
goto err_destroy_direct_rqts;
- err = mlx5e_create_direct_tirs(priv);
+ err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
if (err)
goto err_destroy_indirect_tirs;
@@ -384,11 +396,11 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
return 0;
err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
err_destroy_indirect_tirs:
mlx5e_destroy_indirect_tirs(priv, true);
err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
err_destroy_indirect_rqts:
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
err_close_drop_rq:
@@ -401,9 +413,9 @@ err_destroy_q_counters:
static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
{
mlx5i_destroy_flow_steering(priv);
- mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
mlx5e_destroy_indirect_tirs(priv, true);
- mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_destroy_q_counters(priv);
@@ -418,6 +430,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
.cleanup_rx = mlx5i_cleanup_rx,
.enable = NULL, /* mlx5i_enable */
.disable = NULL, /* mlx5i_disable */
+ .update_rx = mlx5e_update_nic_rx,
.update_stats = NULL, /* mlx5i_update_stats */
.update_carrier = NULL, /* no HW update in IB link */
.rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
@@ -526,7 +539,7 @@ static int mlx5i_open(struct net_device *netdev)
if (err)
goto err_remove_fs_underlay_qp;
- mlx5e_refresh_tirs(epriv, false);
+ epriv->profile->update_rx(epriv);
mlx5e_activate_priv_channels(epriv);
mutex_unlock(&epriv->state_lock);
@@ -698,7 +711,9 @@ static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num,
prof->init(mdev, netdev, prof, ipriv);
- mlx5e_attach_netdev(epriv);
+ err = mlx5e_attach_netdev(epriv);
+ if (err)
+ goto detach;
netif_carrier_off(netdev);
/* set rdma_netdev func pointers */
@@ -714,6 +729,11 @@ static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num,
return 0;
+detach:
+ prof->cleanup(epriv);
+ if (ipriv->sub_interface)
+ return err;
+ mlx5e_destroy_mdev_resources(mdev);
destroy_ht:
mlx5i_pkey_qpn_ht_cleanup(netdev);
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index e19ba3fcd1b7..c87962cab921 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -59,6 +59,8 @@ struct mlx5i_priv {
char *mlx5e_priv[0];
};
+int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn);
+
/* Underlay QP create/destroy functions */
int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index b491b8f5fd6b..6e56fa769d2e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -210,7 +210,7 @@ static int mlx5i_pkey_open(struct net_device *netdev)
goto err_unint_underlay_qp;
}
- err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]);
+ err = mlx5i_create_tis(mdev, ipriv->qp.qpn, &epriv->tisn[0]);
if (err) {
mlx5_core_warn(mdev, "create child tis failed, %d\n", err);
goto err_remove_rx_uderlay_qp;
@@ -221,7 +221,7 @@ static int mlx5i_pkey_open(struct net_device *netdev)
mlx5_core_warn(mdev, "opening child channels failed, %d\n", err);
goto err_clear_state_opened_flag;
}
- mlx5e_refresh_tirs(epriv, false);
+ epriv->profile->update_rx(epriv);
mlx5e_activate_priv_channels(epriv);
mutex_unlock(&epriv->state_lock);
@@ -350,6 +350,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
.cleanup_rx = mlx5i_pkey_cleanup_rx,
.enable = NULL,
.disable = NULL,
+ .update_rx = mlx5e_update_nic_rx,
.update_stats = NULL,
.rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
.rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 959605559858..c5ef2ff26465 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -305,8 +305,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
!mlx5_sriov_is_enabled(dev1);
#ifdef CONFIG_MLX5_ESWITCH
- roce_lag &= dev0->priv.eswitch->mode == SRIOV_NONE &&
- dev1->priv.eswitch->mode == SRIOV_NONE;
+ roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
#endif
if (roce_lag)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index 8212bfd05733..e69766393990 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include <linux/netdevice.h>
+#include <net/nexthop.h>
#include "lag.h"
#include "lag_mp.h"
#include "mlx5_core.h"
@@ -110,6 +111,8 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
struct fib_info *fi)
{
struct lag_mp *mp = &ldev->lag_mp;
+ struct fib_nh *fib_nh0, *fib_nh1;
+ unsigned int nhs;
/* Handle delete event */
if (event == FIB_EVENT_ENTRY_DEL) {
@@ -120,9 +123,11 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
}
/* Handle add/replace event */
- if (fi->fib_nhs == 1) {
+ nhs = fib_info_num_path(fi);
+ if (nhs == 1) {
if (__mlx5_lag_is_active(ldev)) {
- struct net_device *nh_dev = fi->fib_nh[0].fib_nh_dev;
+ struct fib_nh *nh = fib_info_nh(fi, 0);
+ struct net_device *nh_dev = nh->fib_nh_dev;
int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
mlx5_lag_set_port_affinity(ldev, ++i);
@@ -130,14 +135,16 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
return;
}
- if (fi->fib_nhs != 2)
+ if (nhs != 2)
return;
/* Verify next hops are ports of the same hca */
- if (!(fi->fib_nh[0].fib_nh_dev == ldev->pf[0].netdev &&
- fi->fib_nh[1].fib_nh_dev == ldev->pf[1].netdev) &&
- !(fi->fib_nh[0].fib_nh_dev == ldev->pf[1].netdev &&
- fi->fib_nh[1].fib_nh_dev == ldev->pf[0].netdev)) {
+ fib_nh0 = fib_info_nh(fi, 0);
+ fib_nh1 = fib_info_nh(fi, 1);
+ if (!(fib_nh0->fib_nh_dev == ldev->pf[0].netdev &&
+ fib_nh1->fib_nh_dev == ldev->pf[1].netdev) &&
+ !(fib_nh0->fib_nh_dev == ldev->pf[1].netdev &&
+ fib_nh1->fib_nh_dev == ldev->pf[0].netdev)) {
mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n");
return;
}
@@ -174,7 +181,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
mlx5_lag_set_port_affinity(ldev, i);
}
} else if (event == FIB_EVENT_NH_ADD &&
- fi->fib_nhs == 2) {
+ fib_info_num_path(fi) == 2) {
mlx5_lag_set_port_affinity(ldev, 0);
}
}
@@ -238,6 +245,7 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
struct mlx5_fib_event_work *fib_work;
struct fib_entry_notifier_info *fen_info;
struct fib_nh_notifier_info *fnh_info;
+ struct net_device *fib_dev;
struct fib_info *fi;
if (info->family != AF_INET)
@@ -254,8 +262,13 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
fen_info = container_of(info, struct fib_entry_notifier_info,
info);
fi = fen_info->fi;
- if (fi->fib_dev != ldev->pf[0].netdev &&
- fi->fib_dev != ldev->pf[1].netdev) {
+ if (fi->nh) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
+ fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
+ if (fib_dev != ldev->pf[0].netdev &&
+ fib_dev != ldev->pf[1].netdev) {
return NOTIFY_DONE;
}
fib_work = mlx5_lag_init_fib_work(ldev, event);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
new file mode 100644
index 000000000000..ea9ee88491e5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "mlx5_core.h"
+
+int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
+ void *key, u32 sz_bytes,
+ u32 *p_key_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ u32 sz_bits = sz_bytes * BITS_PER_BYTE;
+ u8 general_obj_key_size;
+ u64 general_obj_types;
+ void *obj, *key_p;
+ int err;
+
+ obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object);
+ key_p = MLX5_ADDR_OF(encryption_key_obj, obj, key);
+
+ general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+ if (!(general_obj_types &
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY))
+ return -EINVAL;
+
+ switch (sz_bits) {
+ case 128:
+ general_obj_key_size =
+ MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128;
+ break;
+ case 256:
+ general_obj_key_size =
+ MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ memcpy(key_p, key, sz_bytes);
+
+ MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size);
+ MLX5_SET(encryption_key_obj, obj, key_type,
+ MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK);
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
+ MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.pdn);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *p_key_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ /* avoid leaking key on the stack */
+ memzero_explicit(in, sizeof(in));
+
+ return err;
+}
+
+void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, key_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index c0fb6d72b695..3dfab91ae5f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -7,7 +7,6 @@
#include <linux/mlx5/eq.h>
#include <linux/mlx5/cq.h>
-#define MLX5_MAX_IRQ_NAME (32)
#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe))
struct mlx5_eq_tasklet {
@@ -36,8 +35,14 @@ struct mlx5_eq {
struct mlx5_rsc_debug *dbg;
};
+struct mlx5_eq_async {
+ struct mlx5_eq core;
+ struct notifier_block irq_nb;
+};
+
struct mlx5_eq_comp {
- struct mlx5_eq core; /* Must be first */
+ struct mlx5_eq core;
+ struct notifier_block irq_nb;
struct mlx5_eq_tasklet tasklet_ctx;
struct list_head list;
};
@@ -70,7 +75,7 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev);
void mlx5_eq_table_destroy(struct mlx5_core_dev *dev);
int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
void mlx5_cq_tasklet_cb(unsigned long data);
@@ -92,7 +97,4 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev);
#endif
-int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
-int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
-
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c
new file mode 100644
index 000000000000..23361a9ae4fa
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/kernel.h>
+#include "mlx5_core.h"
+#include "geneve.h"
+
+struct mlx5_geneve {
+ struct mlx5_core_dev *mdev;
+ __be16 opt_class;
+ u8 opt_type;
+ u32 obj_id;
+ struct mutex sync_lock; /* protect GENEVE obj operations */
+ u32 refcount;
+};
+
+static int mlx5_geneve_tlv_option_create(struct mlx5_core_dev *mdev,
+ __be16 class,
+ u8 type,
+ u8 len)
+{
+ u32 in[MLX5_ST_SZ_DW(create_geneve_tlv_option_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ u64 general_obj_types;
+ void *hdr, *opt;
+ u16 obj_id;
+ int err;
+
+ general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+ if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT))
+ return -EINVAL;
+
+ hdr = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, hdr);
+ opt = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, geneve_tlv_opt);
+
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT);
+
+ MLX5_SET(geneve_tlv_option, opt, option_class, be16_to_cpu(class));
+ MLX5_SET(geneve_tlv_option, opt, option_type, type);
+ MLX5_SET(geneve_tlv_option, opt, option_data_length, len);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ return obj_id;
+}
+
+static void mlx5_geneve_tlv_option_destroy(struct mlx5_core_dev *mdev, u16 obj_id)
+{
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt)
+{
+ int res = 0;
+
+ if (IS_ERR_OR_NULL(geneve))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&geneve->sync_lock);
+
+ if (geneve->refcount) {
+ if (geneve->opt_class == opt->opt_class &&
+ geneve->opt_type == opt->type) {
+ /* We already have TLV options obj allocated */
+ geneve->refcount++;
+ } else {
+ /* TLV options obj allocated, but its params
+ * do not match the new request.
+ * We support only one such object.
+ */
+ mlx5_core_warn(geneve->mdev,
+ "Won't create Geneve TLV opt object with class:type:len = 0x%x:0x%x:%d (another class:type already exists)\n",
+ be16_to_cpu(opt->opt_class),
+ opt->type,
+ opt->length);
+ res = -EOPNOTSUPP;
+ goto unlock;
+ }
+ } else {
+ /* We don't have any TLV options obj allocated */
+
+ res = mlx5_geneve_tlv_option_create(geneve->mdev,
+ opt->opt_class,
+ opt->type,
+ opt->length);
+ if (res < 0) {
+ mlx5_core_warn(geneve->mdev,
+ "Failed creating Geneve TLV opt object class:type:len = 0x%x:0x%x:%d (err=%d)\n",
+ be16_to_cpu(opt->opt_class),
+ opt->type, opt->length, res);
+ goto unlock;
+ }
+ geneve->opt_class = opt->opt_class;
+ geneve->opt_type = opt->type;
+ geneve->obj_id = res;
+ geneve->refcount++;
+ }
+
+unlock:
+ mutex_unlock(&geneve->sync_lock);
+ return res;
+}
+
+void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve)
+{
+ if (IS_ERR_OR_NULL(geneve))
+ return;
+
+ mutex_lock(&geneve->sync_lock);
+ if (--geneve->refcount == 0) {
+ /* We've just removed the last user of Geneve option.
+ * Now delete the object in FW.
+ */
+ mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id);
+
+ geneve->opt_class = 0;
+ geneve->opt_type = 0;
+ geneve->obj_id = 0;
+ }
+ mutex_unlock(&geneve->sync_lock);
+}
+
+struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_geneve *geneve =
+ kzalloc(sizeof(*geneve), GFP_KERNEL);
+
+ if (!geneve)
+ return ERR_PTR(-ENOMEM);
+ geneve->mdev = mdev;
+ mutex_init(&geneve->sync_lock);
+
+ return geneve;
+}
+
+void mlx5_geneve_destroy(struct mlx5_geneve *geneve)
+{
+ if (IS_ERR_OR_NULL(geneve))
+ return;
+
+ /* Lockless since we are unloading */
+ if (geneve->refcount)
+ mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id);
+
+ kfree(geneve);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h
new file mode 100644
index 000000000000..adee0cbba19c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_GENEVE_H__
+#define __MLX5_GENEVE_H__
+
+#include <net/geneve.h>
+#include <linux/mlx5/driver.h>
+
+struct mlx5_geneve;
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev);
+void mlx5_geneve_destroy(struct mlx5_geneve *geneve);
+
+int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt);
+void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve);
+
+#else /* CONFIG_MLX5_ESWITCH */
+
+static inline struct mlx5_geneve
+*mlx5_geneve_create(struct mlx5_core_dev *mdev) { return NULL; }
+static inline void
+mlx5_geneve_destroy(struct mlx5_geneve *geneve) {}
+static inline int
+mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) { return 0; }
+static inline void
+mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) {}
+
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif /* __MLX5_GENEVE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index 397a2847867a..b99d469e4e64 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -41,6 +41,9 @@ int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+int mlx5_crdump_enable(struct mlx5_core_dev *dev);
+void mlx5_crdump_disable(struct mlx5_core_dev *dev);
+int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data);
/* TODO move to lib/events.h */
@@ -76,4 +79,9 @@ struct mlx5_pme_stats {
void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats);
int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data);
+/* Crypto */
+int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
+ void *key, u32 sz_bytes, u32 *p_key_id);
+void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id);
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
index a71d5b9c7ab2..3118e8d66407 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -67,6 +67,7 @@ static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
struct l2table_node {
struct l2addr_node node;
u32 index; /* index in HW l2 table */
+ int ref_count;
};
struct mlx5_mpfs {
@@ -134,8 +135,8 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
{
struct mlx5_mpfs *mpfs = dev->priv.mpfs;
struct l2table_node *l2addr;
+ int err = 0;
u32 index;
- int err;
if (!MLX5_ESWITCH_MANAGER(dev))
return 0;
@@ -144,30 +145,35 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
if (l2addr) {
- err = -EEXIST;
- goto abort;
+ l2addr->ref_count++;
+ goto out;
}
err = alloc_l2table_index(mpfs, &index);
if (err)
- goto abort;
+ goto out;
l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL);
if (!l2addr) {
- free_l2table_index(mpfs, index);
err = -ENOMEM;
- goto abort;
+ goto hash_add_err;
}
- l2addr->index = index;
err = set_l2table_entry_cmd(dev, index, mac);
- if (err) {
- l2addr_hash_del(l2addr);
- free_l2table_index(mpfs, index);
- }
+ if (err)
+ goto set_table_entry_err;
+
+ l2addr->index = index;
+ l2addr->ref_count = 1;
mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index);
-abort:
+ goto out;
+
+set_table_entry_err:
+ l2addr_hash_del(l2addr);
+hash_add_err:
+ free_l2table_index(mpfs, index);
+out:
mutex_unlock(&mpfs->lock);
return err;
}
@@ -190,6 +196,9 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
goto unlock;
}
+ if (--l2addr->ref_count > 0)
+ goto unlock;
+
index = l2addr->index;
del_l2table_entry_cmd(dev, index);
l2addr_hash_del(l2addr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
new file mode 100644
index 000000000000..6b774e0c2766
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/pci.h>
+#include "mlx5_core.h"
+#include "pci_vsc.h"
+
+#define MLX5_EXTRACT_C(source, offset, size) \
+ ((((u32)(source)) >> (offset)) & MLX5_ONES32(size))
+#define MLX5_EXTRACT(src, start, len) \
+ (((len) == 32) ? (src) : MLX5_EXTRACT_C(src, start, len))
+#define MLX5_ONES32(size) \
+ ((size) ? (0xffffffff >> (32 - (size))) : 0)
+#define MLX5_MASK32(offset, size) \
+ (MLX5_ONES32(size) << (offset))
+#define MLX5_MERGE_C(rsrc1, rsrc2, start, len) \
+ ((((rsrc2) << (start)) & (MLX5_MASK32((start), (len)))) | \
+ ((rsrc1) & (~MLX5_MASK32((start), (len)))))
+#define MLX5_MERGE(rsrc1, rsrc2, start, len) \
+ (((len) == 32) ? (rsrc2) : MLX5_MERGE_C(rsrc1, rsrc2, start, len))
+#define vsc_read(dev, offset, val) \
+ pci_read_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val))
+#define vsc_write(dev, offset, val) \
+ pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val))
+#define VSC_MAX_RETRIES 2048
+
+enum {
+ VSC_CTRL_OFFSET = 0x4,
+ VSC_COUNTER_OFFSET = 0x8,
+ VSC_SEMAPHORE_OFFSET = 0xc,
+ VSC_ADDR_OFFSET = 0x10,
+ VSC_DATA_OFFSET = 0x14,
+
+ VSC_FLAG_BIT_OFFS = 31,
+ VSC_FLAG_BIT_LEN = 1,
+
+ VSC_SYND_BIT_OFFS = 30,
+ VSC_SYND_BIT_LEN = 1,
+
+ VSC_ADDR_BIT_OFFS = 0,
+ VSC_ADDR_BIT_LEN = 30,
+
+ VSC_SPACE_BIT_OFFS = 0,
+ VSC_SPACE_BIT_LEN = 16,
+
+ VSC_SIZE_VLD_BIT_OFFS = 28,
+ VSC_SIZE_VLD_BIT_LEN = 1,
+
+ VSC_STATUS_BIT_OFFS = 29,
+ VSC_STATUS_BIT_LEN = 3,
+};
+
+void mlx5_pci_vsc_init(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_is_pf(dev))
+ return;
+
+ dev->vsc_addr = pci_find_capability(dev->pdev,
+ PCI_CAP_ID_VNDR);
+ if (!dev->vsc_addr)
+ mlx5_core_warn(dev, "Failed to get valid vendor specific ID\n");
+}
+
+int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev)
+{
+ u32 counter = 0;
+ int retries = 0;
+ u32 lock_val;
+ int ret;
+
+ pci_cfg_access_lock(dev->pdev);
+ do {
+ if (retries > VSC_MAX_RETRIES) {
+ ret = -EBUSY;
+ goto pci_unlock;
+ }
+
+ /* Check if semaphore is already locked */
+ ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val);
+ if (ret)
+ goto pci_unlock;
+
+ if (lock_val) {
+ retries++;
+ usleep_range(1000, 2000);
+ continue;
+ }
+
+ /* Read and write counter value, if written value is
+ * the same, semaphore was acquired successfully.
+ */
+ ret = vsc_read(dev, VSC_COUNTER_OFFSET, &counter);
+ if (ret)
+ goto pci_unlock;
+
+ ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, counter);
+ if (ret)
+ goto pci_unlock;
+
+ ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val);
+ if (ret)
+ goto pci_unlock;
+
+ retries++;
+ } while (counter != lock_val);
+
+ return 0;
+
+pci_unlock:
+ pci_cfg_access_unlock(dev->pdev);
+ return ret;
+}
+
+int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev)
+{
+ int ret;
+
+ ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, MLX5_VSC_UNLOCK);
+ pci_cfg_access_unlock(dev->pdev);
+ return ret;
+}
+
+int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space,
+ u32 *ret_space_size)
+{
+ int ret;
+ u32 val = 0;
+
+ if (!mlx5_vsc_accessible(dev))
+ return -EINVAL;
+
+ if (ret_space_size)
+ *ret_space_size = 0;
+
+ /* Get a unique val */
+ ret = vsc_read(dev, VSC_CTRL_OFFSET, &val);
+ if (ret)
+ goto out;
+
+ /* Try to modify the lock */
+ val = MLX5_MERGE(val, space, VSC_SPACE_BIT_OFFS, VSC_SPACE_BIT_LEN);
+ ret = vsc_write(dev, VSC_CTRL_OFFSET, val);
+ if (ret)
+ goto out;
+
+ /* Verify lock was modified */
+ ret = vsc_read(dev, VSC_CTRL_OFFSET, &val);
+ if (ret)
+ goto out;
+
+ if (MLX5_EXTRACT(val, VSC_STATUS_BIT_OFFS, VSC_STATUS_BIT_LEN) == 0)
+ return -EINVAL;
+
+ /* Get space max address if indicated by size valid bit */
+ if (ret_space_size &&
+ MLX5_EXTRACT(val, VSC_SIZE_VLD_BIT_OFFS, VSC_SIZE_VLD_BIT_LEN)) {
+ ret = vsc_read(dev, VSC_ADDR_OFFSET, &val);
+ if (ret) {
+ mlx5_core_warn(dev, "Failed to get max space size\n");
+ goto out;
+ }
+ *ret_space_size = MLX5_EXTRACT(val, VSC_ADDR_BIT_OFFS,
+ VSC_ADDR_BIT_LEN);
+ }
+ return 0;
+
+out:
+ return ret;
+}
+
+static int mlx5_vsc_wait_on_flag(struct mlx5_core_dev *dev, u8 expected_val)
+{
+ int retries = 0;
+ u32 flag;
+ int ret;
+
+ do {
+ if (retries > VSC_MAX_RETRIES)
+ return -EBUSY;
+
+ ret = vsc_read(dev, VSC_ADDR_OFFSET, &flag);
+ if (ret)
+ return ret;
+ flag = MLX5_EXTRACT(flag, VSC_FLAG_BIT_OFFS, VSC_FLAG_BIT_LEN);
+ retries++;
+
+ if ((retries & 0xf) == 0)
+ usleep_range(1000, 2000);
+
+ } while (flag != expected_val);
+
+ return 0;
+}
+
+static int mlx5_vsc_gw_write(struct mlx5_core_dev *dev, unsigned int address,
+ u32 data)
+{
+ int ret;
+
+ if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS,
+ VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN))
+ return -EINVAL;
+
+ /* Set flag to 0x1 */
+ address = MLX5_MERGE(address, 1, VSC_FLAG_BIT_OFFS, 1);
+ ret = vsc_write(dev, VSC_DATA_OFFSET, data);
+ if (ret)
+ goto out;
+
+ ret = vsc_write(dev, VSC_ADDR_OFFSET, address);
+ if (ret)
+ goto out;
+
+ /* Wait for the flag to be cleared */
+ ret = mlx5_vsc_wait_on_flag(dev, 0);
+
+out:
+ return ret;
+}
+
+static int mlx5_vsc_gw_read(struct mlx5_core_dev *dev, unsigned int address,
+ u32 *data)
+{
+ int ret;
+
+ if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS,
+ VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN))
+ return -EINVAL;
+
+ ret = vsc_write(dev, VSC_ADDR_OFFSET, address);
+ if (ret)
+ goto out;
+
+ ret = mlx5_vsc_wait_on_flag(dev, 1);
+ if (ret)
+ goto out;
+
+ ret = vsc_read(dev, VSC_DATA_OFFSET, data);
+out:
+ return ret;
+}
+
+static int mlx5_vsc_gw_read_fast(struct mlx5_core_dev *dev,
+ unsigned int read_addr,
+ unsigned int *next_read_addr,
+ u32 *data)
+{
+ int ret;
+
+ ret = mlx5_vsc_gw_read(dev, read_addr, data);
+ if (ret)
+ goto out;
+
+ ret = vsc_read(dev, VSC_ADDR_OFFSET, next_read_addr);
+ if (ret)
+ goto out;
+
+ *next_read_addr = MLX5_EXTRACT(*next_read_addr, VSC_ADDR_BIT_OFFS,
+ VSC_ADDR_BIT_LEN);
+
+ if (*next_read_addr <= read_addr)
+ ret = -EINVAL;
+out:
+ return ret;
+}
+
+int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data,
+ int length)
+{
+ unsigned int next_read_addr = 0;
+ unsigned int read_addr = 0;
+
+ while (read_addr < length) {
+ if (mlx5_vsc_gw_read_fast(dev, read_addr, &next_read_addr,
+ &data[(read_addr >> 2)]))
+ return read_addr;
+
+ read_addr = next_read_addr;
+ }
+ return length;
+}
+
+int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space,
+ enum mlx5_vsc_state state)
+{
+ u32 data, id = 0;
+ int ret;
+
+ ret = mlx5_vsc_gw_set_space(dev, MLX5_SEMAPHORE_SPACE_DOMAIN, NULL);
+ if (ret) {
+ mlx5_core_warn(dev, "Failed to set gw space %d\n", ret);
+ return ret;
+ }
+
+ if (state == MLX5_VSC_LOCK) {
+ /* Get a unique ID based on the counter */
+ ret = vsc_read(dev, VSC_COUNTER_OFFSET, &id);
+ if (ret)
+ return ret;
+ }
+
+ /* Try to modify lock */
+ ret = mlx5_vsc_gw_write(dev, space, id);
+ if (ret)
+ return ret;
+
+ /* Verify lock was modified */
+ ret = mlx5_vsc_gw_read(dev, space, &data);
+ if (ret)
+ return -EINVAL;
+
+ if (data != id)
+ return -EBUSY;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h
new file mode 100644
index 000000000000..64272a6d7754
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#ifndef __MLX5_PCI_VSC_H__
+#define __MLX5_PCI_VSC_H__
+
+enum mlx5_vsc_state {
+ MLX5_VSC_UNLOCK,
+ MLX5_VSC_LOCK,
+};
+
+enum {
+ MLX5_VSC_SPACE_SCAN_CRSPACE = 0x7,
+};
+
+void mlx5_pci_vsc_init(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev);
+int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space,
+ u32 *ret_space_size);
+int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data,
+ int length);
+
+static inline bool mlx5_vsc_accessible(struct mlx5_core_dev *dev)
+{
+ return !!dev->vsc_addr;
+}
+
+int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space,
+ enum mlx5_vsc_state state);
+
+#endif /* __MLX5_PCI_VSC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
index be69c1d7941a..48b5c847b642 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
@@ -98,27 +98,12 @@ static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy,
*/
if (entropy_flags.gre_calc_supported &&
reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) {
- /* Other applications may change the global FW entropy
- * calculations settings. Check that the current entropy value
- * is the negative of the updated value.
- */
- if (entropy_flags.force_enabled &&
- enable == entropy_flags.gre_calc_enabled) {
- mlx5_core_warn(tun_entropy->mdev,
- "Unexpected GRE entropy calc setting - expected %d",
- !entropy_flags.gre_calc_enabled);
- return -EOPNOTSUPP;
- }
- err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, enable,
- entropy_flags.force_supported);
+ if (!entropy_flags.force_supported)
+ return 0;
+ err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev,
+ enable, !enable);
if (err)
return err;
- /* if we turn on the entropy we don't need to force it anymore */
- if (entropy_flags.force_supported && enable) {
- err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, 1, 0);
- if (err)
- return err;
- }
} else if (entropy_flags.calc_supported) {
/* Other applications may change the global FW entropy
* calculations settings. Check that the current entropy value
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 61fa1d162d28..b15b27a497fc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -56,6 +56,7 @@
#include "fs_core.h"
#include "lib/mpfs.h"
#include "eswitch.h"
+#include "devlink.h"
#include "lib/mlx5.h"
#include "fpga/core.h"
#include "fpga/ipsec.h"
@@ -63,7 +64,9 @@
#include "accel/tls.h"
#include "lib/clock.h"
#include "lib/vxlan.h"
+#include "lib/geneve.h"
#include "lib/devcom.h"
+#include "lib/pci_vsc.h"
#include "diag/fw_tracer.h"
#include "ecpf.h"
@@ -169,18 +172,28 @@ static struct mlx5_profile profile[] = {
#define FW_INIT_TIMEOUT_MILI 2000
#define FW_INIT_WAIT_MS 2
-#define FW_PRE_INIT_TIMEOUT_MILI 10000
+#define FW_PRE_INIT_TIMEOUT_MILI 120000
+#define FW_INIT_WARN_MESSAGE_INTERVAL 20000
-static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
+static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
+ u32 warn_time_mili)
{
+ unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili);
unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
int err = 0;
+ BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL);
+
while (fw_initializing(dev)) {
if (time_after(jiffies, end)) {
err = -EBUSY;
break;
}
+ if (warn_time_mili && time_after(jiffies, warn)) {
+ mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n",
+ jiffies_to_msecs(end - warn) / 1000);
+ warn = jiffies + msecs_to_jiffies(warn_time_mili);
+ }
msleep(FW_INIT_WAIT_MS);
}
@@ -721,8 +734,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
struct mlx5_priv *priv = &dev->priv;
int err = 0;
- priv->pci_dev_data = id->driver_data;
-
+ mutex_init(&dev->pci_status_mutex);
pci_set_drvdata(dev->pdev, dev);
dev->bar_addr = pci_resource_start(pdev, 0);
@@ -761,6 +773,8 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
goto err_clr_master;
}
+ mlx5_pci_vsc_init(dev);
+
return 0;
err_clr_master:
@@ -794,10 +808,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
goto err_devcom;
}
+ err = mlx5_irq_table_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "failed to initialize irq table\n");
+ goto err_devcom;
+ }
+
err = mlx5_eq_table_init(dev);
if (err) {
mlx5_core_err(dev, "failed to initialize eq\n");
- goto err_devcom;
+ goto err_irq_cleanup;
}
err = mlx5_events_init(dev);
@@ -821,6 +841,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
mlx5_init_clock(dev);
dev->vxlan = mlx5_vxlan_create(dev);
+ dev->geneve = mlx5_geneve_create(dev);
err = mlx5_init_rl_table(dev);
if (err) {
@@ -834,37 +855,38 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
goto err_rl_cleanup;
}
- err = mlx5_eswitch_init(dev);
+ err = mlx5_sriov_init(dev);
if (err) {
- mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
+ mlx5_core_err(dev, "Failed to init sriov %d\n", err);
goto err_mpfs_cleanup;
}
- err = mlx5_sriov_init(dev);
+ err = mlx5_eswitch_init(dev);
if (err) {
- mlx5_core_err(dev, "Failed to init sriov %d\n", err);
- goto err_eswitch_cleanup;
+ mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
+ goto err_sriov_cleanup;
}
err = mlx5_fpga_init(dev);
if (err) {
mlx5_core_err(dev, "Failed to init fpga device %d\n", err);
- goto err_sriov_cleanup;
+ goto err_eswitch_cleanup;
}
dev->tracer = mlx5_fw_tracer_create(dev);
return 0;
-err_sriov_cleanup:
- mlx5_sriov_cleanup(dev);
err_eswitch_cleanup:
mlx5_eswitch_cleanup(dev->priv.eswitch);
+err_sriov_cleanup:
+ mlx5_sriov_cleanup(dev);
err_mpfs_cleanup:
mlx5_mpfs_cleanup(dev);
err_rl_cleanup:
mlx5_cleanup_rl_table(dev);
err_tables_cleanup:
+ mlx5_geneve_destroy(dev->geneve);
mlx5_vxlan_destroy(dev->vxlan);
mlx5_cleanup_mkey_table(dev);
mlx5_cleanup_qp_table(dev);
@@ -873,6 +895,8 @@ err_events_cleanup:
mlx5_events_cleanup(dev);
err_eq_cleanup:
mlx5_eq_table_cleanup(dev);
+err_irq_cleanup:
+ mlx5_irq_table_cleanup(dev);
err_devcom:
mlx5_devcom_unregister_device(dev->priv.devcom);
@@ -883,10 +907,11 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
{
mlx5_fw_tracer_destroy(dev->tracer);
mlx5_fpga_cleanup(dev);
- mlx5_sriov_cleanup(dev);
mlx5_eswitch_cleanup(dev->priv.eswitch);
+ mlx5_sriov_cleanup(dev);
mlx5_mpfs_cleanup(dev);
mlx5_cleanup_rl_table(dev);
+ mlx5_geneve_destroy(dev->geneve);
mlx5_vxlan_destroy(dev->vxlan);
mlx5_cleanup_clock(dev);
mlx5_cleanup_reserved_gids(dev);
@@ -895,6 +920,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_cq_debugfs_cleanup(dev);
mlx5_events_cleanup(dev);
mlx5_eq_table_cleanup(dev);
+ mlx5_irq_table_cleanup(dev);
mlx5_devcom_unregister_device(dev->priv.devcom);
}
@@ -911,7 +937,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
/* wait for firmware to accept initialization segments configurations
*/
- err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
+ err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL);
if (err) {
mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n",
FW_PRE_INIT_TIMEOUT_MILI);
@@ -924,7 +950,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
return err;
}
- err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
+ err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0);
if (err) {
mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n",
FW_INIT_TIMEOUT_MILI);
@@ -1028,6 +1054,12 @@ static int mlx5_load(struct mlx5_core_dev *dev)
mlx5_events_start(dev);
mlx5_pagealloc_start(dev);
+ err = mlx5_irq_table_create(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc IRQs\n");
+ goto err_irq_table;
+ }
+
err = mlx5_eq_table_create(dev);
if (err) {
mlx5_core_err(dev, "Failed to create EQs\n");
@@ -1067,7 +1099,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
err = mlx5_core_set_hca_defaults(dev);
if (err) {
mlx5_core_err(dev, "Failed to set hca defaults\n");
- goto err_fs;
+ goto err_sriov;
}
err = mlx5_sriov_attach(dev);
@@ -1099,6 +1131,8 @@ err_fpga_start:
err_fw_tracer:
mlx5_eq_table_destroy(dev);
err_eq_table:
+ mlx5_irq_table_destroy(dev);
+err_irq_table:
mlx5_pagealloc_stop(dev);
mlx5_events_stop(dev);
mlx5_put_uars_page(dev, dev->priv.uar);
@@ -1115,6 +1149,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_fpga_device_stop(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
mlx5_eq_table_destroy(dev);
+ mlx5_irq_table_destroy(dev);
mlx5_pagealloc_stop(dev);
mlx5_events_stop(dev);
mlx5_put_uars_page(dev, dev->priv.uar);
@@ -1183,7 +1218,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
int err = 0;
if (cleanup)
- mlx5_drain_health_recovery(dev);
+ mlx5_drain_health_wq(dev);
mutex_lock(&dev->intf_state_mutex);
if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
@@ -1210,17 +1245,6 @@ out:
return err;
}
-static const struct devlink_ops mlx5_devlink_ops = {
-#ifdef CONFIG_MLX5_ESWITCH
- .eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
- .eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
- .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
- .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
- .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set,
- .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
-#endif
-};
-
static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
{
struct mlx5_priv *priv = &dev->priv;
@@ -1230,7 +1254,6 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
- mutex_init(&dev->pci_status_mutex);
mutex_init(&dev->intf_state_mutex);
mutex_init(&priv->bfregs.reg_head.lock);
@@ -1282,9 +1305,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
struct devlink *devlink;
int err;
- devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev));
+ devlink = mlx5_devlink_alloc();
if (!devlink) {
- dev_err(&pdev->dev, "kzalloc failed\n");
+ dev_err(&pdev->dev, "devlink alloc failed\n");
return -ENOMEM;
}
@@ -1292,6 +1315,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
dev->device = &pdev->dev;
dev->pdev = pdev;
+ dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ?
+ MLX5_COREDEV_VF : MLX5_COREDEV_PF;
+
err = mlx5_mdev_init(dev, prof_sel);
if (err)
goto mdev_init_err;
@@ -1312,10 +1338,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
request_module_nowait(MLX5_IB_MOD);
- err = devlink_register(devlink, &pdev->dev);
+ err = mlx5_devlink_register(devlink, &pdev->dev);
if (err)
goto clean_load;
+ err = mlx5_crdump_enable(dev);
+ if (err)
+ dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
+
pci_save_state(pdev);
return 0;
@@ -1327,7 +1357,7 @@ err_load_one:
pci_init_err:
mlx5_mdev_uninit(dev);
mdev_init_err:
- devlink_free(devlink);
+ mlx5_devlink_free(devlink);
return err;
}
@@ -1337,7 +1367,8 @@ static void remove_one(struct pci_dev *pdev)
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct devlink *devlink = priv_to_devlink(dev);
- devlink_unregister(devlink);
+ mlx5_crdump_disable(dev);
+ mlx5_devlink_unregister(devlink);
mlx5_unregister_device(dev);
if (mlx5_unload_one(dev, true)) {
@@ -1348,7 +1379,7 @@ static void remove_one(struct pci_dev *pdev)
mlx5_pci_close(dev);
mlx5_mdev_uninit(dev);
- devlink_free(devlink);
+ mlx5_devlink_free(devlink);
}
static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
@@ -1359,12 +1390,10 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
mlx5_core_info(dev, "%s was called\n", __func__);
mlx5_enter_error_state(dev, false);
+ mlx5_error_sw_reset(dev);
mlx5_unload_one(dev, false);
- /* In case of kernel call drain the health wq */
- if (state) {
- mlx5_drain_health_wq(dev);
- mlx5_pci_disable_device(dev);
- }
+ mlx5_drain_health_wq(dev);
+ mlx5_pci_disable_device(dev);
return state == pci_channel_io_perm_failure ?
PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
@@ -1532,7 +1561,8 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
void mlx5_disable_device(struct mlx5_core_dev *dev)
{
- mlx5_pci_err_detected(dev->pdev, 0);
+ mlx5_error_sw_reset(dev);
+ mlx5_unload_one(dev, false);
}
void mlx5_recover_device(struct mlx5_core_dev *dev)
@@ -1570,7 +1600,7 @@ static int __init init(void)
get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
mlx5_core_verify_params();
- mlx5_fpga_ipsec_build_fs_cmds();
+ mlx5_accel_ipsec_build_fs_cmds();
mlx5_register_debugfs();
err = pci_register_driver(&mlx5_core_driver);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 22e69d4813e4..471bbc48bc1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -111,6 +111,11 @@ enum {
MLX5_DRIVER_SYND = 0xbadd00de,
};
+enum mlx5_semaphore_space_address {
+ MLX5_SEMAPHORE_SPACE_DOMAIN = 0xA,
+ MLX5_SEMAPHORE_SW_RESET = 0x20,
+};
+
int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
int mlx5_query_board_id(struct mlx5_core_dev *dev);
int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
@@ -118,6 +123,7 @@ int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
+void mlx5_error_sw_reset(struct mlx5_core_dev *dev);
void mlx5_disable_device(struct mlx5_core_dev *dev);
void mlx5_recover_device(struct mlx5_core_dev *dev);
int mlx5_sriov_init(struct mlx5_core_dev *dev);
@@ -153,6 +159,19 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_remove(struct mlx5_core_dev *dev);
+int mlx5_irq_table_init(struct mlx5_core_dev *dev);
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_irq_table_create(struct mlx5_core_dev *dev);
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
+int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+ struct notifier_block *nb);
+int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+ struct notifier_block *nb);
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx);
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table);
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table);
+
int mlx5_events_init(struct mlx5_core_dev *dev);
void mlx5_events_cleanup(struct mlx5_core_dev *dev);
void mlx5_events_start(struct mlx5_core_dev *dev);
@@ -184,7 +203,10 @@ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \
MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj))
-int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw);
+int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw,
+ struct netlink_ext_ack *extack);
+int mlx5_fw_version_query(struct mlx5_core_dev *dev,
+ u32 *running_ver, u32 *stored_ver);
void mlx5e_init(void);
void mlx5e_cleanup(void);
@@ -213,7 +235,7 @@ enum {
MLX5_NIC_IFC_FULL = 0,
MLX5_NIC_IFC_DISABLED = 1,
MLX5_NIC_IFC_NO_DRAM_NIC = 2,
- MLX5_NIC_IFC_INVALID = 3
+ MLX5_NIC_IFC_SW_RESET = 7
};
u8 mlx5_get_nic_state(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index ea744d8466ea..9231b39d18b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -38,15 +38,12 @@
void mlx5_init_mkey_table(struct mlx5_core_dev *dev)
{
- struct mlx5_mkey_table *table = &dev->priv.mkey_table;
-
- memset(table, 0, sizeof(*table));
- rwlock_init(&table->lock);
- INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+ xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ);
}
void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
{
+ WARN_ON(!xa_empty(&dev->priv.mkey_table));
}
int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
@@ -56,8 +53,8 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
mlx5_async_cbk_t callback,
struct mlx5_async_work *context)
{
- struct mlx5_mkey_table *table = &dev->priv.mkey_table;
u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
+ struct xarray *mkeys = &dev->priv.mkey_table;
u32 mkey_index;
void *mkc;
int err;
@@ -88,12 +85,10 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
mkey_index, key, mkey->key);
- /* connect to mkey tree */
- write_lock_irq(&table->lock);
- err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey);
- write_unlock_irq(&table->lock);
+ err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey,
+ GFP_KERNEL));
if (err) {
- mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n",
+ mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n",
mlx5_base_mkey(mkey->key), err);
mlx5_core_destroy_mkey(dev, mkey);
}
@@ -114,17 +109,17 @@ EXPORT_SYMBOL(mlx5_core_create_mkey);
int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
struct mlx5_core_mkey *mkey)
{
- struct mlx5_mkey_table *table = &dev->priv.mkey_table;
u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0};
+ struct xarray *mkeys = &dev->priv.mkey_table;
struct mlx5_core_mkey *deleted_mkey;
unsigned long flags;
- write_lock_irqsave(&table->lock, flags);
- deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key));
- write_unlock_irqrestore(&table->lock, flags);
+ xa_lock_irqsave(mkeys, flags);
+ deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key));
+ xa_unlock_irqrestore(mkeys, flags);
if (!deleted_mkey) {
- mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n",
+ mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n",
mlx5_base_mkey(mkey->key));
return -ENOENT;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
new file mode 100644
index 000000000000..373981a659c7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
+
+#define MLX5_MAX_IRQ_NAME (32)
+
+struct mlx5_irq {
+ struct atomic_notifier_head nh;
+ cpumask_var_t mask;
+ char name[MLX5_MAX_IRQ_NAME];
+};
+
+struct mlx5_irq_table {
+ struct mlx5_irq *irq;
+ int nvec;
+#ifdef CONFIG_RFS_ACCEL
+ struct cpu_rmap *rmap;
+#endif
+};
+
+int mlx5_irq_table_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *irq_table;
+
+ irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL);
+ if (!irq_table)
+ return -ENOMEM;
+
+ dev->priv.irq_table = irq_table;
+ return 0;
+}
+
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
+{
+ kvfree(dev->priv.irq_table);
+}
+
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table)
+{
+ return table->nvec - MLX5_IRQ_VEC_COMP_BASE;
+}
+
+static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx)
+{
+ struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+
+ return &irq_table->irq[vecidx];
+}
+
+int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+ struct notifier_block *nb)
+{
+ struct mlx5_irq *irq;
+
+ irq = &irq_table->irq[vecidx];
+ return atomic_notifier_chain_register(&irq->nh, nb);
+}
+
+int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+ struct notifier_block *nb)
+{
+ struct mlx5_irq *irq;
+
+ irq = &irq_table->irq[vecidx];
+ return atomic_notifier_chain_unregister(&irq->nh, nb);
+}
+
+static irqreturn_t mlx5_irq_int_handler(int irq, void *nh)
+{
+ atomic_notifier_call_chain(nh, 0, NULL);
+ return IRQ_HANDLED;
+}
+
+static void irq_set_name(char *name, int vecidx)
+{
+ if (vecidx == 0) {
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async");
+ return;
+ }
+
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
+ vecidx - MLX5_IRQ_VEC_COMP_BASE);
+ return;
+}
+
+static int request_irqs(struct mlx5_core_dev *dev, int nvec)
+{
+ char name[MLX5_MAX_IRQ_NAME];
+ int err;
+ int i;
+
+ for (i = 0; i < nvec; i++) {
+ struct mlx5_irq *irq = mlx5_irq_get(dev, i);
+ int irqn = pci_irq_vector(dev->pdev, i);
+
+ irq_set_name(name, i);
+ ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
+ snprintf(irq->name, MLX5_MAX_IRQ_NAME,
+ "%s@pci:%s", name, pci_name(dev->pdev));
+ err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name,
+ &irq->nh);
+ if (err) {
+ mlx5_core_err(dev, "Failed to request irq\n");
+ goto err_request_irq;
+ }
+ }
+ return 0;
+
+err_request_irq:
+ for (; i >= 0; i--) {
+ struct mlx5_irq *irq = mlx5_irq_get(dev, i);
+ int irqn = pci_irq_vector(dev->pdev, i);
+
+ free_irq(irqn, &irq->nh);
+ }
+ return err;
+}
+
+static void irq_clear_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+
+ free_irq_cpu_rmap(irq_table->rmap);
+#endif
+}
+
+static int irq_set_rmap(struct mlx5_core_dev *mdev)
+{
+ int err = 0;
+#ifdef CONFIG_RFS_ACCEL
+ struct mlx5_irq_table *irq_table = mdev->priv.irq_table;
+ int num_affinity_vec;
+ int vecidx;
+
+ num_affinity_vec = mlx5_irq_get_num_comp(irq_table);
+ irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec);
+ if (!irq_table->rmap) {
+ err = -ENOMEM;
+ mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
+ goto err_out;
+ }
+
+ vecidx = MLX5_IRQ_VEC_COMP_BASE;
+ for (; vecidx < irq_table->nvec; vecidx++) {
+ err = irq_cpu_rmap_add(irq_table->rmap,
+ pci_irq_vector(mdev->pdev, vecidx));
+ if (err) {
+ mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
+ err);
+ goto err_irq_cpu_rmap_add;
+ }
+ }
+ return 0;
+
+err_irq_cpu_rmap_add:
+ irq_clear_rmap(mdev);
+err_out:
+#endif
+ return err;
+}
+
+/* Completion IRQ vectors */
+
+static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+ struct mlx5_irq *irq;
+ int irqn;
+
+ irq = mlx5_irq_get(mdev, vecidx);
+ irqn = pci_irq_vector(mdev->pdev, vecidx);
+ if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
+ mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+ return -ENOMEM;
+ }
+
+ cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node),
+ irq->mask);
+ if (IS_ENABLED(CONFIG_SMP) &&
+ irq_set_affinity_hint(irqn, irq->mask))
+ mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x",
+ irqn);
+
+ return 0;
+}
+
+static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+ struct mlx5_irq *irq;
+ int irqn;
+
+ irq = mlx5_irq_get(mdev, vecidx);
+ irqn = pci_irq_vector(mdev->pdev, vecidx);
+ irq_set_affinity_hint(irqn, NULL);
+ free_cpumask_var(irq->mask);
+}
+
+static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+ int err;
+ int i;
+
+ for (i = 0; i < nvec; i++) {
+ err = set_comp_irq_affinity_hint(mdev, i);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ for (i--; i >= 0; i--)
+ clear_comp_irq_affinity_hint(mdev, i);
+
+ return err;
+}
+
+static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+ int i;
+
+ for (i = 0; i < nvec; i++)
+ clear_comp_irq_affinity_hint(mdev, i);
+}
+
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx)
+{
+ return irq_table->irq[vecidx].mask;
+}
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table)
+{
+ return irq_table->rmap;
+}
+#endif
+
+static void unrequest_irqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *table = dev->priv.irq_table;
+ int i;
+
+ for (i = 0; i < table->nvec; i++)
+ free_irq(pci_irq_vector(dev->pdev, i),
+ &mlx5_irq_get(dev, i)->nh);
+}
+
+int mlx5_irq_table_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_irq_table *table = priv->irq_table;
+ int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ MLX5_CAP_GEN(dev, max_num_eqs) :
+ 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int nvec;
+ int err;
+
+ nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+ MLX5_IRQ_VEC_COMP_BASE;
+ nvec = min_t(int, nvec, num_eqs);
+ if (nvec <= MLX5_IRQ_VEC_COMP_BASE)
+ return -ENOMEM;
+
+ table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL);
+ if (!table->irq)
+ return -ENOMEM;
+
+ nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1,
+ nvec, PCI_IRQ_MSIX);
+ if (nvec < 0) {
+ err = nvec;
+ goto err_free_irq;
+ }
+
+ table->nvec = nvec;
+
+ err = irq_set_rmap(dev);
+ if (err)
+ goto err_set_rmap;
+
+ err = request_irqs(dev, nvec);
+ if (err)
+ goto err_request_irqs;
+
+ err = set_comp_irq_affinity_hints(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
+ goto err_set_affinity;
+ }
+
+ return 0;
+
+err_set_affinity:
+ unrequest_irqs(dev);
+err_request_irqs:
+ irq_clear_rmap(dev);
+err_set_rmap:
+ pci_free_irq_vectors(dev->pdev);
+err_free_irq:
+ kfree(table->irq);
+ return err;
+}
+
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *table = dev->priv.irq_table;
+ int i;
+
+ /* free_irq requires that affinity and rmap will be cleared
+ * before calling it. This is why there is asymmetry with set_rmap
+ * which should be called after alloc_irq but before request_irq.
+ */
+ irq_clear_rmap(dev);
+ clear_comp_irqs_affinity_hints(dev);
+ for (i = 0; i < table->nvec; i++)
+ free_irq(pci_irq_vector(dev->pdev, i),
+ &mlx5_irq_get(dev, i)->nh);
+ pci_free_irq_vectors(dev->pdev);
+ kfree(table->irq);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
index 86f77456f873..17ce9dd56b13 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
@@ -106,10 +106,10 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev)
return 0;
-destroy_flow_table:
- mlx5_destroy_flow_table(ft);
destroy_flow_group:
mlx5_destroy_flow_group(fg);
+destroy_flow_table:
+ mlx5_destroy_flow_table(ft);
free:
kvfree(spec);
kvfree(flow_group_in);
@@ -126,7 +126,7 @@ static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *
{
u8 hw_id[ETH_ALEN];
- mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
+ mlx5_query_mac_address(dev, hw_id);
gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
addrconf_addr_eui48(&gid->raw[8], hw_id);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index a249b3c3843d..61fcfd8b39b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -74,17 +74,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
int err;
int vf;
- if (sriov->enabled_vfs) {
- mlx5_core_warn(dev,
- "failed to enable SRIOV on device, already enabled with %d vfs\n",
- sriov->enabled_vfs);
- return -EBUSY;
- }
-
if (!MLX5_ESWITCH_MANAGER(dev))
goto enable_vfs_hca;
- err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
+ mlx5_eswitch_update_num_of_vfs(dev->priv.eswitch, num_vfs);
+ err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY);
if (err) {
mlx5_core_warn(dev,
"failed to enable eswitch SRIOV (%d)\n", err);
@@ -99,7 +93,6 @@ enable_vfs_hca:
continue;
}
sriov->vfs_ctx[vf].enabled = 1;
- sriov->enabled_vfs++;
if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) {
err = sriov_restore_guids(dev, vf);
if (err) {
@@ -118,13 +111,11 @@ enable_vfs_hca:
static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int num_vfs = pci_num_vf(dev->pdev);
int err;
int vf;
- if (!sriov->enabled_vfs)
- goto out;
-
- for (vf = 0; vf < sriov->num_vfs; vf++) {
+ for (vf = num_vfs - 1; vf >= 0; vf--) {
if (!sriov->vfs_ctx[vf].enabled)
continue;
err = mlx5_core_disable_hca(dev, vf + 1);
@@ -133,12 +124,10 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
continue;
}
sriov->vfs_ctx[vf].enabled = 0;
- sriov->enabled_vfs--;
}
-out:
if (MLX5_ESWITCH_MANAGER(dev))
- mlx5_eswitch_disable_sriov(dev->priv.eswitch);
+ mlx5_eswitch_disable(dev->priv.eswitch);
if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages))
mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
@@ -191,13 +180,11 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
int mlx5_sriov_attach(struct mlx5_core_dev *dev)
{
- struct mlx5_core_sriov *sriov = &dev->priv.sriov;
-
- if (!mlx5_core_is_pf(dev) || !sriov->num_vfs)
+ if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev))
return 0;
/* If sriov VFs exist in PCI level, enable them in device level */
- return mlx5_device_enable_sriov(dev, sriov->num_vfs);
+ return mlx5_device_enable_sriov(dev, pci_num_vf(dev->pdev));
}
void mlx5_sriov_detach(struct mlx5_core_dev *dev)
@@ -208,6 +195,30 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev)
mlx5_device_disable_sriov(dev);
}
+static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
+{
+ u16 host_total_vfs;
+ const u32 *out;
+
+ if (mlx5_core_is_ecpf_esw_manager(dev)) {
+ out = mlx5_esw_query_functions(dev);
+
+ /* Old FW doesn't support getting total_vfs from esw func
+ * but supports getting it from pci_sriov.
+ */
+ if (IS_ERR(out))
+ goto done;
+ host_total_vfs = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_total_vfs);
+ kvfree(out);
+ if (host_total_vfs)
+ return host_total_vfs;
+ }
+
+done:
+ return pci_sriov_get_totalvfs(dev->pdev);
+}
+
int mlx5_sriov_init(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
@@ -218,6 +229,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
return 0;
total_vfs = pci_sriov_get_totalvfs(pdev);
+ sriov->max_vfs = mlx5_get_max_vfs(dev);
sriov->num_vfs = pci_num_vf(pdev);
sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
if (!sriov->vfs_ctx)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 95cdc8cbcba4..c912d82ca64b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -34,6 +34,7 @@
#include <linux/etherdevice.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/vport.h>
+#include <linux/mlx5/eswitch.h>
#include "mlx5_core.h"
/* Mutex to hold while enabling or disabling RoCE */
@@ -155,11 +156,12 @@ int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
}
int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
- u16 vport, u8 *addr)
+ u16 vport, bool other, u8 *addr)
{
- u32 *out;
int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {};
u8 *out_addr;
+ u32 *out;
int err;
out = kvzalloc(outlen, GFP_KERNEL);
@@ -169,7 +171,12 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out,
nic_vport_context.permanent_address);
- err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, other);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
if (!err)
ether_addr_copy(addr, &out_addr[2]);
@@ -178,6 +185,12 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address);
+int mlx5_query_mac_address(struct mlx5_core_dev *mdev, u8 *addr)
+{
+ return mlx5_query_nic_vport_mac_address(mdev, 0, false, addr);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_mac_address);
+
int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
u16 vport, u8 *addr)
{
@@ -194,9 +207,7 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
MLX5_SET(modify_nic_vport_context_in, in,
field_select.permanent_address, 1);
MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
-
- if (vport)
- MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
in, nic_vport_context);
@@ -291,9 +302,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type);
MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
-
- if (vport)
- MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
if (err)
@@ -483,7 +492,7 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
MLX5_SET(modify_nic_vport_context_in, in,
field_select.node_guid, 1);
MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
- MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in,
in, nic_vport_context);
@@ -1157,3 +1166,17 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
return tmp;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
+
+/**
+ * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
+ *
+ * @dev: Pointer to core device
+ *
+ * mlx5_eswitch_get_total_vports returns total number of vports for
+ * the eswitch.
+ */
+u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
+{
+ return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev);
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_total_vports);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 1f87cce421e0..f1ec58c9e9e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -134,11 +134,6 @@ static inline void mlx5_wq_cyc_update_db_record(struct mlx5_wq_cyc *wq)
*wq->db = cpu_to_be32(wq->wqe_ctr);
}
-static inline u16 mlx5_wq_cyc_get_ctr_wrap_cnt(struct mlx5_wq_cyc *wq, u16 ctr)
-{
- return ctr >> wq->fbc.log_sz;
-}
-
static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
{
return ctr & wq->fbc.sz_m1;
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
index 14c0c62f8e73..c50e74ab02c4 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
@@ -5,6 +5,7 @@
#define _MLXFW_H
#include <linux/firmware.h>
+#include <linux/netlink.h>
enum mlxfw_fsm_state {
MLXFW_FSM_STATE_IDLE,
@@ -57,6 +58,10 @@ struct mlxfw_dev_ops {
void (*fsm_cancel)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle);
void (*fsm_release)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle);
+
+ void (*status_notify)(struct mlxfw_dev *mlxfw_dev,
+ const char *msg, const char *comp_name,
+ u32 done_bytes, u32 total_bytes);
};
struct mlxfw_dev {
@@ -67,11 +72,13 @@ struct mlxfw_dev {
#if IS_REACHABLE(CONFIG_MLXFW)
int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
- const struct firmware *firmware);
+ const struct firmware *firmware,
+ struct netlink_ext_ack *extack);
#else
static inline
int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
- const struct firmware *firmware)
+ const struct firmware *firmware,
+ struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
index 240c027e5f07..67990406cba2 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
@@ -39,8 +39,19 @@ static const char * const mlxfw_fsm_state_err_str[] = {
"unknown error"
};
+static void mlxfw_status_notify(struct mlxfw_dev *mlxfw_dev,
+ const char *msg, const char *comp_name,
+ u32 done_bytes, u32 total_bytes)
+{
+ if (!mlxfw_dev->ops->status_notify)
+ return;
+ mlxfw_dev->ops->status_notify(mlxfw_dev, msg, comp_name,
+ done_bytes, total_bytes);
+}
+
static int mlxfw_fsm_state_wait(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
- enum mlxfw_fsm_state fsm_state)
+ enum mlxfw_fsm_state fsm_state,
+ struct netlink_ext_ack *extack)
{
enum mlxfw_fsm_state_err fsm_state_err;
enum mlxfw_fsm_state curr_fsm_state;
@@ -57,11 +68,13 @@ retry:
if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) {
pr_err("Firmware flash failed: %s\n",
mlxfw_fsm_state_err_str[fsm_state_err]);
+ NL_SET_ERR_MSG_MOD(extack, "Firmware flash failed");
return -EINVAL;
}
if (curr_fsm_state != fsm_state) {
if (--times == 0) {
pr_err("Timeout reached on FSM state change");
+ NL_SET_ERR_MSG_MOD(extack, "Timeout reached on FSM state change");
return -ETIMEDOUT;
}
msleep(MLXFW_FSM_STATE_WAIT_CYCLE_MS);
@@ -76,16 +89,20 @@ retry:
static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev,
u32 fwhandle,
- struct mlxfw_mfa2_component *comp)
+ struct mlxfw_mfa2_component *comp,
+ struct netlink_ext_ack *extack)
{
u16 comp_max_write_size;
u8 comp_align_bits;
u32 comp_max_size;
+ char comp_name[8];
u16 block_size;
u8 *block_ptr;
u32 offset;
int err;
+ sprintf(comp_name, "%u", comp->index);
+
err = mlxfw_dev->ops->component_query(mlxfw_dev, comp->index,
&comp_max_size, &comp_align_bits,
&comp_max_write_size);
@@ -96,6 +113,7 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev,
if (comp->data_size > comp_max_size) {
pr_err("Component %d is of size %d which is bigger than limit %d\n",
comp->index, comp->data_size, comp_max_size);
+ NL_SET_ERR_MSG_MOD(extack, "Component is bigger than limit");
return -EINVAL;
}
@@ -103,6 +121,7 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev,
comp_align_bits);
pr_debug("Component update\n");
+ mlxfw_status_notify(mlxfw_dev, "Updating component", comp_name, 0, 0);
err = mlxfw_dev->ops->fsm_component_update(mlxfw_dev, fwhandle,
comp->index,
comp->data_size);
@@ -110,11 +129,13 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev,
return err;
err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle,
- MLXFW_FSM_STATE_DOWNLOAD);
+ MLXFW_FSM_STATE_DOWNLOAD, extack);
if (err)
goto err_out;
pr_debug("Component download\n");
+ mlxfw_status_notify(mlxfw_dev, "Downloading component",
+ comp_name, 0, comp->data_size);
for (offset = 0;
offset < MLXFW_ALIGN_UP(comp->data_size, comp_align_bits);
offset += comp_max_write_size) {
@@ -126,15 +147,20 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev,
offset);
if (err)
goto err_out;
+ mlxfw_status_notify(mlxfw_dev, "Downloading component",
+ comp_name, offset + block_size,
+ comp->data_size);
}
pr_debug("Component verify\n");
+ mlxfw_status_notify(mlxfw_dev, "Verifying component", comp_name, 0, 0);
err = mlxfw_dev->ops->fsm_component_verify(mlxfw_dev, fwhandle,
comp->index);
if (err)
goto err_out;
- err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED);
+ err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle,
+ MLXFW_FSM_STATE_LOCKED, extack);
if (err)
goto err_out;
return 0;
@@ -145,7 +171,8 @@ err_out:
}
static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
- struct mlxfw_mfa2_file *mfa2_file)
+ struct mlxfw_mfa2_file *mfa2_file,
+ struct netlink_ext_ack *extack)
{
u32 component_count;
int err;
@@ -156,6 +183,7 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
&component_count);
if (err) {
pr_err("Could not find device PSID in MFA2 file\n");
+ NL_SET_ERR_MSG_MOD(extack, "Could not find device PSID in MFA2 file");
return err;
}
@@ -168,7 +196,7 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
return PTR_ERR(comp);
pr_info("Flashing component type %d\n", comp->index);
- err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp);
+ err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp, extack);
mlxfw_mfa2_file_component_put(comp);
if (err)
return err;
@@ -177,7 +205,8 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
}
int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
- const struct firmware *firmware)
+ const struct firmware *firmware,
+ struct netlink_ext_ack *extack)
{
struct mlxfw_mfa2_file *mfa2_file;
u32 fwhandle;
@@ -185,6 +214,7 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
if (!mlxfw_mfa2_check(firmware)) {
pr_err("Firmware file is not MFA2\n");
+ NL_SET_ERR_MSG_MOD(extack, "Firmware file is not MFA2");
return -EINVAL;
}
@@ -193,29 +223,35 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
return PTR_ERR(mfa2_file);
pr_info("Initialize firmware flash process\n");
+ mlxfw_status_notify(mlxfw_dev, "Initializing firmware flash process",
+ NULL, 0, 0);
err = mlxfw_dev->ops->fsm_lock(mlxfw_dev, &fwhandle);
if (err) {
pr_err("Could not lock the firmware FSM\n");
+ NL_SET_ERR_MSG_MOD(extack, "Could not lock the firmware FSM");
goto err_fsm_lock;
}
err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle,
- MLXFW_FSM_STATE_LOCKED);
+ MLXFW_FSM_STATE_LOCKED, extack);
if (err)
goto err_state_wait_idle_to_locked;
- err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file);
+ err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file, extack);
if (err)
goto err_flash_components;
pr_debug("Activate image\n");
+ mlxfw_status_notify(mlxfw_dev, "Activating image", NULL, 0, 0);
err = mlxfw_dev->ops->fsm_activate(mlxfw_dev, fwhandle);
if (err) {
pr_err("Could not activate the downloaded image\n");
+ NL_SET_ERR_MSG_MOD(extack, "Could not activate the downloaded image");
goto err_fsm_activate;
}
- err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED);
+ err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle,
+ MLXFW_FSM_STATE_LOCKED, extack);
if (err)
goto err_state_wait_activate_to_locked;
@@ -223,6 +259,7 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle);
pr_info("Firmware flash done.\n");
+ mlxfw_status_notify(mlxfw_dev, "Firmware flash done", NULL, 0, 0);
mlxfw_mfa2_file_fini(mfa2_file);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 11ded0bc7d98..06c80343d9ed 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -83,6 +83,8 @@ config MLXSW_SPECTRUM
select PARMAN
select OBJAGG
select MLXFW
+ imply PTP_1588_CLOCK
+ select NET_PTP_CLASSIFY if PTP_1588_CLOCK
default m
---help---
This driver supports Mellanox Technologies Spectrum Ethernet
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index c4dc72e1ce63..171b36bd8a4e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -31,5 +31,6 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
spectrum_nve.o spectrum_nve_vxlan.o \
spectrum_dpipe.o
mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
+mlxsw_spectrum-$(CONFIG_PTP_1588_CLOCK) += spectrum_ptp.o
obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o
mlxsw_minimal-objs := minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
index 0772e4339b33..5ffdfb532cb7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
@@ -317,6 +317,18 @@ MLXSW_ITEM64(cmd_mbox, query_fw, doorbell_page_offset, 0x40, 0, 64);
*/
MLXSW_ITEM32(cmd_mbox, query_fw, doorbell_page_bar, 0x48, 30, 2);
+/* cmd_mbox_query_fw_free_running_clock_offset
+ * The offset of the free running clock page
+ */
+MLXSW_ITEM64(cmd_mbox, query_fw, free_running_clock_offset, 0x50, 0, 64);
+
+/* cmd_mbox_query_fw_fr_rn_clk_bar
+ * PCI base address register (BAR) of the free running clock page
+ * 0: BAR 0
+ * 1: 64 bit BAR
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, fr_rn_clk_bar, 0x58, 30, 2);
+
/* QUERY_BOARDINFO - Query Board Information
* -----------------------------------------
* OpMod == 0 (N/A), INMmod == 0 (N/A)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 6ee6de7f0160..17ceac7505e5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1003,6 +1003,20 @@ static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink,
return err;
}
+static int mlxsw_devlink_flash_update(struct devlink *devlink,
+ const char *file_name,
+ const char *component,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+ struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+
+ if (!mlxsw_driver->flash_update)
+ return -EOPNOTSUPP;
+ return mlxsw_driver->flash_update(mlxsw_core, file_name,
+ component, extack);
+}
+
static const struct devlink_ops mlxsw_devlink_ops = {
.reload = mlxsw_devlink_core_bus_device_reload,
.port_type_set = mlxsw_devlink_port_type_set,
@@ -1019,6 +1033,7 @@ static const struct devlink_ops mlxsw_devlink_ops = {
.sb_occ_port_pool_get = mlxsw_devlink_sb_occ_port_pool_get,
.sb_occ_tc_port_bind_get = mlxsw_devlink_sb_occ_tc_port_bind_get,
.info_get = mlxsw_devlink_info_get,
+ .flash_update = mlxsw_devlink_flash_update,
};
static int
@@ -1098,6 +1113,12 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
goto err_register_params;
}
+ if (mlxsw_driver->init) {
+ err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
+ if (err)
+ goto err_driver_init;
+ }
+
err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon);
if (err)
goto err_hwmon_init;
@@ -1107,22 +1128,17 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
if (err)
goto err_thermal_init;
- if (mlxsw_driver->init) {
- err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
- if (err)
- goto err_driver_init;
- }
-
if (mlxsw_driver->params_register && !reload)
devlink_params_publish(devlink);
return 0;
-err_driver_init:
- mlxsw_thermal_fini(mlxsw_core->thermal);
err_thermal_init:
mlxsw_hwmon_fini(mlxsw_core->hwmon);
err_hwmon_init:
+ if (mlxsw_core->driver->fini)
+ mlxsw_core->driver->fini(mlxsw_core);
+err_driver_init:
if (mlxsw_driver->params_unregister && !reload)
mlxsw_driver->params_unregister(mlxsw_core);
err_register_params:
@@ -1187,10 +1203,10 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
if (mlxsw_core->driver->params_unregister && !reload)
devlink_params_unpublish(devlink);
- if (mlxsw_core->driver->fini)
- mlxsw_core->driver->fini(mlxsw_core);
mlxsw_thermal_fini(mlxsw_core->thermal);
mlxsw_hwmon_fini(mlxsw_core->hwmon);
+ if (mlxsw_core->driver->fini)
+ mlxsw_core->driver->fini(mlxsw_core);
if (mlxsw_core->driver->params_unregister && !reload)
mlxsw_core->driver->params_unregister(mlxsw_core);
if (!reload)
@@ -1229,6 +1245,15 @@ int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
}
EXPORT_SYMBOL(mlxsw_core_skb_transmit);
+void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core,
+ struct sk_buff *skb, u8 local_port)
+{
+ if (mlxsw_core->driver->ptp_transmitted)
+ mlxsw_core->driver->ptp_transmitted(mlxsw_core, skb,
+ local_port);
+}
+EXPORT_SYMBOL(mlxsw_core_ptp_transmitted);
+
static bool __is_rx_listener_equal(const struct mlxsw_rx_listener *rxl_a,
const struct mlxsw_rx_listener *rxl_b)
{
@@ -2010,6 +2035,18 @@ int mlxsw_core_resources_query(struct mlxsw_core *mlxsw_core, char *mbox,
}
EXPORT_SYMBOL(mlxsw_core_resources_query);
+u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core)
+{
+ return mlxsw_core->bus->read_frc_h(mlxsw_core->bus_priv);
+}
+EXPORT_SYMBOL(mlxsw_core_read_frc_h);
+
+u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core)
+{
+ return mlxsw_core->bus->read_frc_l(mlxsw_core->bus_priv);
+}
+EXPORT_SYMBOL(mlxsw_core_read_frc_l);
+
static int __init mlxsw_core_module_init(void)
{
int err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index e3832cb5bdda..8efcff4b59cb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -48,6 +48,8 @@ bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core,
const struct mlxsw_tx_info *tx_info);
int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
const struct mlxsw_tx_info *tx_info);
+void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core,
+ struct sk_buff *skb, u8 local_port);
struct mlxsw_rx_listener {
void (*func)(struct sk_buff *skb, u8 local_port, void *priv);
@@ -284,6 +286,9 @@ struct mlxsw_driver {
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
u32 *p_cur, u32 *p_max);
+ int (*flash_update)(struct mlxsw_core *mlxsw_core,
+ const char *file_name, const char *component,
+ struct netlink_ext_ack *extack);
void (*txhdr_construct)(struct sk_buff *skb,
const struct mlxsw_tx_info *tx_info);
int (*resources_register)(struct mlxsw_core *mlxsw_core);
@@ -293,6 +298,13 @@ struct mlxsw_driver {
u64 *p_linear_size);
int (*params_register)(struct mlxsw_core *mlxsw_core);
void (*params_unregister)(struct mlxsw_core *mlxsw_core);
+
+ /* Notify a driver that a timestamped packet was transmitted. Driver
+ * is responsible for freeing the passed-in SKB.
+ */
+ void (*ptp_transmitted)(struct mlxsw_core *mlxsw_core,
+ struct sk_buff *skb, u8 local_port);
+
u8 txhdr_len;
const struct mlxsw_config_profile *profile;
bool res_query_enabled;
@@ -306,6 +318,9 @@ int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core);
void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core);
+u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core);
+u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core);
+
bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core,
enum mlxsw_res_id res_id);
@@ -336,6 +351,8 @@ struct mlxsw_bus {
char *in_mbox, size_t in_mbox_size,
char *out_mbox, size_t out_mbox_size,
u8 *p_status);
+ u32 (*read_frc_h)(void *bus_priv);
+ u32 (*read_frc_l)(void *bus_priv);
u8 features;
};
@@ -353,7 +370,8 @@ struct mlxsw_bus_info {
struct mlxsw_fw_rev fw_rev;
u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN];
u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN];
- u8 low_frequency;
+ u8 low_frequency:1,
+ read_frc_capable:1;
};
struct mlxsw_hwmon;
@@ -409,4 +427,14 @@ enum mlxsw_devlink_param_id {
MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL,
};
+struct mlxsw_skb_cb {
+ struct mlxsw_tx_info tx_info;
+};
+
+static inline struct mlxsw_skb_cb *mlxsw_skb_cb(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(mlxsw_skb_cb) > sizeof(skb->cb));
+ return (struct mlxsw_skb_cb *) skb->cb;
+}
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
index cb3e663b1d37..feb4672a5ac0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
@@ -30,8 +30,9 @@ static bool mlxsw_afk_blocks_check(struct mlxsw_afk *mlxsw_afk)
elinst = &block->instances[j];
if (elinst->type != elinst->info->type ||
- elinst->item.size.bits !=
- elinst->info->item.size.bits)
+ (!elinst->avoid_size_check &&
+ elinst->item.size.bits !=
+ elinst->info->item.size.bits))
return false;
}
}
@@ -385,12 +386,12 @@ EXPORT_SYMBOL(mlxsw_afk_values_add_buf);
static void mlxsw_sp_afk_encode_u32(const struct mlxsw_item *storage_item,
const struct mlxsw_item *output_item,
- char *storage, char *output)
+ char *storage, char *output, int diff)
{
u32 value;
value = __mlxsw_item_get32(storage, storage_item, 0);
- __mlxsw_item_set32(output, output_item, 0, value);
+ __mlxsw_item_set32(output, output_item, 0, value + diff);
}
static void mlxsw_sp_afk_encode_buf(const struct mlxsw_item *storage_item,
@@ -406,14 +407,14 @@ static void mlxsw_sp_afk_encode_buf(const struct mlxsw_item *storage_item,
static void
mlxsw_sp_afk_encode_one(const struct mlxsw_afk_element_inst *elinst,
- char *output, char *storage)
+ char *output, char *storage, int u32_diff)
{
const struct mlxsw_item *storage_item = &elinst->info->item;
const struct mlxsw_item *output_item = &elinst->item;
if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32)
mlxsw_sp_afk_encode_u32(storage_item, output_item,
- storage, output);
+ storage, output, u32_diff);
else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF)
mlxsw_sp_afk_encode_buf(storage_item, output_item,
storage, output);
@@ -446,9 +447,10 @@ void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk,
continue;
mlxsw_sp_afk_encode_one(elinst, block_key,
- values->storage.key);
+ values->storage.key,
+ elinst->u32_key_diff);
mlxsw_sp_afk_encode_one(elinst, block_mask,
- values->storage.mask);
+ values->storage.mask, 0);
}
mlxsw_afk->ops->encode_block(key, i, block_key);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
index 4a625cdf3e7c..cb229b55ecc4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
@@ -74,7 +74,7 @@ struct mlxsw_afk_element_info {
* define an internal storage geometry.
*/
static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
- MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 8),
+ MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 16),
MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_32_47, 0x04, 2),
MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_0_31, 0x06, 4),
MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_32_47, 0x0A, 2),
@@ -107,9 +107,14 @@ struct mlxsw_afk_element_inst { /* element instance in actual block */
const struct mlxsw_afk_element_info *info;
enum mlxsw_afk_element_type type;
struct mlxsw_item item; /* element geometry in block */
+ int u32_key_diff; /* in case value needs to be adjusted before write
+ * this diff is here to handle that
+ */
+ bool avoid_size_check;
};
-#define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset, _shift, _size) \
+#define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset, \
+ _shift, _size, _u32_key_diff, _avoid_size_check) \
{ \
.info = &mlxsw_afk_element_infos[MLXSW_AFK_ELEMENT_##_element], \
.type = _type, \
@@ -119,15 +124,24 @@ struct mlxsw_afk_element_inst { /* element instance in actual block */
.size = {.bits = _size}, \
.name = #_element, \
}, \
+ .u32_key_diff = _u32_key_diff, \
+ .avoid_size_check = _avoid_size_check, \
}
#define MLXSW_AFK_ELEMENT_INST_U32(_element, _offset, _shift, _size) \
MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_U32, \
- _element, _offset, _shift, _size)
+ _element, _offset, _shift, _size, 0, false)
+
+#define MLXSW_AFK_ELEMENT_INST_EXT_U32(_element, _offset, \
+ _shift, _size, _key_diff, \
+ _avoid_size_check) \
+ MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_U32, \
+ _element, _offset, _shift, _size, \
+ _key_diff, _avoid_size_check)
#define MLXSW_AFK_ELEMENT_INST_BUF(_element, _offset, _size) \
MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_BUF, \
- _element, _offset, 0, _size)
+ _element, _offset, 0, _size, 0, false)
struct mlxsw_afk_block {
u16 encoding; /* block ID */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index 72539a9a3847..d2c7ce67c300 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -92,33 +92,20 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
u16 temp;
} temp_thresh;
char mcia_pl[MLXSW_REG_MCIA_LEN] = {0};
- char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0};
- u16 module_temp;
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
+ unsigned int module_temp;
bool qsfp;
int err;
- mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
- 1);
- err = mlxsw_reg_query(core, MLXSW_REG(mtbr), mtbr_pl);
+ mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
+ false, false);
+ err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
if (err)
return err;
-
- /* Don't read temperature thresholds for module with no valid info. */
- mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &module_temp, NULL);
- switch (module_temp) {
- case MLXSW_REG_MTBR_BAD_SENS_INFO: /* fall-through */
- case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
- case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
- case MLXSW_REG_MTBR_INDEX_NA:
+ mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, NULL);
+ if (!module_temp) {
*temp = 0;
return 0;
- default:
- /* Do not consider thresholds for zero temperature. */
- if (MLXSW_REG_MTMP_TEMP_TO_MC(module_temp) == 0) {
- *temp = 0;
- return 0;
- }
- break;
}
/* Read Free Side Device Temperature Thresholds from page 03h
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index 496dc904c5ed..5b00726c4346 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -23,6 +23,14 @@ struct mlxsw_hwmon_attr {
char name[32];
};
+static int mlxsw_hwmon_get_attr_index(int index, int count)
+{
+ if (index >= count)
+ return index % count + MLXSW_REG_MTMP_GBOX_INDEX_MIN;
+
+ return index;
+}
+
struct mlxsw_hwmon {
struct mlxsw_core *core;
const struct mlxsw_bus_info *bus_info;
@@ -33,6 +41,7 @@ struct mlxsw_hwmon {
struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT];
unsigned int attrs_count;
u8 sensor_count;
+ u8 module_sensor_count;
};
static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
@@ -43,18 +52,19 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
char mtmp_pl[MLXSW_REG_MTMP_LEN];
- unsigned int temp;
+ int temp, index;
int err;
- mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index,
- false, false);
+ index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+ mlxsw_hwmon->module_sensor_count);
+ mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
if (err) {
dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
return err;
}
mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
- return sprintf(buf, "%u\n", temp);
+ return sprintf(buf, "%d\n", temp);
}
static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
@@ -65,18 +75,19 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
char mtmp_pl[MLXSW_REG_MTMP_LEN];
- unsigned int temp_max;
+ int temp_max, index;
int err;
- mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index,
- false, false);
+ index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+ mlxsw_hwmon->module_sensor_count);
+ mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
if (err) {
dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
return err;
}
mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL);
- return sprintf(buf, "%u\n", temp_max);
+ return sprintf(buf, "%d\n", temp_max);
}
static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
@@ -88,6 +99,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
char mtmp_pl[MLXSW_REG_MTMP_LEN];
unsigned long val;
+ int index;
int err;
err = kstrtoul(buf, 10, &val);
@@ -96,7 +108,9 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
if (val != 1)
return -EINVAL;
- mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, true, true);
+ index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+ mlxsw_hwmon->module_sensor_count);
+ mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true);
err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
if (err) {
dev_err(mlxsw_hwmon->bus_info->dev, "Failed to reset temp sensor history\n");
@@ -198,40 +212,20 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev,
struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
- char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0};
- u16 temp;
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
u8 module;
+ int temp;
int err;
module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
- mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
- 1);
- err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl);
- if (err) {
- dev_err(dev, "Failed to query module temperature sensor\n");
+ mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
+ false, false);
+ err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
+ if (err)
return err;
- }
-
- mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL);
- /* Update status and temperature cache. */
- switch (temp) {
- case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
- case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
- case MLXSW_REG_MTBR_INDEX_NA:
- temp = 0;
- break;
- case MLXSW_REG_MTBR_BAD_SENS_INFO:
- /* Untrusted cable is connected. Reading temperature from its
- * sensor is faulty.
- */
- temp = 0;
- break;
- default:
- temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
- break;
- }
+ mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
- return sprintf(buf, "%u\n", temp);
+ return sprintf(buf, "%d\n", temp);
}
static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev,
@@ -333,6 +327,20 @@ mlxsw_hwmon_module_temp_label_show(struct device *dev,
mlwsw_hwmon_attr->type_index);
}
+static ssize_t
+mlxsw_hwmon_gbox_temp_label_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+ container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+ struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+ int index = mlwsw_hwmon_attr->type_index -
+ mlxsw_hwmon->module_sensor_count + 1;
+
+ return sprintf(buf, "gearbox %03u\n", index);
+}
+
enum mlxsw_hwmon_attr_type {
MLXSW_HWMON_ATTR_TYPE_TEMP,
MLXSW_HWMON_ATTR_TYPE_TEMP_MAX,
@@ -345,6 +353,7 @@ enum mlxsw_hwmon_attr_type {
MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT,
MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG,
MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL,
};
static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
@@ -428,6 +437,13 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
"temp%u_label", num + 1);
break;
+ case MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL:
+ mlxsw_hwmon_attr->dev_attr.show =
+ mlxsw_hwmon_gbox_temp_label_show;
+ mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+ snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+ "temp%u_label", num + 1);
+ break;
default:
WARN_ON(1);
}
@@ -556,6 +572,54 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon)
index, index);
index++;
}
+ mlxsw_hwmon->module_sensor_count = index;
+
+ return 0;
+}
+
+static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+ int index, max_index, sensor_index;
+ char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
+ u8 gbox_num;
+ int err;
+
+ mlxsw_reg_mgpir_pack(mgpir_pl);
+ err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL);
+ if (!gbox_num)
+ return 0;
+
+ index = mlxsw_hwmon->module_sensor_count;
+ max_index = mlxsw_hwmon->module_sensor_count + gbox_num;
+ while (index < max_index) {
+ sensor_index = index % mlxsw_hwmon->module_sensor_count +
+ MLXSW_REG_MTMP_GBOX_INDEX_MIN;
+ mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, true, true);
+ err = mlxsw_reg_write(mlxsw_hwmon->core,
+ MLXSW_REG(mtmp), mtmp_pl);
+ if (err) {
+ dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n",
+ sensor_index);
+ return err;
+ }
+ mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP,
+ index, index);
+ mlxsw_hwmon_attr_add(mlxsw_hwmon,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, index,
+ index);
+ mlxsw_hwmon_attr_add(mlxsw_hwmon,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_RST, index,
+ index);
+ mlxsw_hwmon_attr_add(mlxsw_hwmon,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL,
+ index, index);
+ index++;
+ }
return 0;
}
@@ -586,6 +650,10 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
if (err)
goto err_temp_module_init;
+ err = mlxsw_hwmon_gearbox_init(mlxsw_hwmon);
+ if (err)
+ goto err_temp_gearbox_init;
+
mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group;
mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs;
@@ -602,6 +670,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
return 0;
err_hwmon_register:
+err_temp_gearbox_init:
err_temp_module_init:
err_fans_init:
err_temp_init:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index d3e851e7ca72..35a1dc89c28a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -23,6 +23,7 @@
#define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */
#define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
#define MLXSW_THERMAL_ZONE_MAX_NAME 16
+#define MLXSW_THERMAL_TEMP_SCORE_MAX GENMASK(31, 0)
#define MLXSW_THERMAL_MAX_STATE 10
#define MLXSW_THERMAL_MAX_DUTY 255
/* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values
@@ -98,7 +99,7 @@ struct mlxsw_thermal_module {
struct thermal_zone_device *tzdev;
struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
enum thermal_device_mode mode;
- int module;
+ int module; /* Module or gearbox number */
};
struct mlxsw_thermal {
@@ -111,6 +112,10 @@ struct mlxsw_thermal {
struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
enum thermal_device_mode mode;
struct mlxsw_thermal_module *tz_module_arr;
+ struct mlxsw_thermal_module *tz_gearbox_arr;
+ u8 tz_gearbox_num;
+ unsigned int tz_highest_score;
+ struct thermal_zone_device *tz_highest_dev;
};
static inline u8 mlxsw_state_to_duty(int state)
@@ -195,6 +200,34 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
return 0;
}
+static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal,
+ struct thermal_zone_device *tzdev,
+ struct mlxsw_thermal_trip *trips,
+ int temp)
+{
+ struct mlxsw_thermal_trip *trip = trips;
+ unsigned int score, delta, i, shift = 1;
+
+ /* Calculate thermal zone score, if temperature is above the critical
+ * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX.
+ */
+ score = MLXSW_THERMAL_TEMP_SCORE_MAX;
+ for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS;
+ i++, trip++) {
+ if (temp < trip->temp) {
+ delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp);
+ score = delta * shift;
+ break;
+ }
+ shift *= 256;
+ }
+
+ if (score > thermal->tz_highest_score) {
+ thermal->tz_highest_score = score;
+ thermal->tz_highest_dev = tzdev;
+ }
+}
+
static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
struct thermal_cooling_device *cdev)
{
@@ -279,7 +312,7 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
struct mlxsw_thermal *thermal = tzdev->devdata;
struct device *dev = thermal->bus_info->dev;
char mtmp_pl[MLXSW_REG_MTMP_LEN];
- unsigned int temp;
+ int temp;
int err;
mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
@@ -290,8 +323,11 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
return err;
}
mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+ if (temp > 0)
+ mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips,
+ temp);
- *p_temp = (int) temp;
+ *p_temp = temp;
return 0;
}
@@ -351,6 +387,22 @@ static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev,
return 0;
}
+static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev,
+ int trip, enum thermal_trend *trend)
+{
+ struct mlxsw_thermal_module *tz = tzdev->devdata;
+ struct mlxsw_thermal *thermal = tz->parent;
+
+ if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+ return -EINVAL;
+
+ if (tzdev == thermal->tz_highest_dev)
+ return 1;
+
+ *trend = THERMAL_TREND_STABLE;
+ return 0;
+}
+
static struct thermal_zone_device_ops mlxsw_thermal_ops = {
.bind = mlxsw_thermal_bind,
.unbind = mlxsw_thermal_unbind,
@@ -362,6 +414,7 @@ static struct thermal_zone_device_ops mlxsw_thermal_ops = {
.set_trip_temp = mlxsw_thermal_set_trip_temp,
.get_trip_hyst = mlxsw_thermal_get_trip_hyst,
.set_trip_hyst = mlxsw_thermal_set_trip_hyst,
+ .get_trend = mlxsw_thermal_trend_get,
};
static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev,
@@ -449,39 +502,33 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
struct mlxsw_thermal_module *tz = tzdev->devdata;
struct mlxsw_thermal *thermal = tz->parent;
struct device *dev = thermal->bus_info->dev;
- char mtbr_pl[MLXSW_REG_MTBR_LEN];
- u16 temp;
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
+ int temp;
int err;
/* Read module temperature. */
- mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX +
- tz->module, 1);
- err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtbr), mtbr_pl);
- if (err)
- return err;
-
- mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL);
- /* Update temperature. */
- switch (temp) {
- case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
- case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
- case MLXSW_REG_MTBR_INDEX_NA: /* fall-through */
- case MLXSW_REG_MTBR_BAD_SENS_INFO:
+ mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN +
+ tz->module, false, false);
+ err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+ if (err) {
+ /* Do not return error - in case of broken module's sensor
+ * it will cause error message flooding.
+ */
temp = 0;
- break;
- default:
- temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
- /* Reset all trip point. */
- mlxsw_thermal_module_trips_reset(tz);
- /* Update trip points. */
- err = mlxsw_thermal_module_trips_update(dev, thermal->core,
- tz);
- if (err)
- return err;
- break;
+ *p_temp = (int) temp;
+ return 0;
}
+ mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+ *p_temp = temp;
+
+ if (!temp)
+ return 0;
+
+ /* Update trip points. */
+ err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
+ if (!err && temp > 0)
+ mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
- *p_temp = (int) temp;
return 0;
}
@@ -545,10 +592,6 @@ mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip,
return 0;
}
-static struct thermal_zone_params mlxsw_thermal_module_params = {
- .governor_name = "user_space",
-};
-
static struct thermal_zone_device_ops mlxsw_thermal_module_ops = {
.bind = mlxsw_thermal_module_bind,
.unbind = mlxsw_thermal_module_unbind,
@@ -560,6 +603,46 @@ static struct thermal_zone_device_ops mlxsw_thermal_module_ops = {
.set_trip_temp = mlxsw_thermal_module_trip_temp_set,
.get_trip_hyst = mlxsw_thermal_module_trip_hyst_get,
.set_trip_hyst = mlxsw_thermal_module_trip_hyst_set,
+ .get_trend = mlxsw_thermal_trend_get,
+};
+
+static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev,
+ int *p_temp)
+{
+ struct mlxsw_thermal_module *tz = tzdev->devdata;
+ struct mlxsw_thermal *thermal = tz->parent;
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
+ u16 index;
+ int temp;
+ int err;
+
+ index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module;
+ mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
+
+ err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+ if (temp > 0)
+ mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
+
+ *p_temp = temp;
+ return 0;
+}
+
+static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = {
+ .bind = mlxsw_thermal_module_bind,
+ .unbind = mlxsw_thermal_module_unbind,
+ .get_mode = mlxsw_thermal_module_mode_get,
+ .set_mode = mlxsw_thermal_module_mode_set,
+ .get_temp = mlxsw_thermal_gearbox_temp_get,
+ .get_trip_type = mlxsw_thermal_module_trip_type_get,
+ .get_trip_temp = mlxsw_thermal_module_trip_temp_get,
+ .set_trip_temp = mlxsw_thermal_module_trip_temp_set,
+ .get_trip_hyst = mlxsw_thermal_module_trip_hyst_get,
+ .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set,
+ .get_trend = mlxsw_thermal_trend_get,
};
static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
@@ -675,13 +758,13 @@ mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
MLXSW_THERMAL_TRIP_MASK,
module_tz,
&mlxsw_thermal_module_ops,
- &mlxsw_thermal_module_params,
- 0, 0);
+ NULL, 0, 0);
if (IS_ERR(module_tz->tzdev)) {
err = PTR_ERR(module_tz->tzdev);
return err;
}
+ module_tz->mode = THERMAL_DEVICE_ENABLED;
return 0;
}
@@ -787,6 +870,92 @@ mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal)
kfree(thermal->tz_module_arr);
}
+static int
+mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
+{
+ char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
+
+ snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d",
+ gearbox_tz->module + 1);
+ gearbox_tz->tzdev = thermal_zone_device_register(tz_name,
+ MLXSW_THERMAL_NUM_TRIPS,
+ MLXSW_THERMAL_TRIP_MASK,
+ gearbox_tz,
+ &mlxsw_thermal_gearbox_ops,
+ NULL, 0, 0);
+ if (IS_ERR(gearbox_tz->tzdev))
+ return PTR_ERR(gearbox_tz->tzdev);
+
+ gearbox_tz->mode = THERMAL_DEVICE_ENABLED;
+ return 0;
+}
+
+static void
+mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz)
+{
+ thermal_zone_device_unregister(gearbox_tz->tzdev);
+}
+
+static int
+mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
+ struct mlxsw_thermal *thermal)
+{
+ struct mlxsw_thermal_module *gearbox_tz;
+ char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+ int i;
+ int err;
+
+ if (!mlxsw_core_res_query_enabled(core))
+ return 0;
+
+ mlxsw_reg_mgpir_pack(mgpir_pl);
+ err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL);
+ if (!thermal->tz_gearbox_num)
+ return 0;
+
+ thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num,
+ sizeof(*thermal->tz_gearbox_arr),
+ GFP_KERNEL);
+ if (!thermal->tz_gearbox_arr)
+ return -ENOMEM;
+
+ for (i = 0; i < thermal->tz_gearbox_num; i++) {
+ gearbox_tz = &thermal->tz_gearbox_arr[i];
+ memcpy(gearbox_tz->trips, default_thermal_trips,
+ sizeof(thermal->trips));
+ gearbox_tz->module = i;
+ gearbox_tz->parent = thermal;
+ err = mlxsw_thermal_gearbox_tz_init(gearbox_tz);
+ if (err)
+ goto err_unreg_tz_gearbox;
+ }
+
+ return 0;
+
+err_unreg_tz_gearbox:
+ for (i--; i >= 0; i--)
+ mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
+ kfree(thermal->tz_gearbox_arr);
+ return err;
+}
+
+static void
+mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal)
+{
+ int i;
+
+ if (!mlxsw_core_res_query_enabled(thermal->core))
+ return;
+
+ for (i = thermal->tz_gearbox_num - 1; i >= 0; i--)
+ mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
+ kfree(thermal->tz_gearbox_arr);
+}
+
int mlxsw_thermal_init(struct mlxsw_core *core,
const struct mlxsw_bus_info *bus_info,
struct mlxsw_thermal **p_thermal)
@@ -877,10 +1046,16 @@ int mlxsw_thermal_init(struct mlxsw_core *core,
if (err)
goto err_unreg_tzdev;
+ err = mlxsw_thermal_gearboxes_init(dev, core, thermal);
+ if (err)
+ goto err_unreg_modules_tzdev;
+
thermal->mode = THERMAL_DEVICE_ENABLED;
*p_thermal = thermal;
return 0;
+err_unreg_modules_tzdev:
+ mlxsw_thermal_modules_fini(thermal);
err_unreg_tzdev:
if (thermal->tzdev) {
thermal_zone_device_unregister(thermal->tzdev);
@@ -899,6 +1074,7 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
{
int i;
+ mlxsw_thermal_gearboxes_fini(thermal);
mlxsw_thermal_modules_fini(thermal);
if (thermal->tzdev) {
thermal_zone_device_unregister(thermal->tzdev);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index 06aea1999518..95f408d0e103 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -43,11 +43,10 @@
#define MLXSW_I2C_PREP_SIZE (MLXSW_I2C_ADDR_WIDTH + 28)
#define MLXSW_I2C_MBOX_SIZE 20
#define MLXSW_I2C_MBOX_OUT_PARAM_OFF 12
-#define MLXSW_I2C_MAX_BUFF_SIZE 32
#define MLXSW_I2C_MBOX_OFFSET_BITS 20
#define MLXSW_I2C_MBOX_SIZE_BITS 12
#define MLXSW_I2C_ADDR_BUF_SIZE 4
-#define MLXSW_I2C_BLK_MAX 32
+#define MLXSW_I2C_BLK_DEF 32
#define MLXSW_I2C_RETRY 5
#define MLXSW_I2C_TIMEOUT_MSECS 5000
#define MLXSW_I2C_MAX_DATA_SIZE 256
@@ -62,6 +61,7 @@
* @dev: I2C device;
* @core: switch core pointer;
* @bus_info: bus info block;
+ * @block_size: maximum block size allowed to pass to under layer;
*/
struct mlxsw_i2c {
struct {
@@ -74,6 +74,7 @@ struct mlxsw_i2c {
struct device *dev;
struct mlxsw_core *core;
struct mlxsw_bus_info bus_info;
+ u16 block_size;
};
#define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) { \
@@ -315,20 +316,26 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
struct i2c_client *client = to_i2c_client(dev);
struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS);
- u8 tran_buf[MLXSW_I2C_MAX_BUFF_SIZE + MLXSW_I2C_ADDR_BUF_SIZE];
int off = mlxsw_i2c->cmd.mb_off_in, chunk_size, i, j;
unsigned long end;
+ u8 *tran_buf;
struct i2c_msg write_tran =
- MLXSW_I2C_WRITE_MSG(client, tran_buf, MLXSW_I2C_PUSH_CMD_SIZE);
+ MLXSW_I2C_WRITE_MSG(client, NULL, MLXSW_I2C_PUSH_CMD_SIZE);
int err;
+ tran_buf = kmalloc(mlxsw_i2c->block_size + MLXSW_I2C_ADDR_BUF_SIZE,
+ GFP_KERNEL);
+ if (!tran_buf)
+ return -ENOMEM;
+
+ write_tran.buf = tran_buf;
for (i = 0; i < num; i++) {
- chunk_size = (in_mbox_size > MLXSW_I2C_BLK_MAX) ?
- MLXSW_I2C_BLK_MAX : in_mbox_size;
+ chunk_size = (in_mbox_size > mlxsw_i2c->block_size) ?
+ mlxsw_i2c->block_size : in_mbox_size;
write_tran.len = MLXSW_I2C_ADDR_WIDTH + chunk_size;
mlxsw_i2c_set_slave_addr(tran_buf, off);
memcpy(&tran_buf[MLXSW_I2C_ADDR_BUF_SIZE], in_mbox +
- MLXSW_I2C_BLK_MAX * i, chunk_size);
+ mlxsw_i2c->block_size * i, chunk_size);
j = 0;
end = jiffies + timeout;
@@ -342,9 +349,10 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
(j++ < MLXSW_I2C_RETRY));
if (err != 1) {
- if (!err)
+ if (!err) {
err = -EIO;
- return err;
+ goto mlxsw_i2c_write_exit;
+ }
}
off += chunk_size;
@@ -355,24 +363,27 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 0);
if (err) {
dev_err(&client->dev, "Could not start transaction");
- return -EIO;
+ err = -EIO;
+ goto mlxsw_i2c_write_exit;
}
/* Wait until go bit is cleared. */
err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, p_status);
if (err) {
dev_err(&client->dev, "HW semaphore is not released");
- return err;
+ goto mlxsw_i2c_write_exit;
}
/* Validate transaction completion status. */
if (*p_status) {
dev_err(&client->dev, "Bad transaction completion status %x\n",
*p_status);
- return -EIO;
+ err = -EIO;
}
- return 0;
+mlxsw_i2c_write_exit:
+ kfree(tran_buf);
+ return err;
}
/* Routine executes I2C command. */
@@ -395,8 +406,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
if (in_mbox) {
reg_size = mlxsw_i2c_get_reg_size(in_mbox);
- num = reg_size / MLXSW_I2C_BLK_MAX;
- if (reg_size % MLXSW_I2C_BLK_MAX)
+ num = reg_size / mlxsw_i2c->block_size;
+ if (reg_size % mlxsw_i2c->block_size)
num++;
if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
@@ -416,7 +427,7 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
} else {
/* No input mailbox is case of initialization query command. */
reg_size = MLXSW_I2C_MAX_DATA_SIZE;
- num = reg_size / MLXSW_I2C_BLK_MAX;
+ num = reg_size / mlxsw_i2c->block_size;
if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
dev_err(&client->dev, "Could not acquire lock");
@@ -432,8 +443,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
/* Send read transaction to get output mailbox content. */
read_tran[1].buf = out_mbox;
for (i = 0; i < num; i++) {
- chunk_size = (reg_size > MLXSW_I2C_BLK_MAX) ?
- MLXSW_I2C_BLK_MAX : reg_size;
+ chunk_size = (reg_size > mlxsw_i2c->block_size) ?
+ mlxsw_i2c->block_size : reg_size;
read_tran[1].len = chunk_size;
mlxsw_i2c_set_slave_addr(tran_buf, off);
@@ -509,8 +520,20 @@ mlxsw_i2c_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
if (!mbox)
return -ENOMEM;
+ err = mlxsw_cmd_query_fw(mlxsw_core, mbox);
+ if (err)
+ goto mbox_put;
+
+ mlxsw_i2c->bus_info.fw_rev.major =
+ mlxsw_cmd_mbox_query_fw_fw_rev_major_get(mbox);
+ mlxsw_i2c->bus_info.fw_rev.minor =
+ mlxsw_cmd_mbox_query_fw_fw_rev_minor_get(mbox);
+ mlxsw_i2c->bus_info.fw_rev.subminor =
+ mlxsw_cmd_mbox_query_fw_fw_rev_subminor_get(mbox);
+
err = mlxsw_core_resources_query(mlxsw_core, mbox, res);
+mbox_put:
mlxsw_cmd_mbox_free(mbox);
return err;
}
@@ -534,6 +557,7 @@ static const struct mlxsw_bus mlxsw_i2c_bus = {
static int mlxsw_i2c_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
+ const struct i2c_adapter_quirks *quirks = client->adapter->quirks;
struct mlxsw_i2c *mlxsw_i2c;
u8 status;
int err;
@@ -542,6 +566,22 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
if (!mlxsw_i2c)
return -ENOMEM;
+ if (quirks) {
+ if ((quirks->max_read_len &&
+ quirks->max_read_len < MLXSW_I2C_BLK_DEF) ||
+ (quirks->max_write_len &&
+ quirks->max_write_len < MLXSW_I2C_BLK_DEF)) {
+ dev_err(&client->dev, "Insufficient transaction buffer length\n");
+ return -EOPNOTSUPP;
+ }
+
+ mlxsw_i2c->block_size = max_t(u16, MLXSW_I2C_BLK_DEF,
+ min_t(u16, quirks->max_read_len,
+ quirks->max_write_len));
+ } else {
+ mlxsw_i2c->block_size = MLXSW_I2C_BLK_DEF;
+ }
+
i2c_set_clientdata(client, mlxsw_i2c);
mutex_init(&mlxsw_i2c->cmd.lock);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index cf2114273b72..471b0ca6d69a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -67,6 +67,23 @@ static const struct net_device_ops mlxsw_m_port_netdev_ops = {
.ndo_get_devlink_port = mlxsw_m_port_get_devlink_port,
};
+static void mlxsw_m_module_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlxsw_m_port *mlxsw_m_port = netdev_priv(dev);
+ struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m;
+
+ strlcpy(drvinfo->driver, mlxsw_m->bus_info->device_kind,
+ sizeof(drvinfo->driver));
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%d",
+ mlxsw_m->bus_info->fw_rev.major,
+ mlxsw_m->bus_info->fw_rev.minor,
+ mlxsw_m->bus_info->fw_rev.subminor);
+ strlcpy(drvinfo->bus_info, mlxsw_m->bus_info->device_name,
+ sizeof(drvinfo->bus_info));
+}
+
static int mlxsw_m_get_module_info(struct net_device *netdev,
struct ethtool_modinfo *modinfo)
{
@@ -88,6 +105,7 @@ mlxsw_m_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee,
}
static const struct ethtool_ops mlxsw_m_port_ethtool_ops = {
+ .get_drvinfo = mlxsw_m_module_get_drvinfo,
.get_module_info = mlxsw_m_get_module_info,
.get_module_eeprom = mlxsw_m_get_module_eeprom,
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index b40455f8293d..615455a21567 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -102,6 +102,7 @@ struct mlxsw_pci_queue_type_group {
struct mlxsw_pci {
struct pci_dev *pdev;
u8 __iomem *hw_addr;
+ u64 free_running_clock_offset;
struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT];
u32 doorbell_offset;
struct mlxsw_core *core;
@@ -507,17 +508,28 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci,
{
struct pci_dev *pdev = mlxsw_pci->pdev;
struct mlxsw_pci_queue_elem_info *elem_info;
+ struct mlxsw_tx_info tx_info;
char *wqe;
struct sk_buff *skb;
int i;
spin_lock(&q->lock);
elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+ tx_info = mlxsw_skb_cb(elem_info->u.sdq.skb)->tx_info;
skb = elem_info->u.sdq.skb;
wqe = elem_info->elem;
for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE);
- dev_kfree_skb_any(skb);
+
+ if (unlikely(!tx_info.is_emad &&
+ skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+ mlxsw_core_ptp_transmitted(mlxsw_pci->core, skb,
+ tx_info.local_port);
+ skb = NULL;
+ }
+
+ if (skb)
+ dev_kfree_skb_any(skb);
elem_info->u.sdq.skb = NULL;
if (q->consumer_counter++ != consumer_counter_limit)
@@ -835,7 +847,6 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
&mem_item->mapaddr);
if (!mem_item->buf)
return -ENOMEM;
- memset(mem_item->buf, 0, mem_item->size);
q->elem_info = kcalloc(q->count, sizeof(*q->elem_info), GFP_KERNEL);
if (!q->elem_info) {
@@ -1414,6 +1425,15 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
mlxsw_pci->doorbell_offset =
mlxsw_cmd_mbox_query_fw_doorbell_page_offset_get(mbox);
+ if (mlxsw_cmd_mbox_query_fw_fr_rn_clk_bar_get(mbox) != 0) {
+ dev_err(&pdev->dev, "Unsupported free running clock BAR queried from hw\n");
+ err = -EINVAL;
+ goto err_fr_rn_clk_bar;
+ }
+
+ mlxsw_pci->free_running_clock_offset =
+ mlxsw_cmd_mbox_query_fw_free_running_clock_offset_get(mbox);
+
num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox);
err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages);
if (err)
@@ -1469,6 +1489,7 @@ err_query_resources:
err_boardinfo:
mlxsw_pci_fw_area_fini(mlxsw_pci);
err_fw_area_init:
+err_fr_rn_clk_bar:
err_doorbell_page_bar:
err_iface_rev:
err_query_fw:
@@ -1537,6 +1558,7 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb,
err = -EAGAIN;
goto unlock;
}
+ mlxsw_skb_cb(skb)->tx_info = *tx_info;
elem_info->u.sdq.skb = skb;
wqe = elem_info->elem;
@@ -1560,6 +1582,9 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb,
goto unmap_frags;
}
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
/* Set unused sq entries byte count to zero. */
for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
mlxsw_pci_wqe_byte_count_set(wqe, i, 0);
@@ -1672,6 +1697,24 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
return err;
}
+static u32 mlxsw_pci_read_frc_h(void *bus_priv)
+{
+ struct mlxsw_pci *mlxsw_pci = bus_priv;
+ u64 frc_offset;
+
+ frc_offset = mlxsw_pci->free_running_clock_offset;
+ return mlxsw_pci_read32(mlxsw_pci, FREE_RUNNING_CLOCK_H(frc_offset));
+}
+
+static u32 mlxsw_pci_read_frc_l(void *bus_priv)
+{
+ struct mlxsw_pci *mlxsw_pci = bus_priv;
+ u64 frc_offset;
+
+ frc_offset = mlxsw_pci->free_running_clock_offset;
+ return mlxsw_pci_read32(mlxsw_pci, FREE_RUNNING_CLOCK_L(frc_offset));
+}
+
static const struct mlxsw_bus mlxsw_pci_bus = {
.kind = "pci",
.init = mlxsw_pci_init,
@@ -1679,6 +1722,8 @@ static const struct mlxsw_bus mlxsw_pci_bus = {
.skb_transmit_busy = mlxsw_pci_skb_transmit_busy,
.skb_transmit = mlxsw_pci_skb_transmit,
.cmd_exec = mlxsw_pci_cmd_exec,
+ .read_frc_h = mlxsw_pci_read_frc_h,
+ .read_frc_l = mlxsw_pci_read_frc_l,
.features = MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET,
};
@@ -1740,6 +1785,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
mlxsw_pci->bus_info.device_kind = driver_name;
mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev);
mlxsw_pci->bus_info.dev = &pdev->dev;
+ mlxsw_pci->bus_info.read_frc_capable = true;
mlxsw_pci->id = id;
err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index 8648ca171254..e57e42e2d2b2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -43,6 +43,9 @@
#define MLXSW_PCI_DOORBELL(offset, type_offset, num) \
((offset) + (type_offset) + (num) * 4)
+#define MLXSW_PCI_FREE_RUNNING_CLOCK_H(offset) (offset)
+#define MLXSW_PCI_FREE_RUNNING_CLOCK_L(offset) ((offset) + 4)
+
#define MLXSW_PCI_CQS_MAX 96
#define MLXSW_PCI_EQS_COUNT 2
#define MLXSW_PCI_EQ_ASYNC_NUM 0
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index e8002bfc1e8f..ead36702549a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -997,7 +997,7 @@ static inline void mlxsw_reg_spaft_pack(char *payload, u8 local_port,
MLXSW_REG_ZERO(spaft, payload);
mlxsw_reg_spaft_local_port_set(payload, local_port);
mlxsw_reg_spaft_allow_untagged_set(payload, allow_untagged);
- mlxsw_reg_spaft_allow_prio_tagged_set(payload, true);
+ mlxsw_reg_spaft_allow_prio_tagged_set(payload, allow_untagged);
mlxsw_reg_spaft_allow_tagged_set(payload, true);
}
@@ -3515,6 +3515,18 @@ MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8);
*/
MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1);
+/* reg_qeec_ptps
+ * PTP shaper
+ * 0: regular shaper mode
+ * 1: PTP oriented shaper
+ * Allowed only for hierarchy 0
+ * Not supported for CPU port
+ * Note that ptps mode may affect the shaper rates of all hierarchies
+ * Supported only on Spectrum-1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, ptps, 0x0C, 29, 1);
+
enum {
MLXSW_REG_QEEC_BYTES_MODE,
MLXSW_REG_QEEC_PACKETS_MODE,
@@ -3601,6 +3613,16 @@ static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
mlxsw_reg_qeec_next_element_index_set(payload, next_index);
}
+static inline void mlxsw_reg_qeec_ptps_pack(char *payload, u8 local_port,
+ bool ptps)
+{
+ MLXSW_REG_ZERO(qeec, payload);
+ mlxsw_reg_qeec_local_port_set(payload, local_port);
+ mlxsw_reg_qeec_element_hierarchy_set(payload,
+ MLXSW_REG_QEEC_HIERARCY_PORT);
+ mlxsw_reg_qeec_ptps_set(payload, ptps);
+}
+
/* QRWE - QoS ReWrite Enable
* -------------------------
* This register configures the rewrite enable per receive port.
@@ -3814,6 +3836,112 @@ mlxsw_reg_qtctm_pack(char *payload, u8 local_port, bool mc)
mlxsw_reg_qtctm_mc_set(payload, mc);
}
+/* QPSC - QoS PTP Shaper Configuration Register
+ * --------------------------------------------
+ * The QPSC allows advanced configuration of the shapers when QEEC.ptps=1.
+ * Supported only on Spectrum-1.
+ */
+#define MLXSW_REG_QPSC_ID 0x401B
+#define MLXSW_REG_QPSC_LEN 0x28
+
+MLXSW_REG_DEFINE(qpsc, MLXSW_REG_QPSC_ID, MLXSW_REG_QPSC_LEN);
+
+enum mlxsw_reg_qpsc_port_speed {
+ MLXSW_REG_QPSC_PORT_SPEED_100M,
+ MLXSW_REG_QPSC_PORT_SPEED_1G,
+ MLXSW_REG_QPSC_PORT_SPEED_10G,
+ MLXSW_REG_QPSC_PORT_SPEED_25G,
+};
+
+/* reg_qpsc_port_speed
+ * Port speed.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpsc, port_speed, 0x00, 0, 4);
+
+/* reg_qpsc_shaper_time_exp
+ * The base-time-interval for updating the shapers tokens (for all hierarchies).
+ * shaper_update_rate = 2 ^ shaper_time_exp * (1 + shaper_time_mantissa) * 32nSec
+ * shaper_rate = 64bit * shaper_inc / shaper_update_rate
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_time_exp, 0x04, 16, 4);
+
+/* reg_qpsc_shaper_time_mantissa
+ * The base-time-interval for updating the shapers tokens (for all hierarchies).
+ * shaper_update_rate = 2 ^ shaper_time_exp * (1 + shaper_time_mantissa) * 32nSec
+ * shaper_rate = 64bit * shaper_inc / shaper_update_rate
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_time_mantissa, 0x04, 0, 5);
+
+/* reg_qpsc_shaper_inc
+ * Number of tokens added to shaper on each update.
+ * Units of 8B.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_inc, 0x08, 0, 5);
+
+/* reg_qpsc_shaper_bs
+ * Max shaper Burst size.
+ * Burst size is 2 ^ max_shaper_bs * 512 [bits]
+ * Range is: 5..25 (from 2KB..2GB)
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_bs, 0x0C, 0, 6);
+
+/* reg_qpsc_ptsc_we
+ * Write enable to port_to_shaper_credits.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, qpsc, ptsc_we, 0x10, 31, 1);
+
+/* reg_qpsc_port_to_shaper_credits
+ * For split ports: range 1..57
+ * For non-split ports: range 1..112
+ * Written only when ptsc_we is set.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, port_to_shaper_credits, 0x10, 0, 8);
+
+/* reg_qpsc_ing_timestamp_inc
+ * Ingress timestamp increment.
+ * 2's complement.
+ * The timestamp of MTPPTR at ingress will be incremented by this value. Global
+ * value for all ports.
+ * Same units as used by MTPPTR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, ing_timestamp_inc, 0x20, 0, 32);
+
+/* reg_qpsc_egr_timestamp_inc
+ * Egress timestamp increment.
+ * 2's complement.
+ * The timestamp of MTPPTR at egress will be incremented by this value. Global
+ * value for all ports.
+ * Same units as used by MTPPTR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, egr_timestamp_inc, 0x24, 0, 32);
+
+static inline void
+mlxsw_reg_qpsc_pack(char *payload, enum mlxsw_reg_qpsc_port_speed port_speed,
+ u8 shaper_time_exp, u8 shaper_time_mantissa, u8 shaper_inc,
+ u8 shaper_bs, u8 port_to_shaper_credits,
+ int ing_timestamp_inc, int egr_timestamp_inc)
+{
+ MLXSW_REG_ZERO(qpsc, payload);
+ mlxsw_reg_qpsc_port_speed_set(payload, port_speed);
+ mlxsw_reg_qpsc_shaper_time_exp_set(payload, shaper_time_exp);
+ mlxsw_reg_qpsc_shaper_time_mantissa_set(payload, shaper_time_mantissa);
+ mlxsw_reg_qpsc_shaper_inc_set(payload, shaper_inc);
+ mlxsw_reg_qpsc_shaper_bs_set(payload, shaper_bs);
+ mlxsw_reg_qpsc_ptsc_we_set(payload, true);
+ mlxsw_reg_qpsc_port_to_shaper_credits_set(payload, port_to_shaper_credits);
+ mlxsw_reg_qpsc_ing_timestamp_inc_set(payload, ing_timestamp_inc);
+ mlxsw_reg_qpsc_egr_timestamp_inc_set(payload, egr_timestamp_inc);
+}
+
/* PMLP - Ports Module to Local Port Register
* ------------------------------------------
* Configures the assignment of modules to local ports.
@@ -5292,6 +5420,8 @@ enum mlxsw_reg_htgt_trap_group {
MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD,
MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
};
/* reg_htgt_trap_group
@@ -8039,16 +8169,21 @@ MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7);
MLXSW_REG_DEFINE(mtmp, MLXSW_REG_MTMP_ID, MLXSW_REG_MTMP_LEN);
+#define MLXSW_REG_MTMP_MODULE_INDEX_MIN 64
+#define MLXSW_REG_MTMP_GBOX_INDEX_MIN 256
/* reg_mtmp_sensor_index
* Sensors index to access.
* 64-127 of sensor_index are mapped to the SFP+/QSFP modules sequentially
* (module 0 is mapped to sensor_index 64).
* Access: Index
*/
-MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 7);
+MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 12);
/* Convert to milli degrees Celsius */
-#define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125)
+#define MLXSW_REG_MTMP_TEMP_TO_MC(val) ({ typeof(val) v_ = (val); \
+ ((v_) >= 0) ? ((v_) * 125) : \
+ ((s16)((GENMASK(15, 0) + (v_) + 1) \
+ * 125)); })
/* reg_mtmp_temperature
* Temperature reading from the sensor. Reading is in 0.125 Celsius
@@ -8107,7 +8242,7 @@ MLXSW_ITEM32(reg, mtmp, temperature_threshold_lo, 0x10, 0, 16);
*/
MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE);
-static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index,
+static inline void mlxsw_reg_mtmp_pack(char *payload, u16 sensor_index,
bool max_temp_enable,
bool max_temp_reset)
{
@@ -8119,11 +8254,10 @@ static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index,
MLXSW_REG_MTMP_THRESH_HI);
}
-static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp,
- unsigned int *p_max_temp,
- char *sensor_name)
+static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp,
+ int *p_max_temp, char *sensor_name)
{
- u16 temp;
+ s16 temp;
if (p_temp) {
temp = mlxsw_reg_mtmp_temperature_get(payload);
@@ -8156,7 +8290,7 @@ MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN);
* 64-127 are mapped to the SFP+/QSFP modules sequentially).
* Access: Index
*/
-MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 7);
+MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 12);
/* reg_mtbr_num_rec
* Request: Number of records to read
@@ -8183,7 +8317,7 @@ MLXSW_ITEM32_INDEXED(reg, mtbr, rec_max_temp, MLXSW_REG_MTBR_BASE_LEN, 16,
MLXSW_ITEM32_INDEXED(reg, mtbr, rec_temp, MLXSW_REG_MTBR_BASE_LEN, 0, 16,
MLXSW_REG_MTBR_REC_LEN, 0x00, false);
-static inline void mlxsw_reg_mtbr_pack(char *payload, u8 base_sensor_index,
+static inline void mlxsw_reg_mtbr_pack(char *payload, u16 base_sensor_index,
u8 num_rec)
{
MLXSW_REG_ZERO(mtbr, payload);
@@ -8689,6 +8823,107 @@ static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port,
MLXSW_REG_MLCR_DURATION_MAX : 0);
}
+/* MTPPS - Management Pulse Per Second Register
+ * --------------------------------------------
+ * This register provides the device PPS capabilities, configure the PPS in and
+ * out modules and holds the PPS in time stamp.
+ */
+#define MLXSW_REG_MTPPS_ID 0x9053
+#define MLXSW_REG_MTPPS_LEN 0x3C
+
+MLXSW_REG_DEFINE(mtpps, MLXSW_REG_MTPPS_ID, MLXSW_REG_MTPPS_LEN);
+
+/* reg_mtpps_enable
+ * Enables the PPS functionality the specific pin.
+ * A boolean variable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpps, enable, 0x20, 31, 1);
+
+enum mlxsw_reg_mtpps_pin_mode {
+ MLXSW_REG_MTPPS_PIN_MODE_VIRTUAL_PIN = 0x2,
+};
+
+/* reg_mtpps_pin_mode
+ * Pin mode to be used. The mode must comply with the supported modes of the
+ * requested pin.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpps, pin_mode, 0x20, 8, 4);
+
+#define MLXSW_REG_MTPPS_PIN_SP_VIRTUAL_PIN 7
+
+/* reg_mtpps_pin
+ * Pin to be configured or queried out of the supported pins.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtpps, pin, 0x20, 0, 8);
+
+/* reg_mtpps_time_stamp
+ * When pin_mode = pps_in, the latched device time when it was triggered from
+ * the external GPIO pin.
+ * When pin_mode = pps_out or virtual_pin or pps_out_and_virtual_pin, the target
+ * time to generate next output signal.
+ * Time is in units of device clock.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, mtpps, time_stamp, 0x28, 0, 64);
+
+static inline void
+mlxsw_reg_mtpps_vpin_pack(char *payload, u64 time_stamp)
+{
+ MLXSW_REG_ZERO(mtpps, payload);
+ mlxsw_reg_mtpps_pin_set(payload, MLXSW_REG_MTPPS_PIN_SP_VIRTUAL_PIN);
+ mlxsw_reg_mtpps_pin_mode_set(payload,
+ MLXSW_REG_MTPPS_PIN_MODE_VIRTUAL_PIN);
+ mlxsw_reg_mtpps_enable_set(payload, true);
+ mlxsw_reg_mtpps_time_stamp_set(payload, time_stamp);
+}
+
+/* MTUTC - Management UTC Register
+ * -------------------------------
+ * Configures the HW UTC counter.
+ */
+#define MLXSW_REG_MTUTC_ID 0x9055
+#define MLXSW_REG_MTUTC_LEN 0x1C
+
+MLXSW_REG_DEFINE(mtutc, MLXSW_REG_MTUTC_ID, MLXSW_REG_MTUTC_LEN);
+
+enum mlxsw_reg_mtutc_operation {
+ MLXSW_REG_MTUTC_OPERATION_SET_TIME_AT_NEXT_SEC = 0,
+ MLXSW_REG_MTUTC_OPERATION_ADJUST_FREQ = 3,
+};
+
+/* reg_mtutc_operation
+ * Operation.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, mtutc, operation, 0x00, 0, 4);
+
+/* reg_mtutc_freq_adjustment
+ * Frequency adjustment: Every PPS the HW frequency will be
+ * adjusted by this value. Units of HW clock, where HW counts
+ * 10^9 HW clocks for 1 HW second.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtutc, freq_adjustment, 0x04, 0, 32);
+
+/* reg_mtutc_utc_sec
+ * UTC seconds.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, mtutc, utc_sec, 0x10, 0, 32);
+
+static inline void
+mlxsw_reg_mtutc_pack(char *payload, enum mlxsw_reg_mtutc_operation oper,
+ u32 freq_adj, u32 utc_sec)
+{
+ MLXSW_REG_ZERO(mtutc, payload);
+ mlxsw_reg_mtutc_operation_set(payload, oper);
+ mlxsw_reg_mtutc_freq_adjustment_set(payload, freq_adj);
+ mlxsw_reg_mtutc_utc_sec_set(payload, utc_sec);
+}
+
/* MCQI - Management Component Query Information
* ---------------------------------------------
* This register allows querying information about firmware components.
@@ -9043,6 +9278,267 @@ static inline void mlxsw_reg_mprs_pack(char *payload, u16 parsing_depth,
mlxsw_reg_mprs_vxlan_udp_dport_set(payload, vxlan_udp_dport);
}
+/* MOGCR - Monitoring Global Configuration Register
+ * ------------------------------------------------
+ */
+#define MLXSW_REG_MOGCR_ID 0x9086
+#define MLXSW_REG_MOGCR_LEN 0x20
+
+MLXSW_REG_DEFINE(mogcr, MLXSW_REG_MOGCR_ID, MLXSW_REG_MOGCR_LEN);
+
+/* reg_mogcr_ptp_iftc
+ * PTP Ingress FIFO Trap Clear
+ * The PTP_ING_FIFO trap provides MTPPTR with clr according
+ * to this value. Default 0.
+ * Reserved when IB switches and when SwitchX/-2, Spectrum-2
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mogcr, ptp_iftc, 0x00, 1, 1);
+
+/* reg_mogcr_ptp_eftc
+ * PTP Egress FIFO Trap Clear
+ * The PTP_EGR_FIFO trap provides MTPPTR with clr according
+ * to this value. Default 0.
+ * Reserved when IB switches and when SwitchX/-2, Spectrum-2
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mogcr, ptp_eftc, 0x00, 0, 1);
+
+/* MTPPPC - Time Precision Packet Port Configuration
+ * -------------------------------------------------
+ * This register serves for configuration of which PTP messages should be
+ * timestamped. This is a global configuration, despite the register name.
+ *
+ * Reserved when Spectrum-2.
+ */
+#define MLXSW_REG_MTPPPC_ID 0x9090
+#define MLXSW_REG_MTPPPC_LEN 0x28
+
+MLXSW_REG_DEFINE(mtpppc, MLXSW_REG_MTPPPC_ID, MLXSW_REG_MTPPPC_LEN);
+
+/* reg_mtpppc_ing_timestamp_message_type
+ * Bitwise vector of PTP message types to timestamp at ingress.
+ * MessageType field as defined by IEEE 1588
+ * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Default all 0
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpppc, ing_timestamp_message_type, 0x08, 0, 16);
+
+/* reg_mtpppc_egr_timestamp_message_type
+ * Bitwise vector of PTP message types to timestamp at egress.
+ * MessageType field as defined by IEEE 1588
+ * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Default all 0
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpppc, egr_timestamp_message_type, 0x0C, 0, 16);
+
+static inline void mlxsw_reg_mtpppc_pack(char *payload, u16 ing, u16 egr)
+{
+ MLXSW_REG_ZERO(mtpppc, payload);
+ mlxsw_reg_mtpppc_ing_timestamp_message_type_set(payload, ing);
+ mlxsw_reg_mtpppc_egr_timestamp_message_type_set(payload, egr);
+}
+
+/* MTPPTR - Time Precision Packet Timestamping Reading
+ * ---------------------------------------------------
+ * The MTPPTR is used for reading the per port PTP timestamp FIFO.
+ * There is a trap for packets which are latched to the timestamp FIFO, thus the
+ * SW knows which FIFO to read. Note that packets enter the FIFO before been
+ * trapped. The sequence number is used to synchronize the timestamp FIFO
+ * entries and the trapped packets.
+ * Reserved when Spectrum-2.
+ */
+
+#define MLXSW_REG_MTPPTR_ID 0x9091
+#define MLXSW_REG_MTPPTR_BASE_LEN 0x10 /* base length, without records */
+#define MLXSW_REG_MTPPTR_REC_LEN 0x10 /* record length */
+#define MLXSW_REG_MTPPTR_REC_MAX_COUNT 4
+#define MLXSW_REG_MTPPTR_LEN (MLXSW_REG_MTPPTR_BASE_LEN + \
+ MLXSW_REG_MTPPTR_REC_LEN * MLXSW_REG_MTPPTR_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(mtpptr, MLXSW_REG_MTPPTR_ID, MLXSW_REG_MTPPTR_LEN);
+
+/* reg_mtpptr_local_port
+ * Not supported for CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtpptr, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_mtpptr_dir {
+ MLXSW_REG_MTPPTR_DIR_INGRESS,
+ MLXSW_REG_MTPPTR_DIR_EGRESS,
+};
+
+/* reg_mtpptr_dir
+ * Direction.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtpptr, dir, 0x00, 0, 1);
+
+/* reg_mtpptr_clr
+ * Clear the records.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, mtpptr, clr, 0x04, 31, 1);
+
+/* reg_mtpptr_num_rec
+ * Number of valid records in the response
+ * Range 0.. cap_ptp_timestamp_fifo
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mtpptr, num_rec, 0x08, 0, 4);
+
+/* reg_mtpptr_rec_message_type
+ * MessageType field as defined by IEEE 1588 Each bit corresponds to a value
+ * (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_message_type,
+ MLXSW_REG_MTPPTR_BASE_LEN, 8, 4,
+ MLXSW_REG_MTPPTR_REC_LEN, 0, false);
+
+/* reg_mtpptr_rec_domain_number
+ * DomainNumber field as defined by IEEE 1588
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_domain_number,
+ MLXSW_REG_MTPPTR_BASE_LEN, 0, 8,
+ MLXSW_REG_MTPPTR_REC_LEN, 0, false);
+
+/* reg_mtpptr_rec_sequence_id
+ * SequenceId field as defined by IEEE 1588
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_sequence_id,
+ MLXSW_REG_MTPPTR_BASE_LEN, 0, 16,
+ MLXSW_REG_MTPPTR_REC_LEN, 0x4, false);
+
+/* reg_mtpptr_rec_timestamp_high
+ * Timestamp of when the PTP packet has passed through the port Units of PLL
+ * clock time.
+ * For Spectrum-1 the PLL clock is 156.25Mhz and PLL clock time is 6.4nSec.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_high,
+ MLXSW_REG_MTPPTR_BASE_LEN, 0, 32,
+ MLXSW_REG_MTPPTR_REC_LEN, 0x8, false);
+
+/* reg_mtpptr_rec_timestamp_low
+ * See rec_timestamp_high.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_low,
+ MLXSW_REG_MTPPTR_BASE_LEN, 0, 32,
+ MLXSW_REG_MTPPTR_REC_LEN, 0xC, false);
+
+static inline void mlxsw_reg_mtpptr_unpack(const char *payload,
+ unsigned int rec,
+ u8 *p_message_type,
+ u8 *p_domain_number,
+ u16 *p_sequence_id,
+ u64 *p_timestamp)
+{
+ u32 timestamp_high, timestamp_low;
+
+ *p_message_type = mlxsw_reg_mtpptr_rec_message_type_get(payload, rec);
+ *p_domain_number = mlxsw_reg_mtpptr_rec_domain_number_get(payload, rec);
+ *p_sequence_id = mlxsw_reg_mtpptr_rec_sequence_id_get(payload, rec);
+ timestamp_high = mlxsw_reg_mtpptr_rec_timestamp_high_get(payload, rec);
+ timestamp_low = mlxsw_reg_mtpptr_rec_timestamp_low_get(payload, rec);
+ *p_timestamp = (u64)timestamp_high << 32 | timestamp_low;
+}
+
+/* MTPTPT - Monitoring Precision Time Protocol Trap Register
+ * ---------------------------------------------------------
+ * This register is used for configuring under which trap to deliver PTP
+ * packets depending on type of the packet.
+ */
+#define MLXSW_REG_MTPTPT_ID 0x9092
+#define MLXSW_REG_MTPTPT_LEN 0x08
+
+MLXSW_REG_DEFINE(mtptpt, MLXSW_REG_MTPTPT_ID, MLXSW_REG_MTPTPT_LEN);
+
+enum mlxsw_reg_mtptpt_trap_id {
+ MLXSW_REG_MTPTPT_TRAP_ID_PTP0,
+ MLXSW_REG_MTPTPT_TRAP_ID_PTP1,
+};
+
+/* reg_mtptpt_trap_id
+ * Trap id.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtptpt, trap_id, 0x00, 0, 4);
+
+/* reg_mtptpt_message_type
+ * Bitwise vector of PTP message types to trap. This is a necessary but
+ * non-sufficient condition since need to enable also per port. See MTPPPC.
+ * Message types are defined by IEEE 1588 Each bit corresponds to a value (e.g.
+ * Bit0: Sync, Bit1: Delay_Req)
+ */
+MLXSW_ITEM32(reg, mtptpt, message_type, 0x04, 0, 16);
+
+static inline void mlxsw_reg_mtptptp_pack(char *payload,
+ enum mlxsw_reg_mtptpt_trap_id trap_id,
+ u16 message_type)
+{
+ MLXSW_REG_ZERO(mtptpt, payload);
+ mlxsw_reg_mtptpt_trap_id_set(payload, trap_id);
+ mlxsw_reg_mtptpt_message_type_set(payload, message_type);
+}
+
+/* MGPIR - Management General Peripheral Information Register
+ * ----------------------------------------------------------
+ * MGPIR register allows software to query the hardware and
+ * firmware general information of peripheral entities.
+ */
+#define MLXSW_REG_MGPIR_ID 0x9100
+#define MLXSW_REG_MGPIR_LEN 0xA0
+
+MLXSW_REG_DEFINE(mgpir, MLXSW_REG_MGPIR_ID, MLXSW_REG_MGPIR_LEN);
+
+enum mlxsw_reg_mgpir_device_type {
+ MLXSW_REG_MGPIR_DEVICE_TYPE_NONE,
+ MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE,
+};
+
+/* device_type
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, device_type, 0x00, 24, 4);
+
+/* devices_per_flash
+ * Number of devices of device_type per flash (can be shared by few devices).
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8);
+
+/* num_of_devices
+ * Number of devices of device_type.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8);
+
+static inline void mlxsw_reg_mgpir_pack(char *payload)
+{
+ MLXSW_REG_ZERO(mgpir, payload);
+}
+
+static inline void
+mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices,
+ enum mlxsw_reg_mgpir_device_type *device_type,
+ u8 *devices_per_flash)
+{
+ if (num_of_devices)
+ *num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload);
+ if (device_type)
+ *device_type = mlxsw_reg_mgpir_device_type_get(payload);
+ if (devices_per_flash)
+ *devices_per_flash =
+ mlxsw_reg_mgpir_devices_per_flash_get(payload);
+}
+
/* TNGCR - Tunneling NVE General Configuration Register
* ----------------------------------------------------
* The TNGCR register is used for setting up the NVE Tunneling configuration.
@@ -10006,6 +10502,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(qpdsm),
MLXSW_REG(qpdpm),
MLXSW_REG(qtctm),
+ MLXSW_REG(qpsc),
MLXSW_REG(pmlp),
MLXSW_REG(pmtu),
MLXSW_REG(ptys),
@@ -10052,12 +10549,19 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(mgir),
MLXSW_REG(mrsr),
MLXSW_REG(mlcr),
+ MLXSW_REG(mtpps),
+ MLXSW_REG(mtutc),
MLXSW_REG(mpsc),
MLXSW_REG(mcqi),
MLXSW_REG(mcc),
MLXSW_REG(mcda),
MLXSW_REG(mgpc),
MLXSW_REG(mprs),
+ MLXSW_REG(mogcr),
+ MLXSW_REG(mtpppc),
+ MLXSW_REG(mtpptr),
+ MLXSW_REG(mtptpt),
+ MLXSW_REG(mgpir),
MLXSW_REG(tngcr),
MLXSW_REG(tnumt),
MLXSW_REG(tnqcr),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index dbb425717f5e..650638152bbc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -41,6 +41,7 @@
#include "spectrum_dpipe.h"
#include "spectrum_acl_flex_actions.h"
#include "spectrum_span.h"
+#include "spectrum_ptp.h"
#include "../mlxfw/mlxfw.h"
#define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
@@ -146,6 +147,35 @@ struct mlxsw_sp_mlxfw_dev {
struct mlxsw_sp *mlxsw_sp;
};
+struct mlxsw_sp_ptp_ops {
+ struct mlxsw_sp_ptp_clock *
+ (*clock_init)(struct mlxsw_sp *mlxsw_sp, struct device *dev);
+ void (*clock_fini)(struct mlxsw_sp_ptp_clock *clock);
+
+ struct mlxsw_sp_ptp_state *(*init)(struct mlxsw_sp *mlxsw_sp);
+ void (*fini)(struct mlxsw_sp_ptp_state *ptp_state);
+
+ /* Notify a driver that a packet that might be PTP was received. Driver
+ * is responsible for freeing the passed-in SKB.
+ */
+ void (*receive)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+ u8 local_port);
+
+ /* Notify a driver that a timestamped packet was transmitted. Driver
+ * is responsible for freeing the passed-in SKB.
+ */
+ void (*transmitted)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+ u8 local_port);
+
+ int (*hwtstamp_get)(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config);
+ int (*hwtstamp_set)(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config);
+ void (*shaper_work)(struct work_struct *work);
+ int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp,
+ struct ethtool_ts_info *info);
+};
+
static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev,
u16 component_index, u32 *p_max_size,
u8 *p_align_bits, u16 *p_max_write_size)
@@ -294,6 +324,19 @@ static void mlxsw_sp_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
}
+static void mlxsw_sp_status_notify(struct mlxfw_dev *mlxfw_dev,
+ const char *msg, const char *comp_name,
+ u32 done_bytes, u32 total_bytes)
+{
+ struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
+
+ devlink_flash_update_status_notify(priv_to_devlink(mlxsw_sp->core),
+ msg, comp_name,
+ done_bytes, total_bytes);
+}
+
static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = {
.component_query = mlxsw_sp_component_query,
.fsm_lock = mlxsw_sp_fsm_lock,
@@ -303,11 +346,13 @@ static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = {
.fsm_activate = mlxsw_sp_fsm_activate,
.fsm_query_state = mlxsw_sp_fsm_query_state,
.fsm_cancel = mlxsw_sp_fsm_cancel,
- .fsm_release = mlxsw_sp_fsm_release
+ .fsm_release = mlxsw_sp_fsm_release,
+ .status_notify = mlxsw_sp_status_notify,
};
static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp,
- const struct firmware *firmware)
+ const struct firmware *firmware,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp_mlxfw_dev mlxsw_sp_mlxfw_dev = {
.mlxfw_dev = {
@@ -320,7 +365,10 @@ static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp,
int err;
mlxsw_core_fw_flash_start(mlxsw_sp->core);
- err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware);
+ devlink_flash_update_begin_notify(priv_to_devlink(mlxsw_sp->core));
+ err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev,
+ firmware, extack);
+ devlink_flash_update_end_notify(priv_to_devlink(mlxsw_sp->core));
mlxsw_core_fw_flash_end(mlxsw_sp->core);
return err;
@@ -374,7 +422,7 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
return err;
}
- err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware);
+ err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware, NULL);
release_firmware(firmware);
if (err)
dev_err(mlxsw_sp->bus_info->dev, "Could not upgrade firmware\n");
@@ -388,6 +436,27 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
return 0;
}
+static int mlxsw_sp_flash_update(struct mlxsw_core *mlxsw_core,
+ const char *file_name, const char *component,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+ const struct firmware *firmware;
+ int err;
+
+ if (component)
+ return -EOPNOTSUPP;
+
+ err = request_firmware_direct(&firmware, file_name,
+ mlxsw_sp->bus_info->dev);
+ if (err)
+ return err;
+ err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware, extack);
+ release_firmware(firmware);
+
+ return err;
+}
+
int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
unsigned int counter_index, u64 *packets,
u64 *bytes)
@@ -738,6 +807,8 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb,
u64 len;
int err;
+ memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb));
+
if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info))
return NETDEV_TX_BUSY;
@@ -1437,21 +1508,21 @@ static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
static int
mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_acl_block *acl_block,
- struct tc_cls_flower_offload *f)
+ struct flow_cls_offload *f)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_acl_block_mlxsw_sp(acl_block);
switch (f->command) {
- case TC_CLSFLOWER_REPLACE:
+ case FLOW_CLS_REPLACE:
return mlxsw_sp_flower_replace(mlxsw_sp, acl_block, f);
- case TC_CLSFLOWER_DESTROY:
+ case FLOW_CLS_DESTROY:
mlxsw_sp_flower_destroy(mlxsw_sp, acl_block, f);
return 0;
- case TC_CLSFLOWER_STATS:
+ case FLOW_CLS_STATS:
return mlxsw_sp_flower_stats(mlxsw_sp, acl_block, f);
- case TC_CLSFLOWER_TMPLT_CREATE:
+ case FLOW_CLS_TMPLT_CREATE:
return mlxsw_sp_flower_tmplt_create(mlxsw_sp, acl_block, f);
- case TC_CLSFLOWER_TMPLT_DESTROY:
+ case FLOW_CLS_TMPLT_DESTROY:
mlxsw_sp_flower_tmplt_destroy(mlxsw_sp, acl_block, f);
return 0;
default:
@@ -1514,33 +1585,45 @@ static int mlxsw_sp_setup_tc_block_cb_flower(enum tc_setup_type type,
}
}
+static void mlxsw_sp_tc_block_flower_release(void *cb_priv)
+{
+ struct mlxsw_sp_acl_block *acl_block = cb_priv;
+
+ mlxsw_sp_acl_block_destroy(acl_block);
+}
+
+static LIST_HEAD(mlxsw_sp_block_cb_list);
+
static int
mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
- struct tcf_block *block, bool ingress,
- struct netlink_ext_ack *extack)
+ struct flow_block_offload *f, bool ingress)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct mlxsw_sp_acl_block *acl_block;
- struct tcf_block_cb *block_cb;
+ struct flow_block_cb *block_cb;
+ bool register_block = false;
int err;
- block_cb = tcf_block_cb_lookup(block, mlxsw_sp_setup_tc_block_cb_flower,
- mlxsw_sp);
+ block_cb = flow_block_cb_lookup(f->block,
+ mlxsw_sp_setup_tc_block_cb_flower,
+ mlxsw_sp);
if (!block_cb) {
- acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, block->net);
+ acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, f->net);
if (!acl_block)
return -ENOMEM;
- block_cb = __tcf_block_cb_register(block,
- mlxsw_sp_setup_tc_block_cb_flower,
- mlxsw_sp, acl_block, extack);
+ block_cb = flow_block_cb_alloc(mlxsw_sp_setup_tc_block_cb_flower,
+ mlxsw_sp, acl_block,
+ mlxsw_sp_tc_block_flower_release);
if (IS_ERR(block_cb)) {
+ mlxsw_sp_acl_block_destroy(acl_block);
err = PTR_ERR(block_cb);
goto err_cb_register;
}
+ register_block = true;
} else {
- acl_block = tcf_block_cb_priv(block_cb);
+ acl_block = flow_block_cb_priv(block_cb);
}
- tcf_block_cb_incref(block_cb);
+ flow_block_cb_incref(block_cb);
err = mlxsw_sp_acl_block_bind(mlxsw_sp, acl_block,
mlxsw_sp_port, ingress);
if (err)
@@ -1551,28 +1634,32 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
else
mlxsw_sp_port->eg_acl_block = acl_block;
+ if (register_block) {
+ flow_block_cb_add(block_cb, f);
+ list_add_tail(&block_cb->driver_list, &mlxsw_sp_block_cb_list);
+ }
+
return 0;
err_block_bind:
- if (!tcf_block_cb_decref(block_cb)) {
- __tcf_block_cb_unregister(block, block_cb);
+ if (!flow_block_cb_decref(block_cb))
+ flow_block_cb_free(block_cb);
err_cb_register:
- mlxsw_sp_acl_block_destroy(acl_block);
- }
return err;
}
static void
mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
- struct tcf_block *block, bool ingress)
+ struct flow_block_offload *f, bool ingress)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct mlxsw_sp_acl_block *acl_block;
- struct tcf_block_cb *block_cb;
+ struct flow_block_cb *block_cb;
int err;
- block_cb = tcf_block_cb_lookup(block, mlxsw_sp_setup_tc_block_cb_flower,
- mlxsw_sp);
+ block_cb = flow_block_cb_lookup(f->block,
+ mlxsw_sp_setup_tc_block_cb_flower,
+ mlxsw_sp);
if (!block_cb)
return;
@@ -1581,50 +1668,63 @@ mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
else
mlxsw_sp_port->eg_acl_block = NULL;
- acl_block = tcf_block_cb_priv(block_cb);
+ acl_block = flow_block_cb_priv(block_cb);
err = mlxsw_sp_acl_block_unbind(mlxsw_sp, acl_block,
mlxsw_sp_port, ingress);
- if (!err && !tcf_block_cb_decref(block_cb)) {
- __tcf_block_cb_unregister(block, block_cb);
- mlxsw_sp_acl_block_destroy(acl_block);
+ if (!err && !flow_block_cb_decref(block_cb)) {
+ flow_block_cb_remove(block_cb, f);
+ list_del(&block_cb->driver_list);
}
}
static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
- struct tc_block_offload *f)
+ struct flow_block_offload *f)
{
- tc_setup_cb_t *cb;
+ struct flow_block_cb *block_cb;
+ flow_setup_cb_t *cb;
bool ingress;
int err;
- if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) {
+ if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) {
cb = mlxsw_sp_setup_tc_block_cb_matchall_ig;
ingress = true;
- } else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
+ } else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
cb = mlxsw_sp_setup_tc_block_cb_matchall_eg;
ingress = false;
} else {
return -EOPNOTSUPP;
}
+ f->driver_block_list = &mlxsw_sp_block_cb_list;
+
switch (f->command) {
- case TC_BLOCK_BIND:
- err = tcf_block_cb_register(f->block, cb, mlxsw_sp_port,
- mlxsw_sp_port, f->extack);
- if (err)
- return err;
- err = mlxsw_sp_setup_tc_block_flower_bind(mlxsw_sp_port,
- f->block, ingress,
- f->extack);
+ case FLOW_BLOCK_BIND:
+ if (flow_block_cb_is_busy(cb, mlxsw_sp_port,
+ &mlxsw_sp_block_cb_list))
+ return -EBUSY;
+
+ block_cb = flow_block_cb_alloc(cb, mlxsw_sp_port,
+ mlxsw_sp_port, NULL);
+ if (IS_ERR(block_cb))
+ return PTR_ERR(block_cb);
+ err = mlxsw_sp_setup_tc_block_flower_bind(mlxsw_sp_port, f,
+ ingress);
if (err) {
- tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port);
+ flow_block_cb_free(block_cb);
return err;
}
+ flow_block_cb_add(block_cb, f);
+ list_add_tail(&block_cb->driver_list, &mlxsw_sp_block_cb_list);
return 0;
- case TC_BLOCK_UNBIND:
+ case FLOW_BLOCK_UNBIND:
mlxsw_sp_setup_tc_block_flower_unbind(mlxsw_sp_port,
- f->block, ingress);
- tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port);
+ f, ingress);
+ block_cb = flow_block_cb_lookup(f->block, cb, mlxsw_sp_port);
+ if (!block_cb)
+ return -ENOENT;
+
+ flow_block_cb_remove(block_cb, f);
+ list_del(&block_cb->driver_list);
return 0;
default:
return -EOPNOTSUPP;
@@ -1745,6 +1845,65 @@ mlxsw_sp_port_get_devlink_port(struct net_device *dev)
mlxsw_sp_port->local_port);
}
+static int mlxsw_sp_port_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ifreq *ifr)
+{
+ struct hwtstamp_config config;
+ int err;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port,
+ &config);
+ if (err)
+ return err;
+
+ if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int mlxsw_sp_port_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ifreq *ifr)
+{
+ struct hwtstamp_config config;
+ int err;
+
+ err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port,
+ &config);
+ if (err)
+ return err;
+
+ if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static inline void mlxsw_sp_port_ptp_clear(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct hwtstamp_config config = {0};
+
+ mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config);
+}
+
+static int
+mlxsw_sp_port_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+ return mlxsw_sp_port_hwtstamp_set(mlxsw_sp_port, ifr);
+ case SIOCGHWTSTAMP:
+ return mlxsw_sp_port_hwtstamp_get(mlxsw_sp_port, ifr);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
.ndo_open = mlxsw_sp_port_open,
.ndo_stop = mlxsw_sp_port_stop,
@@ -1760,6 +1919,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
.ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid,
.ndo_set_features = mlxsw_sp_set_features,
.ndo_get_devlink_port = mlxsw_sp_port_get_devlink_port,
+ .ndo_do_ioctl = mlxsw_sp_port_ioctl,
};
static void mlxsw_sp_port_get_drvinfo(struct net_device *dev,
@@ -2525,28 +2685,33 @@ mlxsw_sp1_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto,
}
}
+static u32
+mlxsw_sp1_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) {
+ if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask)
+ return mlxsw_sp1_port_link_mode[i].speed;
+ }
+
+ return SPEED_UNKNOWN;
+}
+
static void
mlxsw_sp1_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok,
u32 ptys_eth_proto,
struct ethtool_link_ksettings *cmd)
{
- u32 speed = SPEED_UNKNOWN;
- u8 duplex = DUPLEX_UNKNOWN;
- int i;
+ cmd->base.speed = SPEED_UNKNOWN;
+ cmd->base.duplex = DUPLEX_UNKNOWN;
if (!carrier_ok)
- goto out;
+ return;
- for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) {
- if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) {
- speed = mlxsw_sp1_port_link_mode[i].speed;
- duplex = DUPLEX_FULL;
- break;
- }
- }
-out:
- cmd->base.speed = speed;
- cmd->base.duplex = duplex;
+ cmd->base.speed = mlxsw_sp1_from_ptys_speed(mlxsw_sp, ptys_eth_proto);
+ if (cmd->base.speed != SPEED_UNKNOWN)
+ cmd->base.duplex = DUPLEX_FULL;
}
static u32
@@ -2617,6 +2782,7 @@ static const struct mlxsw_sp_port_type_speed_ops
mlxsw_sp1_port_type_speed_ops = {
.from_ptys_supported_port = mlxsw_sp1_from_ptys_supported_port,
.from_ptys_link = mlxsw_sp1_from_ptys_link,
+ .from_ptys_speed = mlxsw_sp1_from_ptys_speed,
.from_ptys_speed_duplex = mlxsw_sp1_from_ptys_speed_duplex,
.to_ptys_advert_link = mlxsw_sp1_to_ptys_advert_link,
.to_ptys_speed = mlxsw_sp1_to_ptys_speed,
@@ -2867,28 +3033,33 @@ mlxsw_sp2_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto,
}
}
+static u32
+mlxsw_sp2_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) {
+ if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask)
+ return mlxsw_sp2_port_link_mode[i].speed;
+ }
+
+ return SPEED_UNKNOWN;
+}
+
static void
mlxsw_sp2_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok,
u32 ptys_eth_proto,
struct ethtool_link_ksettings *cmd)
{
- u32 speed = SPEED_UNKNOWN;
- u8 duplex = DUPLEX_UNKNOWN;
- int i;
+ cmd->base.speed = SPEED_UNKNOWN;
+ cmd->base.duplex = DUPLEX_UNKNOWN;
if (!carrier_ok)
- goto out;
+ return;
- for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) {
- if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) {
- speed = mlxsw_sp2_port_link_mode[i].speed;
- duplex = DUPLEX_FULL;
- break;
- }
- }
-out:
- cmd->base.speed = speed;
- cmd->base.duplex = duplex;
+ cmd->base.speed = mlxsw_sp2_from_ptys_speed(mlxsw_sp, ptys_eth_proto);
+ if (cmd->base.speed != SPEED_UNKNOWN)
+ cmd->base.duplex = DUPLEX_FULL;
}
static bool
@@ -2999,6 +3170,7 @@ static const struct mlxsw_sp_port_type_speed_ops
mlxsw_sp2_port_type_speed_ops = {
.from_ptys_supported_port = mlxsw_sp2_from_ptys_supported_port,
.from_ptys_link = mlxsw_sp2_from_ptys_link,
+ .from_ptys_speed = mlxsw_sp2_from_ptys_speed,
.from_ptys_speed_duplex = mlxsw_sp2_from_ptys_speed_duplex,
.to_ptys_advert_link = mlxsw_sp2_to_ptys_advert_link,
.to_ptys_speed = mlxsw_sp2_to_ptys_speed,
@@ -3128,6 +3300,10 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, &eth_proto_cap, NULL, NULL);
autoneg = cmd->base.autoneg == AUTONEG_ENABLE;
+ if (!autoneg && cmd->base.speed == SPEED_56000) {
+ netdev_err(dev, "56G not supported with autoneg off\n");
+ return -EINVAL;
+ }
eth_proto_new = autoneg ?
ops->to_ptys_advert_link(mlxsw_sp, cmd) :
ops->to_ptys_speed(mlxsw_sp, cmd->base.speed);
@@ -3155,31 +3331,6 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
return 0;
}
-static int mlxsw_sp_flash_device(struct net_device *dev,
- struct ethtool_flash *flash)
-{
- struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
- struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
- const struct firmware *firmware;
- int err;
-
- if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
- return -EOPNOTSUPP;
-
- dev_hold(dev);
- rtnl_unlock();
-
- err = request_firmware_direct(&firmware, flash->data, &dev->dev);
- if (err)
- goto out;
- err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware);
- release_firmware(firmware);
-out:
- rtnl_lock();
- dev_put(dev);
- return err;
-}
-
static int mlxsw_sp_get_module_info(struct net_device *netdev,
struct ethtool_modinfo *modinfo)
{
@@ -3209,6 +3360,15 @@ static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
return err;
}
+static int
+mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+ return mlxsw_sp->ptp_ops->get_ts_info(mlxsw_sp, info);
+}
+
static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
.get_drvinfo = mlxsw_sp_port_get_drvinfo,
.get_link = ethtool_op_get_link,
@@ -3220,9 +3380,9 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
.get_sset_count = mlxsw_sp_port_get_sset_count,
.get_link_ksettings = mlxsw_sp_port_get_link_ksettings,
.set_link_ksettings = mlxsw_sp_port_set_link_ksettings,
- .flash_device = mlxsw_sp_flash_device,
.get_module_info = mlxsw_sp_get_module_info,
.get_module_eeprom = mlxsw_sp_get_module_eeprom,
+ .get_ts_info = mlxsw_sp_get_ts_info,
};
static int
@@ -3339,8 +3499,9 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
return err;
}
- /* Make sure the max shaper is disabled in all hierarchies that
- * support it.
+ /* Make sure the max shaper is disabled in all hierarchies that support
+ * it. Note that this disables ptps (PTP shaper), but that is intended
+ * for the initial configuration.
*/
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0,
@@ -3585,6 +3746,9 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
}
mlxsw_sp_port->default_vlan = mlxsw_sp_port_vlan;
+ INIT_DELAYED_WORK(&mlxsw_sp_port->ptp.shaper_dw,
+ mlxsw_sp->ptp_ops->shaper_work);
+
mlxsw_sp->ports[local_port] = mlxsw_sp_port;
err = register_netdev(dev);
if (err) {
@@ -3639,6 +3803,8 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw);
+ cancel_delayed_work_sync(&mlxsw_sp_port->ptp.shaper_dw);
+ mlxsw_sp_port_ptp_clear(mlxsw_sp_port);
mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp);
unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
mlxsw_sp->ports[local_port] = NULL;
@@ -3923,14 +4089,55 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
if (status == MLXSW_PORT_OPER_STATUS_UP) {
netdev_info(mlxsw_sp_port->dev, "link up\n");
netif_carrier_on(mlxsw_sp_port->dev);
+ mlxsw_core_schedule_dw(&mlxsw_sp_port->ptp.shaper_dw, 0);
} else {
netdev_info(mlxsw_sp_port->dev, "link down\n");
netif_carrier_off(mlxsw_sp_port->dev);
}
}
-static void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
- u8 local_port, void *priv)
+static void mlxsw_sp1_ptp_fifo_event_func(struct mlxsw_sp *mlxsw_sp,
+ char *mtpptr_pl, bool ingress)
+{
+ u8 local_port;
+ u8 num_rec;
+ int i;
+
+ local_port = mlxsw_reg_mtpptr_local_port_get(mtpptr_pl);
+ num_rec = mlxsw_reg_mtpptr_num_rec_get(mtpptr_pl);
+ for (i = 0; i < num_rec; i++) {
+ u8 domain_number;
+ u8 message_type;
+ u16 sequence_id;
+ u64 timestamp;
+
+ mlxsw_reg_mtpptr_unpack(mtpptr_pl, i, &message_type,
+ &domain_number, &sequence_id,
+ &timestamp);
+ mlxsw_sp1_ptp_got_timestamp(mlxsw_sp, ingress, local_port,
+ message_type, domain_number,
+ sequence_id, timestamp);
+ }
+}
+
+static void mlxsw_sp1_ptp_ing_fifo_event_func(const struct mlxsw_reg_info *reg,
+ char *mtpptr_pl, void *priv)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, true);
+}
+
+static void mlxsw_sp1_ptp_egr_fifo_event_func(const struct mlxsw_reg_info *reg,
+ char *mtpptr_pl, void *priv)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, false);
+}
+
+void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
+ u8 local_port, void *priv)
{
struct mlxsw_sp *mlxsw_sp = priv;
struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
@@ -4004,6 +4211,14 @@ out:
consume_skb(skb);
}
+static void mlxsw_sp_rx_listener_ptp(struct sk_buff *skb, u8 local_port,
+ void *priv)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port);
+}
+
#define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl) \
MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action, \
_is_ctrl, SP_##_trap_group, DISCARD)
@@ -4025,7 +4240,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
/* L2 traps */
MLXSW_SP_RXL_NO_MARK(STP, TRAP_TO_CPU, STP, true),
MLXSW_SP_RXL_NO_MARK(LACP, TRAP_TO_CPU, LACP, true),
- MLXSW_SP_RXL_NO_MARK(LLDP, TRAP_TO_CPU, LLDP, true),
+ MLXSW_RXL(mlxsw_sp_rx_listener_ptp, LLDP, TRAP_TO_CPU,
+ false, SP_LLDP, DISCARD),
MLXSW_SP_RXL_MARK(DHCP, MIRROR_TO_CPU, DHCP, false),
MLXSW_SP_RXL_MARK(IGMP_QUERY, MIRROR_TO_CPU, IGMP, false),
MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, TRAP_TO_CPU, IGMP, false),
@@ -4094,6 +4310,16 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
/* NVE traps */
MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false),
MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, TRAP_TO_CPU, ARP, false),
+ /* PTP traps */
+ MLXSW_RXL(mlxsw_sp_rx_listener_ptp, PTP0, TRAP_TO_CPU,
+ false, SP_PTP0, DISCARD),
+ MLXSW_SP_RXL_NO_MARK(PTP1, TRAP_TO_CPU, PTP1, false),
+};
+
+static const struct mlxsw_listener mlxsw_sp1_listener[] = {
+ /* Events */
+ MLXSW_EVENTL(mlxsw_sp1_ptp_egr_fifo_event_func, PTP_EGR_FIFO, SP_PTP0),
+ MLXSW_EVENTL(mlxsw_sp1_ptp_ing_fifo_event_func, PTP_ING_FIFO, SP_PTP0),
};
static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
@@ -4145,6 +4371,14 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
rate = 1024;
burst_size = 7;
break;
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
+ rate = 24 * 1024;
+ burst_size = 12;
+ break;
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1:
+ rate = 19 * 1024;
+ burst_size = 12;
+ break;
default:
continue;
}
@@ -4183,6 +4417,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
priority = 5;
tc = 5;
break;
@@ -4200,6 +4435,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1:
priority = 2;
tc = 2;
break;
@@ -4233,22 +4469,16 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
return 0;
}
-static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
+static int mlxsw_sp_traps_register(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_listener listeners[],
+ size_t listeners_count)
{
int i;
int err;
- err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core);
- if (err)
- return err;
-
- err = mlxsw_sp_trap_groups_set(mlxsw_sp->core);
- if (err)
- return err;
-
- for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+ for (i = 0; i < listeners_count; i++) {
err = mlxsw_core_trap_register(mlxsw_sp->core,
- &mlxsw_sp_listener[i],
+ &listeners[i],
mlxsw_sp);
if (err)
goto err_listener_register;
@@ -4259,30 +4489,73 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
err_listener_register:
for (i--; i >= 0; i--) {
mlxsw_core_trap_unregister(mlxsw_sp->core,
- &mlxsw_sp_listener[i],
+ &listeners[i],
mlxsw_sp);
}
return err;
}
-static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
+static void mlxsw_sp_traps_unregister(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_listener listeners[],
+ size_t listeners_count)
{
int i;
- for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+ for (i = 0; i < listeners_count; i++) {
mlxsw_core_trap_unregister(mlxsw_sp->core,
- &mlxsw_sp_listener[i],
+ &listeners[i],
mlxsw_sp);
}
}
+static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
+{
+ int err;
+
+ err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_trap_groups_set(mlxsw_sp->core);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener,
+ ARRAY_SIZE(mlxsw_sp_listener));
+ if (err)
+ return err;
+
+ err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp->listeners,
+ mlxsw_sp->listeners_count);
+ if (err)
+ goto err_extra_traps_init;
+
+ return 0;
+
+err_extra_traps_init:
+ mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
+ ARRAY_SIZE(mlxsw_sp_listener));
+ return err;
+}
+
+static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp->listeners,
+ mlxsw_sp->listeners_count);
+ mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
+ ARRAY_SIZE(mlxsw_sp_listener));
+}
+
+#define MLXSW_SP_LAG_SEED_INIT 0xcafecafe
+
static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
{
char slcr_pl[MLXSW_REG_SLCR_LEN];
u32 seed;
int err;
- seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
+ seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac),
+ MLXSW_SP_LAG_SEED_INIT);
mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC |
MLXSW_REG_SLCR_LAG_HASH_DMAC |
MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE |
@@ -4325,6 +4598,32 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
}
+static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = {
+ .clock_init = mlxsw_sp1_ptp_clock_init,
+ .clock_fini = mlxsw_sp1_ptp_clock_fini,
+ .init = mlxsw_sp1_ptp_init,
+ .fini = mlxsw_sp1_ptp_fini,
+ .receive = mlxsw_sp1_ptp_receive,
+ .transmitted = mlxsw_sp1_ptp_transmitted,
+ .hwtstamp_get = mlxsw_sp1_ptp_hwtstamp_get,
+ .hwtstamp_set = mlxsw_sp1_ptp_hwtstamp_set,
+ .shaper_work = mlxsw_sp1_ptp_shaper_work,
+ .get_ts_info = mlxsw_sp1_ptp_get_ts_info,
+};
+
+static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = {
+ .clock_init = mlxsw_sp2_ptp_clock_init,
+ .clock_fini = mlxsw_sp2_ptp_clock_fini,
+ .init = mlxsw_sp2_ptp_init,
+ .fini = mlxsw_sp2_ptp_fini,
+ .receive = mlxsw_sp2_ptp_receive,
+ .transmitted = mlxsw_sp2_ptp_transmitted,
+ .hwtstamp_get = mlxsw_sp2_ptp_hwtstamp_get,
+ .hwtstamp_set = mlxsw_sp2_ptp_hwtstamp_set,
+ .shaper_work = mlxsw_sp2_ptp_shaper_work,
+ .get_ts_info = mlxsw_sp2_ptp_get_ts_info,
+};
+
static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
unsigned long event, void *ptr);
@@ -4422,6 +4721,28 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_router_init;
}
+ if (mlxsw_sp->bus_info->read_frc_capable) {
+ /* NULL is a valid return value from clock_init */
+ mlxsw_sp->clock =
+ mlxsw_sp->ptp_ops->clock_init(mlxsw_sp,
+ mlxsw_sp->bus_info->dev);
+ if (IS_ERR(mlxsw_sp->clock)) {
+ err = PTR_ERR(mlxsw_sp->clock);
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to init ptp clock\n");
+ goto err_ptp_clock_init;
+ }
+ }
+
+ if (mlxsw_sp->clock) {
+ /* NULL is a valid return value from ptp_ops->init */
+ mlxsw_sp->ptp_state = mlxsw_sp->ptp_ops->init(mlxsw_sp);
+ if (IS_ERR(mlxsw_sp->ptp_state)) {
+ err = PTR_ERR(mlxsw_sp->ptp_state);
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize PTP\n");
+ goto err_ptp_init;
+ }
+ }
+
/* Initialize netdevice notifier after router and SPAN is initialized,
* so that the event handler can use router structures and call SPAN
* respin.
@@ -4452,6 +4773,12 @@ err_ports_create:
err_dpipe_init:
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
err_netdev_notifier:
+ if (mlxsw_sp->clock)
+ mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
+err_ptp_init:
+ if (mlxsw_sp->clock)
+ mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock);
+err_ptp_clock_init:
mlxsw_sp_router_fini(mlxsw_sp);
err_router_init:
mlxsw_sp_acl_fini(mlxsw_sp);
@@ -4495,6 +4822,9 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals;
mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops;
+ mlxsw_sp->ptp_ops = &mlxsw_sp1_ptp_ops;
+ mlxsw_sp->listeners = mlxsw_sp1_listener;
+ mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener);
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
}
@@ -4514,6 +4844,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
+ mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
}
@@ -4525,6 +4856,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_ports_remove(mlxsw_sp);
mlxsw_sp_dpipe_fini(mlxsw_sp);
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+ if (mlxsw_sp->clock) {
+ mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
+ mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock);
+ }
mlxsw_sp_router_fini(mlxsw_sp);
mlxsw_sp_acl_fini(mlxsw_sp);
mlxsw_sp_nve_fini(mlxsw_sp);
@@ -4867,6 +5202,15 @@ static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core)
mlxsw_sp_params_unregister(mlxsw_core);
}
+static void mlxsw_sp_ptp_transmitted(struct mlxsw_core *mlxsw_core,
+ struct sk_buff *skb, u8 local_port)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+
+ skb_pull(skb, MLXSW_TXHDR_LEN);
+ mlxsw_sp->ptp_ops->transmitted(mlxsw_sp, skb, local_port);
+}
+
static struct mlxsw_driver mlxsw_sp1_driver = {
.kind = mlxsw_sp1_driver_name,
.priv_size = sizeof(struct mlxsw_sp),
@@ -4885,11 +5229,13 @@ static struct mlxsw_driver mlxsw_sp1_driver = {
.sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear,
.sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get,
.sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get,
+ .flash_update = mlxsw_sp_flash_update,
.txhdr_construct = mlxsw_sp_txhdr_construct,
.resources_register = mlxsw_sp1_resources_register,
.kvd_sizes_get = mlxsw_sp_kvd_sizes_get,
.params_register = mlxsw_sp_params_register,
.params_unregister = mlxsw_sp_params_unregister,
+ .ptp_transmitted = mlxsw_sp_ptp_transmitted,
.txhdr_len = MLXSW_TXHDR_LEN,
.profile = &mlxsw_sp1_config_profile,
.res_query_enabled = true,
@@ -4913,10 +5259,12 @@ static struct mlxsw_driver mlxsw_sp2_driver = {
.sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear,
.sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get,
.sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get,
+ .flash_update = mlxsw_sp_flash_update,
.txhdr_construct = mlxsw_sp_txhdr_construct,
.resources_register = mlxsw_sp2_resources_register,
.params_register = mlxsw_sp2_params_register,
.params_unregister = mlxsw_sp2_params_unregister,
+ .ptp_transmitted = mlxsw_sp_ptp_transmitted,
.txhdr_len = MLXSW_TXHDR_LEN,
.profile = &mlxsw_sp2_config_profile,
.res_query_enabled = true,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 8601b3041acd..131f62ce9297 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -136,6 +136,8 @@ struct mlxsw_sp_acl_tcam_ops;
struct mlxsw_sp_nve_ops;
struct mlxsw_sp_sb_vals;
struct mlxsw_sp_port_type_speed_ops;
+struct mlxsw_sp_ptp_state;
+struct mlxsw_sp_ptp_ops;
struct mlxsw_sp {
struct mlxsw_sp_port **ports;
@@ -155,6 +157,8 @@ struct mlxsw_sp {
struct mlxsw_sp_kvdl *kvdl;
struct mlxsw_sp_nve *nve;
struct notifier_block netdevice_nb;
+ struct mlxsw_sp_ptp_clock *clock;
+ struct mlxsw_sp_ptp_state *ptp_state;
struct mlxsw_sp_counter_pool *counter_pool;
struct {
@@ -172,6 +176,9 @@ struct mlxsw_sp {
const struct mlxsw_sp_rif_ops **rif_ops_arr;
const struct mlxsw_sp_sb_vals *sb_vals;
const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
+ const struct mlxsw_sp_ptp_ops *ptp_ops;
+ const struct mlxsw_listener *listeners;
+ size_t listeners_count;
};
static inline struct mlxsw_sp_upper *
@@ -259,6 +266,12 @@ struct mlxsw_sp_port {
unsigned acl_rule_count;
struct mlxsw_sp_acl_block *ing_acl_block;
struct mlxsw_sp_acl_block *eg_acl_block;
+ struct {
+ struct delayed_work shaper_dw;
+ struct hwtstamp_config hwtstamp_config;
+ u16 ing_types;
+ u16 egr_types;
+ } ptp;
};
struct mlxsw_sp_port_type_speed_ops {
@@ -267,6 +280,7 @@ struct mlxsw_sp_port_type_speed_ops {
struct ethtool_link_ksettings *cmd);
void (*from_ptys_link)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto,
unsigned long *mode);
+ u32 (*from_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto);
void (*from_ptys_speed_duplex)(struct mlxsw_sp *mlxsw_sp,
bool carrier_ok, u32 ptys_eth_proto,
struct ethtool_link_ksettings *cmd);
@@ -435,6 +449,8 @@ struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
extern struct notifier_block mlxsw_sp_switchdev_notifier;
/* spectrum.c */
+void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
+ u8 local_port, void *priv);
int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
bool dwrr, u8 dwrr_weight);
@@ -620,6 +636,15 @@ enum mlxsw_sp_acl_profile {
MLXSW_SP_ACL_PROFILE_MR,
};
+struct mlxsw_sp_acl_block {
+ struct list_head binding_list;
+ struct mlxsw_sp_acl_ruleset *ruleset_zero;
+ struct mlxsw_sp *mlxsw_sp;
+ unsigned int rule_count;
+ unsigned int disable_count;
+ struct net *net;
+};
+
struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl);
struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block);
unsigned int mlxsw_sp_acl_block_rule_count(struct mlxsw_sp_acl_block *block);
@@ -782,19 +807,19 @@ extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
/* spectrum_flower.c */
int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_block *block,
- struct tc_cls_flower_offload *f);
+ struct flow_cls_offload *f);
void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_block *block,
- struct tc_cls_flower_offload *f);
+ struct flow_cls_offload *f);
int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_block *block,
- struct tc_cls_flower_offload *f);
+ struct flow_cls_offload *f);
int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_block *block,
- struct tc_cls_flower_offload *f);
+ struct flow_cls_offload *f);
void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_block *block,
- struct tc_cls_flower_offload *f);
+ struct flow_cls_offload *f);
/* spectrum_qdisc.c */
int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port);
@@ -805,6 +830,7 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p);
/* spectrum_fid.c */
+bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index);
bool mlxsw_sp_fid_lag_vid_valid(const struct mlxsw_sp_fid *fid);
struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp,
u16 fid_index);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index a146a44634e9..e8ac90564dbe 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -45,14 +45,6 @@ struct mlxsw_sp_acl_block_binding {
bool ingress;
};
-struct mlxsw_sp_acl_block {
- struct list_head binding_list;
- struct mlxsw_sp_acl_ruleset *ruleset_zero;
- struct mlxsw_sp *mlxsw_sp;
- unsigned int rule_count;
- unsigned int disable_count;
-};
-
struct mlxsw_sp_acl_ruleset_ht_key {
struct mlxsw_sp_acl_block *block;
u32 chain_index;
@@ -221,6 +213,7 @@ struct mlxsw_sp_acl_block *mlxsw_sp_acl_block_create(struct mlxsw_sp *mlxsw_sp,
return NULL;
INIT_LIST_HEAD(&block->binding_list);
block->mlxsw_sp = mlxsw_sp;
+ block->net = net;
return block;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
index c1a9cc9a3292..4c98950380d5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
@@ -1171,13 +1171,12 @@ mlxsw_sp_acl_erp_delta_fill(const struct mlxsw_sp_acl_erp_key *parent_key,
return -EINVAL;
}
if (si == -1) {
- /* The masks are the same, this cannot happen.
- * That means the caller is broken.
+ /* The masks are the same, this can happen in case eRPs with
+ * the same mask were created in both A-TCAM and C-TCAM.
+ * The only possible condition under which this can happen
+ * is identical rule insertion. Delta is not possible here.
*/
- WARN_ON(1);
- *delta_start = 0;
- *delta_mask = 0;
- return 0;
+ return -EINVAL;
}
pmask = (unsigned char) parent_key->mask[__MASK_IDX(si)];
mask = (unsigned char) key->mask[__MASK_IDX(si)];
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
index 2a998dea4f39..279c241f76f0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
@@ -12,7 +12,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = {
MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x02, 4),
MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
- MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+ MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
};
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = {
@@ -20,7 +20,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = {
MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x02, 4),
MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
- MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+ MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
};
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = {
@@ -32,13 +32,13 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = {
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_sip[] = {
MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4),
MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
- MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+ MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
};
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = {
MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x00, 4),
MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
- MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+ MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
};
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = {
@@ -149,7 +149,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_4[] = {
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5[] = {
MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 16, 12),
- MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x04, 0, 8), /* RX_ACL_SYSTEM_PORT */
+ MLXSW_AFK_ELEMENT_INST_EXT_U32(SRC_SYS_PORT, 0x04, 0, 8, -1, true), /* RX_ACL_SYSTEM_PORT */
};
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_0[] = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index 8512dd49e420..1537f70bc26d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -437,8 +437,8 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp1_sb_prs[] = {
MLXSW_SP1_SB_PR_CPU_SIZE, true, false),
};
-#define MLXSW_SP2_SB_PR_INGRESS_SIZE 40960000
-#define MLXSW_SP2_SB_PR_EGRESS_SIZE 40960000
+#define MLXSW_SP2_SB_PR_INGRESS_SIZE 38128752
+#define MLXSW_SP2_SB_PR_EGRESS_SIZE 38128752
#define MLXSW_SP2_SB_PR_CPU_SIZE (256 * 1000)
/* Order according to mlxsw_sp2_sb_pool_dess */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
index b25048c6c761..21296fa7f7fb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
@@ -408,14 +408,6 @@ static int mlxsw_sp_port_dcb_app_update(struct mlxsw_sp_port *mlxsw_sp_port)
have_dscp = mlxsw_sp_port_dcb_app_prio_dscp_map(mlxsw_sp_port,
&prio_map);
- if (!have_dscp) {
- err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port,
- MLXSW_REG_QPTS_TRUST_STATE_PCP);
- if (err)
- netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L2\n");
- return err;
- }
-
mlxsw_sp_port_dcb_app_dscp_prio_map(mlxsw_sp_port, default_prio,
&dscp_map);
err = mlxsw_sp_port_dcb_app_update_qpdpm(mlxsw_sp_port,
@@ -432,6 +424,14 @@ static int mlxsw_sp_port_dcb_app_update(struct mlxsw_sp_port *mlxsw_sp_port)
return err;
}
+ if (!have_dscp) {
+ err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port,
+ MLXSW_REG_QPTS_TRUST_STATE_PCP);
+ if (err)
+ netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L2\n");
+ return err;
+ }
+
err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port,
MLXSW_REG_QPTS_TRUST_STATE_DSCP);
if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
index 46baf3b44309..8df3cb21baa6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
@@ -126,6 +126,16 @@ static const int *mlxsw_sp_packet_type_sfgc_types[] = {
[MLXSW_SP_FLOOD_TYPE_MC] = mlxsw_sp_sfgc_mc_packet_types,
};
+bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index)
+{
+ enum mlxsw_sp_fid_type fid_type = MLXSW_SP_FID_TYPE_DUMMY;
+ struct mlxsw_sp_fid_family *fid_family;
+
+ fid_family = mlxsw_sp->fid_core->fid_family_arr[fid_type];
+
+ return fid_family->start_index == fid_index;
+}
+
bool mlxsw_sp_fid_lag_vid_valid(const struct mlxsw_sp_fid *fid)
{
return fid->fid_family->lag_vid_valid;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 15f804453cd6..202e9a246019 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -120,8 +120,51 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
return 0;
}
+static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei,
+ struct flow_cls_offload *f,
+ struct mlxsw_sp_acl_block *block)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ struct net_device *ingress_dev;
+ struct flow_match_meta match;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
+ return 0;
+
+ flow_rule_match_meta(rule, &match);
+ if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
+ NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported ingress ifindex mask");
+ return -EINVAL;
+ }
+
+ ingress_dev = __dev_get_by_index(block->net,
+ match.key->ingress_ifindex);
+ if (!ingress_dev) {
+ NL_SET_ERR_MSG_MOD(f->common.extack, "Can't find specified ingress port to match on");
+ return -EINVAL;
+ }
+
+ if (!mlxsw_sp_port_dev_check(ingress_dev)) {
+ NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on non-mlxsw ingress port");
+ return -EINVAL;
+ }
+
+ mlxsw_sp_port = netdev_priv(ingress_dev);
+ if (mlxsw_sp_port->mlxsw_sp != block->mlxsw_sp) {
+ NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on a port from different device");
+ return -EINVAL;
+ }
+
+ mlxsw_sp_acl_rulei_keymask_u32(rulei,
+ MLXSW_AFK_ELEMENT_SRC_SYS_PORT,
+ mlxsw_sp_port->local_port,
+ 0xFFFFFFFF);
+ return 0;
+}
+
static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei,
- struct tc_cls_flower_offload *f)
+ struct flow_cls_offload *f)
{
struct flow_match_ipv4_addrs match;
@@ -136,7 +179,7 @@ static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei,
}
static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei,
- struct tc_cls_flower_offload *f)
+ struct flow_cls_offload *f)
{
struct flow_match_ipv6_addrs match;
@@ -170,10 +213,10 @@ static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei,
static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_rule_info *rulei,
- struct tc_cls_flower_offload *f,
+ struct flow_cls_offload *f,
u8 ip_proto)
{
- const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+ const struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_match_ports match;
if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS))
@@ -197,10 +240,10 @@ static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp,
static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_rule_info *rulei,
- struct tc_cls_flower_offload *f,
+ struct flow_cls_offload *f,
u8 ip_proto)
{
- const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+ const struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_match_tcp match;
if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP))
@@ -222,10 +265,10 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp,
static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_rule_info *rulei,
- struct tc_cls_flower_offload *f,
+ struct flow_cls_offload *f,
u16 n_proto)
{
- const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+ const struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_match_ip match;
if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP))
@@ -247,8 +290,8 @@ static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp,
match.mask->tos & 0x3);
mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP,
- match.key->tos >> 6,
- match.mask->tos >> 6);
+ match.key->tos >> 2,
+ match.mask->tos >> 2);
return 0;
}
@@ -256,9 +299,9 @@ static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp,
static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_block *block,
struct mlxsw_sp_acl_rule_info *rulei,
- struct tc_cls_flower_offload *f)
+ struct flow_cls_offload *f)
{
- struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_dissector *dissector = rule->match.dissector;
u16 n_proto_mask = 0;
u16 n_proto_key = 0;
@@ -267,7 +310,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
int err;
if (dissector->used_keys &
- ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ ~(BIT(FLOW_DISSECTOR_KEY_META) |
+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
BIT(FLOW_DISSECTOR_KEY_BASIC) |
BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
@@ -283,6 +327,10 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_acl_rulei_priority(rulei, f->common.prio);
+ err = mlxsw_sp_flower_parse_meta(rulei, f, block);
+ if (err)
+ return err;
+
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
struct flow_match_control match;
@@ -378,7 +426,7 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_block *block,
- struct tc_cls_flower_offload *f)
+ struct flow_cls_offload *f)
{
struct mlxsw_sp_acl_rule_info *rulei;
struct mlxsw_sp_acl_ruleset *ruleset;
@@ -425,7 +473,7 @@ err_rule_create:
void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_block *block,
- struct tc_cls_flower_offload *f)
+ struct flow_cls_offload *f)
{
struct mlxsw_sp_acl_ruleset *ruleset;
struct mlxsw_sp_acl_rule *rule;
@@ -447,7 +495,7 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_block *block,
- struct tc_cls_flower_offload *f)
+ struct flow_cls_offload *f)
{
struct mlxsw_sp_acl_ruleset *ruleset;
struct mlxsw_sp_acl_rule *rule;
@@ -483,7 +531,7 @@ err_rule_get_stats:
int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_block *block,
- struct tc_cls_flower_offload *f)
+ struct flow_cls_offload *f)
{
struct mlxsw_sp_acl_ruleset *ruleset;
struct mlxsw_sp_acl_rule_info rulei;
@@ -504,7 +552,7 @@ int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_block *block,
- struct tc_cls_flower_offload *f)
+ struct flow_cls_offload *f)
{
struct mlxsw_sp_acl_ruleset *ruleset;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
new file mode 100644
index 000000000000..bd9c2bc2d5d6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -0,0 +1,1111 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */
+
+#include <linux/ptp_clock_kernel.h>
+#include <linux/clocksource.h>
+#include <linux/timecounter.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/rhashtable.h>
+#include <linux/ptp_classify.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/net_tstamp.h>
+
+#include "spectrum.h"
+#include "spectrum_ptp.h"
+#include "core.h"
+
+#define MLXSW_SP1_PTP_CLOCK_CYCLES_SHIFT 29
+#define MLXSW_SP1_PTP_CLOCK_FREQ_KHZ 156257 /* 6.4nSec */
+#define MLXSW_SP1_PTP_CLOCK_MASK 64
+
+#define MLXSW_SP1_PTP_HT_GC_INTERVAL 500 /* ms */
+
+/* How long, approximately, should the unmatched entries stay in the hash table
+ * before they are collected. Should be evenly divisible by the GC interval.
+ */
+#define MLXSW_SP1_PTP_HT_GC_TIMEOUT 1000 /* ms */
+
+struct mlxsw_sp_ptp_state {
+ struct mlxsw_sp *mlxsw_sp;
+ struct rhashtable unmatched_ht;
+ spinlock_t unmatched_lock; /* protects the HT */
+ struct delayed_work ht_gc_dw;
+ u32 gc_cycle;
+};
+
+struct mlxsw_sp1_ptp_key {
+ u8 local_port;
+ u8 message_type;
+ u16 sequence_id;
+ u8 domain_number;
+ bool ingress;
+};
+
+struct mlxsw_sp1_ptp_unmatched {
+ struct mlxsw_sp1_ptp_key key;
+ struct rhash_head ht_node;
+ struct rcu_head rcu;
+ struct sk_buff *skb;
+ u64 timestamp;
+ u32 gc_cycle;
+};
+
+static const struct rhashtable_params mlxsw_sp1_ptp_unmatched_ht_params = {
+ .key_len = sizeof_field(struct mlxsw_sp1_ptp_unmatched, key),
+ .key_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, key),
+ .head_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, ht_node),
+};
+
+struct mlxsw_sp_ptp_clock {
+ struct mlxsw_core *core;
+ spinlock_t lock; /* protect this structure */
+ struct cyclecounter cycles;
+ struct timecounter tc;
+ u32 nominal_c_mult;
+ struct ptp_clock *ptp;
+ struct ptp_clock_info ptp_info;
+ unsigned long overflow_period;
+ struct delayed_work overflow_work;
+};
+
+static u64 __mlxsw_sp1_ptp_read_frc(struct mlxsw_sp_ptp_clock *clock,
+ struct ptp_system_timestamp *sts)
+{
+ struct mlxsw_core *mlxsw_core = clock->core;
+ u32 frc_h1, frc_h2, frc_l;
+
+ frc_h1 = mlxsw_core_read_frc_h(mlxsw_core);
+ ptp_read_system_prets(sts);
+ frc_l = mlxsw_core_read_frc_l(mlxsw_core);
+ ptp_read_system_postts(sts);
+ frc_h2 = mlxsw_core_read_frc_h(mlxsw_core);
+
+ if (frc_h1 != frc_h2) {
+ /* wrap around */
+ ptp_read_system_prets(sts);
+ frc_l = mlxsw_core_read_frc_l(mlxsw_core);
+ ptp_read_system_postts(sts);
+ }
+
+ return (u64) frc_l | (u64) frc_h2 << 32;
+}
+
+static u64 mlxsw_sp1_ptp_read_frc(const struct cyclecounter *cc)
+{
+ struct mlxsw_sp_ptp_clock *clock =
+ container_of(cc, struct mlxsw_sp_ptp_clock, cycles);
+
+ return __mlxsw_sp1_ptp_read_frc(clock, NULL) & cc->mask;
+}
+
+static int
+mlxsw_sp1_ptp_phc_adjfreq(struct mlxsw_sp_ptp_clock *clock, int freq_adj)
+{
+ struct mlxsw_core *mlxsw_core = clock->core;
+ char mtutc_pl[MLXSW_REG_MTUTC_LEN];
+
+ mlxsw_reg_mtutc_pack(mtutc_pl, MLXSW_REG_MTUTC_OPERATION_ADJUST_FREQ,
+ freq_adj, 0);
+ return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl);
+}
+
+static u64 mlxsw_sp1_ptp_ns2cycles(const struct timecounter *tc, u64 nsec)
+{
+ u64 cycles = (u64) nsec;
+
+ cycles <<= tc->cc->shift;
+ cycles = div_u64(cycles, tc->cc->mult);
+
+ return cycles;
+}
+
+static int
+mlxsw_sp1_ptp_phc_settime(struct mlxsw_sp_ptp_clock *clock, u64 nsec)
+{
+ struct mlxsw_core *mlxsw_core = clock->core;
+ u64 next_sec, next_sec_in_nsec, cycles;
+ char mtutc_pl[MLXSW_REG_MTUTC_LEN];
+ char mtpps_pl[MLXSW_REG_MTPPS_LEN];
+ int err;
+
+ next_sec = div_u64(nsec, NSEC_PER_SEC) + 1;
+ next_sec_in_nsec = next_sec * NSEC_PER_SEC;
+
+ spin_lock_bh(&clock->lock);
+ cycles = mlxsw_sp1_ptp_ns2cycles(&clock->tc, next_sec_in_nsec);
+ spin_unlock_bh(&clock->lock);
+
+ mlxsw_reg_mtpps_vpin_pack(mtpps_pl, cycles);
+ err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtpps), mtpps_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mtutc_pack(mtutc_pl,
+ MLXSW_REG_MTUTC_OPERATION_SET_TIME_AT_NEXT_SEC,
+ 0, next_sec);
+ return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl);
+}
+
+static int mlxsw_sp1_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+ struct mlxsw_sp_ptp_clock *clock =
+ container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
+ int neg_adj = 0;
+ u32 diff;
+ u64 adj;
+ s32 ppb;
+
+ ppb = scaled_ppm_to_ppb(scaled_ppm);
+
+ if (ppb < 0) {
+ neg_adj = 1;
+ ppb = -ppb;
+ }
+
+ adj = clock->nominal_c_mult;
+ adj *= ppb;
+ diff = div_u64(adj, NSEC_PER_SEC);
+
+ spin_lock_bh(&clock->lock);
+ timecounter_read(&clock->tc);
+ clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
+ clock->nominal_c_mult + diff;
+ spin_unlock_bh(&clock->lock);
+
+ return mlxsw_sp1_ptp_phc_adjfreq(clock, neg_adj ? -ppb : ppb);
+}
+
+static int mlxsw_sp1_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct mlxsw_sp_ptp_clock *clock =
+ container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
+ u64 nsec;
+
+ spin_lock_bh(&clock->lock);
+ timecounter_adjtime(&clock->tc, delta);
+ nsec = timecounter_read(&clock->tc);
+ spin_unlock_bh(&clock->lock);
+
+ return mlxsw_sp1_ptp_phc_settime(clock, nsec);
+}
+
+static int mlxsw_sp1_ptp_gettimex(struct ptp_clock_info *ptp,
+ struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
+{
+ struct mlxsw_sp_ptp_clock *clock =
+ container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
+ u64 cycles, nsec;
+
+ spin_lock_bh(&clock->lock);
+ cycles = __mlxsw_sp1_ptp_read_frc(clock, sts);
+ nsec = timecounter_cyc2time(&clock->tc, cycles);
+ spin_unlock_bh(&clock->lock);
+
+ *ts = ns_to_timespec64(nsec);
+
+ return 0;
+}
+
+static int mlxsw_sp1_ptp_settime(struct ptp_clock_info *ptp,
+ const struct timespec64 *ts)
+{
+ struct mlxsw_sp_ptp_clock *clock =
+ container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
+ u64 nsec = timespec64_to_ns(ts);
+
+ spin_lock_bh(&clock->lock);
+ timecounter_init(&clock->tc, &clock->cycles, nsec);
+ nsec = timecounter_read(&clock->tc);
+ spin_unlock_bh(&clock->lock);
+
+ return mlxsw_sp1_ptp_phc_settime(clock, nsec);
+}
+
+static const struct ptp_clock_info mlxsw_sp1_ptp_clock_info = {
+ .owner = THIS_MODULE,
+ .name = "mlxsw_sp_clock",
+ .max_adj = 100000000,
+ .adjfine = mlxsw_sp1_ptp_adjfine,
+ .adjtime = mlxsw_sp1_ptp_adjtime,
+ .gettimex64 = mlxsw_sp1_ptp_gettimex,
+ .settime64 = mlxsw_sp1_ptp_settime,
+};
+
+static void mlxsw_sp1_ptp_clock_overflow(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct mlxsw_sp_ptp_clock *clock;
+
+ clock = container_of(dwork, struct mlxsw_sp_ptp_clock, overflow_work);
+
+ spin_lock_bh(&clock->lock);
+ timecounter_read(&clock->tc);
+ spin_unlock_bh(&clock->lock);
+ mlxsw_core_schedule_dw(&clock->overflow_work, clock->overflow_period);
+}
+
+struct mlxsw_sp_ptp_clock *
+mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev)
+{
+ u64 overflow_cycles, nsec, frac = 0;
+ struct mlxsw_sp_ptp_clock *clock;
+ int err;
+
+ clock = kzalloc(sizeof(*clock), GFP_KERNEL);
+ if (!clock)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&clock->lock);
+ clock->cycles.read = mlxsw_sp1_ptp_read_frc;
+ clock->cycles.shift = MLXSW_SP1_PTP_CLOCK_CYCLES_SHIFT;
+ clock->cycles.mult = clocksource_khz2mult(MLXSW_SP1_PTP_CLOCK_FREQ_KHZ,
+ clock->cycles.shift);
+ clock->nominal_c_mult = clock->cycles.mult;
+ clock->cycles.mask = CLOCKSOURCE_MASK(MLXSW_SP1_PTP_CLOCK_MASK);
+ clock->core = mlxsw_sp->core;
+
+ timecounter_init(&clock->tc, &clock->cycles,
+ ktime_to_ns(ktime_get_real()));
+
+ /* Calculate period in seconds to call the overflow watchdog - to make
+ * sure counter is checked at least twice every wrap around.
+ * The period is calculated as the minimum between max HW cycles count
+ * (The clock source mask) and max amount of cycles that can be
+ * multiplied by clock multiplier where the result doesn't exceed
+ * 64bits.
+ */
+ overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult);
+ overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3));
+
+ nsec = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles, 0, &frac);
+ clock->overflow_period = nsecs_to_jiffies(nsec);
+
+ INIT_DELAYED_WORK(&clock->overflow_work, mlxsw_sp1_ptp_clock_overflow);
+ mlxsw_core_schedule_dw(&clock->overflow_work, 0);
+
+ clock->ptp_info = mlxsw_sp1_ptp_clock_info;
+ clock->ptp = ptp_clock_register(&clock->ptp_info, dev);
+ if (IS_ERR(clock->ptp)) {
+ err = PTR_ERR(clock->ptp);
+ dev_err(dev, "ptp_clock_register failed %d\n", err);
+ goto err_ptp_clock_register;
+ }
+
+ return clock;
+
+err_ptp_clock_register:
+ cancel_delayed_work_sync(&clock->overflow_work);
+ kfree(clock);
+ return ERR_PTR(err);
+}
+
+void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock)
+{
+ ptp_clock_unregister(clock->ptp);
+ cancel_delayed_work_sync(&clock->overflow_work);
+ kfree(clock);
+}
+
+static int mlxsw_sp_ptp_parse(struct sk_buff *skb,
+ u8 *p_domain_number,
+ u8 *p_message_type,
+ u16 *p_sequence_id)
+{
+ unsigned int offset = 0;
+ unsigned int ptp_class;
+ u8 *data;
+
+ data = skb_mac_header(skb);
+ ptp_class = ptp_classify_raw(skb);
+
+ switch (ptp_class & PTP_CLASS_VMASK) {
+ case PTP_CLASS_V1:
+ case PTP_CLASS_V2:
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ if (ptp_class & PTP_CLASS_VLAN)
+ offset += VLAN_HLEN;
+
+ switch (ptp_class & PTP_CLASS_PMASK) {
+ case PTP_CLASS_IPV4:
+ offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
+ break;
+ case PTP_CLASS_IPV6:
+ offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
+ break;
+ case PTP_CLASS_L2:
+ offset += ETH_HLEN;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ /* PTP header is 34 bytes. */
+ if (skb->len < offset + 34)
+ return -EINVAL;
+
+ *p_message_type = data[offset] & 0x0f;
+ *p_domain_number = data[offset + 4];
+ *p_sequence_id = (u16)(data[offset + 30]) << 8 | data[offset + 31];
+ return 0;
+}
+
+/* Returns NULL on successful insertion, a pointer on conflict, or an ERR_PTR on
+ * error.
+ */
+static struct mlxsw_sp1_ptp_unmatched *
+mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_ptp_key key,
+ struct sk_buff *skb,
+ u64 timestamp)
+{
+ int cycles = MLXSW_SP1_PTP_HT_GC_TIMEOUT / MLXSW_SP1_PTP_HT_GC_INTERVAL;
+ struct mlxsw_sp_ptp_state *ptp_state = mlxsw_sp->ptp_state;
+ struct mlxsw_sp1_ptp_unmatched *unmatched;
+ struct mlxsw_sp1_ptp_unmatched *conflict;
+
+ unmatched = kzalloc(sizeof(*unmatched), GFP_ATOMIC);
+ if (!unmatched)
+ return ERR_PTR(-ENOMEM);
+
+ unmatched->key = key;
+ unmatched->skb = skb;
+ unmatched->timestamp = timestamp;
+ unmatched->gc_cycle = mlxsw_sp->ptp_state->gc_cycle + cycles;
+
+ conflict = rhashtable_lookup_get_insert_fast(&ptp_state->unmatched_ht,
+ &unmatched->ht_node,
+ mlxsw_sp1_ptp_unmatched_ht_params);
+ if (conflict)
+ kfree(unmatched);
+
+ return conflict;
+}
+
+static struct mlxsw_sp1_ptp_unmatched *
+mlxsw_sp1_ptp_unmatched_lookup(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_ptp_key key)
+{
+ return rhashtable_lookup(&mlxsw_sp->ptp_state->unmatched_ht, &key,
+ mlxsw_sp1_ptp_unmatched_ht_params);
+}
+
+static int
+mlxsw_sp1_ptp_unmatched_remove(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_ptp_unmatched *unmatched)
+{
+ return rhashtable_remove_fast(&mlxsw_sp->ptp_state->unmatched_ht,
+ &unmatched->ht_node,
+ mlxsw_sp1_ptp_unmatched_ht_params);
+}
+
+/* This function is called in the following scenarios:
+ *
+ * 1) When a packet is matched with its timestamp.
+ * 2) In several situation when it is necessary to immediately pass on
+ * an SKB without a timestamp.
+ * 3) From GC indirectly through mlxsw_sp1_ptp_unmatched_finish().
+ * This case is similar to 2) above.
+ */
+static void mlxsw_sp1_ptp_packet_finish(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port,
+ bool ingress,
+ struct skb_shared_hwtstamps *hwtstamps)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+
+ /* Between capturing the packet and finishing it, there is a window of
+ * opportunity for the originating port to go away (e.g. due to a
+ * split). Also make sure the SKB device reference is still valid.
+ */
+ mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ if (!(mlxsw_sp_port && (!skb->dev || skb->dev == mlxsw_sp_port->dev))) {
+ dev_kfree_skb_any(skb);
+ return;
+ }
+
+ if (ingress) {
+ if (hwtstamps)
+ *skb_hwtstamps(skb) = *hwtstamps;
+ mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp);
+ } else {
+ /* skb_tstamp_tx() allows hwtstamps to be NULL. */
+ skb_tstamp_tx(skb, hwtstamps);
+ dev_kfree_skb_any(skb);
+ }
+}
+
+static void mlxsw_sp1_packet_timestamp(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_ptp_key key,
+ struct sk_buff *skb,
+ u64 timestamp)
+{
+ struct skb_shared_hwtstamps hwtstamps;
+ u64 nsec;
+
+ spin_lock_bh(&mlxsw_sp->clock->lock);
+ nsec = timecounter_cyc2time(&mlxsw_sp->clock->tc, timestamp);
+ spin_unlock_bh(&mlxsw_sp->clock->lock);
+
+ hwtstamps.hwtstamp = ns_to_ktime(nsec);
+ mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb,
+ key.local_port, key.ingress, &hwtstamps);
+}
+
+static void
+mlxsw_sp1_ptp_unmatched_finish(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_ptp_unmatched *unmatched)
+{
+ if (unmatched->skb && unmatched->timestamp)
+ mlxsw_sp1_packet_timestamp(mlxsw_sp, unmatched->key,
+ unmatched->skb,
+ unmatched->timestamp);
+ else if (unmatched->skb)
+ mlxsw_sp1_ptp_packet_finish(mlxsw_sp, unmatched->skb,
+ unmatched->key.local_port,
+ unmatched->key.ingress, NULL);
+ kfree_rcu(unmatched, rcu);
+}
+
+static void mlxsw_sp1_ptp_unmatched_free_fn(void *ptr, void *arg)
+{
+ struct mlxsw_sp1_ptp_unmatched *unmatched = ptr;
+
+ /* This is invoked at a point where the ports are gone already. Nothing
+ * to do with whatever is left in the HT but to free it.
+ */
+ if (unmatched->skb)
+ dev_kfree_skb_any(unmatched->skb);
+ kfree_rcu(unmatched, rcu);
+}
+
+static void mlxsw_sp1_ptp_got_piece(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_ptp_key key,
+ struct sk_buff *skb, u64 timestamp)
+{
+ struct mlxsw_sp1_ptp_unmatched *unmatched, *conflict;
+ int err;
+
+ rcu_read_lock();
+
+ unmatched = mlxsw_sp1_ptp_unmatched_lookup(mlxsw_sp, key);
+
+ spin_lock(&mlxsw_sp->ptp_state->unmatched_lock);
+
+ if (unmatched) {
+ /* There was an unmatched entry when we looked, but it may have
+ * been removed before we took the lock.
+ */
+ err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, unmatched);
+ if (err)
+ unmatched = NULL;
+ }
+
+ if (!unmatched) {
+ /* We have no unmatched entry, but one may have been added after
+ * we looked, but before we took the lock.
+ */
+ unmatched = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key,
+ skb, timestamp);
+ if (IS_ERR(unmatched)) {
+ if (skb)
+ mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb,
+ key.local_port,
+ key.ingress, NULL);
+ unmatched = NULL;
+ } else if (unmatched) {
+ /* Save just told us, under lock, that the entry is
+ * there, so this has to work.
+ */
+ err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp,
+ unmatched);
+ WARN_ON_ONCE(err);
+ }
+ }
+
+ /* If unmatched is non-NULL here, it comes either from the lookup, or
+ * from the save attempt above. In either case the entry was removed
+ * from the hash table. If unmatched is NULL, a new unmatched entry was
+ * added to the hash table, and there was no conflict.
+ */
+
+ if (skb && unmatched && unmatched->timestamp) {
+ unmatched->skb = skb;
+ } else if (timestamp && unmatched && unmatched->skb) {
+ unmatched->timestamp = timestamp;
+ } else if (unmatched) {
+ /* unmatched holds an older entry of the same type: either an
+ * skb if we are handling skb, or a timestamp if we are handling
+ * timestamp. We can't match that up, so save what we have.
+ */
+ conflict = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key,
+ skb, timestamp);
+ if (IS_ERR(conflict)) {
+ if (skb)
+ mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb,
+ key.local_port,
+ key.ingress, NULL);
+ } else {
+ /* Above, we removed an object with this key from the
+ * hash table, under lock, so conflict can not be a
+ * valid pointer.
+ */
+ WARN_ON_ONCE(conflict);
+ }
+ }
+
+ spin_unlock(&mlxsw_sp->ptp_state->unmatched_lock);
+
+ if (unmatched)
+ mlxsw_sp1_ptp_unmatched_finish(mlxsw_sp, unmatched);
+
+ rcu_read_unlock();
+}
+
+static void mlxsw_sp1_ptp_got_packet(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port,
+ bool ingress)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ struct mlxsw_sp1_ptp_key key;
+ u8 types;
+ int err;
+
+ mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ if (!mlxsw_sp_port)
+ goto immediate;
+
+ types = ingress ? mlxsw_sp_port->ptp.ing_types :
+ mlxsw_sp_port->ptp.egr_types;
+ if (!types)
+ goto immediate;
+
+ memset(&key, 0, sizeof(key));
+ key.local_port = local_port;
+ key.ingress = ingress;
+
+ err = mlxsw_sp_ptp_parse(skb, &key.domain_number, &key.message_type,
+ &key.sequence_id);
+ if (err)
+ goto immediate;
+
+ /* For packets whose timestamping was not enabled on this port, don't
+ * bother trying to match the timestamp.
+ */
+ if (!((1 << key.message_type) & types))
+ goto immediate;
+
+ mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, skb, 0);
+ return;
+
+immediate:
+ mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, local_port, ingress, NULL);
+}
+
+void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
+ u8 local_port, u8 message_type,
+ u8 domain_number, u16 sequence_id,
+ u64 timestamp)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ struct mlxsw_sp1_ptp_key key;
+ u8 types;
+
+ mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ if (!mlxsw_sp_port)
+ return;
+
+ types = ingress ? mlxsw_sp_port->ptp.ing_types :
+ mlxsw_sp_port->ptp.egr_types;
+
+ /* For message types whose timestamping was not enabled on this port,
+ * don't bother with the timestamp.
+ */
+ if (!((1 << message_type) & types))
+ return;
+
+ memset(&key, 0, sizeof(key));
+ key.local_port = local_port;
+ key.domain_number = domain_number;
+ key.message_type = message_type;
+ key.sequence_id = sequence_id;
+ key.ingress = ingress;
+
+ mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, NULL, timestamp);
+}
+
+void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+ u8 local_port)
+{
+ skb_reset_mac_header(skb);
+ mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, true);
+}
+
+void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port)
+{
+ mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, false);
+}
+
+static void
+mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state,
+ struct mlxsw_sp1_ptp_unmatched *unmatched)
+{
+ int err;
+
+ /* If an unmatched entry has an SKB, it has to be handed over to the
+ * networking stack. This is usually done from a trap handler, which is
+ * invoked in a softirq context. Here we are going to do it in process
+ * context. If that were to be interrupted by a softirq, it could cause
+ * a deadlock when an attempt is made to take an already-taken lock
+ * somewhere along the sending path. Disable softirqs to prevent this.
+ */
+ local_bh_disable();
+
+ spin_lock(&ptp_state->unmatched_lock);
+ err = rhashtable_remove_fast(&ptp_state->unmatched_ht,
+ &unmatched->ht_node,
+ mlxsw_sp1_ptp_unmatched_ht_params);
+ spin_unlock(&ptp_state->unmatched_lock);
+
+ if (err)
+ /* The packet was matched with timestamp during the walk. */
+ goto out;
+
+ /* mlxsw_sp1_ptp_unmatched_finish() invokes netif_receive_skb(). While
+ * the comment at that function states that it can only be called in
+ * soft IRQ context, this pattern of local_bh_disable() +
+ * netif_receive_skb(), in process context, is seen elsewhere in the
+ * kernel, notably in pktgen.
+ */
+ mlxsw_sp1_ptp_unmatched_finish(ptp_state->mlxsw_sp, unmatched);
+
+out:
+ local_bh_enable();
+}
+
+static void mlxsw_sp1_ptp_ht_gc(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct mlxsw_sp1_ptp_unmatched *unmatched;
+ struct mlxsw_sp_ptp_state *ptp_state;
+ struct rhashtable_iter iter;
+ u32 gc_cycle;
+ void *obj;
+
+ ptp_state = container_of(dwork, struct mlxsw_sp_ptp_state, ht_gc_dw);
+ gc_cycle = ptp_state->gc_cycle++;
+
+ rhashtable_walk_enter(&ptp_state->unmatched_ht, &iter);
+ rhashtable_walk_start(&iter);
+ while ((obj = rhashtable_walk_next(&iter))) {
+ if (IS_ERR(obj))
+ continue;
+
+ unmatched = obj;
+ if (unmatched->gc_cycle <= gc_cycle)
+ mlxsw_sp1_ptp_ht_gc_collect(ptp_state, unmatched);
+ }
+ rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
+
+ mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw,
+ MLXSW_SP1_PTP_HT_GC_INTERVAL);
+}
+
+static int mlxsw_sp_ptp_mtptpt_set(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_reg_mtptpt_trap_id trap_id,
+ u16 message_type)
+{
+ char mtptpt_pl[MLXSW_REG_MTPTPT_LEN];
+
+ mlxsw_reg_mtptptp_pack(mtptpt_pl, trap_id, message_type);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtptpt), mtptpt_pl);
+}
+
+static int mlxsw_sp1_ptp_set_fifo_clr_on_trap(struct mlxsw_sp *mlxsw_sp,
+ bool clr)
+{
+ char mogcr_pl[MLXSW_REG_MOGCR_LEN] = {0};
+ int err;
+
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mogcr_ptp_iftc_set(mogcr_pl, clr);
+ mlxsw_reg_mogcr_ptp_eftc_set(mogcr_pl, clr);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl);
+}
+
+static int mlxsw_sp1_ptp_mtpppc_set(struct mlxsw_sp *mlxsw_sp,
+ u16 ing_types, u16 egr_types)
+{
+ char mtpppc_pl[MLXSW_REG_MTPPPC_LEN];
+
+ mlxsw_reg_mtpppc_pack(mtpppc_pl, ing_types, egr_types);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtpppc), mtpppc_pl);
+}
+
+struct mlxsw_sp1_ptp_shaper_params {
+ u32 ethtool_speed;
+ enum mlxsw_reg_qpsc_port_speed port_speed;
+ u8 shaper_time_exp;
+ u8 shaper_time_mantissa;
+ u8 shaper_inc;
+ u8 shaper_bs;
+ u8 port_to_shaper_credits;
+ int ing_timestamp_inc;
+ int egr_timestamp_inc;
+};
+
+static const struct mlxsw_sp1_ptp_shaper_params
+mlxsw_sp1_ptp_shaper_params[] = {
+ {
+ .ethtool_speed = SPEED_100,
+ .port_speed = MLXSW_REG_QPSC_PORT_SPEED_100M,
+ .shaper_time_exp = 4,
+ .shaper_time_mantissa = 12,
+ .shaper_inc = 9,
+ .shaper_bs = 1,
+ .port_to_shaper_credits = 1,
+ .ing_timestamp_inc = -313,
+ .egr_timestamp_inc = 313,
+ },
+ {
+ .ethtool_speed = SPEED_1000,
+ .port_speed = MLXSW_REG_QPSC_PORT_SPEED_1G,
+ .shaper_time_exp = 0,
+ .shaper_time_mantissa = 12,
+ .shaper_inc = 6,
+ .shaper_bs = 0,
+ .port_to_shaper_credits = 1,
+ .ing_timestamp_inc = -35,
+ .egr_timestamp_inc = 35,
+ },
+ {
+ .ethtool_speed = SPEED_10000,
+ .port_speed = MLXSW_REG_QPSC_PORT_SPEED_10G,
+ .shaper_time_exp = 0,
+ .shaper_time_mantissa = 2,
+ .shaper_inc = 14,
+ .shaper_bs = 1,
+ .port_to_shaper_credits = 1,
+ .ing_timestamp_inc = -11,
+ .egr_timestamp_inc = 11,
+ },
+ {
+ .ethtool_speed = SPEED_25000,
+ .port_speed = MLXSW_REG_QPSC_PORT_SPEED_25G,
+ .shaper_time_exp = 0,
+ .shaper_time_mantissa = 0,
+ .shaper_inc = 11,
+ .shaper_bs = 1,
+ .port_to_shaper_credits = 1,
+ .ing_timestamp_inc = -14,
+ .egr_timestamp_inc = 14,
+ },
+};
+
+#define MLXSW_SP1_PTP_SHAPER_PARAMS_LEN ARRAY_SIZE(mlxsw_sp1_ptp_shaper_params)
+
+static int mlxsw_sp1_ptp_shaper_params_set(struct mlxsw_sp *mlxsw_sp)
+{
+ const struct mlxsw_sp1_ptp_shaper_params *params;
+ char qpsc_pl[MLXSW_REG_QPSC_LEN];
+ int i, err;
+
+ for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) {
+ params = &mlxsw_sp1_ptp_shaper_params[i];
+ mlxsw_reg_qpsc_pack(qpsc_pl, params->port_speed,
+ params->shaper_time_exp,
+ params->shaper_time_mantissa,
+ params->shaper_inc, params->shaper_bs,
+ params->port_to_shaper_credits,
+ params->ing_timestamp_inc,
+ params->egr_timestamp_inc);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpsc), qpsc_pl);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_ptp_state *ptp_state;
+ u16 message_type;
+ int err;
+
+ err = mlxsw_sp1_ptp_shaper_params_set(mlxsw_sp);
+ if (err)
+ return ERR_PTR(err);
+
+ ptp_state = kzalloc(sizeof(*ptp_state), GFP_KERNEL);
+ if (!ptp_state)
+ return ERR_PTR(-ENOMEM);
+ ptp_state->mlxsw_sp = mlxsw_sp;
+
+ spin_lock_init(&ptp_state->unmatched_lock);
+
+ err = rhashtable_init(&ptp_state->unmatched_ht,
+ &mlxsw_sp1_ptp_unmatched_ht_params);
+ if (err)
+ goto err_hashtable_init;
+
+ /* Delive these message types as PTP0. */
+ message_type = BIT(MLXSW_SP_PTP_MESSAGE_TYPE_SYNC) |
+ BIT(MLXSW_SP_PTP_MESSAGE_TYPE_DELAY_REQ) |
+ BIT(MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_REQ) |
+ BIT(MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_RESP);
+ err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0,
+ message_type);
+ if (err)
+ goto err_mtptpt_set;
+
+ /* Everything else is PTP1. */
+ message_type = ~message_type;
+ err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1,
+ message_type);
+ if (err)
+ goto err_mtptpt1_set;
+
+ err = mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, true);
+ if (err)
+ goto err_fifo_clr;
+
+ INIT_DELAYED_WORK(&ptp_state->ht_gc_dw, mlxsw_sp1_ptp_ht_gc);
+ mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw,
+ MLXSW_SP1_PTP_HT_GC_INTERVAL);
+ return ptp_state;
+
+err_fifo_clr:
+ mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0);
+err_mtptpt1_set:
+ mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0);
+err_mtptpt_set:
+ rhashtable_destroy(&ptp_state->unmatched_ht);
+err_hashtable_init:
+ kfree(ptp_state);
+ return ERR_PTR(err);
+}
+
+void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state)
+{
+ struct mlxsw_sp *mlxsw_sp = ptp_state->mlxsw_sp;
+
+ cancel_delayed_work_sync(&ptp_state->ht_gc_dw);
+ mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp, 0, 0);
+ mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, false);
+ mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0);
+ mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0);
+ rhashtable_free_and_destroy(&ptp_state->unmatched_ht,
+ &mlxsw_sp1_ptp_unmatched_free_fn, NULL);
+ kfree(ptp_state);
+}
+
+int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config)
+{
+ *config = mlxsw_sp_port->ptp.hwtstamp_config;
+ return 0;
+}
+
+static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config,
+ u16 *p_ing_types, u16 *p_egr_types,
+ enum hwtstamp_rx_filters *p_rx_filter)
+{
+ enum hwtstamp_rx_filters rx_filter = config->rx_filter;
+ enum hwtstamp_tx_types tx_type = config->tx_type;
+ u16 ing_types = 0x00;
+ u16 egr_types = 0x00;
+
+ switch (tx_type) {
+ case HWTSTAMP_TX_OFF:
+ egr_types = 0x00;
+ break;
+ case HWTSTAMP_TX_ON:
+ egr_types = 0xff;
+ break;
+ case HWTSTAMP_TX_ONESTEP_SYNC:
+ return -ERANGE;
+ }
+
+ switch (rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ ing_types = 0x00;
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ ing_types = 0x01;
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ ing_types = 0x02;
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ ing_types = 0x0f;
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ ing_types = 0xff;
+ break;
+ case HWTSTAMP_FILTER_SOME:
+ case HWTSTAMP_FILTER_NTP_ALL:
+ return -ERANGE;
+ }
+
+ *p_ing_types = ing_types;
+ *p_egr_types = egr_types;
+ *p_rx_filter = rx_filter;
+ return 0;
+}
+
+static int mlxsw_sp1_ptp_mtpppc_update(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 ing_types, u16 egr_types)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_port *tmp;
+ int i;
+
+ /* MTPPPC configures timestamping globally, not per port. Find the
+ * configuration that contains all configured timestamping requests.
+ */
+ for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) {
+ tmp = mlxsw_sp->ports[i];
+ if (tmp && tmp != mlxsw_sp_port) {
+ ing_types |= tmp->ptp.ing_types;
+ egr_types |= tmp->ptp.egr_types;
+ }
+ }
+
+ return mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp_port->mlxsw_sp,
+ ing_types, egr_types);
+}
+
+static bool mlxsw_sp1_ptp_hwtstamp_enabled(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ return mlxsw_sp_port->ptp.ing_types || mlxsw_sp_port->ptp.egr_types;
+}
+
+static int
+mlxsw_sp1_ptp_port_shaper_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qeec_pl[MLXSW_REG_QEEC_LEN];
+
+ mlxsw_reg_qeec_ptps_pack(qeec_pl, mlxsw_sp_port->local_port, enable);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
+}
+
+static int mlxsw_sp1_ptp_port_shaper_check(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char ptys_pl[MLXSW_REG_PTYS_LEN];
+ u32 eth_proto_oper, speed;
+ bool ptps = false;
+ int err, i;
+
+ if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port))
+ return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, false);
+
+ port_type_speed_ops = mlxsw_sp->port_type_speed_ops;
+ port_type_speed_ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl,
+ mlxsw_sp_port->local_port, 0,
+ false);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+ if (err)
+ return err;
+ port_type_speed_ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, NULL, NULL,
+ &eth_proto_oper);
+
+ speed = port_type_speed_ops->from_ptys_speed(mlxsw_sp, eth_proto_oper);
+ for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) {
+ if (mlxsw_sp1_ptp_shaper_params[i].ethtool_speed == speed) {
+ ptps = true;
+ break;
+ }
+ }
+
+ return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, ptps);
+}
+
+void mlxsw_sp1_ptp_shaper_work(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ int err;
+
+ mlxsw_sp_port = container_of(dwork, struct mlxsw_sp_port,
+ ptp.shaper_dw);
+
+ if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port))
+ return;
+
+ err = mlxsw_sp1_ptp_port_shaper_check(mlxsw_sp_port);
+ if (err)
+ netdev_err(mlxsw_sp_port->dev, "Failed to set up PTP shaper\n");
+}
+
+int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config)
+{
+ enum hwtstamp_rx_filters rx_filter;
+ u16 ing_types;
+ u16 egr_types;
+ int err;
+
+ err = mlxsw_sp_ptp_get_message_types(config, &ing_types, &egr_types,
+ &rx_filter);
+ if (err)
+ return err;
+
+ err = mlxsw_sp1_ptp_mtpppc_update(mlxsw_sp_port, ing_types, egr_types);
+ if (err)
+ return err;
+
+ mlxsw_sp_port->ptp.hwtstamp_config = *config;
+ mlxsw_sp_port->ptp.ing_types = ing_types;
+ mlxsw_sp_port->ptp.egr_types = egr_types;
+
+ err = mlxsw_sp1_ptp_port_shaper_check(mlxsw_sp_port);
+ if (err)
+ return err;
+
+ /* Notify the ioctl caller what we are actually timestamping. */
+ config->rx_filter = rx_filter;
+
+ return 0;
+}
+
+int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+ struct ethtool_ts_info *info)
+{
+ info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp);
+
+ info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+ BIT(HWTSTAMP_TX_ON);
+
+ info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+ BIT(HWTSTAMP_FILTER_ALL);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
new file mode 100644
index 000000000000..72e55f6926b9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_PTP_H
+#define _MLXSW_SPECTRUM_PTP_H
+
+#include <linux/device.h>
+#include <linux/rhashtable.h>
+
+struct mlxsw_sp;
+struct mlxsw_sp_port;
+struct mlxsw_sp_ptp_clock;
+
+enum {
+ MLXSW_SP_PTP_MESSAGE_TYPE_SYNC,
+ MLXSW_SP_PTP_MESSAGE_TYPE_DELAY_REQ,
+ MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_REQ,
+ MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_RESP,
+};
+
+static inline int mlxsw_sp_ptp_get_ts_info_noptp(struct ethtool_ts_info *info)
+{
+ info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE;
+ info->phc_index = -1;
+ return 0;
+}
+
+#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
+
+struct mlxsw_sp_ptp_clock *
+mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev);
+
+void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock);
+
+struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp);
+
+void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state);
+
+void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+ u8 local_port);
+
+void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port);
+
+void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
+ u8 local_port, u8 message_type,
+ u8 domain_number, u16 sequence_id,
+ u64 timestamp);
+
+int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config);
+
+int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config);
+
+void mlxsw_sp1_ptp_shaper_work(struct work_struct *work);
+
+int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+ struct ethtool_ts_info *info);
+
+#else
+
+static inline struct mlxsw_sp_ptp_clock *
+mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev)
+{
+ return NULL;
+}
+
+static inline void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock)
+{
+}
+
+static inline struct mlxsw_sp_ptp_state *
+mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp)
+{
+ return NULL;
+}
+
+static inline void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state)
+{
+}
+
+static inline void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port)
+{
+ mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp);
+}
+
+static inline void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port)
+{
+ dev_kfree_skb_any(skb);
+}
+
+static inline void
+mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
+ u8 local_port, u8 message_type,
+ u8 domain_number,
+ u16 sequence_id, u64 timestamp)
+{
+}
+
+static inline int
+mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void mlxsw_sp1_ptp_shaper_work(struct work_struct *work)
+{
+}
+
+static inline int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+ struct ethtool_ts_info *info)
+{
+ return mlxsw_sp_ptp_get_ts_info_noptp(info);
+}
+
+#endif
+
+static inline struct mlxsw_sp_ptp_clock *
+mlxsw_sp2_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev)
+{
+ return NULL;
+}
+
+static inline void mlxsw_sp2_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock)
+{
+}
+
+static inline struct mlxsw_sp_ptp_state *
+mlxsw_sp2_ptp_init(struct mlxsw_sp *mlxsw_sp)
+{
+ return NULL;
+}
+
+static inline void mlxsw_sp2_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state)
+{
+}
+
+static inline void mlxsw_sp2_ptp_receive(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port)
+{
+ mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp);
+}
+
+static inline void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port)
+{
+ dev_kfree_skb_any(skb);
+}
+
+static inline int
+mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void mlxsw_sp2_ptp_shaper_work(struct work_struct *work)
+{
+}
+
+static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+ struct ethtool_ts_info *info)
+{
+ return mlxsw_sp_ptp_get_ts_info_noptp(info);
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 1cda8a248b12..e618be7ce6c6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -21,6 +21,7 @@
#include <net/arp.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
+#include <net/nexthop.h>
#include <net/fib_rules.h>
#include <net/ip_tunnels.h>
#include <net/l3mdev.h>
@@ -2363,7 +2364,7 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
static void
mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_neigh_entry *neigh_entry,
- bool removing);
+ bool removing, bool dead);
static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
{
@@ -2507,7 +2508,8 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
memcpy(neigh_entry->ha, ha, ETH_ALEN);
mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
- mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
+ mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
+ dead);
if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
@@ -2886,7 +2888,7 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
return false;
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
- struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
+ struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
struct in6_addr *gw;
int ifindex, weight;
@@ -2958,7 +2960,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
struct net_device *dev;
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
- dev = mlxsw_sp_rt6->rt->fib6_nh.fib_nh_dev;
+ dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev;
val ^= dev->ifindex;
}
@@ -3472,13 +3474,79 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
nh->update = 1;
}
+static int
+mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ struct neighbour *n, *old_n = neigh_entry->key.n;
+ struct mlxsw_sp_nexthop *nh;
+ bool entry_connected;
+ u8 nud_state, dead;
+ int err;
+
+ nh = list_first_entry(&neigh_entry->nexthop_list,
+ struct mlxsw_sp_nexthop, neigh_list_node);
+
+ n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
+ if (!n) {
+ n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
+ nh->rif->dev);
+ if (IS_ERR(n))
+ return PTR_ERR(n);
+ neigh_event_send(n, NULL);
+ }
+
+ mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
+ neigh_entry->key.n = n;
+ err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
+ if (err)
+ goto err_neigh_entry_insert;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ dead = n->dead;
+ read_unlock_bh(&n->lock);
+ entry_connected = nud_state & NUD_VALID && !dead;
+
+ list_for_each_entry(nh, &neigh_entry->nexthop_list,
+ neigh_list_node) {
+ neigh_release(old_n);
+ neigh_clone(n);
+ __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
+ }
+
+ neigh_release(n);
+
+ return 0;
+
+err_neigh_entry_insert:
+ neigh_entry->key.n = old_n;
+ mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
+ neigh_release(n);
+ return err;
+}
+
static void
mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_neigh_entry *neigh_entry,
- bool removing)
+ bool removing, bool dead)
{
struct mlxsw_sp_nexthop *nh;
+ if (list_empty(&neigh_entry->nexthop_list))
+ return;
+
+ if (dead) {
+ int err;
+
+ err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
+ neigh_entry);
+ if (err)
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
+ return;
+ }
+
list_for_each_entry(nh, &neigh_entry->nexthop_list,
neigh_list_node) {
__mlxsw_sp_nexthop_neigh_update(nh, removing);
@@ -3816,23 +3884,25 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
}
static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
- const struct fib_info *fi)
+ struct fib_info *fi)
{
- return fi->fib_nh->fib_nh_scope == RT_SCOPE_LINK ||
- mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
+ const struct fib_nh *nh = fib_info_nh(fi, 0);
+
+ return nh->fib_nh_scope == RT_SCOPE_LINK ||
+ mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
}
static struct mlxsw_sp_nexthop_group *
mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
{
+ unsigned int nhs = fib_info_num_path(fi);
struct mlxsw_sp_nexthop_group *nh_grp;
struct mlxsw_sp_nexthop *nh;
struct fib_nh *fib_nh;
int i;
int err;
- nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs),
- GFP_KERNEL);
+ nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL);
if (!nh_grp)
return ERR_PTR(-ENOMEM);
nh_grp->priv = fi;
@@ -3840,11 +3910,11 @@ mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
nh_grp->neigh_tbl = &arp_tbl;
nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
- nh_grp->count = fi->fib_nhs;
+ nh_grp->count = nhs;
fib_info_hold(fi);
for (i = 0; i < nh_grp->count; i++) {
nh = &nh_grp->nexthops[i];
- fib_nh = &fi->fib_nh[i];
+ fib_nh = fib_info_nh(fi, i);
err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
if (err)
goto err_nexthop4_init;
@@ -3960,9 +4030,9 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
struct fib6_info *rt = mlxsw_sp_rt6->rt;
- if (nh->rif && nh->rif->dev == rt->fib6_nh.fib_nh_dev &&
+ if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
- &rt->fib6_nh.fib_nh_gw6))
+ &rt->fib6_nh->fib_nh_gw6))
return nh;
continue;
}
@@ -4022,13 +4092,13 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
- list)->rt->fib6_nh.fib_nh_flags |= RTNH_F_OFFLOAD;
+ list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
return;
}
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
- struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
+ struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
struct mlxsw_sp_nexthop *nh;
nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
@@ -4050,7 +4120,7 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
struct fib6_info *rt = mlxsw_sp_rt6->rt;
- rt->fib6_nh.fib_nh_flags &= ~RTNH_F_OFFLOAD;
+ rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
}
}
@@ -4282,9 +4352,9 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
const struct fib_entry_notifier_info *fen_info,
struct mlxsw_sp_fib_entry *fib_entry)
{
+ struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
- struct net_device *dev = fen_info->fi->fib_dev;
struct mlxsw_sp_ipip_entry *ipip_entry;
struct fib_info *fi = fen_info->fi;
@@ -4928,7 +4998,8 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
{
/* RTF_CACHE routes are ignored */
- return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family;
+ return !(rt->fib6_flags & RTF_ADDRCONF) &&
+ rt->fib6_nh->fib_nh_gw_family;
}
static struct fib6_info *
@@ -4987,8 +5058,8 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
const struct fib6_info *rt,
enum mlxsw_sp_ipip_type *ret)
{
- return rt->fib6_nh.fib_nh_dev &&
- mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.fib_nh_dev, ret);
+ return rt->fib6_nh->fib_nh_dev &&
+ mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
}
static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
@@ -4998,7 +5069,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
{
const struct mlxsw_sp_ipip_ops *ipip_ops;
struct mlxsw_sp_ipip_entry *ipip_entry;
- struct net_device *dev = rt->fib6_nh.fib_nh_dev;
+ struct net_device *dev = rt->fib6_nh->fib_nh_dev;
struct mlxsw_sp_rif *rif;
int err;
@@ -5041,11 +5112,11 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh,
const struct fib6_info *rt)
{
- struct net_device *dev = rt->fib6_nh.fib_nh_dev;
+ struct net_device *dev = rt->fib6_nh->fib_nh_dev;
nh->nh_grp = nh_grp;
- nh->nh_weight = rt->fib6_nh.fib_nh_weight;
- memcpy(&nh->gw_addr, &rt->fib6_nh.fib_nh_gw6, sizeof(nh->gw_addr));
+ nh->nh_weight = rt->fib6_nh->fib_nh_weight;
+ memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
@@ -5068,7 +5139,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
const struct fib6_info *rt)
{
- return rt->fib6_nh.fib_nh_gw_family ||
+ return rt->fib6_nh->fib_nh_gw_family ||
mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
}
@@ -5207,17 +5278,21 @@ err_nexthop6_group_get:
static int
mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib6_entry *fib6_entry,
- struct fib6_info *rt)
+ struct fib6_info **rt_arr, unsigned int nrt6)
{
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
- int err;
+ int err, i;
- mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
- if (IS_ERR(mlxsw_sp_rt6))
- return PTR_ERR(mlxsw_sp_rt6);
+ for (i = 0; i < nrt6; i++) {
+ mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
+ if (IS_ERR(mlxsw_sp_rt6)) {
+ err = PTR_ERR(mlxsw_sp_rt6);
+ goto err_rt6_create;
+ }
- list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
- fib6_entry->nrt6++;
+ list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+ fib6_entry->nrt6++;
+ }
err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
if (err)
@@ -5226,27 +5301,38 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
return 0;
err_nexthop6_group_update:
- fib6_entry->nrt6--;
- list_del(&mlxsw_sp_rt6->list);
- mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+ i = nrt6;
+err_rt6_create:
+ for (i--; i >= 0; i--) {
+ fib6_entry->nrt6--;
+ mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
+ struct mlxsw_sp_rt6, list);
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+ }
return err;
}
static void
mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib6_entry *fib6_entry,
- struct fib6_info *rt)
+ struct fib6_info **rt_arr, unsigned int nrt6)
{
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+ int i;
- mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
- if (WARN_ON(!mlxsw_sp_rt6))
- return;
+ for (i = 0; i < nrt6; i++) {
+ mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
+ rt_arr[i]);
+ if (WARN_ON_ONCE(!mlxsw_sp_rt6))
+ continue;
+
+ fib6_entry->nrt6--;
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+ }
- fib6_entry->nrt6--;
- list_del(&mlxsw_sp_rt6->list);
mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
- mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
}
static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
@@ -5287,29 +5373,32 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
static struct mlxsw_sp_fib6_entry *
mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_node *fib_node,
- struct fib6_info *rt)
+ struct fib6_info **rt_arr, unsigned int nrt6)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_entry *fib_entry;
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
- int err;
+ int err, i;
fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
if (!fib6_entry)
return ERR_PTR(-ENOMEM);
fib_entry = &fib6_entry->common;
- mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
- if (IS_ERR(mlxsw_sp_rt6)) {
- err = PTR_ERR(mlxsw_sp_rt6);
- goto err_rt6_create;
+ INIT_LIST_HEAD(&fib6_entry->rt6_list);
+
+ for (i = 0; i < nrt6; i++) {
+ mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
+ if (IS_ERR(mlxsw_sp_rt6)) {
+ err = PTR_ERR(mlxsw_sp_rt6);
+ goto err_rt6_create;
+ }
+ list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+ fib6_entry->nrt6++;
}
- mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
+ mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
- INIT_LIST_HEAD(&fib6_entry->rt6_list);
- list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
- fib6_entry->nrt6 = 1;
err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
if (err)
goto err_nexthop6_group_get;
@@ -5319,9 +5408,15 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
return fib6_entry;
err_nexthop6_group_get:
- list_del(&mlxsw_sp_rt6->list);
- mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+ i = nrt6;
err_rt6_create:
+ for (i--; i >= 0; i--) {
+ fib6_entry->nrt6--;
+ mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
+ struct mlxsw_sp_rt6, list);
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+ }
kfree(fib6_entry);
return ERR_PTR(err);
}
@@ -5364,16 +5459,16 @@ mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
static int
mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
- bool replace)
+ bool *p_replace)
{
struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
struct mlxsw_sp_fib6_entry *fib6_entry;
- fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
+ fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace);
- if (replace && WARN_ON(!fib6_entry))
- return -EINVAL;
+ if (*p_replace && !fib6_entry)
+ *p_replace = false;
if (fib6_entry) {
list_add_tail(&new6_entry->common.list,
@@ -5408,11 +5503,11 @@ mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib6_entry *fib6_entry,
- bool replace)
+ bool *p_replace)
{
int err;
- err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
+ err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace);
if (err)
return err;
@@ -5485,10 +5580,12 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
}
static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
- struct fib6_info *rt, bool replace)
+ struct fib6_info **rt_arr,
+ unsigned int nrt6, bool replace)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_node *fib_node;
+ struct fib6_info *rt = rt_arr[0];
int err;
if (mlxsw_sp->router->aborted)
@@ -5513,19 +5610,21 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
*/
fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
if (fib6_entry) {
- err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
+ err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry,
+ rt_arr, nrt6);
if (err)
goto err_fib6_entry_nexthop_add;
return 0;
}
- fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
+ fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
+ nrt6);
if (IS_ERR(fib6_entry)) {
err = PTR_ERR(fib6_entry);
goto err_fib6_entry_create;
}
- err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
+ err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace);
if (err)
goto err_fib6_node_entry_link;
@@ -5542,10 +5641,12 @@ err_fib6_entry_nexthop_add:
}
static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
- struct fib6_info *rt)
+ struct fib6_info **rt_arr,
+ unsigned int nrt6)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_node *fib_node;
+ struct fib6_info *rt = rt_arr[0];
if (mlxsw_sp->router->aborted)
return;
@@ -5557,11 +5658,12 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
if (WARN_ON(!fib6_entry))
return;
- /* If route is part of a multipath entry, but not the last one
- * removed, then only reduce its nexthop group.
+ /* If not all the nexthops are deleted, then only reduce the nexthop
+ * group.
*/
- if (!list_is_singular(&fib6_entry->rt6_list)) {
- mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
+ if (nrt6 != fib6_entry->nrt6) {
+ mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
+ nrt6);
return;
}
@@ -5822,10 +5924,15 @@ static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
}
+struct mlxsw_sp_fib6_event_work {
+ struct fib6_info **rt_arr;
+ unsigned int nrt6;
+};
+
struct mlxsw_sp_fib_event_work {
struct work_struct work;
union {
- struct fib6_entry_notifier_info fen6_info;
+ struct mlxsw_sp_fib6_event_work fib6_work;
struct fib_entry_notifier_info fen_info;
struct fib_rule_notifier_info fr_info;
struct fib_nh_notifier_info fnh_info;
@@ -5836,6 +5943,54 @@ struct mlxsw_sp_fib_event_work {
unsigned long event;
};
+static int
+mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
+ struct fib6_entry_notifier_info *fen6_info)
+{
+ struct fib6_info *rt = fen6_info->rt;
+ struct fib6_info **rt_arr;
+ struct fib6_info *iter;
+ unsigned int nrt6;
+ int i = 0;
+
+ nrt6 = fen6_info->nsiblings + 1;
+
+ rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
+ if (!rt_arr)
+ return -ENOMEM;
+
+ fib6_work->rt_arr = rt_arr;
+ fib6_work->nrt6 = nrt6;
+
+ rt_arr[0] = rt;
+ fib6_info_hold(rt);
+
+ if (!fen6_info->nsiblings)
+ return 0;
+
+ list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
+ if (i == fen6_info->nsiblings)
+ break;
+
+ rt_arr[i + 1] = iter;
+ fib6_info_hold(iter);
+ i++;
+ }
+ WARN_ON_ONCE(i != fen6_info->nsiblings);
+
+ return 0;
+}
+
+static void
+mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
+{
+ int i;
+
+ for (i = 0; i < fib6_work->nrt6; i++)
+ mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
+ kfree(fib6_work->rt_arr);
+}
+
static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
{
struct mlxsw_sp_fib_event_work *fib_work =
@@ -5894,18 +6049,21 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
- case FIB_EVENT_ENTRY_APPEND: /* fall through */
case FIB_EVENT_ENTRY_ADD:
replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
err = mlxsw_sp_router_fib6_add(mlxsw_sp,
- fib_work->fen6_info.rt, replace);
+ fib_work->fib6_work.rt_arr,
+ fib_work->fib6_work.nrt6,
+ replace);
if (err)
mlxsw_sp_router_fib_abort(mlxsw_sp);
- mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+ mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
break;
case FIB_EVENT_ENTRY_DEL:
- mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
- mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+ mlxsw_sp_router_fib6_del(mlxsw_sp,
+ fib_work->fib6_work.rt_arr,
+ fib_work->fib6_work.nrt6);
+ mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
break;
case FIB_EVENT_RULE_ADD:
/* if we get here, a rule was added that we do not support.
@@ -5994,22 +6152,26 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
}
}
-static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
- struct fib_notifier_info *info)
+static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
+ struct fib_notifier_info *info)
{
struct fib6_entry_notifier_info *fen6_info;
+ int err;
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
- case FIB_EVENT_ENTRY_APPEND: /* fall through */
case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_DEL:
fen6_info = container_of(info, struct fib6_entry_notifier_info,
info);
- fib_work->fen6_info = *fen6_info;
- fib6_info_hold(fib_work->fen6_info.rt);
+ err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
+ fen6_info);
+ if (err)
+ return err;
break;
}
+
+ return 0;
}
static void
@@ -6118,6 +6280,20 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
return notifier_from_errno(-EINVAL);
}
+ if (fen_info->fi->nh) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
+ } else if (info->family == AF_INET6) {
+ struct fib6_entry_notifier_info *fen6_info;
+
+ fen6_info = container_of(info,
+ struct fib6_entry_notifier_info,
+ info);
+ if (fen6_info->rt->nh) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
}
break;
}
@@ -6136,7 +6312,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
break;
case AF_INET6:
INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
- mlxsw_sp_router_fib6_event(fib_work, info);
+ err = mlxsw_sp_router_fib6_event(fib_work, info);
+ if (err)
+ goto err_fib_event;
break;
case RTNL_FAMILY_IP6MR:
case RTNL_FAMILY_IPMR:
@@ -6148,6 +6326,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
mlxsw_core_schedule_work(&fib_work->work);
return NOTIFY_DONE;
+
+err_fib_event:
+ kfree(fib_work);
+ return NOTIFY_BAD;
}
struct mlxsw_sp_rif *
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 50111f228d77..5ecb45118400 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -2468,6 +2468,9 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
goto just_remove;
}
+ if (mlxsw_sp_fid_is_dummy(mlxsw_sp, fid))
+ goto just_remove;
+
mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_fid(mlxsw_sp_port, fid);
if (!mlxsw_sp_port_vlan) {
netdev_err(mlxsw_sp_port->dev, "Failed to find a matching {Port, VID} following FDB notification\n");
@@ -2527,6 +2530,9 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp,
goto just_remove;
}
+ if (mlxsw_sp_fid_is_dummy(mlxsw_sp, fid))
+ goto just_remove;
+
mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_fid(mlxsw_sp_port, fid);
if (!mlxsw_sp_port_vlan) {
netdev_err(mlxsw_sp_port->dev, "Failed to find a matching {Port, VID} following FDB notification\n");
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index fc4f19167262..bdab96f5bc70 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -299,6 +299,8 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb,
u64 len;
int err;
+ memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb));
+
if (mlxsw_core_skb_transmit_busy(mlxsw_sx->core, &tx_info))
return NETDEV_TX_BUSY;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 451216dd7f6b..19202bdb5105 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -17,6 +17,8 @@ enum {
MLXSW_TRAP_ID_MVRP = 0x15,
MLXSW_TRAP_ID_RPVST = 0x16,
MLXSW_TRAP_ID_DHCP = 0x19,
+ MLXSW_TRAP_ID_PTP0 = 0x28,
+ MLXSW_TRAP_ID_PTP1 = 0x29,
MLXSW_TRAP_ID_IGMP_QUERY = 0x30,
MLXSW_TRAP_ID_IGMP_V1_REPORT = 0x31,
MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32,
@@ -76,6 +78,10 @@ enum {
enum mlxsw_event_trap_id {
/* Port Up/Down event generated by hardware */
MLXSW_TRAP_ID_PUDE = 0x8,
+ /* PTP Ingress FIFO has a new entry */
+ MLXSW_TRAP_ID_PTP_ING_FIFO = 0x2D,
+ /* PTP Egress FIFO has a new entry */
+ MLXSW_TRAP_ID_PTP_EGR_FIFO = 0x2E,
};
#endif /* _MLXSW_TRAP_H */