diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5')
119 files changed, 10362 insertions, 3259 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 6debffb8336b..37fef8cd25e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Mellanox driver configuration # @@ -5,8 +6,10 @@ config MLX5_CORE tristate "Mellanox 5th generation network adapters (ConnectX series) core driver" depends on PCI + select NET_DEVLINK imply PTP_1588_CLOCK imply VXLAN + imply MLXFW default n ---help--- Core driver for low level functionality of the ConnectX-4 and @@ -31,6 +34,7 @@ config MLX5_CORE_EN depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE depends on IPV6=y || IPV6=n || MLX5_CORE=m select PAGE_POOL + select DIMLIB default n ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. @@ -93,26 +97,60 @@ config MLX5_CORE_IPOIB ---help--- MLX5 IPoIB offloads & acceleration support. +config MLX5_FPGA_IPSEC + bool "Mellanox Technologies IPsec Innova support" + depends on MLX5_CORE + depends on MLX5_FPGA + default n + help + Build IPsec support for the Innova family of network cards by Mellanox + Technologies. Innova network cards are comprised of a ConnectX chip + and an FPGA chip on one board. If you select this option, the + mlx5_core driver will include the Innova FPGA core and allow building + sandbox-specific client drivers. + config MLX5_EN_IPSEC bool "IPSec XFRM cryptography-offload accelaration" - depends on MLX5_ACCEL depends on MLX5_CORE_EN depends on XFRM_OFFLOAD depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD + depends on MLX5_FPGA_IPSEC default n - ---help--- + help Build support for IPsec cryptography-offload accelaration in the NIC. Note: Support for hardware with this capability needs to be selected for this option to become available. -config MLX5_EN_TLS - bool "TLS cryptography-offload accelaration" +config MLX5_FPGA_TLS + bool "Mellanox Technologies TLS Innova support" + depends on TLS_DEVICE + depends on TLS=y || MLX5_CORE=m + depends on MLX5_FPGA + default n + help + Build TLS support for the Innova family of network cards by Mellanox + Technologies. Innova network cards are comprised of a ConnectX chip + and an FPGA chip on one board. If you select this option, the + mlx5_core driver will include the Innova FPGA core and allow building + sandbox-specific client drivers. + +config MLX5_TLS + bool "Mellanox Technologies TLS Connect-X support" depends on MLX5_CORE_EN depends on TLS_DEVICE depends on TLS=y || MLX5_CORE=m - depends on MLX5_ACCEL + select MLX5_ACCEL default n - ---help--- - Build support for TLS cryptography-offload accelaration in the NIC. - Note: Support for hardware with this capability needs to be selected - for this option to become available. + help + Build TLS support for the Connect-X family of network cards by Mellanox + Technologies. + +config MLX5_EN_TLS + bool "TLS cryptography-offload accelaration" + depends on MLX5_CORE_EN + depends on MLX5_FPGA_TLS || MLX5_TLS + default y + help + Build support for TLS cryptography-offload accelaration in the NIC. + Note: Support for hardware with this capability needs to be selected + for this option to become available. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 1a16f6d73cbc..57d2cc666fe3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -13,16 +13,18 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ - transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ - lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o + lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \ + diag/fw_tracer.o diag/crdump.o devlink.o # # Netdev basic # mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ - en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o + en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o \ + en/params.o en/xsk/umem.o en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o # # Netdev extra @@ -30,12 +32,15 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ + lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ + en/tc_tun_geneve.o # # Core extra # -mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ + ecpf.o rdma.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o @@ -48,14 +53,14 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib # # Accelerations & FPGA # -mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o +mlx5_core-$(CONFIG_MLX5_FPGA_IPSEC) += fpga/ipsec.o +mlx5_core-$(CONFIG_MLX5_FPGA_TLS) += fpga/tls.o +mlx5_core-$(CONFIG_MLX5_ACCEL) += lib/crypto.o accel/tls.o accel/ipsec.o -mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \ - fpga/ipsec.o fpga/tls.o +mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ en_accel/ipsec_stats.o -mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o - -CFLAGS_tracepoint.o := -I$(src) +mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \ + en_accel/ktls.o en_accel/ktls_tx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile index d8e17110f25d..c78512eed8d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c index 9f1b1939716a..eddc34e4a762 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c @@ -31,6 +31,8 @@ * */ +#ifdef CONFIG_MLX5_FPGA_IPSEC + #include <linux/mlx5/device.h> #include "accel/ipsec.h" @@ -74,6 +76,11 @@ int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) return mlx5_fpga_ipsec_init(mdev); } +void mlx5_accel_ipsec_build_fs_cmds(void) +{ + mlx5_fpga_ipsec_build_fs_cmds(); +} + void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) { mlx5_fpga_ipsec_cleanup(mdev); @@ -107,3 +114,5 @@ int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, return mlx5_fpga_esp_modify_xfrm(xfrm, attrs); } EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h index 024dbd22a89b..530e428d46ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h @@ -37,7 +37,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/accel.h> -#ifdef CONFIG_MLX5_ACCEL +#ifdef CONFIG_MLX5_FPGA_IPSEC #define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \ MLX5_ACCEL_IPSEC_CAP_DEVICE) @@ -54,6 +54,7 @@ void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, void mlx5_accel_esp_free_hw_context(void *context); int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev); +void mlx5_accel_ipsec_build_fs_cmds(void); void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev); #else @@ -79,6 +80,10 @@ static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) return 0; } +static inline void mlx5_accel_ipsec_build_fs_cmds(void) +{ +} + static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) { } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c index da7bd26368f9..cab708af3422 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c @@ -35,6 +35,9 @@ #include "accel/tls.h" #include "mlx5_core.h" +#include "lib/mlx5.h" + +#ifdef CONFIG_MLX5_FPGA_TLS #include "fpga/tls.h" int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, @@ -61,7 +64,8 @@ int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { - return mlx5_fpga_is_tls_device(mdev); + return mlx5_fpga_is_tls_device(mdev) || + mlx5_accel_is_ktls_device(mdev); } u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) @@ -78,3 +82,42 @@ void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { mlx5_fpga_tls_cleanup(mdev); } +#endif + +#ifdef CONFIG_MLX5_TLS +int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id) +{ + u32 sz_bytes; + void *key; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + + key = info->key; + sz_bytes = sizeof(info->key); + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = + (struct tls12_crypto_info_aes_gcm_256 *)crypto_info; + + key = info->key; + sz_bytes = sizeof(info->key); + break; + } + default: + return -EINVAL; + } + + return mlx5_create_encryption_key(mdev, key, sz_bytes, p_key_id); +} + +void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) +{ + mlx5_destroy_encryption_key(mdev, key_id); +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h index def4093ebfae..879321b21616 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h @@ -37,8 +37,51 @@ #include <linux/mlx5/driver.h> #include <linux/tls.h> -#ifdef CONFIG_MLX5_ACCEL +#ifdef CONFIG_MLX5_TLS +int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id); +void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id); +static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, tls)) + return false; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return false; + + return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); +} + +static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info) +{ + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + if (crypto_info->version == TLS_1_2_VERSION) + return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); + break; + } + + return false; +} +#else +static inline int +mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id) { return -ENOTSUPP; } +static inline void +mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) {} + +static inline bool +mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; } +static inline bool +mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info) { return false; } +#endif + +#ifdef CONFIG_MLX5_FPGA_TLS enum { MLX5_ACCEL_TLS_TX = BIT(0), MLX5_ACCEL_TLS_RX = BIT(1), @@ -84,11 +127,13 @@ static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, bool direction_sx) { } static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, u64 rcd_sn) { return 0; } -static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; } +static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) +{ + return mlx5_accel_is_ktls_device(mdev); +} static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; } static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; } static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { } - #endif #endif /* __MLX5_ACCEL_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 9008e17126db..549f962cd86e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -57,15 +57,16 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, int node) { struct mlx5_priv *priv = &dev->priv; + struct device *device = dev->device; int original_node; void *cpu_handle; mutex_lock(&priv->alloc_mutex); - original_node = dev_to_node(&dev->pdev->dev); - set_dev_node(&dev->pdev->dev, node); - cpu_handle = dma_alloc_coherent(&dev->pdev->dev, size, dma_handle, + original_node = dev_to_node(device); + set_dev_node(device, node); + cpu_handle = dma_alloc_coherent(device, size, dma_handle, GFP_KERNEL); - set_dev_node(&dev->pdev->dev, original_node); + set_dev_node(device, original_node); mutex_unlock(&priv->alloc_mutex); return cpu_handle; } @@ -110,7 +111,7 @@ EXPORT_SYMBOL(mlx5_buf_alloc); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) { - dma_free_coherent(&dev->pdev->dev, buf->size, buf->frags->buf, + dma_free_coherent(dev->device, buf->size, buf->frags->buf, buf->frags->map); kfree(buf->frags); @@ -139,7 +140,7 @@ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, if (!frag->buf) goto err_free_buf; if (frag->map & ((1 << buf->page_shift) - 1)) { - dma_free_coherent(&dev->pdev->dev, frag_sz, + dma_free_coherent(dev->device, frag_sz, buf->frags[i].buf, buf->frags[i].map); mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n", &frag->map, buf->page_shift); @@ -152,7 +153,7 @@ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, err_free_buf: while (i--) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, buf->frags[i].buf, + dma_free_coherent(dev->device, PAGE_SIZE, buf->frags[i].buf, buf->frags[i].map); kfree(buf->frags); err_out: @@ -168,7 +169,7 @@ void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) for (i = 0; i < buf->npages; i++) { int frag_sz = min_t(int, size, PAGE_SIZE); - dma_free_coherent(&dev->pdev->dev, frag_sz, buf->frags[i].buf, + dma_free_coherent(dev->device, frag_sz, buf->frags[i].buf, buf->frags[i].map); size -= frag_sz; } @@ -274,7 +275,7 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) __set_bit(db->index, db->u.pgdir->bitmap); if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) { - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + dma_free_coherent(dev->device, PAGE_SIZE, db->u.pgdir->db_page, db->u.pgdir->db_dma); list_del(&db->u.pgdir->list); bitmap_free(db->u.pgdir->bitmap); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index be48c6440251..8cdd7e66f8df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -316,7 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: case MLX5_CMD_OP_DEALLOC_MEMIC: case MLX5_CMD_OP_PAGE_FAULT_RESUME: - case MLX5_CMD_OP_QUERY_HOST_PARAMS: + case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -441,6 +441,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + case MLX5_CMD_OP_CREATE_UCTX: + case MLX5_CMD_OP_DESTROY_UCTX: + case MLX5_CMD_OP_CREATE_UMEM: + case MLX5_CMD_OP_DESTROY_UMEM: case MLX5_CMD_OP_ALLOC_MEMIC: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; @@ -628,7 +632,11 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); - MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS); + MLX5_COMMAND_STR_CASE(QUERY_ESW_FUNCTIONS); + MLX5_COMMAND_STR_CASE(CREATE_UCTX); + MLX5_COMMAND_STR_CASE(DESTROY_UCTX); + MLX5_COMMAND_STR_CASE(CREATE_UMEM); + MLX5_COMMAND_STR_CASE(DESTROY_UMEM); default: return "unknown command opcode"; } } @@ -917,7 +925,6 @@ static void cmd_work_handler(struct work_struct *work) mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell); - mmiowb(); /* if not in polling don't use ent after this point */ if (cmd_mode == CMD_MODE_POLLING || poll_cmd) { poll_timeout(ent); @@ -1347,7 +1354,7 @@ static void set_wqname(struct mlx5_core_dev *dev) struct mlx5_cmd *cmd = &dev->cmd; snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s", - dev_name(&dev->pdev->dev)); + dev_name(dev->device)); } static void clean_debug_files(struct mlx5_core_dev *dev) @@ -1605,7 +1612,27 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev) static int status_to_err(u8 status) { - return status ? -1 : 0; /* TBD more meaningful codes */ + switch (status) { + case MLX5_CMD_DELIVERY_STAT_OK: + case MLX5_DRIVER_STATUS_ABORTED: + return 0; + case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: + case MLX5_CMD_DELIVERY_STAT_TOK_ERR: + return -EBADR; + case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: + case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: + case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: + return -EFAULT; /* Bad address */ + case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: + case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: + case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: + case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: + return -ENOMSG; + case MLX5_CMD_DELIVERY_STAT_FW_ERR: + return -EIO; + default: + return -EINVAL; + } } static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, @@ -1852,7 +1879,7 @@ static void create_msg_cache(struct mlx5_core_dev *dev) static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) { - struct device *ddev = &dev->pdev->dev; + struct device *ddev = dev->device; cmd->cmd_alloc_buf = dma_alloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, &cmd->alloc_dma, GFP_KERNEL); @@ -1883,7 +1910,7 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) { - struct device *ddev = &dev->pdev->dev; + struct device *ddev = dev->device; dma_free_coherent(ddev, cmd->alloc_size, cmd->cmd_alloc_buf, cmd->alloc_dma); @@ -1902,14 +1929,13 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) memset(cmd, 0, sizeof(*cmd)); cmd_if_rev = cmdif_rev(dev); if (cmd_if_rev != CMD_IF_REV) { - dev_err(&dev->pdev->dev, - "Driver cmdif rev(%d) differs from firmware's(%d)\n", - CMD_IF_REV, cmd_if_rev); + mlx5_core_err(dev, + "Driver cmdif rev(%d) differs from firmware's(%d)\n", + CMD_IF_REV, cmd_if_rev); return -EINVAL; } - cmd->pool = dma_pool_create("mlx5_cmd", &dev->pdev->dev, size, align, - 0); + cmd->pool = dma_pool_create("mlx5_cmd", dev->device, size, align, 0); if (!cmd->pool) return -ENOMEM; @@ -1921,14 +1947,14 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) cmd->log_sz = cmd_l >> 4 & 0xf; cmd->log_stride = cmd_l & 0xf; if (1 << cmd->log_sz > MLX5_MAX_COMMANDS) { - dev_err(&dev->pdev->dev, "firmware reports too many outstanding commands %d\n", - 1 << cmd->log_sz); + mlx5_core_err(dev, "firmware reports too many outstanding commands %d\n", + 1 << cmd->log_sz); err = -EINVAL; goto err_free_page; } if (cmd->log_sz + cmd->log_stride > MLX5_ADAPTER_PAGE_SHIFT) { - dev_err(&dev->pdev->dev, "command queue size overflow\n"); + mlx5_core_err(dev, "command queue size overflow\n"); err = -EINVAL; goto err_free_page; } @@ -1939,8 +1965,8 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; if (cmd->cmdif_rev > CMD_IF_REV) { - dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n", - CMD_IF_REV, cmd->cmdif_rev); + mlx5_core_err(dev, "driver does not support command interface version. driver %d, firmware %d\n", + CMD_IF_REV, cmd->cmdif_rev); err = -EOPNOTSUPP; goto err_free_page; } @@ -1956,7 +1982,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) cmd_h = (u32)((u64)(cmd->dma) >> 32); cmd_l = (u32)(cmd->dma); if (cmd_l & 0xfff) { - dev_err(&dev->pdev->dev, "invalid command queue address\n"); + mlx5_core_err(dev, "invalid command queue address\n"); err = -ENOMEM; goto err_free_page; } @@ -1976,7 +2002,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) set_wqname(dev); cmd->wq = create_singlethread_workqueue(cmd->wq_name); if (!cmd->wq) { - dev_err(&dev->pdev->dev, "failed to create command workqueue\n"); + mlx5_core_err(dev, "failed to create command workqueue\n"); err = -ENOMEM; goto err_cache; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 713a17ee3751..818edc63e428 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -58,7 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data) list_for_each_entry_safe(mcq, temp, &ctx->process_list, tasklet_ctx.list) { list_del_init(&mcq->tasklet_ctx.list); - mcq->tasklet_ctx.comp(mcq); + mcq->tasklet_ctx.comp(mcq, NULL); mlx5_cq_put(mcq); if (time_after(jiffies, end)) break; @@ -68,7 +68,8 @@ void mlx5_cq_tasklet_cb(unsigned long data) tasklet_schedule(&ctx->task); } -static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq) +static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, + struct mlx5_eqe *eqe) { unsigned long flags; struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; @@ -87,11 +88,10 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq) } int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - u32 *in, int inlen) + u32 *in, int inlen, u32 *out, int outlen) { int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn); u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)]; - u32 out[MLX5_ST_SZ_DW(create_cq_out)]; u32 din[MLX5_ST_SZ_DW(destroy_cq_in)]; struct mlx5_eq_comp *eq; int err; @@ -100,9 +100,9 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, if (IS_ERR(eq)) return PTR_ERR(eq); - memset(out, 0, sizeof(out)); + memset(out, 0, outlen); MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); if (err) return err; @@ -158,13 +158,8 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; int err; - err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); - if (err) - return err; - - err = mlx5_eq_del_cq(&cq->eq->core, cq); - if (err) - return err; + mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); + mlx5_eq_del_cq(&cq->eq->core, cq); MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index ebc046fa97d3..5bb6a26ea267 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -248,11 +248,32 @@ void mlx5_unregister_interface(struct mlx5_interface *intf) } EXPORT_SYMBOL(mlx5_unregister_interface); +/* Must be called with intf_mutex held */ +static bool mlx5_has_added_dev_by_protocol(struct mlx5_core_dev *mdev, int protocol) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_interface *intf; + bool found = false; + + list_for_each_entry(intf, &intf_list, list) { + if (intf->protocol == protocol) { + dev_ctx = mlx5_get_device(intf, &mdev->priv); + if (dev_ctx && test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + found = true; + break; + } + } + + return found; +} + void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol) { mutex_lock(&mlx5_intf_mutex); - mlx5_remove_dev_by_protocol(mdev, protocol); - mlx5_add_dev_by_protocol(mdev, protocol); + if (mlx5_has_added_dev_by_protocol(mdev, protocol)) { + mlx5_remove_dev_by_protocol(mdev, protocol); + mlx5_add_dev_by_protocol(mdev, protocol); + } mutex_unlock(&mlx5_intf_mutex); } @@ -290,13 +311,20 @@ static u32 mlx5_gen_pci_id(struct mlx5_core_dev *dev) /* Must be called with intf_mutex held */ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) { - u32 pci_id = mlx5_gen_pci_id(dev); struct mlx5_core_dev *res = NULL; struct mlx5_core_dev *tmp_dev; struct mlx5_priv *priv; + u32 pci_id; + if (!mlx5_core_is_pf(dev)) + return NULL; + + pci_id = mlx5_gen_pci_id(dev); list_for_each_entry(priv, &mlx5_dev_list, dev_list) { tmp_dev = container_of(priv, struct mlx5_core_dev, priv); + if (!mlx5_core_is_pf(tmp_dev)) + continue; + if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) { res = tmp_dev; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c new file mode 100644 index 000000000000..a400f4430c28 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <devlink.h> + +#include "mlx5_core.h" +#include "eswitch.h" + +static int mlx5_devlink_flash_update(struct devlink *devlink, + const char *file_name, + const char *component, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + const struct firmware *fw; + int err; + + if (component) + return -EOPNOTSUPP; + + err = request_firmware_direct(&fw, file_name, &dev->pdev->dev); + if (err) + return err; + + return mlx5_firmware_flash(dev, fw, extack); +} + +static u8 mlx5_fw_ver_major(u32 version) +{ + return (version >> 24) & 0xff; +} + +static u8 mlx5_fw_ver_minor(u32 version) +{ + return (version >> 16) & 0xff; +} + +static u16 mlx5_fw_ver_subminor(u32 version) +{ + return version & 0xffff; +} + +#define DEVLINK_FW_STRING_LEN 32 + +static int +mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + char version_str[DEVLINK_FW_STRING_LEN]; + u32 running_fw, stored_fw; + int err; + + err = devlink_info_driver_name_put(req, DRIVER_NAME); + if (err) + return err; + + err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id); + if (err) + return err; + + err = mlx5_fw_version_query(dev, &running_fw, &stored_fw); + if (err) + return err; + + snprintf(version_str, sizeof(version_str), "%d.%d.%04d", + mlx5_fw_ver_major(running_fw), mlx5_fw_ver_minor(running_fw), + mlx5_fw_ver_subminor(running_fw)); + err = devlink_info_version_running_put(req, "fw.version", version_str); + if (err) + return err; + + /* no pending version, return running (stored) version */ + if (stored_fw == 0) + stored_fw = running_fw; + + snprintf(version_str, sizeof(version_str), "%d.%d.%04d", + mlx5_fw_ver_major(stored_fw), mlx5_fw_ver_minor(stored_fw), + mlx5_fw_ver_subminor(stored_fw)); + err = devlink_info_version_stored_put(req, "fw.version", version_str); + if (err) + return err; + + return 0; +} + +static const struct devlink_ops mlx5_devlink_ops = { +#ifdef CONFIG_MLX5_ESWITCH + .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, + .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, + .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, + .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, + .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, + .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, +#endif + .flash_update = mlx5_devlink_flash_update, + .info_get = mlx5_devlink_info_get, +}; + +struct devlink *mlx5_devlink_alloc(void) +{ + return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev)); +} + +void mlx5_devlink_free(struct devlink *devlink) +{ + devlink_free(devlink); +} + +int mlx5_devlink_register(struct devlink *devlink, struct device *dev) +{ + return devlink_register(devlink, dev); +} + +void mlx5_devlink_unregister(struct devlink *devlink) +{ + devlink_unregister(devlink); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h new file mode 100644 index 000000000000..d0ba03774ddf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef __MLX5_DEVLINK_H__ +#define __MLX5_DEVLINK_H__ + +#include <net/devlink.h> + +struct devlink *mlx5_devlink_alloc(void); +void mlx5_devlink_free(struct devlink *devlink); +int mlx5_devlink_register(struct devlink *devlink, struct device *dev); +void mlx5_devlink_unregister(struct devlink *devlink); + +#endif /* __MLX5_DEVLINK_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile index d8e17110f25d..c78512eed8d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c new file mode 100644 index 000000000000..28d02749d3c4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include "lib/pci_vsc.h" +#include "lib/mlx5.h" + +#define BAD_ACCESS 0xBADACCE5 +#define MLX5_PROTECTED_CR_SCAN_CRSPACE 0x7 + +static bool mlx5_crdump_enabled(struct mlx5_core_dev *dev) +{ + return !!dev->priv.health.crdump_size; +} + +static int mlx5_crdump_fill(struct mlx5_core_dev *dev, u32 *cr_data) +{ + u32 crdump_size = dev->priv.health.crdump_size; + int i, ret; + + for (i = 0; i < (crdump_size / 4); i++) + cr_data[i] = BAD_ACCESS; + + ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump_size); + if (ret <= 0) { + if (ret == 0) + return -EIO; + return ret; + } + + if (crdump_size != ret) { + mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n", + ret, crdump_size); + return -EINVAL; + } + + return 0; +} + +int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data) +{ + int ret; + + if (!mlx5_crdump_enabled(dev)) + return -ENODEV; + + ret = mlx5_vsc_gw_lock(dev); + if (ret) { + mlx5_core_warn(dev, "crdump: failed to lock vsc gw err %d\n", + ret); + return ret; + } + /* Verify no other PF is running cr-dump or sw reset */ + ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, + MLX5_VSC_LOCK); + if (ret) { + mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); + goto unlock_gw; + } + + ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL); + if (ret) + goto unlock_sem; + + ret = mlx5_crdump_fill(dev, cr_data); + +unlock_sem: + mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, MLX5_VSC_UNLOCK); +unlock_gw: + mlx5_vsc_gw_unlock(dev); + return ret; +} + +int mlx5_crdump_enable(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + u32 space_size; + int ret; + + if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) || + mlx5_crdump_enabled(dev)) + return 0; + + ret = mlx5_vsc_gw_lock(dev); + if (ret) + return ret; + + /* Check if space is supported and get space size */ + ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, + &space_size); + if (ret) { + /* Unlock and mask error since space is not supported */ + mlx5_vsc_gw_unlock(dev); + return 0; + } + + if (!space_size) { + mlx5_core_warn(dev, "Invalid Crspace size, zero\n"); + mlx5_vsc_gw_unlock(dev); + return -EINVAL; + } + + ret = mlx5_vsc_gw_unlock(dev); + if (ret) + return ret; + + priv->health.crdump_size = space_size; + return 0; +} + +void mlx5_crdump_disable(struct mlx5_core_dev *dev) +{ + dev->priv.health.crdump_size = 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index a4cf123e3f17..ddf1b87f1bc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -187,6 +187,7 @@ TRACE_EVENT(mlx5_fs_set_fte, __field(u32, index) __field(u32, action) __field(u32, flow_tag) + __field(u32, flow_source) __field(u8, mask_enable) __field(int, new_fte) __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) @@ -204,7 +205,8 @@ TRACE_EVENT(mlx5_fs_set_fte, __entry->index = fte->index; __entry->action = fte->action.action; __entry->mask_enable = __entry->fg->mask.match_criteria_enable; - __entry->flow_tag = fte->action.flow_tag; + __entry->flow_tag = fte->flow_context.flow_tag; + __entry->flow_source = fte->flow_context.flow_source; memcpy(__entry->mask_outer, MLX5_ADDR_OF(fte_match_param, &__entry->fg->mask.match_criteria, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 6999f4486e9e..8a4930c8bf62 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -243,6 +243,19 @@ free_strings_db: return -ENOMEM; } +static void +mlx5_fw_tracer_init_saved_traces_array(struct mlx5_fw_tracer *tracer) +{ + tracer->st_arr.saved_traces_index = 0; + mutex_init(&tracer->st_arr.lock); +} + +static void +mlx5_fw_tracer_clean_saved_traces_array(struct mlx5_fw_tracer *tracer) +{ + mutex_destroy(&tracer->st_arr.lock); +} + static void mlx5_tracer_read_strings_db(struct work_struct *work) { struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer, @@ -522,6 +535,24 @@ static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer) list_del(&str_frmt->list); } +static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer, + u64 timestamp, bool lost, + u8 event_id, char *msg) +{ + struct mlx5_fw_trace_data *trace_data; + + mutex_lock(&tracer->st_arr.lock); + trace_data = &tracer->st_arr.straces[tracer->st_arr.saved_traces_index]; + trace_data->timestamp = timestamp; + trace_data->lost = lost; + trace_data->event_id = event_id; + strncpy(trace_data->msg, msg, TRACE_STR_MSG); + + tracer->st_arr.saved_traces_index = + (tracer->st_arr.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1); + mutex_unlock(&tracer->st_arr.lock); +} + static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, struct mlx5_core_dev *dev, u64 trace_timestamp) @@ -540,6 +571,9 @@ static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost, str_frmt->event_id, tmp); + mlx5_fw_tracer_save_trace(dev->tracer, trace_timestamp, + str_frmt->lost, str_frmt->event_id, tmp); + /* remove it from hash */ mlx5_tracer_clean_message(str_frmt); } @@ -786,6 +820,109 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work) mlx5_fw_tracer_start(tracer); } +static int mlx5_fw_tracer_set_core_dump_reg(struct mlx5_core_dev *dev, + u32 *in, int size_in) +{ + u32 out[MLX5_ST_SZ_DW(core_dump_reg)] = {}; + + if (!MLX5_CAP_DEBUG(dev, core_dump_general) && + !MLX5_CAP_DEBUG(dev, core_dump_qp)) + return -EOPNOTSUPP; + + return mlx5_core_access_reg(dev, in, size_in, out, sizeof(out), + MLX5_REG_CORE_DUMP, 0, 1); +} + +int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_tracer *tracer = dev->tracer; + u32 in[MLX5_ST_SZ_DW(core_dump_reg)] = {}; + int err; + + if (!MLX5_CAP_DEBUG(dev, core_dump_general) || !tracer) + return -EOPNOTSUPP; + if (!tracer->owner) + return -EPERM; + + MLX5_SET(core_dump_reg, in, core_dump_type, 0x0); + + err = mlx5_fw_tracer_set_core_dump_reg(dev, in, sizeof(in)); + if (err) + return err; + queue_work(tracer->work_queue, &tracer->handle_traces_work); + flush_workqueue(tracer->work_queue); + return 0; +} + +static int +mlx5_devlink_fmsg_fill_trace(struct devlink_fmsg *fmsg, + struct mlx5_fw_trace_data *trace_data) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "timestamp", trace_data->timestamp); + if (err) + return err; + + err = devlink_fmsg_bool_pair_put(fmsg, "lost", trace_data->lost); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "event_id", trace_data->event_id); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "msg", trace_data->msg); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + return 0; +} + +int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer, + struct devlink_fmsg *fmsg) +{ + struct mlx5_fw_trace_data *straces = tracer->st_arr.straces; + u32 index, start_index, end_index; + u32 saved_traces_index; + int err; + + if (!straces[0].timestamp) + return -ENOMSG; + + mutex_lock(&tracer->st_arr.lock); + saved_traces_index = tracer->st_arr.saved_traces_index; + if (straces[saved_traces_index].timestamp) + start_index = saved_traces_index; + else + start_index = 0; + end_index = (saved_traces_index - 1) & (SAVED_TRACES_NUM - 1); + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "dump fw traces"); + if (err) + goto unlock; + index = start_index; + while (index != end_index) { + err = mlx5_devlink_fmsg_fill_trace(fmsg, &straces[index]); + if (err) + goto unlock; + + index = (index + 1) & (SAVED_TRACES_NUM - 1); + } + + err = devlink_fmsg_arr_pair_nest_end(fmsg); +unlock: + mutex_unlock(&tracer->st_arr.lock); + return err; +} + /* Create software resources (Buffers, etc ..) */ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev) { @@ -833,6 +970,7 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev) goto free_log_buf; } + mlx5_fw_tracer_init_saved_traces_array(tracer); mlx5_core_dbg(dev, "FWTracer: Tracer created\n"); return tracer; @@ -917,6 +1055,7 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer) cancel_work_sync(&tracer->read_fw_strings_work); mlx5_fw_tracer_clean_ready_list(tracer); mlx5_fw_tracer_clean_print_hash(tracer); + mlx5_fw_tracer_clean_saved_traces_array(tracer); mlx5_fw_tracer_free_strings_db(tracer); mlx5_fw_tracer_destroy_log_buf(tracer); flush_workqueue(tracer->work_queue); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h index a8b8747f2b61..40601fba80ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -46,6 +46,9 @@ #define TRACER_BLOCK_SIZE_BYTE 256 #define TRACES_PER_BLOCK 32 +#define TRACE_STR_MSG 256 +#define SAVED_TRACES_NUM 8192 + #define TRACER_MAX_PARAMS 7 #define MESSAGE_HASH_BITS 6 #define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS) @@ -53,6 +56,13 @@ #define MASK_52_7 (0x1FFFFFFFFFFF80) #define MASK_6_0 (0x7F) +struct mlx5_fw_trace_data { + u64 timestamp; + bool lost; + u8 event_id; + char msg[TRACE_STR_MSG]; +}; + struct mlx5_fw_tracer { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -83,6 +93,13 @@ struct mlx5_fw_tracer { u32 consumer_index; } buff; + /* Saved Traces Array */ + struct { + struct mlx5_fw_trace_data straces[SAVED_TRACES_NUM]; + u32 saved_traces_index; + struct mutex lock; /* Protect st_arr access */ + } st_arr; + u64 last_timestamp; struct work_struct handle_traces_work; struct hlist_head hash[MESSAGE_HASH_SIZE]; @@ -171,5 +188,8 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev); int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer); void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer); void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer); +int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev); +int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer, + struct devlink_fmsg *fmsg); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h index 83f90e9aff45..3038be575923 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h @@ -47,7 +47,7 @@ TRACE_EVENT(mlx5_fw, TP_ARGS(tracer, trace_timestamp, lost, event_id, msg), TP_STRUCT__entry( - __string(dev_name, dev_name(&tracer->dev->pdev->dev)) + __string(dev_name, dev_name(tracer->dev->device)) __field(u64, trace_timestamp) __field(bool, lost) __field(u8, event_id) @@ -55,7 +55,8 @@ TRACE_EVENT(mlx5_fw, ), TP_fast_assign( - __assign_str(dev_name, dev_name(&tracer->dev->pdev->dev)); + __assign_str(dev_name, + dev_name(tracer->dev->device)); __entry->trace_timestamp = trace_timestamp; __entry->lost = lost; __entry->event_id = event_id; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 4746f2d28fb6..d2228e37450f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -26,7 +26,7 @@ static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev) MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); MLX5_SET(disable_hca_in, in, function_id, 0); - MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + MLX5_SET(disable_hca_in, in, embedded_cpu_function, 0); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -83,30 +83,3 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev) mlx5_peer_pf_cleanup(dev); } - -static int mlx5_query_host_params_context(struct mlx5_core_dev *dev, - u32 *out, int outlen) -{ - u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {}; - - MLX5_SET(query_host_params_in, in, opcode, - MLX5_CMD_OP_QUERY_HOST_PARAMS); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); -} - -int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) -{ - u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {}; - int err; - - err = mlx5_query_host_params_context(dev, out, sizeof(out)); - if (err) - return err; - - *num_vf = MLX5_GET(query_host_params_out, out, - host_params_context.host_num_of_vfs); - mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf); - - return 0; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h index 346372df218f..d3d7a00a02ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -16,7 +16,6 @@ enum { bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); int mlx5_ec_init(struct mlx5_core_dev *dev); void mlx5_ec_cleanup(struct mlx5_core_dev *dev); -int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf); #else /* CONFIG_MLX5_ESWITCH */ @@ -24,9 +23,6 @@ static inline bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} -static inline int -mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) -{ return -EOPNOTSUPP; } #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index d3eaf2ceaa39..263558875f20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -48,7 +48,7 @@ #include <linux/rhashtable.h> #include <net/switchdev.h> #include <net/xdp.h> -#include <linux/net_dim.h> +#include <linux/dim.h> #include <linux/bits.h> #include "wq.h" #include "mlx5_core.h" @@ -137,6 +137,7 @@ struct page_pool; #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1) #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 +#define MLX5E_TX_XSK_POLL_BUDGET 64 #define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ #define MLX5E_UMR_WQE_INLINE_SZ \ @@ -155,6 +156,11 @@ do { \ ##__VA_ARGS__); \ } while (0) +enum mlx5e_rq_group { + MLX5E_RQ_GROUP_REGULAR, + MLX5E_RQ_GROUP_XSK, + MLX5E_NUM_RQ_GROUPS /* Keep last. */ +}; static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) { @@ -179,7 +185,8 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) /* Use this function to get max num channels after netdev was created */ static inline int mlx5e_get_netdev_max_channels(struct net_device *netdev) { - return min_t(unsigned int, netdev->num_rx_queues, + return min_t(unsigned int, + netdev->num_rx_queues / MLX5E_NUM_RQ_GROUPS, netdev->num_tx_queues); } @@ -202,7 +209,10 @@ struct mlx5e_umr_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_umr_ctrl_seg uctrl; struct mlx5_mkey_seg mkc; - struct mlx5_mtt inline_mtts[0]; + union { + struct mlx5_mtt inline_mtts[0]; + u8 tls_static_params_ctx[0]; + }; }; extern const char mlx5e_self_tests[][ETH_GSTRING_LEN]; @@ -238,10 +248,10 @@ struct mlx5e_params { u16 num_channels; u8 num_tc; bool rx_cqe_compress_def; - struct net_dim_cq_moder rx_cq_moderation; - struct net_dim_cq_moder tx_cq_moderation; + bool tunneled_offload_en; + struct dim_cq_moder rx_cq_moderation; + struct dim_cq_moder tx_cq_moderation; bool lro_en; - u32 lro_wqe_sz; u8 tx_min_inline_mode; bool vlan_strip_disable; bool scatter_fcs_en; @@ -250,6 +260,7 @@ struct mlx5e_params { u32 lro_timeout; u32 pflags; struct bpf_prog *xdp_prog; + struct mlx5e_xsk *xsk; unsigned int sw_mtu; int hard_mtu; }; @@ -325,6 +336,9 @@ struct mlx5e_tx_wqe_info { u32 num_bytes; u8 num_wqebbs; u8 num_dma; +#ifdef CONFIG_MLX5_EN_TLS + skb_frag_t *resync_dump_frag; +#endif }; enum mlx5e_dma_map_type { @@ -348,6 +362,13 @@ enum { struct mlx5e_sq_wqe_info { u8 opcode; + + /* Auxiliary data for different opcodes. */ + union { + struct { + struct mlx5e_rq *rq; + } umr; + }; }; struct mlx5e_txqsq { @@ -356,7 +377,7 @@ struct mlx5e_txqsq { /* dirtied @completion */ u16 cc; u32 dma_fifo_cc; - struct net_dim dim; /* Adaptive Moderation */ + struct dim dim; /* Adaptive Moderation */ /* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; @@ -375,6 +396,7 @@ struct mlx5e_txqsq { void __iomem *uar_map; struct netdev_queue *txq; u32 sqn; + u16 stop_room; u8 min_inline_mode; struct device *pdev; __be32 mkey_be; @@ -385,20 +407,62 @@ struct mlx5e_txqsq { /* control path */ struct mlx5_wq_ctrl wq_ctrl; struct mlx5e_channel *channel; + int ch_ix; int txq_ix; u32 rate_limit; struct work_struct recover_work; } ____cacheline_aligned_in_smp; struct mlx5e_dma_info { - struct page *page; - dma_addr_t addr; + dma_addr_t addr; + union { + struct page *page; + struct { + u64 handle; + void *data; + } xsk; + }; +}; + +/* XDP packets can be transmitted in different ways. On completion, we need to + * distinguish between them to clean up things in a proper way. + */ +enum mlx5e_xdp_xmit_mode { + /* An xdp_frame was transmitted due to either XDP_REDIRECT from another + * device or XDP_TX from an XSK RQ. The frame has to be unmapped and + * returned. + */ + MLX5E_XDP_XMIT_MODE_FRAME, + + /* The xdp_frame was created in place as a result of XDP_TX from a + * regular RQ. No DMA remapping happened, and the page belongs to us. + */ + MLX5E_XDP_XMIT_MODE_PAGE, + + /* No xdp_frame was created at all, the transmit happened from a UMEM + * page. The UMEM Completion Ring producer pointer has to be increased. + */ + MLX5E_XDP_XMIT_MODE_XSK, }; struct mlx5e_xdp_info { - struct xdp_frame *xdpf; - dma_addr_t dma_addr; - struct mlx5e_dma_info di; + enum mlx5e_xdp_xmit_mode mode; + union { + struct { + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + } frame; + struct { + struct mlx5e_rq *rq; + struct mlx5e_dma_info di; + } page; + }; +}; + +struct mlx5e_xdp_xmit_data { + dma_addr_t dma_addr; + void *data; + u32 len; }; struct mlx5e_xdp_info_fifo { @@ -410,26 +474,32 @@ struct mlx5e_xdp_info_fifo { struct mlx5e_xdp_wqe_info { u8 num_wqebbs; - u8 num_ds; + u8 num_pkts; }; struct mlx5e_xdp_mpwqe { /* Current MPWQE session */ struct mlx5e_tx_wqe *wqe; u8 ds_count; + u8 pkt_count; u8 max_ds_count; + u8 complete; + u8 inline_on; }; struct mlx5e_xdpsq; -typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*, - struct mlx5e_xdp_info*); +typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *); +typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *, + struct mlx5e_xdp_xmit_data *, + struct mlx5e_xdp_info *, + int); + struct mlx5e_xdpsq { /* data path */ /* dirtied @completion */ u32 xdpi_fifo_cc; u16 cc; - bool redirect_flush; /* dirtied @xmit */ u32 xdpi_fifo_pc ____cacheline_aligned_in_smp; @@ -440,8 +510,10 @@ struct mlx5e_xdpsq { struct mlx5e_cq cq; /* read only */ + struct xdp_umem *umem; struct mlx5_wq_cyc wq; struct mlx5e_xdpsq_stats *stats; + mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check; mlx5e_fp_xmit_xdp_frame xmit_xdp_frame; struct { struct mlx5e_xdp_wqe_info *wqe_info; @@ -462,10 +534,10 @@ struct mlx5e_xdpsq { struct mlx5e_icosq { /* data path */ + u16 cc; + u16 pc; - /* dirtied @xmit */ - u16 pc ____cacheline_aligned_in_smp; - + struct mlx5_wqe_ctrl_seg *doorbell_cseg; struct mlx5e_cq cq; /* write@xmit, read@completion */ @@ -484,12 +556,6 @@ struct mlx5e_icosq { struct mlx5e_channel *channel; } ____cacheline_aligned_in_smp; -static inline bool -mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n) -{ - return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc); -} - struct mlx5e_wqe_frag_info { struct mlx5e_dma_info *di; u32 offset; @@ -532,7 +598,8 @@ typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq); typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); enum mlx5e_rq_flag { - MLX5E_RQ_FLAG_XDP_XMIT = BIT(0), + MLX5E_RQ_FLAG_XDP_XMIT, + MLX5E_RQ_FLAG_XDP_REDIRECT, }; struct mlx5e_rq_frag_info { @@ -563,11 +630,15 @@ struct mlx5e_rq { struct mlx5e_mpw_info *info; mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq; u16 num_strides; + u16 actual_wq_head; u8 log_stride_sz; - bool umr_in_progress; + u8 umr_in_progress; + u8 umr_last_bulk; + u8 umr_completed; } mpwqe; }; struct { + u16 umem_headroom; u16 headroom; u8 map_dir; /* dma map direction */ } buff; @@ -590,14 +661,18 @@ struct mlx5e_rq { int ix; unsigned int hw_mtu; - struct net_dim dim; /* Dynamic Interrupt Moderation */ + struct dim dim; /* Dynamic Interrupt Moderation */ /* XDP */ struct bpf_prog *xdp_prog; - struct mlx5e_xdpsq xdpsq; + struct mlx5e_xdpsq *xdpsq; DECLARE_BITMAP(flags, 8); struct page_pool *page_pool; + /* AF_XDP zero-copy */ + struct zero_copy_allocator zca; + struct xdp_umem *umem; + /* control */ struct mlx5_wq_ctrl wq_ctrl; __be32 mkey_be; @@ -610,9 +685,15 @@ struct mlx5e_rq { struct xdp_rxq_info xdp_rxq; } ____cacheline_aligned_in_smp; +enum mlx5e_channel_state { + MLX5E_CHANNEL_STATE_XSK, + MLX5E_CHANNEL_NUM_STATES +}; + struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; + struct mlx5e_xdpsq rq_xdpsq; struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC]; struct mlx5e_icosq icosq; /* internal control operations */ bool xdp; @@ -625,6 +706,13 @@ struct mlx5e_channel { /* XDP_REDIRECT */ struct mlx5e_xdpsq xdpsq; + /* AF_XDP zero-copy */ + struct mlx5e_rq xskrq; + struct mlx5e_xdpsq xsksq; + struct mlx5e_icosq xskicosq; + /* xskicosq can be accessed from any CPU - the spinlock protects it. */ + spinlock_t xskicosq_lock; + /* data path - accessed per napi poll */ struct irq_desc *irq_desc; struct mlx5e_ch_stats *stats; @@ -633,6 +721,7 @@ struct mlx5e_channel { struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; struct hwtstamp_config *tstamp; + DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; int cpu; cpumask_var_t xps_cpumask; @@ -648,14 +737,17 @@ struct mlx5e_channel_stats { struct mlx5e_ch_stats ch; struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC]; struct mlx5e_rq_stats rq; + struct mlx5e_rq_stats xskrq; struct mlx5e_xdpsq_stats rq_xdpsq; struct mlx5e_xdpsq_stats xdpsq; + struct mlx5e_xdpsq_stats xsksq; } ____cacheline_aligned_in_smp; enum { MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, MLX5E_STATE_XDP_TX_ENABLED, + MLX5E_STATE_XDP_OPEN, }; struct mlx5e_rqt { @@ -688,6 +780,17 @@ struct mlx5e_modify_sq_param { int rl_index; }; +struct mlx5e_xsk { + /* UMEMs are stored separately from channels, because we don't want to + * lose them when channels are recreated. The kernel also stores UMEMs, + * but it doesn't distinguish between zero-copy and non-zero-copy UMEMs, + * so rely on our mechanism. + */ + struct xdp_umem **umems; + u16 refcnt; + bool ever_used; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; @@ -708,6 +811,7 @@ struct mlx5e_priv { struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS]; struct mlx5e_rss_params rss_params; u32 tx_rates[MLX5E_MAX_NUM_SQS]; @@ -744,6 +848,7 @@ struct mlx5e_priv { struct mlx5e_tls *tls; #endif struct devlink_health_reporter *tx_reporter; + struct mlx5e_xsk xsk; }; struct mlx5e_profile { @@ -757,6 +862,7 @@ struct mlx5e_profile { void (*cleanup_tx)(struct mlx5e_priv *priv); void (*enable)(struct mlx5e_priv *priv); void (*disable)(struct mlx5e_priv *priv); + int (*update_rx)(struct mlx5e_priv *priv); void (*update_stats)(struct mlx5e_priv *priv); void (*update_carrier)(struct mlx5e_priv *priv); struct { @@ -769,13 +875,13 @@ struct mlx5e_profile { void mlx5e_build_ptys2ethtool_map(void); u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback); + struct net_device *sb_dev); netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5e_tx_wqe *wqe, u16 pi); + struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more); -void mlx5e_completion_event(struct mlx5_core_cq *mcq); +void mlx5e_trigger_irq(struct mlx5e_icosq *sq); +void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); @@ -787,11 +893,13 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info); -void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, - bool recycle); +void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info, + bool recycle); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); +void mlx5e_poll_ico_cq(struct mlx5e_cq *cq); bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); @@ -847,6 +955,30 @@ void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params, void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen); struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt); +struct mlx5e_xsk_param; + +struct mlx5e_rq_param; +int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, + struct xdp_umem *umem, struct mlx5e_rq *rq); +int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); +void mlx5e_deactivate_rq(struct mlx5e_rq *rq); +void mlx5e_close_rq(struct mlx5e_rq *rq); + +struct mlx5e_sq_param; +int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq); +void mlx5e_close_icosq(struct mlx5e_icosq *sq); +int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct xdp_umem *umem, + struct mlx5e_xdpsq *sq, bool is_redirect); +void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq); + +struct mlx5e_cq_param; +int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder, + struct mlx5e_cq_param *param, struct mlx5e_cq *cq); +void mlx5e_close_cq(struct mlx5e_cq *cq); + int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); @@ -886,59 +1018,10 @@ static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); } -static inline void mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, - struct mlx5e_tx_wqe **wqe, - u16 *pi) -{ - struct mlx5_wq_cyc *wq = &sq->wq; - - *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - *wqe = mlx5_wq_cyc_get_wqe(wq, *pi); - memset(*wqe, 0, sizeof(**wqe)); -} - -static inline -struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) +static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) { - u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc); - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - - memset(cseg, 0, sizeof(*cseg)); - - cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); - cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); - - (*pc)++; - - return wqe; -} - -static inline -void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, - void __iomem *uar_map, - struct mlx5_wqe_ctrl_seg *ctrl) -{ - ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - /* ensure wqe is visible to device before updating doorbell record */ - dma_wmb(); - - *wq->db = cpu_to_be32(pc); - - /* ensure doorbell record is visible to device before ringing the - * doorbell - */ - wmb(); - - mlx5_write64((__be32 *)ctrl, uar_map, NULL); -} - -static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) -{ - struct mlx5_core_cq *mcq; - - mcq = &cq->mcq; - mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); + return MLX5_CAP_ETH(mdev, swp) && + MLX5_CAP_ETH(mdev, swp_csum) && MLX5_CAP_ETH(mdev, swp_lso); } extern const struct ethtool_ops mlx5e_ethtool_ops; @@ -970,17 +1053,17 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv); -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); -int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, - u32 underlay_qpn, u32 *tisn); +int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); int mlx5e_create_tises(struct mlx5e_priv *priv); +int mlx5e_update_nic_rx(struct mlx5e_priv *priv); void mlx5e_update_carrier(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); @@ -1022,8 +1105,6 @@ u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info); -int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, - struct ethtool_flash *flash); void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, struct ethtool_pauseparam *pauseparam); int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, @@ -1042,7 +1123,9 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *prof int mlx5e_attach_netdev(struct mlx5e_priv *priv); void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); +void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_xsk *xsk, struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, u16 max_channels, u16 mtu); @@ -1059,6 +1142,7 @@ void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti) netdev_features_t mlx5e_features_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features); +int mlx5e_set_features(struct net_device *netdev, netdev_features_t features); #ifdef CONFIG_MLX5_ESWITCH int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac); int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile index d8e17110f25d..c78512eed8d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c new file mode 100644 index 000000000000..79301d116667 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "en/params.h" + +static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + return params->xdp_prog || xsk; +} + +u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u16 headroom = NET_IP_ALIGN; + + if (mlx5e_rx_is_xdp(params, xsk)) { + headroom += XDP_PACKET_HEADROOM; + if (xsk) + headroom += xsk->headroom; + } else { + headroom += MLX5_RX_HEADROOM; + } + + return headroom; +} + +u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk); + u32 frag_sz = linear_rq_headroom + hw_mtu; + + /* AF_XDP doesn't build SKBs in place. */ + if (!xsk) + frag_sz = MLX5_SKB_FRAG_SZ(frag_sz); + + /* XDP in mlx5e doesn't support multiple packets per page. */ + if (mlx5e_rx_is_xdp(params, xsk)) + frag_sz = max_t(u32, frag_sz, PAGE_SIZE); + + /* Even if we can go with a smaller fragment size, we must not put + * multiple packets into a single frame. + */ + if (xsk) + frag_sz = max_t(u32, frag_sz, xsk->chunk_size); + + return frag_sz; +} + +u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk); + + return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); +} + +bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + /* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more + * than one page. For this, check both with and without xsk. + */ + u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk), + mlx5e_rx_get_linear_frag_sz(params, NULL)); + + return !params->lro_en && linear_frag_sz <= PAGE_SIZE; +} + +#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \ + MLX5_MPWQE_LOG_STRIDE_SZ_BASE) +bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk); + s8 signed_log_num_strides_param; + u8 log_num_strides; + + if (!mlx5e_rx_is_linear_skb(params, xsk)) + return false; + + if (order_base_2(linear_frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ) + return false; + + if (MLX5_CAP_GEN(mdev, ext_stride_num_range)) + return true; + + log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); + signed_log_num_strides_param = + (s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE; + + return signed_log_num_strides_param >= 0; +} + +u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk); + + /* Numbers are unsigned, don't subtract to avoid underflow. */ + if (params->log_rq_mtu_frames < + log_pkts_per_wqe + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW) + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW; + + return params->log_rq_mtu_frames - log_pkts_per_wqe; +} + +u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk)); + + return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); +} + +u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + return MLX5_MPWRQ_LOG_WQE_SZ - + mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); +} + +u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ? + mlx5e_rx_is_linear_skb(params, xsk) : + mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); + + return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h new file mode 100644 index 000000000000..bd882b5ee9a7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_PARAMS_H__ +#define __MLX5_EN_PARAMS_H__ + +#include "en.h" + +struct mlx5e_xsk_param { + u16 headroom; + u16 chunk_size; +}; + +struct mlx5e_rq_param { + u32 rqc[MLX5_ST_SZ_DW(rqc)]; + struct mlx5_wq_param wq; + struct mlx5e_rq_frags_info frags_info; +}; + +struct mlx5e_sq_param { + u32 sqc[MLX5_ST_SZ_DW(sqc)]; + struct mlx5_wq_param wq; + bool is_mpw; +}; + +struct mlx5e_cq_param { + u32 cqc[MLX5_ST_SZ_DW(cqc)]; + struct mlx5_wq_param wq; + u16 eq_ix; + u8 cq_period_mode; +}; + +struct mlx5e_channel_param { + struct mlx5e_rq_param rq; + struct mlx5e_sq_param sq; + struct mlx5e_sq_param xdp_sq; + struct mlx5e_sq_param icosq; + struct mlx5e_cq_param rx_cq; + struct mlx5e_cq_param tx_cq; + struct mlx5e_cq_param icosq_cq; +}; + +static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params, + u16 qid, + enum mlx5e_rq_group group, + u16 *ix) +{ + int nch = params->num_channels; + int ch = qid - nch * group; + + if (ch < 0 || ch >= nch) + return false; + + *ix = ch; + return true; +} + +static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params, + u16 qid, + u16 *ix, + enum mlx5e_rq_group *group) +{ + u16 nch = params->num_channels; + + *ix = qid % nch; + *group = qid / nch; +} + +static inline bool mlx5e_qid_validate(struct mlx5e_params *params, u64 qid) +{ + return qid < params->num_channels * MLX5E_NUM_RQ_GROUPS; +} + +/* Parameter calculations */ + +u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); + +/* Build queue parameters */ + +void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_param *param); +void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param); +void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_cq_param *param); +void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_cq_param *param); +void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_cq_param *param); +void mlx5e_build_icosq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_sq_param *param); +void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_sq_param *param); + +#endif /* __MLX5_EN_PARAMS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 4ab0d030b544..633b117eb13e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -167,23 +167,23 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, } /** - * update_buffer_lossy() - * max_mtu: netdev's max_mtu - * pfc_en: <input> current pfc configuration - * buffer: <input> current prio to buffer mapping - * xoff: <input> xoff value - * port_buffer: <output> port receive buffer configuration - * change: <output> + * update_buffer_lossy - Update buffer configuration based on pfc + * @max_mtu: netdev's max_mtu + * @pfc_en: <input> current pfc configuration + * @buffer: <input> current prio to buffer mapping + * @xoff: <input> xoff value + * @port_buffer: <output> port receive buffer configuration + * @change: <output> * - * Update buffer configuration based on pfc configuraiton and priority - * to buffer mapping. - * Buffer's lossy bit is changed to: - * lossless if there is at least one PFC enabled priority mapped to this buffer - * lossy if all priorities mapped to this buffer are PFC disabled + * Update buffer configuration based on pfc configuraiton and + * priority to buffer mapping. + * Buffer's lossy bit is changed to: + * lossless if there is at least one PFC enabled priority + * mapped to this buffer lossy if all priorities mapped to + * this buffer are PFC disabled * - * Return: - * Return 0 if no error. - * Set change to true if buffer configuration is modified. + * @return: 0 if no error, + * sets change to true if buffer configuration was modified. */ static int update_buffer_lossy(unsigned int max_mtu, u8 pfc_en, u8 *buffer, u32 xoff, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index eec07b34b4ad..a6a52806be45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -3,37 +3,53 @@ #include <net/vxlan.h> #include <net/gre.h> -#include "lib/vxlan.h" +#include <net/geneve.h> #include "en/tc_tun.h" +#include "en_tc.h" + +struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) +{ + if (netif_is_vxlan(tunnel_dev)) + return &vxlan_tunnel; + else if (netif_is_geneve(tunnel_dev)) + return &geneve_tunnel; + else if (netif_is_gretap(tunnel_dev) || + netif_is_ip6gretap(tunnel_dev)) + return &gre_tunnel; + else + return NULL; +} static int get_route_and_out_devs(struct mlx5e_priv *priv, struct net_device *dev, struct net_device **route_dev, struct net_device **out_dev) { + struct net_device *uplink_dev, *uplink_upper, *real_dev; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *uplink_dev, *uplink_upper; bool dst_is_lag_dev; + real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev; uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); uplink_upper = netdev_master_upper_dev_get(uplink_dev); dst_is_lag_dev = (uplink_upper && netif_is_lag_master(uplink_upper) && - dev == uplink_upper && + real_dev == uplink_upper && mlx5_lag_is_sriov(priv->mdev)); /* if the egress device isn't on the same HW e-switch or * it's a LAG device, use the uplink */ - if (!netdev_port_same_parent_id(priv->netdev, dev) || + if (!netdev_port_same_parent_id(priv->netdev, real_dev) || dst_is_lag_dev) { - *route_dev = uplink_dev; - *out_dev = *route_dev; + *route_dev = dev; + *out_dev = uplink_dev; } else { *route_dev = dev; if (is_vlan_dev(*route_dev)) *out_dev = uplink_dev; - else if (mlx5e_eswitch_rep(dev)) + else if (mlx5e_eswitch_rep(dev) && + mlx5e_is_valid_eswitch_fwd_dev(priv, dev)) *out_dev = *route_dev; else return -EOPNOTSUPP; @@ -74,7 +90,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, if (ret) return ret; - if (mlx5_lag_is_multipath(mdev) && !rt->rt_gateway) + if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) return -ENETUNREACH; #else return -EOPNOTSUPP; @@ -100,7 +116,7 @@ static const char *mlx5e_netdev_kind(struct net_device *dev) if (dev->rtnl_link_ops) return dev->rtnl_link_ops->kind; else - return ""; + return "unknown"; } static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, @@ -141,63 +157,15 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, return 0; } -static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key) -{ - __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); - struct udphdr *udp = (struct udphdr *)(buf); - struct vxlanhdr *vxh = (struct vxlanhdr *) - ((char *)udp + sizeof(struct udphdr)); - - udp->dest = tun_key->tp_dst; - vxh->vx_flags = VXLAN_HF_VNI; - vxh->vx_vni = vxlan_vni_field(tun_id); - - return 0; -} - -static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key) -{ - __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); - int hdr_len; - struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); - - /* the HW does not calculate GRE csum or sequences */ - if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) - return -EOPNOTSUPP; - - greh->protocol = htons(ETH_P_TEB); - - /* GRE key */ - hdr_len = gre_calc_hlen(tun_key->tun_flags); - greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); - if (tun_key->tun_flags & TUNNEL_KEY) { - __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - - *ptr = tun_id; - } - - return 0; -} - static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, struct mlx5e_encap_entry *e) { - int err = 0; - struct ip_tunnel_key *key = &e->tun_info.key; - - if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - *ip_proto = IPPROTO_UDP; - err = mlx5e_gen_vxlan_header(buf, key); - } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { - *ip_proto = IPPROTO_GRE; - err = mlx5e_gen_gre_header(buf, key); - } else { - pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n" - , e->tunnel_type); - err = -EOPNOTSUPP; + if (!e->tunnel) { + pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n"); + return -EOPNOTSUPP; } - return err; + return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e); } static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev, @@ -229,7 +197,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - struct ip_tunnel_key *tun_key = &e->tun_info.key; + const struct ip_tunnel_key *tun_key = &e->tun_info->key; struct net_device *out_dev, *route_dev; struct neighbour *n = NULL; struct flowi4 fl4 = {}; @@ -253,7 +221,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, ipv4_encap_size = (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + sizeof(struct iphdr) + - e->tunnel_hlen; + e->tunnel->calc_hlen(e); if (max_encap_size < ipv4_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -345,7 +313,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - struct ip_tunnel_key *tun_key = &e->tun_info.key; + const struct ip_tunnel_key *tun_key = &e->tun_info->key; struct net_device *out_dev, *route_dev; struct neighbour *n = NULL; struct flowi6 fl6 = {}; @@ -369,7 +337,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, ipv6_encap_size = (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + sizeof(struct ipv6hdr) + - e->tunnel_hlen; + e->tunnel->calc_hlen(e); if (max_encap_size < ipv6_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -455,27 +423,12 @@ out: return err; } -int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev) -{ - if (netif_is_vxlan(tunnel_dev)) - return MLX5E_TC_TUNNEL_TYPE_VXLAN; - else if (netif_is_gretap(tunnel_dev) || - netif_is_ip6gretap(tunnel_dev)) - return MLX5E_TC_TUNNEL_TYPE_GRETAP; - else - return MLX5E_TC_TUNNEL_TYPE_UNKNOWN; -} - bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev) { - int tunnel_type = mlx5e_tc_tun_get_type(netdev); + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev); - if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN && - MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) - return true; - else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP && - MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) + if (tunnel && tunnel->can_offload(priv)) return true; else return false; @@ -486,71 +439,87 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, struct mlx5e_encap_entry *e, struct netlink_ext_ack *extack) { - e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev); - - if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - int dst_port = be16_to_cpu(e->tun_info.key.tp_dst); + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev); - if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { - NL_SET_ERR_MSG_MOD(extack, - "vxlan udp dport was not registered with the HW"); - netdev_warn(priv->netdev, - "%d isn't an offloaded vxlan udp dport\n", - dst_port); - return -EOPNOTSUPP; - } - e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; - e->tunnel_hlen = VXLAN_HLEN; - } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { - e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; - e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags); - } else { + if (!tunnel) { e->reformat_type = -1; - e->tunnel_hlen = -1; return -EOPNOTSUPP; } - return 0; + + return tunnel->init_encap_attr(tunnel_dev, priv, e, extack); } -static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, - void *headers_c, - void *headers_v) +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v, u8 *match_level) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); + int err = 0; + + if (!tunnel) { + netdev_warn(priv->netdev, + "decapsulation offload is not supported for %s net device\n", + mlx5e_netdev_kind(filter_dev)); + err = -EOPNOTSUPP; + goto out; + } + + *match_level = tunnel->match_level; + + if (tunnel->parse_udp_ports) { + err = tunnel->parse_udp_ports(priv, spec, f, + headers_c, headers_v); + if (err) + goto out; + } + + if (tunnel->parse_tunnel) { + err = tunnel->parse_tunnel(priv, spec, f, + headers_c, headers_v); + if (err) + goto out; + } + +out: + return err; +} + +int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; - void *misc_c = MLX5_ADDR_OF(fte_match_param, - spec->match_criteria, - misc_parameters); - void *misc_v = MLX5_ADDR_OF(fte_match_param, - spec->match_value, - misc_parameters); struct flow_match_ports enc_ports; - flow_rule_match_enc_ports(rule, &enc_ports); - /* Full udp dst port must be given */ - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) || - memchr_inv(&enc_ports.mask->dst, 0xff, sizeof(enc_ports.mask->dst))) { + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { NL_SET_ERR_MSG_MOD(extack, - "VXLAN decap filter must include enc_dst_port condition"); + "UDP tunnel decap filter must include enc_dst_port condition"); netdev_warn(priv->netdev, - "VXLAN decap filter must include enc_dst_port condition\n"); + "UDP tunnel decap filter must include enc_dst_port condition\n"); return -EOPNOTSUPP; } - /* udp dst port must be knonwn as a VXLAN port */ - if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(enc_ports.key->dst))) { + flow_rule_match_enc_ports(rule, &enc_ports); + + if (memchr_inv(&enc_ports.mask->dst, 0xff, + sizeof(enc_ports.mask->dst))) { NL_SET_ERR_MSG_MOD(extack, - "Matched UDP port is not registered as a VXLAN port"); + "UDP tunnel decap filter must match enc_dst_port fully"); netdev_warn(priv->netdev, - "UDP port %d is not registered as a VXLAN port\n", - be16_to_cpu(enc_ports.key->dst)); + "UDP tunnel decap filter must match enc_dst_port fully\n"); return -EOPNOTSUPP; } - /* dst UDP port is valid here */ + /* match on UDP protocol and dst port number */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); @@ -559,90 +528,15 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(enc_ports.key->dst)); + /* UDP src port on outer header is generated by HW, + * so it is probably a bad idea to request matching it. + * Nonetheless, it is allowed. + */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(enc_ports.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(enc_ports.key->src)); - /* match on VNI */ - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_match_enc_keyid enc_keyid; - - flow_rule_match_enc_keyid(rule, &enc_keyid); - - MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, - be32_to_cpu(enc_keyid.mask->keyid)); - MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, - be32_to_cpu(enc_keyid.key->keyid)); - } return 0; } - -static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, - void *outer_headers_c, - void *outer_headers_v) -{ - void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); - - if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) { - NL_SET_ERR_MSG_MOD(f->common.extack, - "GRE HW offloading is not supported"); - netdev_warn(priv->netdev, "GRE HW offloading is not supported\n"); - return -EOPNOTSUPP; - } - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, - ip_protocol, IPPROTO_GRE); - - /* gre protocol*/ - MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol); - MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); - - /* gre key */ - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_match_enc_keyid enc_keyid; - - flow_rule_match_enc_keyid(rule, &enc_keyid); - MLX5_SET(fte_match_set_misc, misc_c, - gre_key.key, be32_to_cpu(enc_keyid.mask->keyid)); - MLX5_SET(fte_match_set_misc, misc_v, - gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); - } - - return 0; -} - -int mlx5e_tc_tun_parse(struct net_device *filter_dev, - struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, - void *headers_c, - void *headers_v, u8 *match_level) -{ - int tunnel_type; - int err = 0; - - tunnel_type = mlx5e_tc_tun_get_type(filter_dev); - if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - *match_level = MLX5_MATCH_L4; - err = mlx5e_tc_tun_parse_vxlan(priv, spec, f, - headers_c, headers_v); - } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { - *match_level = MLX5_MATCH_L3; - err = mlx5e_tc_tun_parse_gretap(priv, spec, f, - headers_c, headers_v); - } else { - netdev_warn(priv->netdev, - "decapsulation offload is not supported for %s net device (%d)\n", - mlx5e_netdev_kind(filter_dev), tunnel_type); - return -EOPNOTSUPP; - } - return err; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index b63f15de899d..c362b9225dc2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -14,9 +14,41 @@ enum { MLX5E_TC_TUNNEL_TYPE_UNKNOWN, MLX5E_TC_TUNNEL_TYPE_VXLAN, - MLX5E_TC_TUNNEL_TYPE_GRETAP + MLX5E_TC_TUNNEL_TYPE_GENEVE, + MLX5E_TC_TUNNEL_TYPE_GRETAP, }; +struct mlx5e_tc_tunnel { + int tunnel_type; + enum mlx5_flow_match_level match_level; + + bool (*can_offload)(struct mlx5e_priv *priv); + int (*calc_hlen)(struct mlx5e_encap_entry *e); + int (*init_encap_attr)(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack); + int (*generate_ip_tun_hdr)(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e); + int (*parse_udp_ports)(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v); + int (*parse_tunnel)(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v); +}; + +extern struct mlx5e_tc_tunnel vxlan_tunnel; +extern struct mlx5e_tc_tunnel geneve_tunnel; +extern struct mlx5e_tc_tunnel gre_tunnel; + +struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev); + int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, struct mlx5e_priv *priv, struct mlx5e_encap_entry *e, @@ -30,15 +62,20 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e); -int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev); bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev); int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, void *headers_c, void *headers_v, u8 *match_level); +int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v); + #endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c new file mode 100644 index 000000000000..951ea26d96bc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/geneve.h> +#include "lib/geneve.h" +#include "en/tc_tun.h" + +#define MLX5E_GENEVE_VER 0 + +static bool mlx5e_tc_tun_can_offload_geneve(struct mlx5e_priv *priv) +{ + return !!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_GENEVE); +} + +static int mlx5e_tc_tun_calc_hlen_geneve(struct mlx5e_encap_entry *e) +{ + return sizeof(struct udphdr) + + sizeof(struct genevehdr) + + e->tun_info->options_len; +} + +static int mlx5e_tc_tun_check_udp_dport_geneve(struct mlx5e_priv *priv, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) + return -EOPNOTSUPP; + + flow_rule_match_enc_ports(rule, &enc_ports); + + /* Currently we support only default GENEVE + * port, so udp dst port must match. + */ + if (be16_to_cpu(enc_ports.key->dst) != GENEVE_UDP_PORT) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP dst port is not registered as a GENEVE port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a GENEVE port\n", + be16_to_cpu(enc_ports.key->dst)); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_udp_ports_geneve(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + int err; + + err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); + if (err) + return err; + + return mlx5e_tc_tun_check_udp_dport_geneve(priv, f); +} + +static int mlx5e_tc_tun_init_encap_attr_geneve(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &geneve_tunnel; + + /* Reformat type for GENEVE encap is similar to VXLAN: + * in both cases the HW adds in the same place a + * defined encapsulation header that the SW provides. + */ + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + return 0; +} + +static void mlx5e_tunnel_id_to_vni(__be64 tun_id, __u8 *vni) +{ +#ifdef __BIG_ENDIAN + vni[0] = (__force __u8)(tun_id >> 16); + vni[1] = (__force __u8)(tun_id >> 8); + vni[2] = (__force __u8)tun_id; +#else + vni[0] = (__force __u8)((__force u64)tun_id >> 40); + vni[1] = (__force __u8)((__force u64)tun_id >> 48); + vni[2] = (__force __u8)((__force u64)tun_id >> 56); +#endif +} + +static int mlx5e_gen_ip_tunnel_header_geneve(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_info *tun_info = e->tun_info; + struct udphdr *udp = (struct udphdr *)(buf); + struct genevehdr *geneveh; + + geneveh = (struct genevehdr *)((char *)udp + sizeof(struct udphdr)); + + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_info->key.tp_dst; + + memset(geneveh, 0, sizeof(*geneveh)); + geneveh->ver = MLX5E_GENEVE_VER; + geneveh->opt_len = tun_info->options_len / 4; + geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM); + geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT); + mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni); + geneveh->proto_type = htons(ETH_P_TEB); + + if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) { + if (!geneveh->opt_len) + return -EOPNOTSUPP; + ip_tunnel_info_opts_get(geneveh->options, tun_info); + } + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_vni(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_keyid enc_keyid; + void *misc_c, *misc_v; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return 0; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_vni)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE VNI is not supported"); + netdev_warn(priv->netdev, "Matching on GENEVE VNI is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, geneve_vni, be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, geneve_vni, be32_to_cpu(enc_keyid.key->keyid)); + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + u8 max_tlv_option_data_len = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_option_data_len); + u8 max_tlv_options = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_options); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + void *misc_c, *misc_v, *misc_3_c, *misc_3_v; + struct geneve_opt *option_key, *option_mask; + __be32 opt_data_key = 0, opt_data_mask = 0; + struct flow_match_enc_opts enc_opts; + int res = 0; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + misc_3_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_3); + misc_3_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) + return 0; + + flow_rule_match_enc_opts(rule, &enc_opts); + + if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.geneve_tlv_option_0_data)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options is not supported"); + netdev_warn(priv->netdev, + "Matching on GENEVE options is not supported\n"); + return -EOPNOTSUPP; + } + + /* make sure that we're talking about GENEVE options */ + + if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: option type is not GENEVE"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: option type is not GENEVE\n"); + return -EOPNOTSUPP; + } + + if (enc_opts.mask->len && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_geneve_opt_len)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options len is not supported"); + netdev_warn(priv->netdev, + "Matching on GENEVE options len is not supported\n"); + return -EOPNOTSUPP; + } + + /* max_geneve_tlv_option_data_len comes in multiples of 4 bytes, and it + * doesn't include the TLV option header. 'geneve_opt_len' is a total + * len of all the options, including the headers, also multiples of 4 + * bytes. Len that comes from the dissector is in bytes. + */ + + if ((enc_opts.key->len / 4) > ((max_tlv_option_data_len + 1) * max_tlv_options)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: unsupported options len"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: unsupported options len (len=%d)\n", + enc_opts.key->len); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, geneve_opt_len, enc_opts.mask->len / 4); + MLX5_SET(fte_match_set_misc, misc_v, geneve_opt_len, enc_opts.key->len / 4); + + /* we support matching on one option only, so just get it */ + option_key = (struct geneve_opt *)&enc_opts.key->data[0]; + option_mask = (struct geneve_opt *)&enc_opts.mask->data[0]; + + if (option_key->length > max_tlv_option_data_len) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: unsupported option len"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: unsupported option len (key=%d, mask=%d)\n", + option_key->length, option_mask->length); + return -EOPNOTSUPP; + } + + /* data can't be all 0 - fail to offload such rule */ + if (!memchr_inv(option_key->opt_data, 0, option_key->length * 4)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: can't match on 0 data field"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: can't match on 0 data field\n"); + return -EOPNOTSUPP; + } + + /* add new GENEVE TLV options object */ + res = mlx5_geneve_tlv_option_add(priv->mdev->geneve, option_key); + if (res) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: failed creating TLV opt object"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: failed creating TLV opt object (class:type:len = 0x%x:0x%x:%d)\n", + be16_to_cpu(option_key->opt_class), + option_key->type, option_key->length); + return res; + } + + /* In general, after creating the object, need to query it + * in order to check which option data to set in misc3. + * But we support only geneve_tlv_option_0_data, so no + * point querying at this stage. + */ + + memcpy(&opt_data_key, option_key->opt_data, option_key->length * 4); + memcpy(&opt_data_mask, option_mask->opt_data, option_mask->length * 4); + MLX5_SET(fte_match_set_misc3, misc_3_v, + geneve_tlv_option_0_data, be32_to_cpu(opt_data_key)); + MLX5_SET(fte_match_set_misc3, misc_3_c, + geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask)); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct netlink_ext_ack *extack = f->common.extack; + + /* match on OAM - packets with OAM bit on should NOT be offloaded */ + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_oam)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE OAM is not supported"); + netdev_warn(priv->netdev, "Matching on GENEVE OAM is not supported\n"); + return -EOPNOTSUPP; + } + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_oam); + MLX5_SET(fte_match_set_misc, misc_v, geneve_oam, 0); + + /* Match on GENEVE protocol. We support only Transparent Eth Bridge. */ + + if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_geneve_protocol_type)) { + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_protocol_type); + MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB); + } + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + int err; + + err = mlx5e_tc_tun_parse_geneve_params(priv, spec, f); + if (err) + return err; + + err = mlx5e_tc_tun_parse_geneve_vni(priv, spec, f); + if (err) + return err; + + return mlx5e_tc_tun_parse_geneve_options(priv, spec, f); +} + +struct mlx5e_tc_tunnel geneve_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GENEVE, + .match_level = MLX5_MATCH_L4, + .can_offload = mlx5e_tc_tun_can_offload_geneve, + .calc_hlen = mlx5e_tc_tun_calc_hlen_geneve, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_geneve, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_geneve, + .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_geneve, + .parse_tunnel = mlx5e_tc_tun_parse_geneve, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c new file mode 100644 index 000000000000..58b13192df23 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/gre.h> +#include "en/tc_tun.h" + +static bool mlx5e_tc_tun_can_offload_gretap(struct mlx5e_priv *priv) +{ + return !!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap); +} + +static int mlx5e_tc_tun_calc_hlen_gretap(struct mlx5e_encap_entry *e) +{ + return gre_calc_hlen(e->tun_info->key.tun_flags); +} + +static int mlx5e_tc_tun_init_encap_attr_gretap(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &gre_tunnel; + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; + return 0; +} + +static int mlx5e_gen_ip_tunnel_header_gretap(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + int hdr_len; + + *ip_proto = IPPROTO_GRE; + + /* the HW does not calculate GRE csum or sequences */ + if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) + return -EOPNOTSUPP; + + greh->protocol = htons(ETH_P_TEB); + + /* GRE key */ + hdr_len = mlx5e_tc_tun_calc_hlen_gretap(e); + greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); + if (tun_key->tun_flags & TUNNEL_KEY) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + *ptr = tun_id; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE); + + /* gre protocol */ + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol); + MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); + + /* gre key */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + MLX5_SET(fte_match_set_misc, misc_c, + gre_key.key, be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, + gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); + } + + return 0; +} + +struct mlx5e_tc_tunnel gre_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GRETAP, + .match_level = MLX5_MATCH_L3, + .can_offload = mlx5e_tc_tun_can_offload_gretap, + .calc_hlen = mlx5e_tc_tun_calc_hlen_gretap, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_gretap, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_gretap, + .parse_udp_ports = NULL, + .parse_tunnel = mlx5e_tc_tun_parse_gretap, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c new file mode 100644 index 000000000000..37b176801bcc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/vxlan.h> +#include "lib/vxlan.h" +#include "en/tc_tun.h" + +static bool mlx5e_tc_tun_can_offload_vxlan(struct mlx5e_priv *priv) +{ + return !!MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap); +} + +static int mlx5e_tc_tun_calc_hlen_vxlan(struct mlx5e_encap_entry *e) +{ + return VXLAN_HLEN; +} + +static int mlx5e_tc_tun_check_udp_dport_vxlan(struct mlx5e_priv *priv, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) + return -EOPNOTSUPP; + + flow_rule_match_enc_ports(rule, &enc_ports); + + /* check the UDP destination port validity */ + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, + be16_to_cpu(enc_ports.key->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP dst port is not registered as a VXLAN port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a VXLAN port\n", + be16_to_cpu(enc_ports.key->dst)); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_udp_ports_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + int err = 0; + + err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); + if (err) + return err; + + return mlx5e_tc_tun_check_udp_dport_vxlan(priv, f); +} + +static int mlx5e_tc_tun_init_encap_attr_vxlan(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + int dst_port = be16_to_cpu(e->tun_info->key.tp_dst); + + e->tunnel = &vxlan_tunnel; + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { + NL_SET_ERR_MSG_MOD(extack, + "vxlan udp dport was not registered with the HW"); + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", + dst_port); + return -EOPNOTSUPP; + } + + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + return 0; +} + +static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + struct udphdr *udp = (struct udphdr *)(buf); + struct vxlanhdr *vxh; + + vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_key->tp_dst; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(tun_id); + + return 0; +} + +static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_keyid enc_keyid; + void *misc_c, *misc_v; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return 0; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid) + return 0; + + /* match on VNI is required */ + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_vxlan_vni)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on VXLAN VNI is not supported"); + netdev_warn(priv->netdev, + "Matching on VXLAN VNI is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, + be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, + be32_to_cpu(enc_keyid.key->keyid)); + + return 0; +} + +struct mlx5e_tc_tunnel vxlan_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_VXLAN, + .match_level = MLX5_MATCH_L4, + .can_offload = mlx5e_tc_tun_can_offload_vxlan, + .calc_hlen = mlx5e_tc_tun_calc_hlen_vxlan, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_vxlan, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan, + .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan, + .parse_tunnel = mlx5e_tc_tun_parse_vxlan, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h new file mode 100644 index 000000000000..ddfe19adb3d9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TXRX_H___ +#define __MLX5_EN_TXRX_H___ + +#include "en.h" + +#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS +#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ + MLX5E_SQ_NOPS_ROOM) + +#ifndef CONFIG_MLX5_EN_TLS +#define MLX5E_SQ_TLS_ROOM (0) +#else +/* TLS offload requires additional stop_room for: + * - a resync SKB. + * kTLS offload requires additional stop_room for: + * - static params WQE, + * - progress params WQE, and + * - resync DUMP per frag. + */ +#define MLX5E_SQ_TLS_ROOM \ + (MLX5_SEND_WQE_MAX_WQEBBS + \ + MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + \ + MAX_SKB_FRAGS * MLX5E_KTLS_MAX_DUMP_WQEBBS) +#endif + +#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) + +static inline bool +mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n) +{ + return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc); +} + +static inline void * +mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, size_t size, u16 *pi) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + void *wqe; + + *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + wqe = mlx5_wq_cyc_get_wqe(wq, *pi); + memset(wqe, 0, size); + + return wqe; +} + +static inline struct mlx5e_tx_wqe * +mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc); + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); + + (*pc)++; + + return wqe; +} + +static inline struct mlx5e_tx_wqe * +mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc); + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); + cseg->fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL; + + (*pc)++; + + return wqe; +} + +static inline void +mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq, + u16 pi, u16 nnops) +{ + struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; + + edge_wi = wi + nnops; + + /* fill sq frag edge with nops to avoid wqe wrapping two pages */ + for (; wi < edge_wi; wi++) { + wi->skb = NULL; + wi->num_wqebbs = 1; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + sq->stats->nop += nnops; +} + +static inline void +mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map, + struct mlx5_wqe_ctrl_seg *ctrl) +{ + ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + + *wq->db = cpu_to_be32(pc); + + /* ensure doorbell record is visible to device before ringing the + * doorbell + */ + wmb(); + + mlx5_write64((__be32 *)ctrl, uar_map); +} + +static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5e_tx_wqe *wqe) +{ + return !!wqe->ctrl.tisn; +} + +static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) +{ + struct mlx5_core_cq *mcq; + + mcq = &cq->mcq; + mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); +} + +static inline struct mlx5e_sq_dma * +mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i) +{ + return &sq->db.dma_fifo[i & sq->dma_fifo_mask]; +} + +static inline void +mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size, + enum mlx5e_dma_map_type map_type) +{ + struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++); + + dma->addr = addr; + dma->size = size; + dma->type = map_type; +} + +static inline void +mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) +{ + switch (dma->type) { + case MLX5E_DMA_MAP_SINGLE: + dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + case MLX5E_DMA_MAP_PAGE: + dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + default: + WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); + } +} + +/* SW parser related functions */ + +struct mlx5e_swp_spec { + __be16 l3_proto; + u8 l4_proto; + u8 is_tun; + __be16 tun_l3_proto; + u8 tun_l4_proto; +}; + +static inline void +mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, + struct mlx5e_swp_spec *swp_spec) +{ + /* SWP offsets are in 2-bytes words */ + eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; + if (swp_spec->l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; + if (swp_spec->l4_proto) { + eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2; + if (swp_spec->l4_proto == IPPROTO_UDP) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP; + } + + if (swp_spec->is_tun) { + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */ + eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2; + if (swp_spec->l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + } + switch (swp_spec->tun_l4_proto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + /* fall through */ + case IPPROTO_TCP: + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + } +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index cad34d6f5f45..b0b982cf69bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -31,11 +31,13 @@ */ #include <linux/bpf_trace.h> +#include <net/xdp_sock.h> #include "en/xdp.h" +#include "en/params.h" -int mlx5e_xdp_max_mtu(struct mlx5e_params *params) +int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM; + int hr = mlx5e_get_linear_rq_headroom(params, xsk); /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). * The condition checked in mlx5e_rx_is_linear_skb is: @@ -54,25 +56,70 @@ int mlx5e_xdp_max_mtu(struct mlx5e_params *params) } static inline bool -mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di, - struct xdp_buff *xdp) +mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, + struct mlx5e_dma_info *di, struct xdp_buff *xdp) { + struct mlx5e_xdp_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; + struct xdp_frame *xdpf; + dma_addr_t dma_addr; - xdpi.xdpf = convert_to_xdp_frame(xdp); - if (unlikely(!xdpi.xdpf)) + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) return false; - xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf); - dma_sync_single_for_device(sq->pdev, xdpi.dma_addr, - xdpi.xdpf->len, PCI_DMA_TODEVICE); - xdpi.di = *di; - return sq->xmit_xdp_frame(sq, &xdpi); + xdptxd.data = xdpf->data; + xdptxd.len = xdpf->len; + + if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) { + /* The xdp_buff was in the UMEM and was copied into a newly + * allocated page. The UMEM page was returned via the ZCA, and + * this new page has to be mapped at this point and has to be + * unmapped and returned via xdp_return_frame on completion. + */ + + /* Prevent double recycling of the UMEM page. Even in case this + * function returns false, the xdp_buff shouldn't be recycled, + * as it was already done in xdp_convert_zc_to_xdp_frame. + */ + __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ + + xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; + + dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len, + DMA_TO_DEVICE); + if (dma_mapping_error(sq->pdev, dma_addr)) { + xdp_return_frame(xdpf); + return false; + } + + xdptxd.dma_addr = dma_addr; + xdpi.frame.xdpf = xdpf; + xdpi.frame.dma_addr = dma_addr; + } else { + /* Driver assumes that convert_to_xdp_frame returns an xdp_frame + * that points to the same memory region as the original + * xdp_buff. It allows to map the memory only once and to use + * the DMA_BIDIRECTIONAL mode. + */ + + xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; + + dma_addr = di->addr + (xdpf->data - (void *)xdpf); + dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, + DMA_TO_DEVICE); + + xdptxd.dma_addr = dma_addr; + xdpi.page.rq = rq; + xdpi.page.di = *di; + } + + return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0); } /* returns true if packet was consumed by xdp */ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - void *va, u16 *rx_headroom, u32 *len) + void *va, u16 *rx_headroom, u32 *len, bool xsk) { struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); struct xdp_buff xdp; @@ -86,16 +133,20 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + *len; xdp.data_hard_start = va; + if (xsk) + xdp.handle = di->xsk.handle; xdp.rxq = &rq->xdp_rxq; act = bpf_prog_run_xdp(prog, &xdp); + if (xsk) + xdp.handle += xdp.data - xdp.data_hard_start; switch (act) { case XDP_PASS: *rx_headroom = xdp.data - xdp.data_hard_start; *len = xdp.data_end - xdp.data; return false; case XDP_TX: - if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp))) + if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp))) goto xdp_abort; __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ return true; @@ -105,8 +156,9 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, if (unlikely(err)) goto xdp_abort; __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); - rq->xdpsq.redirect_flush = true; - mlx5e_page_dma_unmap(rq, di); + __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); + if (!xsk) + mlx5e_page_dma_unmap(rq, di); rq->stats->xdp_redirect++; return true; default: @@ -125,6 +177,7 @@ xdp_abort: static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) { struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5e_xdpsq_stats *stats = sq->stats; struct mlx5_wq_cyc *wq = &sq->wq; u8 wqebbs; u16 pi; @@ -132,7 +185,9 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) mlx5e_xdpsq_fetch_wqe(sq, &session->wqe); prefetchw(session->wqe->data); - session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; + session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; + session->pkt_count = 0; + session->complete = 0; pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); @@ -151,9 +206,13 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) MLX5E_XDP_MPW_MAX_WQEBBS); session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs; + + mlx5e_xdp_update_inline_state(sq); + + stats->mpwqe++; } -static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) +void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) { struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; @@ -167,7 +226,7 @@ static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS); - wi->num_ds = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT; + wi->num_pkts = session->pkt_count; sq->pc += wi->num_wqebbs; @@ -176,36 +235,58 @@ static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) session->wqe = NULL; /* Close session */ } +enum { + MLX5E_XDP_CHECK_OK = 1, + MLX5E_XDP_CHECK_START_MPWQE = 2, +}; + +static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) +{ + if (unlikely(!sq->mpwqe.wqe)) { + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, + MLX5_SEND_WQE_MAX_WQEBBS))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->stats->full++; + return -EBUSY; + } + + return MLX5E_XDP_CHECK_START_MPWQE; + } + + return MLX5E_XDP_CHECK_OK; +} + static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, - struct mlx5e_xdp_info *xdpi) + struct mlx5e_xdp_xmit_data *xdptxd, + struct mlx5e_xdp_info *xdpi, + int check_result) { struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; - dma_addr_t dma_addr = xdpi->dma_addr; - struct xdp_frame *xdpf = xdpi->xdpf; - unsigned int dma_len = xdpf->len; - - if (unlikely(sq->hw_mtu < dma_len)) { + if (unlikely(xdptxd->len > sq->hw_mtu)) { stats->err++; return false; } - if (unlikely(!session->wqe)) { - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - MLX5_SEND_WQE_MAX_WQEBBS))) { - /* SQ is full, ring doorbell */ - mlx5e_xmit_xdp_doorbell(sq); - stats->full++; - return false; - } + if (!check_result) + check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq); + if (unlikely(check_result < 0)) + return false; + if (check_result == MLX5E_XDP_CHECK_START_MPWQE) { + /* Start the session when nothing can fail, so it's guaranteed + * that if there is an active session, it has at least one dseg, + * and it's safe to complete it at any time. + */ mlx5e_xdp_mpwqe_session_start(sq); } - mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len); + mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); - if (unlikely(session->ds_count == session->max_ds_count)) + if (unlikely(session->complete || + session->ds_count == session->max_ds_count)) mlx5e_xdp_mpwqe_complete(sq); mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); @@ -213,7 +294,22 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, return true; } -static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) +static int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) +{ + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->stats->full++; + return -EBUSY; + } + + return MLX5E_XDP_CHECK_OK; +} + +static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_xmit_data *xdptxd, + struct mlx5e_xdp_info *xdpi, + int check_result) { struct mlx5_wq_cyc *wq = &sq->wq; u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); @@ -223,9 +319,8 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info * struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg = wqe->data; - struct xdp_frame *xdpf = xdpi->xdpf; - dma_addr_t dma_addr = xdpi->dma_addr; - unsigned int dma_len = xdpf->len; + dma_addr_t dma_addr = xdptxd->dma_addr; + u32 dma_len = xdptxd->len; struct mlx5e_xdpsq_stats *stats = sq->stats; @@ -236,18 +331,16 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info * return false; } - if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { - /* SQ is full, ring doorbell */ - mlx5e_xmit_xdp_doorbell(sq); - stats->full++; + if (!check_result) + check_result = mlx5e_xmit_xdp_frame_check(sq); + if (unlikely(check_result < 0)) return false; - } cseg->fm_ce_se = 0; /* copy the inline part if required */ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE); + memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); dma_len -= MLX5E_XDP_MIN_INLINE; dma_addr += MLX5E_XDP_MIN_INLINE; @@ -269,12 +362,43 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info * return true; } -bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) +static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_wqe_info *wi, + u32 *xsk_frames, + bool recycle) +{ + struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; + u16 i; + + for (i = 0; i < wi->num_pkts; i++) { + struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + + switch (xdpi.mode) { + case MLX5E_XDP_XMIT_MODE_FRAME: + /* XDP_TX from the XSK RQ and XDP_REDIRECT */ + dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, + xdpi.frame.xdpf->len, DMA_TO_DEVICE); + xdp_return_frame(xdpi.frame.xdpf); + break; + case MLX5E_XDP_XMIT_MODE_PAGE: + /* XDP_TX from the regular RQ */ + mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle); + break; + case MLX5E_XDP_XMIT_MODE_XSK: + /* AF_XDP send */ + (*xsk_frames)++; + break; + default: + WARN_ON_ONCE(true); + } + } +} + +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) { - struct mlx5e_xdp_info_fifo *xdpi_fifo; struct mlx5e_xdpsq *sq; struct mlx5_cqe64 *cqe; - bool is_redirect; + u32 xsk_frames = 0; u16 sqcc; int i; @@ -287,9 +411,6 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) if (!cqe) return false; - is_redirect = !rq; - xdpi_fifo = &sq->db.xdpi_fifo; - /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), * otherwise a cq overrun may occur */ @@ -311,7 +432,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) do { struct mlx5e_xdp_wqe_info *wi; - u16 ci, j; + u16 ci; last_wqe = (sqcc == wqe_counter); ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); @@ -319,22 +440,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) sqcc += wi->num_wqebbs; - for (j = 0; j < wi->num_ds; j++) { - struct mlx5e_xdp_info xdpi = - mlx5e_xdpi_fifo_pop(xdpi_fifo); - - if (is_redirect) { - dma_unmap_single(sq->pdev, xdpi.dma_addr, - xdpi.xdpf->len, DMA_TO_DEVICE); - xdp_return_frame(xdpi.xdpf); - } else { - /* Recycle RX page */ - mlx5e_page_release(rq, &xdpi.di, true); - } - } + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true); } while (!last_wqe); } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + if (xsk_frames) + xsk_umem_complete_tx(sq->umem, xsk_frames); + sq->stats->cqes += i; mlx5_cqwq_update_db_record(&cq->wq); @@ -346,34 +458,24 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) { - struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; - bool is_redirect = !rq; + u32 xsk_frames = 0; while (sq->cc != sq->pc) { struct mlx5e_xdp_wqe_info *wi; - u16 ci, i; + u16 ci; ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); wi = &sq->db.wqe_info[ci]; sq->cc += wi->num_wqebbs; - for (i = 0; i < wi->num_ds; i++) { - struct mlx5e_xdp_info xdpi = - mlx5e_xdpi_fifo_pop(xdpi_fifo); - - if (is_redirect) { - dma_unmap_single(sq->pdev, xdpi.dma_addr, - xdpi.xdpf->len, DMA_TO_DEVICE); - xdp_return_frame(xdpi.xdpf); - } else { - /* Recycle RX page */ - mlx5e_page_release(rq, &xdpi.di, false); - } - } + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false); } + + if (xsk_frames) + xsk_umem_complete_tx(sq->umem, xsk_frames); } int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, @@ -401,21 +503,27 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; + struct mlx5e_xdp_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; - xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) { + xdptxd.data = xdpf->data; + xdptxd.len = xdpf->len; + xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data, + xdptxd.len, DMA_TO_DEVICE); + + if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) { xdp_return_frame_rx_napi(xdpf); drops++; continue; } - xdpi.xdpf = xdpf; + xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; + xdpi.frame.xdpf = xdpf; + xdpi.frame.dma_addr = xdptxd.dma_addr; - if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) { - dma_unmap_single(sq->pdev, xdpi.dma_addr, - xdpf->len, DMA_TO_DEVICE); + if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0))) { + dma_unmap_single(sq->pdev, xdptxd.dma_addr, + xdptxd.len, DMA_TO_DEVICE); xdp_return_frame_rx_napi(xdpf); drops++; } @@ -432,21 +540,23 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) { - struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; + struct mlx5e_xdpsq *xdpsq = rq->xdpsq; if (xdpsq->mpwqe.wqe) mlx5e_xdp_mpwqe_complete(xdpsq); mlx5e_xmit_xdp_doorbell(xdpsq); - if (xdpsq->redirect_flush) { + if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) { xdp_do_flush_map(); - xdpsq->redirect_flush = false; + __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); } } void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) { + sq->xmit_xdp_frame_check = is_mpw ? + mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check; sq->xmit_xdp_frame = is_mpw ? mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 553956cadc8a..b90923932668 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -33,17 +33,20 @@ #define __MLX5_EN_XDP_H__ #include "en.h" +#include "en/txrx.h" #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) #define MLX5E_XDP_TX_EMPTY_DS_COUNT \ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) -int mlx5e_xdp_max_mtu(struct mlx5e_params *params); +struct mlx5e_xsk_param; +int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - void *va, u16 *rx_headroom, u32 *len); -bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq); -void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq); + void *va, u16 *rx_headroom, u32 *len, bool xsk); +void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq); +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw); void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq); int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, @@ -66,6 +69,21 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); } +static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_XDP_OPEN, &priv->state); +} + +static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv) +{ + clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state); +} + +static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv) +{ + return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state); +} + static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) { if (sq->doorbell_cseg) { @@ -74,16 +92,67 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) } } +/* Enable inline WQEs to shift some load from a congested HCA (HW) to + * a less congested cpu (SW). + */ +static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq) +{ + u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc; + struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + +#define MLX5E_XDP_INLINE_WATERMARK_LOW 10 +#define MLX5E_XDP_INLINE_WATERMARK_HIGH 128 + + if (session->inline_on) { + if (outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW) + session->inline_on = 0; + return; + } + + /* inline is false */ + if (outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH) + session->inline_on = 1; +} + static inline void -mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, dma_addr_t dma_addr, u16 dma_len) +mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_xmit_data *xdptxd, + struct mlx5e_xdpsq_stats *stats) { struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; struct mlx5_wqe_data_seg *dseg = - (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count++; + (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; + u32 dma_len = xdptxd->len; + + session->pkt_count++; + +#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg)) - dseg->addr = cpu_to_be64(dma_addr); + if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) { + struct mlx5_wqe_inline_seg *inline_dseg = + (struct mlx5_wqe_inline_seg *)dseg; + u16 ds_len = sizeof(*inline_dseg) + dma_len; + u16 ds_cnt = DIV_ROUND_UP(ds_len, MLX5_SEND_WQE_DS); + + if (unlikely(session->ds_count + ds_cnt > session->max_ds_count)) { + /* Not enough space for inline wqe, send with memory pointer */ + session->complete = true; + goto no_inline; + } + + inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG); + memcpy(inline_dseg->data, xdptxd->data, dma_len); + + session->ds_count += ds_cnt; + stats->inlnw++; + return; + } + +no_inline: + dseg->addr = cpu_to_be64(xdptxd->dma_addr); dseg->byte_count = cpu_to_be32(dma_len); dseg->lkey = sq->mkey_be; + session->ds_count++; } static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, @@ -110,5 +179,4 @@ mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo) { return fifo->xi[(*fifo->cc)++ & fifo->mask]; } - #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile new file mode 100644 index 000000000000..5ee42991900a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/../.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c new file mode 100644 index 000000000000..6a55573ec8f2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "rx.h" +#include "en/xdp.h" +#include <net/xdp_sock.h> + +/* RX data path */ + +bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count) +{ + /* Check in advance that we have enough frames, instead of allocating + * one-by-one, failing and moving frames to the Reuse Ring. + */ + return xsk_umem_has_addrs_rq(rq->umem, count); +} + +int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct xdp_umem *umem = rq->umem; + u64 handle; + + if (!xsk_umem_peek_addr_rq(umem, &handle)) + return -ENOMEM; + + dma_info->xsk.handle = handle + rq->buff.umem_headroom; + dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle); + + /* No need to add headroom to the DMA address. In striding RQ case, we + * just provide pages for UMR, and headroom is counted at the setup + * stage when creating a WQE. In non-striding RQ case, headroom is + * accounted in mlx5e_alloc_rx_wqe. + */ + dma_info->addr = xdp_umem_get_dma(umem, handle); + + xsk_umem_discard_addr_rq(umem); + + dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + return 0; +} + +static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle) +{ + xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask); +} + +/* XSKRQ uses pages from UMEM, they must not be released. They are returned to + * the userspace if possible, and if not, this function is called to reuse them + * in the driver. + */ +void mlx5e_xsk_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle); +} + +/* Return a frame back to the hardware to fill in again. It is used by XDP when + * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP. + */ +void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle) +{ + struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca); + + mlx5e_xsk_recycle_frame(rq, handle); +} + +static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data, + u32 cqe_bcnt) +{ + struct sk_buff *skb; + + skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + skb_put_data(skb, data, cqe_bcnt); + + return skb; +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx) +{ + struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; + u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom; + u32 cqe_bcnt32 = cqe_bcnt; + void *va, *data; + u32 frag_size; + bool consumed; + + /* Check packet size. Note LRO doesn't use linear SKB */ + if (unlikely(cqe_bcnt > rq->hw_mtu)) { + rq->stats->oversize_pkts_sw_drop++; + return NULL; + } + + /* head_offset is not used in this function, because di->xsk.data and + * di->addr point directly to the necessary place. Furthermore, in the + * current implementation, one page = one packet = one frame, so + * head_offset should always be 0. + */ + WARN_ON_ONCE(head_offset); + + va = di->xsk.data; + data = va + rx_headroom; + frag_size = rq->buff.headroom + cqe_bcnt32; + + dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL); + prefetch(data); + + rcu_read_lock(); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true); + rcu_read_unlock(); + + /* Possible flows: + * - XDP_REDIRECT to XSKMAP: + * The page is owned by the userspace from now. + * - XDP_TX and other XDP_REDIRECTs: + * The page was returned by ZCA and recycled. + * - XDP_DROP: + * Recycle the page. + * - XDP_PASS: + * Allocate an SKB, copy the data and recycle the page. + * + * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its + * size is the same as the Driver RX Ring's size, and pages for WQEs are + * allocated first from the Reuse Ring, so it has enough space. + */ + + if (likely(consumed)) { + if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) + __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + return NULL; /* page/packet was consumed by XDP */ + } + + /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the + * frame. On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32); +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt) +{ + struct mlx5e_dma_info *di = wi->di; + u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom; + void *va, *data; + bool consumed; + u32 frag_size; + + /* wi->offset is not used in this function, because di->xsk.data and + * di->addr point directly to the necessary place. Furthermore, in the + * current implementation, one page = one packet = one frame, so + * wi->offset should always be 0. + */ + WARN_ON_ONCE(wi->offset); + + va = di->xsk.data; + data = va + rx_headroom; + frag_size = rq->buff.headroom + cqe_bcnt; + + dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL); + prefetch(data); + + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { + rq->stats->wqe_err++; + return NULL; + } + + rcu_read_lock(); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true); + rcu_read_unlock(); + + if (likely(consumed)) + return NULL; /* page/packet was consumed by XDP */ + + /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse + * will be handled by mlx5e_put_rx_frag. + * On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h new file mode 100644 index 000000000000..307b923a1361 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_RX_H__ +#define __MLX5_EN_XSK_RX_H__ + +#include "en.h" + +/* RX data path */ + +bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count); +int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info); +void mlx5e_xsk_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info); +void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle); +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx); +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt); + +#endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c new file mode 100644 index 000000000000..aaffa6f68dc0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "setup.h" +#include "en/params.h" + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev) +{ + /* AF_XDP doesn't support frames larger than PAGE_SIZE, and the current + * mlx5e XDP implementation doesn't support multiple packets per page. + */ + if (xsk->chunk_size != PAGE_SIZE) + return false; + + /* Current MTU and XSK headroom don't allow packets to fit the frames. */ + if (mlx5e_rx_get_linear_frag_sz(params, xsk) > xsk->chunk_size) + return false; + + /* frag_sz is different for regular and XSK RQs, so ensure that linear + * SKB mode is possible. + */ + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); + default: /* MLX5_WQ_TYPE_CYCLIC */ + return mlx5e_rx_is_linear_skb(params, xsk); + } +} + +static void mlx5e_build_xskicosq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); +} + +static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_channel_param *cparam) +{ + const u8 xskicosq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + + mlx5e_build_rq_param(priv, params, xsk, &cparam->rq); + mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); + mlx5e_build_xskicosq_param(priv, xskicosq_size, &cparam->icosq); + mlx5e_build_rx_cq_param(priv, params, xsk, &cparam->rx_cq); + mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq); + mlx5e_build_ico_cq_param(priv, xskicosq_size, &cparam->icosq_cq); +} + +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, + struct mlx5e_channel *c) +{ + struct mlx5e_channel_param cparam = {}; + struct dim_cq_moder icocq_moder = {}; + int err; + + if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) + return -EINVAL; + + mlx5e_build_xsk_cparam(priv, params, xsk, &cparam); + + err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq); + if (unlikely(err)) + return err; + + err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq); + if (unlikely(err)) + goto err_close_rx_cq; + + err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq); + if (unlikely(err)) + goto err_close_rq; + + /* Create a separate SQ, so that when the UMEM is disabled, we could + * close this SQ safely and stop receiving CQEs. In other case, e.g., if + * the XDPSQ was used instead, we might run into trouble when the UMEM + * is disabled and then reenabled, but the SQ continues receiving CQEs + * from the old UMEM. + */ + err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true); + if (unlikely(err)) + goto err_close_tx_cq; + + err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq); + if (unlikely(err)) + goto err_close_sq; + + /* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be + * triggered and NAPI to be called on the correct CPU. + */ + err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq); + if (unlikely(err)) + goto err_close_icocq; + + spin_lock_init(&c->xskicosq_lock); + + set_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + + return 0; + +err_close_icocq: + mlx5e_close_cq(&c->xskicosq.cq); + +err_close_sq: + mlx5e_close_xdpsq(&c->xsksq); + +err_close_tx_cq: + mlx5e_close_cq(&c->xsksq.cq); + +err_close_rq: + mlx5e_close_rq(&c->xskrq); + +err_close_rx_cq: + mlx5e_close_cq(&c->xskrq.cq); + + return err; +} + +void mlx5e_close_xsk(struct mlx5e_channel *c) +{ + clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + napi_synchronize(&c->napi); + + mlx5e_close_rq(&c->xskrq); + mlx5e_close_cq(&c->xskrq.cq); + mlx5e_close_icosq(&c->xskicosq); + mlx5e_close_cq(&c->xskicosq.cq); + mlx5e_close_xdpsq(&c->xsksq); + mlx5e_close_cq(&c->xsksq.cq); +} + +void mlx5e_activate_xsk(struct mlx5e_channel *c) +{ + set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + /* TX queue is created active. */ + mlx5e_trigger_irq(&c->xskicosq); +} + +void mlx5e_deactivate_xsk(struct mlx5e_channel *c) +{ + mlx5e_deactivate_rq(&c->xskrq); + /* TX queue is disabled on close. */ +} + +static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn) +{ + struct mlx5e_redirect_rqt_param direct_rrp = { + .is_rss = false, + { + .rqn = rqn, + }, + }; + + u32 rqtn = priv->xsk_tir[ix].rqt.rqtn; + + return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); +} + +int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c) +{ + return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn); +} + +int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix) +{ + return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn); +} + +int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + int err, i; + + if (!priv->xsk.refcnt) + return 0; + + for (i = 0; i < chs->num; i++) { + struct mlx5e_channel *c = chs->c[i]; + + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + continue; + + err = mlx5e_xsk_redirect_rqt_to_channel(priv, c); + if (unlikely(err)) + goto err_stop; + } + + return 0; + +err_stop: + for (i--; i >= 0; i--) { + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) + continue; + + mlx5e_xsk_redirect_rqt_to_drop(priv, i); + } + + return err; +} + +void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + int i; + + if (!priv->xsk.refcnt) + return; + + for (i = 0; i < chs->num; i++) { + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) + continue; + + mlx5e_xsk_redirect_rqt_to_drop(priv, i); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h new file mode 100644 index 000000000000..0dd11b81c046 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_SETUP_H__ +#define __MLX5_EN_XSK_SETUP_H__ + +#include "en.h" + +struct mlx5e_xsk_param; + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev); +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, + struct mlx5e_channel *c); +void mlx5e_close_xsk(struct mlx5e_channel *c); +void mlx5e_activate_xsk(struct mlx5e_channel *c); +void mlx5e_deactivate_xsk(struct mlx5e_channel *c); +int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c); +int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix); +int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs); +void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs); + +#endif /* __MLX5_EN_XSK_SETUP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c new file mode 100644 index 000000000000..35e188cf4ea4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "tx.h" +#include "umem.h" +#include "en/xdp.h" +#include "en/params.h" +#include <net/xdp_sock.h> + +int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_channel *c; + u16 ix; + + if (unlikely(!mlx5e_xdp_is_open(priv))) + return -ENETDOWN; + + if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) + return -EINVAL; + + c = priv->channels.c[ix]; + + if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))) + return -ENXIO; + + if (!napi_if_scheduled_mark_missed(&c->napi)) { + spin_lock(&c->xskicosq_lock); + mlx5e_trigger_irq(&c->xskicosq); + spin_unlock(&c->xskicosq_lock); + } + + return 0; +} + +/* When TX fails (because of the size of the packet), we need to get completions + * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish + * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the + * same. + */ +static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_info *xdpi) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; + struct mlx5e_tx_wqe *nopwqe; + + wi->num_wqebbs = 1; + wi->num_pkts = 1; + + nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); + sq->doorbell_cseg = &nopwqe->ctrl; +} + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) +{ + struct xdp_umem *umem = sq->umem; + struct mlx5e_xdp_info xdpi; + struct mlx5e_xdp_xmit_data xdptxd; + bool work_done = true; + bool flush = false; + + xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK; + + for (; budget; budget--) { + int check_result = sq->xmit_xdp_frame_check(sq); + struct xdp_desc desc; + + if (unlikely(check_result < 0)) { + work_done = false; + break; + } + + if (!xsk_umem_consume_tx(umem, &desc)) { + /* TX will get stuck until something wakes it up by + * triggering NAPI. Currently it's expected that the + * application calls sendto() if there are consumed, but + * not completed frames. + */ + break; + } + + xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr); + xdptxd.data = xdp_umem_get_data(umem, desc.addr); + xdptxd.len = desc.len; + + dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr, + xdptxd.len, DMA_BIDIRECTIONAL); + + if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, check_result))) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + + mlx5e_xsk_tx_post_err(sq, &xdpi); + } + + flush = true; + } + + if (flush) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + mlx5e_xmit_xdp_doorbell(sq); + + xsk_umem_consume_tx_done(umem); + } + + return !(budget && work_done); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h new file mode 100644 index 000000000000..7add18bf78d8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_TX_H__ +#define __MLX5_EN_XSK_TX_H__ + +#include "en.h" + +/* TX data path */ + +int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid); + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); + +#endif /* __MLX5_EN_XSK_TX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c new file mode 100644 index 000000000000..4baaa5788320 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <net/xdp_sock.h> +#include "umem.h" +#include "setup.h" +#include "en/params.h" + +static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv, + struct xdp_umem *umem) +{ + struct device *dev = priv->mdev->device; + u32 i; + + for (i = 0; i < umem->npgs; i++) { + dma_addr_t dma = dma_map_page(dev, umem->pgs[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + if (unlikely(dma_mapping_error(dev, dma))) + goto err_unmap; + umem->pages[i].dma = dma; + } + + return 0; + +err_unmap: + while (i--) { + dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); + umem->pages[i].dma = 0; + } + + return -ENOMEM; +} + +static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv, + struct xdp_umem *umem) +{ + struct device *dev = priv->mdev->device; + u32 i; + + for (i = 0; i < umem->npgs; i++) { + dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); + umem->pages[i].dma = 0; + } +} + +static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk) +{ + if (!xsk->umems) { + xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS, + sizeof(*xsk->umems), GFP_KERNEL); + if (unlikely(!xsk->umems)) + return -ENOMEM; + } + + xsk->refcnt++; + xsk->ever_used = true; + + return 0; +} + +static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk) +{ + if (!--xsk->refcnt) { + kfree(xsk->umems); + xsk->umems = NULL; + } +} + +static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix) +{ + int err; + + err = mlx5e_xsk_get_umems(xsk); + if (unlikely(err)) + return err; + + xsk->umems[ix] = umem; + return 0; +} + +static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix) +{ + xsk->umems[ix] = NULL; + + mlx5e_xsk_put_umems(xsk); +} + +static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem) +{ + return umem->headroom <= 0xffff && umem->chunk_size_nohr <= 0xffff; +} + +void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk) +{ + xsk->headroom = umem->headroom; + xsk->chunk_size = umem->chunk_size_nohr + umem->headroom; +} + +static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, + struct xdp_umem *umem, u16 ix) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_xsk_param xsk; + struct mlx5e_channel *c; + int err; + + if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix))) + return -EBUSY; + + if (unlikely(!mlx5e_xsk_is_umem_sane(umem))) + return -EINVAL; + + err = mlx5e_xsk_map_umem(priv, umem); + if (unlikely(err)) + return err; + + err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix); + if (unlikely(err)) + goto err_unmap_umem; + + mlx5e_build_xsk_param(umem, &xsk); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + /* XSK objects will be created on open. */ + goto validate_closed; + } + + if (!params->xdp_prog) { + /* XSK objects will be created when an XDP program is set, + * and the channels are reopened. + */ + goto validate_closed; + } + + c = priv->channels.c[ix]; + + err = mlx5e_open_xsk(priv, params, &xsk, umem, c); + if (unlikely(err)) + goto err_remove_umem; + + mlx5e_activate_xsk(c); + + /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide + * any Fill Ring entries at the setup stage. + */ + + err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]); + if (unlikely(err)) + goto err_deactivate; + + return 0; + +err_deactivate: + mlx5e_deactivate_xsk(c); + mlx5e_close_xsk(c); + +err_remove_umem: + mlx5e_xsk_remove_umem(&priv->xsk, ix); + +err_unmap_umem: + mlx5e_xsk_unmap_umem(priv, umem); + + return err; + +validate_closed: + /* Check the configuration in advance, rather than fail at a later stage + * (in mlx5e_xdp_set or on open) and end up with no channels. + */ + if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) { + err = -EINVAL; + goto err_remove_umem; + } + + return 0; +} + +static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix) +{ + struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params, + &priv->xsk, ix); + struct mlx5e_channel *c; + + if (unlikely(!umem)) + return -EINVAL; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto remove_umem; + + /* XSK RQ and SQ are only created if XDP program is set. */ + if (!priv->channels.params.xdp_prog) + goto remove_umem; + + c = priv->channels.c[ix]; + mlx5e_xsk_redirect_rqt_to_drop(priv, ix); + mlx5e_deactivate_xsk(c); + mlx5e_close_xsk(c); + +remove_umem: + mlx5e_xsk_remove_umem(&priv->xsk, ix); + mlx5e_xsk_unmap_umem(priv, umem); + + return 0; +} + +static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem, + u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_enable_locked(priv, umem, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_disable_locked(priv, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + u16 ix; + + if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) + return -EINVAL; + + return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) : + mlx5e_xsk_disable_umem(priv, ix); +} + +int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries) +{ + struct xdp_umem_fq_reuse *reuseq; + + reuseq = xsk_reuseq_prepare(nentries); + if (unlikely(!reuseq)) + return -ENOMEM; + xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq)); + + return 0; +} + +u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk) +{ + u16 res = xsk->refcnt ? params->num_channels : 0; + + while (res) { + if (mlx5e_xsk_get_umem(params, xsk, res - 1)) + break; + --res; + } + + return res; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h new file mode 100644 index 000000000000..25b4cbe58b54 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_UMEM_H__ +#define __MLX5_EN_XSK_UMEM_H__ + +#include "en.h" + +static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params, + struct mlx5e_xsk *xsk, u16 ix) +{ + if (!xsk || !xsk->umems) + return NULL; + + if (unlikely(ix >= params->num_channels)) + return NULL; + + return xsk->umems[ix]; +} + +struct mlx5e_xsk_param; +void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk); + +/* .ndo_bpf callback. */ +int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid); + +int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries); + +u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk); + +#endif /* __MLX5_EN_XSK_UMEM_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile index d8e17110f25d..c78512eed8d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 1dd225380a66..3022463f2284 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -39,6 +39,58 @@ #include "en_accel/ipsec_rxtx.h" #include "en_accel/tls_rxtx.h" #include "en.h" +#include "en/txrx.h" + +#if IS_ENABLED(CONFIG_GENEVE) +static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev) +{ + return mlx5_tx_swp_supported(mdev); +} + +static inline void +mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) +{ + struct mlx5e_swp_spec swp_spec = {}; + unsigned int offset = 0; + __be16 l3_proto; + u8 l4_proto; + + l3_proto = vlan_get_protocol(skb); + switch (l3_proto) { + case htons(ETH_P_IP): + l4_proto = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + l4_proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL); + break; + default: + return; + } + + if (l4_proto != IPPROTO_UDP || + udp_hdr(skb)->dest != cpu_to_be16(GENEVE_UDP_PORT)) + return; + swp_spec.l3_proto = l3_proto; + swp_spec.l4_proto = l4_proto; + swp_spec.is_tun = true; + if (inner_ip_hdr(skb)->version == 6) { + swp_spec.tun_l3_proto = htons(ETH_P_IPV6); + swp_spec.tun_l4_proto = inner_ipv6_hdr(skb)->nexthdr; + } else { + swp_spec.tun_l3_proto = htons(ETH_P_IP); + swp_spec.tun_l4_proto = inner_ip_hdr(skb)->protocol; + } + + mlx5e_set_eseg_swp(skb, eseg, &swp_spec); +} + +#else +static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev) +{ + return false; +} + +#endif /* CONFIG_GENEVE */ static inline void mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 53608afd39b6..0dd17514caae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -136,7 +136,7 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, u8 mode, struct xfrm_offload *xo) { - u8 proto; + struct mlx5e_swp_spec swp_spec = {}; /* Tunnel Mode: * SWP: OutL3 InL3 InL4 @@ -146,35 +146,23 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb, * SWP: OutL3 InL4 * InL3 * Pkt: MAC IP ESP L4 - * - * Offsets are in 2-byte words, counting from start of frame */ - eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; - if (skb->protocol == htons(ETH_P_IPV6)) - eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; - - if (mode == XFRM_MODE_TUNNEL) { - eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + swp_spec.l3_proto = skb->protocol; + swp_spec.is_tun = mode == XFRM_MODE_TUNNEL; + if (swp_spec.is_tun) { if (xo->proto == IPPROTO_IPV6) { - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; - proto = inner_ipv6_hdr(skb)->nexthdr; + swp_spec.tun_l3_proto = htons(ETH_P_IPV6); + swp_spec.tun_l4_proto = inner_ipv6_hdr(skb)->nexthdr; } else { - proto = inner_ip_hdr(skb)->protocol; + swp_spec.tun_l3_proto = htons(ETH_P_IP); + swp_spec.tun_l4_proto = inner_ip_hdr(skb)->protocol; } } else { - eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2; - if (skb->protocol == htons(ETH_P_IPV6)) - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; - proto = xo->proto; - } - switch (proto) { - case IPPROTO_UDP: - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; - /* Fall through */ - case IPPROTO_TCP: - eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; - break; + swp_spec.tun_l3_proto = skb->protocol; + swp_spec.tun_l4_proto = xo->proto; } + + mlx5e_set_eseg_swp(skb, eseg, &swp_spec); } void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index ca47c0540904..db84500b024f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -39,6 +39,7 @@ #include <linux/skbuff.h> #include <net/xfrm.h> #include "en.h" +#include "en/txrx.h" struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, u32 *cqe_bcnt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c new file mode 100644 index 000000000000..d2ff74d52720 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include "en.h" +#include "en_accel/ktls.h" + +static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, tls_en, 1); + + return mlx5e_create_tis(mdev, in, tisn); +} + +static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_ktls_offload_context_tx *tx_priv; + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (WARN_ON(direction != TLS_OFFLOAD_CTX_DIR_TX)) + return -EINVAL; + + if (WARN_ON(!mlx5e_ktls_type_check(mdev, crypto_info))) + return -EOPNOTSUPP; + + tx_priv = kvzalloc(sizeof(*tx_priv), GFP_KERNEL); + if (!tx_priv) + return -ENOMEM; + + tx_priv->expected_seq = start_offload_tcp_sn; + tx_priv->crypto_info = crypto_info; + mlx5e_set_ktls_tx_priv_ctx(tls_ctx, tx_priv); + + /* tc and underlay_qpn values are not in use for tls tis */ + err = mlx5e_ktls_create_tis(mdev, &tx_priv->tisn); + if (err) + goto create_tis_fail; + + err = mlx5_ktls_create_key(mdev, crypto_info, &tx_priv->key_id); + if (err) + goto encryption_key_create_fail; + + mlx5e_ktls_tx_offload_set_pending(tx_priv); + + return 0; + +encryption_key_create_fail: + mlx5e_destroy_tis(priv->mdev, tx_priv->tisn); +create_tis_fail: + kvfree(tx_priv); + return err; +} + +static void mlx5e_ktls_del(struct net_device *netdev, + struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_ktls_offload_context_tx *tx_priv = + mlx5e_get_ktls_tx_priv_ctx(tls_ctx); + + mlx5_ktls_destroy_key(priv->mdev, tx_priv->key_id); + mlx5e_destroy_tis(priv->mdev, tx_priv->tisn); + kvfree(tx_priv); +} + +static const struct tlsdev_ops mlx5e_ktls_ops = { + .tls_dev_add = mlx5e_ktls_add, + .tls_dev_del = mlx5e_ktls_del, +}; + +void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + + if (!mlx5_accel_is_ktls_device(priv->mdev)) + return; + + netdev->hw_features |= NETIF_F_HW_TLS_TX; + netdev->features |= NETIF_F_HW_TLS_TX; + + netdev->tlsdev_ops = &mlx5e_ktls_ops; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h new file mode 100644 index 000000000000..407da83474ef --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5E_KTLS_H__ +#define __MLX5E_KTLS_H__ + +#include "en.h" + +#ifdef CONFIG_MLX5_EN_TLS +#include <net/tls.h> +#include "accel/tls.h" + +#define MLX5E_KTLS_STATIC_UMR_WQE_SZ \ + (sizeof(struct mlx5e_umr_wqe) + MLX5_ST_SZ_BYTES(tls_static_params)) +#define MLX5E_KTLS_STATIC_WQEBBS \ + (DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_BB)) + +#define MLX5E_KTLS_PROGRESS_WQE_SZ \ + (sizeof(struct mlx5e_tx_wqe) + MLX5_ST_SZ_BYTES(tls_progress_params)) +#define MLX5E_KTLS_PROGRESS_WQEBBS \ + (DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB)) +#define MLX5E_KTLS_MAX_DUMP_WQEBBS 2 + +enum { + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD = 1, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION = 2, +}; + +enum { + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START = 0, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 1, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 2, +}; + +struct mlx5e_ktls_offload_context_tx { + struct tls_offload_context_tx *tx_ctx; + struct tls_crypto_info *crypto_info; + u32 expected_seq; + u32 tisn; + u32 key_id; + bool ctx_post_pending; +}; + +struct mlx5e_ktls_offload_context_tx_shadow { + struct tls_offload_context_tx tx_ctx; + struct mlx5e_ktls_offload_context_tx *priv_tx; +}; + +static inline void +mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx, + struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx); + struct mlx5e_ktls_offload_context_tx_shadow *shadow; + + BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX); + + shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx; + + shadow->priv_tx = priv_tx; + priv_tx->tx_ctx = tx_ctx; +} + +static inline struct mlx5e_ktls_offload_context_tx * +mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx) +{ + struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx); + struct mlx5e_ktls_offload_context_tx_shadow *shadow; + + BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX); + + shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx; + + return shadow->priv_tx; +} + +void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv); +void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx); + +struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_tx_wqe **wqe, u16 *pi); +void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + struct mlx5e_sq_dma *dma); + +#else + +static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) +{ +} + +#endif + +#endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c new file mode 100644 index 000000000000..5c08891806f0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include <linux/tls.h> +#include "en.h" +#include "en/txrx.h" +#include "en_accel/ktls.h" + +enum { + MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2 = 0x2, +}; + +enum { + MLX5E_ENCRYPTION_STANDARD_TLS = 0x1, +}; + +#define EXTRACT_INFO_FIELDS do { \ + salt = info->salt; \ + rec_seq = info->rec_seq; \ + salt_sz = sizeof(info->salt); \ + rec_seq_sz = sizeof(info->rec_seq); \ +} while (0) + +static void +fill_static_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + struct tls_crypto_info *crypto_info = priv_tx->crypto_info; + char *initial_rn, *gcm_iv; + u16 salt_sz, rec_seq_sz; + char *salt, *rec_seq; + u8 tls_version; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + + EXTRACT_INFO_FIELDS; + break; + } + default: + WARN_ON(1); + return; + } + + gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv); + initial_rn = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number); + + memcpy(gcm_iv, salt, salt_sz); + memcpy(initial_rn, rec_seq, rec_seq_sz); + + tls_version = MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2; + + MLX5_SET(tls_static_params, ctx, tls_version, tls_version); + MLX5_SET(tls_static_params, ctx, const_1, 1); + MLX5_SET(tls_static_params, ctx, const_2, 2); + MLX5_SET(tls_static_params, ctx, encryption_standard, + MLX5E_ENCRYPTION_STANDARD_TLS); + MLX5_SET(tls_static_params, ctx, dek_index, priv_tx->key_id); +} + +static void +build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + +#define STATIC_PARAMS_DS_CNT \ + DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_DS) + + cseg->opmod_idx_opcode = cpu_to_be32((pc << 8) | MLX5_OPCODE_UMR | + (MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS << 24)); + cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + STATIC_PARAMS_DS_CNT); + cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; + cseg->imm = cpu_to_be32(priv_tx->tisn); + + ucseg->flags = MLX5_UMR_INLINE; + ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16); + + fill_static_params_ctx(wqe->tls_static_params_ctx, priv_tx); +} + +static void +fill_progress_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + MLX5_SET(tls_progress_params, ctx, pd, priv_tx->tisn); + MLX5_SET(tls_progress_params, ctx, record_tracker_state, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START); + MLX5_SET(tls_progress_params, ctx, auth_state, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD); +} + +static void +build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + +#define PROGRESS_PARAMS_DS_CNT \ + DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_DS) + + cseg->opmod_idx_opcode = + cpu_to_be32((pc << 8) | MLX5_OPCODE_SET_PSV | + (MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS << 24)); + cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + PROGRESS_PARAMS_DS_CNT); + cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; + + fill_progress_params_ctx(wqe->data, priv_tx); +} + +static void tx_fill_wi(struct mlx5e_txqsq *sq, + u16 pi, u8 num_wqebbs, + skb_frag_t *resync_dump_frag) +{ + struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; + + wi->skb = NULL; + wi->num_wqebbs = num_wqebbs; + wi->resync_dump_frag = resync_dump_frag; +} + +void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + priv_tx->ctx_post_pending = true; +} + +static bool +mlx5e_ktls_tx_offload_test_and_clear_pending(struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + bool ret = priv_tx->ctx_post_pending; + + priv_tx->ctx_post_pending = false; + + return ret; +} + +static void +post_static_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5e_umr_wqe *umr_wqe; + u16 pi; + + umr_wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_STATIC_UMR_WQE_SZ, &pi); + build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence); + tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, NULL); + sq->pc += MLX5E_KTLS_STATIC_WQEBBS; +} + +static void +post_progress_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5e_tx_wqe *wqe; + u16 pi; + + wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_PROGRESS_WQE_SZ, &pi); + build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence); + tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, NULL); + sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS; +} + +static void +mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool skip_static_post, bool fence_first_post) +{ + bool progress_fence = skip_static_post || !fence_first_post; + + if (!skip_static_post) + post_static_params(sq, priv_tx, fence_first_post); + + post_progress_params(sq, priv_tx, progress_fence); +} + +struct tx_sync_info { + u64 rcd_sn; + s32 sync_len; + int nr_frags; + skb_frag_t *frags[MAX_SKB_FRAGS]; +}; + +static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, + u32 tcp_seq, struct tx_sync_info *info) +{ + struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx; + struct tls_record_info *record; + int remaining, i = 0; + unsigned long flags; + bool ret = true; + + spin_lock_irqsave(&tx_ctx->lock, flags); + record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn); + + if (unlikely(!record)) { + ret = false; + goto out; + } + + if (unlikely(tcp_seq < tls_record_start_seq(record))) { + if (!tls_record_is_start_marker(record)) + ret = false; + goto out; + } + + info->sync_len = tcp_seq - tls_record_start_seq(record); + remaining = info->sync_len; + while (remaining > 0) { + skb_frag_t *frag = &record->frags[i]; + + __skb_frag_ref(frag); + remaining -= skb_frag_size(frag); + info->frags[i++] = frag; + } + /* reduce the part which will be sent with the original SKB */ + if (remaining < 0) + skb_frag_size_add(info->frags[i - 1], remaining); + info->nr_frags = i; +out: + spin_unlock_irqrestore(&tx_ctx->lock, flags); + return ret; +} + +static void +tx_post_resync_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + u64 rcd_sn) +{ + struct tls_crypto_info *crypto_info = priv_tx->crypto_info; + __be64 rn_be = cpu_to_be64(rcd_sn); + bool skip_static_post; + u16 rec_seq_sz; + char *rec_seq; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + + rec_seq = info->rec_seq; + rec_seq_sz = sizeof(info->rec_seq); + break; + } + default: + WARN_ON(1); + return; + } + + skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz); + if (!skip_static_post) + memcpy(rec_seq, &rn_be, rec_seq_sz); + + mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, skip_static_post, true); +} + +static int +tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb, + skb_frag_t *frag, u32 tisn, bool first) +{ + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5e_tx_wqe *wqe; + dma_addr_t dma_addr = 0; + u16 ds_cnt, ds_cnt_inl; + u8 num_wqebbs; + u16 pi, ihs; + int fsz; + + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + ihs = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb)); + ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS); + ds_cnt += ds_cnt_inl; + ds_cnt += 1; /* one frag */ + + wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi); + + num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + + cseg = &wqe->ctrl; + eseg = &wqe->eth; + dseg = wqe->data; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + cseg->imm = cpu_to_be32(tisn); + cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; + + eseg->inline_hdr.sz = cpu_to_be16(ihs); + memcpy(eseg->inline_hdr.start, skb->data, ihs); + dseg += ds_cnt_inl; + + fsz = skb_frag_size(frag); + dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) + return -ENOMEM; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; + dseg->byte_count = cpu_to_be32(fsz); + mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); + + tx_fill_wi(sq, pi, num_wqebbs, frag); + sq->pc += num_wqebbs; + + WARN(num_wqebbs > MLX5E_KTLS_MAX_DUMP_WQEBBS, + "unexpected DUMP num_wqebbs, %d > %d", + num_wqebbs, MLX5E_KTLS_MAX_DUMP_WQEBBS); + + return 0; +} + +void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + struct mlx5e_sq_dma *dma) +{ + struct mlx5e_sq_stats *stats = sq->stats; + + mlx5e_tx_dma_unmap(sq->pdev, dma); + __skb_frag_unref(wi->resync_dump_frag); + stats->tls_dump_packets++; + stats->tls_dump_bytes += wi->num_bytes; +} + +static void tx_post_fence_nop(struct mlx5e_txqsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + + tx_fill_wi(sq, pi, 1, NULL); + + mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); +} + +static struct sk_buff * +mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + u32 seq) +{ + struct mlx5e_sq_stats *stats = sq->stats; + struct mlx5_wq_cyc *wq = &sq->wq; + struct tx_sync_info info = {}; + u16 contig_wqebbs_room, pi; + u8 num_wqebbs; + int i; + + if (!tx_sync_info_get(priv_tx, seq, &info)) { + /* We might get here if a retransmission reaches the driver + * after the relevant record is acked. + * It should be safe to drop the packet in this case + */ + stats->tls_drop_no_sync_data++; + goto err_out; + } + + if (unlikely(info.sync_len < 0)) { + u32 payload; + int headln; + + headln = skb_transport_offset(skb) + tcp_hdrlen(skb); + payload = skb->len - headln; + if (likely(payload <= -info.sync_len)) + return skb; + + stats->tls_drop_bypass_req++; + goto err_out; + } + + stats->tls_ooo++; + + num_wqebbs = MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + + (info.nr_frags ? info.nr_frags * MLX5E_KTLS_MAX_DUMP_WQEBBS : 1); + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < num_wqebbs)) + mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); + + tx_post_resync_params(sq, priv_tx, info.rcd_sn); + + for (i = 0; i < info.nr_frags; i++) + if (tx_post_resync_dump(sq, skb, info.frags[i], + priv_tx->tisn, !i)) + goto err_out; + + /* If no dump WQE was sent, we need to have a fence NOP WQE before the + * actual data xmit. + */ + if (!info.nr_frags) + tx_post_fence_nop(sq); + + return skb; + +err_out: + dev_kfree_skb_any(skb); + return NULL; +} + +struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_tx_wqe **wqe, u16 *pi) +{ + struct mlx5e_ktls_offload_context_tx *priv_tx; + struct mlx5e_sq_stats *stats = sq->stats; + struct mlx5_wqe_ctrl_seg *cseg; + struct tls_context *tls_ctx; + int datalen; + u32 seq; + + if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) + goto out; + + datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + if (!datalen) + goto out; + + tls_ctx = tls_get_ctx(skb->sk); + if (unlikely(tls_ctx->netdev != netdev)) + goto err_out; + + priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); + + if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) { + mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false); + *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); + stats->tls_ctx++; + } + + seq = ntohl(tcp_hdr(skb)->seq); + if (unlikely(priv_tx->expected_seq != seq)) { + skb = mlx5e_ktls_tx_handle_ooo(priv_tx, sq, skb, seq); + if (unlikely(!skb)) + goto out; + *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); + } + + priv_tx->expected_seq = seq + datalen; + + cseg = &(*wqe)->ctrl; + cseg->imm = cpu_to_be32(priv_tx->tisn); + + stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; + stats->tls_encrypted_bytes += datalen; + +out: + return skb; + +err_out: + dev_kfree_skb_any(skb); + return NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c index e88340e196f7..fba561ffe1d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c @@ -160,25 +160,31 @@ static void mlx5e_tls_del(struct net_device *netdev, direction == TLS_OFFLOAD_CTX_DIR_TX); } -static void mlx5e_tls_resync_rx(struct net_device *netdev, struct sock *sk, - u32 seq, u64 rcd_sn) +static int mlx5e_tls_resync(struct net_device *netdev, struct sock *sk, + u32 seq, u8 *rcd_sn_data, + enum tls_offload_ctx_dir direction) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_tls_offload_context_rx *rx_ctx; + u64 rcd_sn = *(u64 *)rcd_sn_data; + if (WARN_ON_ONCE(direction != TLS_OFFLOAD_CTX_DIR_RX)) + return -EINVAL; rx_ctx = mlx5e_get_tls_rx_context(tls_ctx); netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq, be64_to_cpu(rcd_sn)); mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn); atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply); + + return 0; } static const struct tlsdev_ops mlx5e_tls_ops = { .tls_dev_add = mlx5e_tls_add, .tls_dev_del = mlx5e_tls_del, - .tls_dev_resync_rx = mlx5e_tls_resync_rx, + .tls_dev_resync = mlx5e_tls_resync, }; void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) @@ -186,6 +192,11 @@ void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) struct net_device *netdev = priv->netdev; u32 caps; + if (mlx5_accel_is_ktls_device(priv->mdev)) { + mlx5e_ktls_build_netdev(priv); + return; + } + if (!mlx5_accel_is_tls_device(priv->mdev)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h index 3f5d72163b56..9015f3f7792d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h @@ -33,8 +33,10 @@ #ifndef __MLX5E_TLS_H__ #define __MLX5E_TLS_H__ -#ifdef CONFIG_MLX5_EN_TLS +#include "accel/tls.h" +#include "en_accel/ktls.h" +#ifdef CONFIG_MLX5_EN_TLS #include <net/tls.h> #include "en.h" @@ -94,7 +96,12 @@ int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data); #else -static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) { } +static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) +{ + if (mlx5_accel_is_ktls_device(priv->mdev)) + mlx5e_ktls_build_netdev(priv); +} + static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { } static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index be137d4a9169..71384ad1a443 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -181,7 +181,6 @@ static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb, */ nskb->ip_summed = CHECKSUM_PARTIAL; - nskb->xmit_more = 1; nskb->queue_mapping = skb->queue_mapping; } @@ -248,8 +247,8 @@ mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context, sq->stats->tls_resync_bytes += nskb->len; mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln, cpu_to_be64(info.rcd_sn)); - mlx5e_sq_xmit(sq, nskb, *wqe, *pi); - mlx5e_sq_fetch_wqe(sq, wqe, pi); + mlx5e_sq_xmit(sq, nskb, *wqe, *pi, true); + *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); return skb; err_out: @@ -270,6 +269,11 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, int datalen; u32 skb_seq; + if (MLX5_CAP_GEN(sq->channel->mdev, tls)) { + skb = mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi); + goto out; + } + if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h index 311667ec71b8..90bc1f2384c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h @@ -38,6 +38,7 @@ #include <linux/skbuff.h> #include "en.h" +#include "en/txrx.h" struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 554672edf8c3..8dd31b5c740c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -680,7 +680,7 @@ static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev, memset(perm_addr, 0xff, MAX_ADDR_LEN); - mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr); + mlx5_query_mac_address(priv->mdev, perm_addr); } static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index d67adf70a97b..ca9cfbf57d8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -30,22 +30,22 @@ * SOFTWARE. */ -#include <linux/net_dim.h> +#include <linux/dim.h> #include "en.h" static void -mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder, +mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder, struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) { mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts); - dim->state = NET_DIM_START_MEASURE; + dim->state = DIM_START_MEASURE; } void mlx5e_rx_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, work); + struct dim *dim = container_of(work, struct dim, work); struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim); - struct net_dim_cq_moder cur_moder = + struct dim_cq_moder cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq); @@ -53,9 +53,9 @@ void mlx5e_rx_dim_work(struct work_struct *work) void mlx5e_tx_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, work); + struct dim *dim = container_of(work, struct dim, work); struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim); - struct net_dim_cq_moder cur_moder = + struct dim_cq_moder cur_moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix); mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 78dc8fe2a83c..126ec4181286 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -32,6 +32,7 @@ #include "en.h" #include "en/port.h" +#include "en/xsk/umem.h" #include "lib/clock.h" void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, @@ -46,7 +47,7 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); - strlcpy(drvinfo->bus_info, pci_name(mdev->pdev), + strlcpy(drvinfo->bus_info, dev_name(mdev->device), sizeof(drvinfo->bus_info)); } @@ -388,8 +389,17 @@ static int mlx5e_set_ringparam(struct net_device *dev, void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch) { + mutex_lock(&priv->state_lock); + ch->max_combined = mlx5e_get_netdev_max_channels(priv->netdev); ch->combined_count = priv->channels.params.num_channels; + if (priv->xsk.refcnt) { + /* The upper half are XSK queues. */ + ch->max_combined *= 2; + ch->combined_count *= 2; + } + + mutex_unlock(&priv->state_lock); } static void mlx5e_get_channels(struct net_device *dev, @@ -403,6 +413,7 @@ static void mlx5e_get_channels(struct net_device *dev, int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch) { + struct mlx5e_params *cur_params = &priv->channels.params; unsigned int count = ch->combined_count; struct mlx5e_channels new_channels = {}; bool arfs_enabled; @@ -414,16 +425,26 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, return -EINVAL; } - if (priv->channels.params.num_channels == count) + if (cur_params->num_channels == count) return 0; mutex_lock(&priv->state_lock); + /* Don't allow changing the number of channels if there is an active + * XSK, because the numeration of the XSK and regular RQs will change. + */ + if (priv->xsk.refcnt) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n", + __func__); + goto out; + } + new_channels.params = priv->channels.params; new_channels.params.num_channels = count; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; + *cur_params = new_channels.params; if (!netif_is_rxfh_configured(priv->netdev)) mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, count); @@ -466,7 +487,7 @@ static int mlx5e_set_channels(struct net_device *dev, int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct net_dim_cq_moder *rx_moder, *tx_moder; + struct dim_cq_moder *rx_moder, *tx_moder; if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -EOPNOTSUPP; @@ -521,7 +542,7 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct net_dim_cq_moder *rx_moder, *tx_moder; + struct dim_cq_moder *rx_moder, *tx_moder; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; int err = 0; @@ -1561,7 +1582,7 @@ static int mlx5e_get_module_info(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *dev = priv->mdev; int size_read = 0; - u8 data[4]; + u8 data[4] = {0}; size_read = mlx5_query_module_eeprom(dev, 0, 2, data); if (size_read < 2) @@ -1571,17 +1592,17 @@ static int mlx5e_get_module_info(struct net_device *netdev, switch (data[0]) { case MLX5_MODULE_ID_QSFP: modinfo->type = ETH_MODULE_SFF_8436; - modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; break; case MLX5_MODULE_ID_QSFP_PLUS: case MLX5_MODULE_ID_QSFP28: /* data[1] = revision id */ if (data[0] == MLX5_MODULE_ID_QSFP28 || data[1] >= 0x3) { modinfo->type = ETH_MODULE_SFF_8636; - modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; } else { modinfo->type = ETH_MODULE_SFF_8436; - modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; } break; case MLX5_MODULE_ID_SFP: @@ -1867,39 +1888,21 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) return priv->channels.params.pflags; } -int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, - struct ethtool_flash *flash) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct net_device *dev = priv->netdev; - const struct firmware *fw; - int err; - - if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) - return -EOPNOTSUPP; - - err = request_firmware_direct(&fw, flash->data, &dev->dev); - if (err) - return err; - - dev_hold(dev); - rtnl_unlock(); - - err = mlx5_firmware_flash(mdev, fw); - release_firmware(fw); - - rtnl_lock(); - dev_put(dev); - return err; -} - -static int mlx5e_flash_device(struct net_device *dev, - struct ethtool_flash *flash) +#ifndef CONFIG_MLX5_EN_RXNFC +/* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS + * otherwise this function will be defined from en_fs_ethtool.c + */ +static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) { struct mlx5e_priv *priv = netdev_priv(dev); - return mlx5e_ethtool_flash_device(priv, flash); + if (info->cmd != ETHTOOL_GRXRINGS) + return -EOPNOTSUPP; + /* ring_count is needed by ethtool -x */ + info->data = priv->channels.params.num_channels; + return 0; } +#endif const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, @@ -1919,11 +1922,10 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size, .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, -#ifdef CONFIG_MLX5_EN_RXNFC .get_rxnfc = mlx5e_get_rxnfc, +#ifdef CONFIG_MLX5_EN_RXNFC .set_rxnfc = mlx5e_set_rxnfc, #endif - .flash_device = mlx5e_flash_device, .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, .get_pauseparam = mlx5e_get_pauseparam, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 4421c10f58ae..ea3a490b569a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -32,6 +32,8 @@ #include <linux/mlx5/fs.h> #include "en.h" +#include "en/params.h" +#include "en/xsk/umem.h" struct mlx5e_ethtool_rule { struct list_head list; @@ -414,6 +416,14 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, if (fs->ring_cookie == RX_CLS_FLOW_DISC) { flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; } else { + struct mlx5e_params *params = &priv->channels.params; + enum mlx5e_rq_group group; + struct mlx5e_tir *tir; + u16 ix; + + mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group); + tir = group == MLX5E_RQ_GROUP_XSK ? priv->xsk_tir : priv->direct_tir; + dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (!dst) { err = -ENOMEM; @@ -421,12 +431,12 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, } dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn; + dst->tir_num = tir[ix].tirn; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); - flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -600,9 +610,9 @@ static int validate_flow(struct mlx5e_priv *priv, if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES) return -ENOSPC; - if (fs->ring_cookie >= priv->channels.params.num_channels && - fs->ring_cookie != RX_CLS_FLOW_DISC) - return -EINVAL; + if (fs->ring_cookie != RX_CLS_FLOW_DISC) + if (!mlx5e_qid_validate(&priv->channels.params, fs->ring_cookie)) + return -EINVAL; switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { case ETHER_FLOW: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 46157e2a1e5a..6d0ae87c8ded 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -34,15 +34,19 @@ #include <net/pkt_cls.h> #include <linux/mlx5/fs.h> #include <net/vxlan.h> +#include <net/geneve.h> #include <linux/bpf.h> #include <linux/if_bridge.h> #include <net/page_pool.h> +#include <net/xdp_sock.h> #include "eswitch.h" #include "en.h" +#include "en/txrx.h" #include "en_tc.h" #include "en_rep.h" #include "en_accel/ipsec.h" #include "en_accel/ipsec_rxtx.h" +#include "en_accel/en_accel.h" #include "en_accel/tls.h" #include "accel/ipsec.h" #include "accel/tls.h" @@ -53,35 +57,12 @@ #include "lib/eq.h" #include "en/monitor_stats.h" #include "en/reporter.h" +#include "en/params.h" +#include "en/xsk/umem.h" +#include "en/xsk/setup.h" +#include "en/xsk/rx.h" +#include "en/xsk/tx.h" -struct mlx5e_rq_param { - u32 rqc[MLX5_ST_SZ_DW(rqc)]; - struct mlx5_wq_param wq; - struct mlx5e_rq_frags_info frags_info; -}; - -struct mlx5e_sq_param { - u32 sqc[MLX5_ST_SZ_DW(sqc)]; - struct mlx5_wq_param wq; - bool is_mpw; -}; - -struct mlx5e_cq_param { - u32 cqc[MLX5_ST_SZ_DW(cqc)]; - struct mlx5_wq_param wq; - u16 eq_ix; - u8 cq_period_mode; -}; - -struct mlx5e_channel_param { - struct mlx5e_rq_param rq; - struct mlx5e_sq_param sq; - struct mlx5e_sq_param xdp_sq; - struct mlx5e_sq_param icosq; - struct mlx5e_cq_param rx_cq; - struct mlx5e_cq_param tx_cq; - struct mlx5e_cq_param icosq_cq; -}; bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { @@ -101,108 +82,9 @@ bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) return true; } -static u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params) -{ - u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - u16 linear_rq_headroom = params->xdp_prog ? - XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; - u32 frag_sz; - - linear_rq_headroom += NET_IP_ALIGN; - - frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu); - - if (params->xdp_prog && frag_sz < PAGE_SIZE) - frag_sz = PAGE_SIZE; - - return frag_sz; -} - -static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params) -{ - u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params); - - return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); -} - -static bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params); - - return !params->lro_en && frag_sz <= PAGE_SIZE; -} - -#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \ - MLX5_MPWQE_LOG_STRIDE_SZ_BASE) -static bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params); - s8 signed_log_num_strides_param; - u8 log_num_strides; - - if (!mlx5e_rx_is_linear_skb(mdev, params)) - return false; - - if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ) - return false; - - if (MLX5_CAP_GEN(mdev, ext_stride_num_range)) - return true; - - log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz); - signed_log_num_strides_param = - (s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE; - - return signed_log_num_strides_param >= 0; -} - -static u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params) -{ - if (params->log_rq_mtu_frames < - mlx5e_mpwqe_log_pkts_per_wqe(params) + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW) - return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW; - - return params->log_rq_mtu_frames - mlx5e_mpwqe_log_pkts_per_wqe(params); -} - -static u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) - return order_base_2(mlx5e_rx_get_linear_frag_sz(params)); - - return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); -} - -static u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - return MLX5_MPWRQ_LOG_WQE_SZ - - mlx5e_mpwqe_get_log_stride_size(mdev, params); -} - -static u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - u16 linear_rq_headroom = params->xdp_prog ? - XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; - bool is_linear_skb; - - linear_rq_headroom += NET_IP_ALIGN; - - is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ? - mlx5e_rx_is_linear_skb(mdev, params) : - mlx5e_rx_mpwqe_is_linear_skb(mdev, params); - - return is_linear_skb ? linear_rq_headroom : 0; -} - void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { - params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; params->log_rq_mtu_frames = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; @@ -210,18 +92,31 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? - BIT(mlx5e_mpwqe_get_log_rq_size(params)) : + BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) : BIT(params->log_rq_mtu_frames), - BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)), + BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)), MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { - return mlx5e_check_fragmented_striding_rq_cap(mdev) && - !MLX5_IPSEC_DEV(mdev) && - !(params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params)); + if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) + return false; + + if (MLX5_IPSEC_DEV(mdev)) + return false; + + if (params->xdp_prog) { + /* XSK params are not considered here. If striding RQ is in use, + * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will + * be called with the known XSK params. + */ + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) + return false; + } + + return true; } void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) @@ -469,7 +364,6 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) } static int mlx5e_init_di_list(struct mlx5e_rq *rq, - struct mlx5e_params *params, int wq_sz, int cpu) { int len = wq_sz << rq->wqe.info.log_num_frags; @@ -491,6 +385,8 @@ static void mlx5e_free_di_list(struct mlx5e_rq *rq) static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct xdp_umem *umem, struct mlx5e_rq_param *rqp, struct mlx5e_rq *rq) { @@ -498,6 +394,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = c->mdev; void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 num_xsk_frames = 0; + u32 rq_xdp_ix; u32 pool_size; int wq_sz; int err; @@ -514,7 +412,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->ix = c->ix; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - rq->stats = &c->priv->channel_stats[c->ix].rq; + rq->xdpsq = &c->rq_xdpsq; + rq->umem = umem; + + if (rq->umem) + rq->stats = &c->priv->channel_stats[c->ix].xskrq; + else + rq->stats = &c->priv->channel_stats[c->ix].rq; rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL; if (IS_ERR(rq->xdp_prog)) { @@ -523,12 +427,16 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix); + rq_xdp_ix = rq->ix; + if (xsk) + rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK; + err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix); if (err < 0) goto err_rq_wq_destroy; rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; - rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params); + rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); + rq->buff.umem_headroom = xsk ? xsk->headroom : 0; pool_size = 1 << params->log_rq_mtu_frames; switch (rq->wq_type) { @@ -542,7 +450,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); - pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params); + if (xsk) + num_xsk_frames = wq_sz << + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + + pool_size = MLX5_MPWRQ_PAGES_PER_WQE << + mlx5e_mpwqe_get_log_rq_size(params, xsk); rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; @@ -561,12 +474,15 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->mpwqe.skb_from_cqe_mpwrq = - mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ? - mlx5e_skb_from_cqe_mpwrq_linear : - mlx5e_skb_from_cqe_mpwrq_nonlinear; - rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params); - rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params)); + rq->mpwqe.skb_from_cqe_mpwrq = xsk ? + mlx5e_xsk_skb_from_cqe_mpwrq_linear : + mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ? + mlx5e_skb_from_cqe_mpwrq_linear : + mlx5e_skb_from_cqe_mpwrq_nonlinear; + + rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + rq->mpwqe.num_strides = + BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) @@ -587,6 +503,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); + if (xsk) + num_xsk_frames = wq_sz << rq->wqe.info.log_num_frags; + rq->wqe.info = rqp->frags_info; rq->wqe.frags = kvzalloc_node(array_size(sizeof(*rq->wqe.frags), @@ -597,9 +516,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_free; } - err = mlx5e_init_di_list(rq, params, wq_sz, c->cpu); + err = mlx5e_init_di_list(rq, wq_sz, c->cpu); if (err) goto err_free; + rq->post_wqes = mlx5e_post_rx_wqes; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; @@ -615,33 +535,49 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_free; } - rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(mdev, params) ? - mlx5e_skb_from_cqe_linear : - mlx5e_skb_from_cqe_nonlinear; + rq->wqe.skb_from_cqe = xsk ? + mlx5e_xsk_skb_from_cqe_linear : + mlx5e_rx_is_linear_skb(params, NULL) ? + mlx5e_skb_from_cqe_linear : + mlx5e_skb_from_cqe_nonlinear; rq->mkey_be = c->mkey_be; } - /* Create a page_pool and register it with rxq */ - pp_params.order = 0; - pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ - pp_params.pool_size = pool_size; - pp_params.nid = cpu_to_node(c->cpu); - pp_params.dev = c->pdev; - pp_params.dma_dir = rq->buff.map_dir; - - /* page_pool can be used even when there is no rq->xdp_prog, - * given page_pool does not handle DMA mapping there is no - * required state to clear. And page_pool gracefully handle - * elevated refcnt. - */ - rq->page_pool = page_pool_create(&pp_params); - if (IS_ERR(rq->page_pool)) { - err = PTR_ERR(rq->page_pool); - rq->page_pool = NULL; - goto err_free; + if (xsk) { + err = mlx5e_xsk_resize_reuseq(umem, num_xsk_frames); + if (unlikely(err)) { + mlx5_core_err(mdev, "Unable to allocate the Reuse Ring for %u frames\n", + num_xsk_frames); + goto err_free; + } + + rq->zca.free = mlx5e_xsk_zca_free; + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_ZERO_COPY, + &rq->zca); + } else { + /* Create a page_pool and register it with rxq */ + pp_params.order = 0; + pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ + pp_params.pool_size = pool_size; + pp_params.nid = cpu_to_node(c->cpu); + pp_params.dev = c->pdev; + pp_params.dma_dir = rq->buff.map_dir; + + /* page_pool can be used even when there is no rq->xdp_prog, + * given page_pool does not handle DMA mapping there is no + * required state to clear. And page_pool gracefully handle + * elevated refcnt. + */ + rq->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rq->page_pool)) { + err = PTR_ERR(rq->page_pool); + rq->page_pool = NULL; + goto err_free; + } + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_PAGE_POOL, rq->page_pool); } - err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, - MEM_TYPE_PAGE_POOL, rq->page_pool); if (err) goto err_free; @@ -681,11 +617,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, switch (params->rx_cq_moderation.cq_period_mode) { case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: - rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE; + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; break; case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: default: - rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } rq->page_cache.head = 0; @@ -708,8 +644,7 @@ err_rq_wq_destroy: if (rq->xdp_prog) bpf_prog_put(rq->xdp_prog); xdp_rxq_info_unreg(&rq->xdp_rxq); - if (rq->page_pool) - page_pool_destroy(rq->page_pool); + page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); return err; @@ -722,10 +657,6 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) if (rq->xdp_prog) bpf_prog_put(rq->xdp_prog); - xdp_rxq_info_unreg(&rq->xdp_rxq); - if (rq->page_pool) - page_pool_destroy(rq->page_pool); - switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: kvfree(rq->mpwqe.info); @@ -740,8 +671,15 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) { struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i]; - mlx5e_page_release(rq, dma_info, false); + /* With AF_XDP, page_cache is not used, so this loop is not + * entered, and it's safe to call mlx5e_page_release_dynamic + * directly. + */ + mlx5e_page_release_dynamic(rq, dma_info, false); } + + xdp_rxq_info_unreg(&rq->xdp_rxq); + page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); } @@ -875,7 +813,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) mlx5_core_destroy_rq(rq->mdev, rq->rqn); } -static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) +int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) { unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time); struct mlx5e_channel *c = rq->channel; @@ -902,10 +840,14 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + u16 head = wq->head; + int i; - /* UMR WQE (if in progress) is always at wq->head */ - if (rq->mpwqe.umr_in_progress) - rq->dealloc_wqe(rq, wq->head); + /* Outstanding UMR WQEs (in progress) start at wq->head */ + for (i = 0; i < rq->mpwqe.umr_in_progress; i++) { + rq->dealloc_wqe(rq, head); + head = mlx5_wq_ll_get_wqe_next_ix(wq, head); + } while (!mlx5_wq_ll_is_empty(wq)) { struct mlx5e_rx_wqe_ll *wqe; @@ -929,14 +871,13 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) } -static int mlx5e_open_rq(struct mlx5e_channel *c, - struct mlx5e_params *params, - struct mlx5e_rq_param *param, - struct mlx5e_rq *rq) +int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, + struct xdp_umem *umem, struct mlx5e_rq *rq) { int err; - err = mlx5e_alloc_rq(c, params, param, rq); + err = mlx5e_alloc_rq(c, params, xsk, umem, param, rq); if (err) return err; @@ -970,25 +911,17 @@ err_free_rq: static void mlx5e_activate_rq(struct mlx5e_rq *rq) { - struct mlx5e_icosq *sq = &rq->channel->icosq; - struct mlx5_wq_cyc *wq = &sq->wq; - struct mlx5e_tx_wqe *nopwqe; - - u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; - nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc); - mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl); + mlx5e_trigger_irq(&rq->channel->icosq); } -static void mlx5e_deactivate_rq(struct mlx5e_rq *rq) +void mlx5e_deactivate_rq(struct mlx5e_rq *rq) { clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ } -static void mlx5e_close_rq(struct mlx5e_rq *rq) +void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); mlx5e_destroy_rq(rq); @@ -1041,6 +974,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct xdp_umem *umem, struct mlx5e_sq_param *param, struct mlx5e_xdpsq *sq, bool is_redirect) @@ -1056,9 +990,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - sq->stats = is_redirect ? - &c->priv->channel_stats[c->ix].xdpsq : - &c->priv->channel_stats[c->ix].rq_xdpsq; + sq->umem = umem; + + sq->stats = sq->umem ? + &c->priv->channel_stats[c->ix].xsksq : + is_redirect ? + &c->priv->channel_stats[c->ix].xdpsq : + &c->priv->channel_stats[c->ix].rq_xdpsq; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1091,7 +1029,7 @@ static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq) static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa) { - u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); sq->db.ico_wqe = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.ico_wqe)), @@ -1183,15 +1121,19 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->clock = &mdev->clock; sq->mkey_be = c->mkey_be; sq->channel = c; + sq->ch_ix = c->ix; sq->txq_ix = txq_ix; sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; + sq->stop_room = MLX5E_SQ_STOP_ROOM; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); - if (mlx5_accel_is_tls_device(c->priv->mdev)) + if (mlx5_accel_is_tls_device(c->priv->mdev)) { set_bit(MLX5E_SQ_STATE_TLS, &sq->state); + sq->stop_room += MLX5E_SQ_TLS_ROOM; + } param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1437,10 +1379,8 @@ static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) mlx5e_tx_reporter_err_cqe(sq); } -static int mlx5e_open_icosq(struct mlx5e_channel *c, - struct mlx5e_params *params, - struct mlx5e_sq_param *param, - struct mlx5e_icosq *sq) +int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq) { struct mlx5e_create_sq_param csp = {}; int err; @@ -1466,7 +1406,7 @@ err_free_icosq: return err; } -static void mlx5e_close_icosq(struct mlx5e_icosq *sq) +void mlx5e_close_icosq(struct mlx5e_icosq *sq) { struct mlx5e_channel *c = sq->channel; @@ -1477,16 +1417,14 @@ static void mlx5e_close_icosq(struct mlx5e_icosq *sq) mlx5e_free_icosq(sq); } -static int mlx5e_open_xdpsq(struct mlx5e_channel *c, - struct mlx5e_params *params, - struct mlx5e_sq_param *param, - struct mlx5e_xdpsq *sq, - bool is_redirect) +int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct xdp_umem *umem, + struct mlx5e_xdpsq *sq, bool is_redirect) { struct mlx5e_create_sq_param csp = {}; int err; - err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect); + err = mlx5e_alloc_xdpsq(c, params, umem, param, sq, is_redirect); if (err) return err; @@ -1527,7 +1465,7 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c, dseg->lkey = sq->mkey_be; wi->num_wqebbs = 1; - wi->num_ds = 1; + wi->num_pkts = 1; } } @@ -1540,7 +1478,7 @@ err_free_xdpsq: return err; } -static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) +void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) { struct mlx5e_channel *c = sq->channel; @@ -1548,7 +1486,7 @@ static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) napi_synchronize(&c->napi); mlx5e_destroy_sq(c->mdev, sq->sqn); - mlx5e_free_xdpsq_descs(sq, rq); + mlx5e_free_xdpsq_descs(sq); mlx5e_free_xdpsq(sq); } @@ -1618,6 +1556,7 @@ static void mlx5e_free_cq(struct mlx5e_cq *cq) static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) { + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; struct mlx5_core_dev *mdev = cq->mdev; struct mlx5_core_cq *mcq = &cq->mcq; @@ -1652,7 +1591,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); - err = mlx5_core_create_cq(mdev, mcq, in, inlen); + err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out)); kvfree(in); @@ -1669,10 +1608,8 @@ static void mlx5e_destroy_cq(struct mlx5e_cq *cq) mlx5_core_destroy_cq(cq->mdev, &cq->mcq); } -static int mlx5e_open_cq(struct mlx5e_channel *c, - struct net_dim_cq_moder moder, - struct mlx5e_cq_param *param, - struct mlx5e_cq *cq) +int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder, + struct mlx5e_cq_param *param, struct mlx5e_cq *cq) { struct mlx5_core_dev *mdev = c->mdev; int err; @@ -1695,7 +1632,7 @@ err_free_cq: return err; } -static void mlx5e_close_cq(struct mlx5e_cq *cq) +void mlx5e_close_cq(struct mlx5e_cq *cq) { mlx5e_destroy_cq(cq); mlx5e_free_cq(cq); @@ -1869,49 +1806,16 @@ static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c) free_cpumask_var(c->xps_cpumask); } -static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, - struct mlx5e_params *params, - struct mlx5e_channel_param *cparam, - struct mlx5e_channel **cp) +static int mlx5e_open_queues(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam) { - int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); - struct net_dim_cq_moder icocq_moder = {0, 0}; - struct net_device *netdev = priv->netdev; - struct mlx5e_channel *c; - unsigned int irq; + struct dim_cq_moder icocq_moder = {0, 0}; int err; - int eqn; - - err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); - if (err) - return err; - - c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); - if (!c) - return -ENOMEM; - - c->priv = priv; - c->mdev = priv->mdev; - c->tstamp = &priv->tstamp; - c->ix = ix; - c->cpu = cpu; - c->pdev = &priv->mdev->pdev->dev; - c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); - c->num_tc = params->num_tc; - c->xdp = !!params->xdp_prog; - c->stats = &priv->channel_stats[ix].ch; - c->irq_desc = irq_to_desc(irq); - - err = mlx5e_alloc_xps_cpumask(c, params); - if (err) - goto err_free_channel; - - netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); if (err) - goto err_napi_del; + return err; err = mlx5e_open_tx_cqs(c, params, cparam); if (err) @@ -1927,7 +1831,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, /* XDP SQ CQ params are same as normal TXQ sq CQ params */ err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation, - &cparam->tx_cq, &c->rq.xdpsq.cq) : 0; + &cparam->tx_cq, &c->rq_xdpsq.cq) : 0; if (err) goto err_close_rx_cq; @@ -1941,20 +1845,21 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, if (err) goto err_close_icosq; - err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq, false) : 0; - if (err) - goto err_close_sqs; + if (c->xdp) { + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, + &c->rq_xdpsq, false); + if (err) + goto err_close_sqs; + } - err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq); + err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq); if (err) goto err_close_xdp_sq; - err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->xdpsq, true); + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true); if (err) goto err_close_rq; - *cp = c; - return 0; err_close_rq: @@ -1962,7 +1867,7 @@ err_close_rq: err_close_xdp_sq: if (c->xdp) - mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq); + mlx5e_close_xdpsq(&c->rq_xdpsq); err_close_sqs: mlx5e_close_sqs(c); @@ -1972,8 +1877,9 @@ err_close_icosq: err_disable_napi: napi_disable(&c->napi); + if (c->xdp) - mlx5e_close_cq(&c->rq.xdpsq.cq); + mlx5e_close_cq(&c->rq_xdpsq.cq); err_close_rx_cq: mlx5e_close_cq(&c->rq.cq); @@ -1987,6 +1893,85 @@ err_close_tx_cqs: err_close_icosq_cq: mlx5e_close_cq(&c->icosq.cq); + return err; +} + +static void mlx5e_close_queues(struct mlx5e_channel *c) +{ + mlx5e_close_xdpsq(&c->xdpsq); + mlx5e_close_rq(&c->rq); + if (c->xdp) + mlx5e_close_xdpsq(&c->rq_xdpsq); + mlx5e_close_sqs(c); + mlx5e_close_icosq(&c->icosq); + napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->rq_xdpsq.cq); + mlx5e_close_cq(&c->rq.cq); + mlx5e_close_cq(&c->xdpsq.cq); + mlx5e_close_tx_cqs(c); + mlx5e_close_cq(&c->icosq.cq); +} + +static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam, + struct xdp_umem *umem, + struct mlx5e_channel **cp) +{ + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); + struct net_device *netdev = priv->netdev; + struct mlx5e_xsk_param xsk; + struct mlx5e_channel *c; + unsigned int irq; + int err; + int eqn; + + err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + if (err) + return err; + + c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); + if (!c) + return -ENOMEM; + + c->priv = priv; + c->mdev = priv->mdev; + c->tstamp = &priv->tstamp; + c->ix = ix; + c->cpu = cpu; + c->pdev = priv->mdev->device; + c->netdev = priv->netdev; + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); + c->num_tc = params->num_tc; + c->xdp = !!params->xdp_prog; + c->stats = &priv->channel_stats[ix].ch; + c->irq_desc = irq_to_desc(irq); + + err = mlx5e_alloc_xps_cpumask(c, params); + if (err) + goto err_free_channel; + + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); + + err = mlx5e_open_queues(c, params, cparam); + if (unlikely(err)) + goto err_napi_del; + + if (umem) { + mlx5e_build_xsk_param(umem, &xsk); + err = mlx5e_open_xsk(priv, params, &xsk, umem, c); + if (unlikely(err)) + goto err_close_queues; + } + + *cp = c; + + return 0; + +err_close_queues: + mlx5e_close_queues(c); + err_napi_del: netif_napi_del(&c->napi); mlx5e_free_xps_cpumask(c); @@ -2005,12 +1990,18 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_rq(&c->rq); netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix); + + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_activate_xsk(c); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) { int tc; + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_deactivate_xsk(c); + mlx5e_deactivate_rq(&c->rq); for (tc = 0; tc < c->num_tc; tc++) mlx5e_deactivate_txqsq(&c->sq[tc]); @@ -2018,19 +2009,9 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) static void mlx5e_close_channel(struct mlx5e_channel *c) { - mlx5e_close_xdpsq(&c->xdpsq, NULL); - mlx5e_close_rq(&c->rq); - if (c->xdp) - mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq); - mlx5e_close_sqs(c); - mlx5e_close_icosq(&c->icosq); - napi_disable(&c->napi); - if (c->xdp) - mlx5e_close_cq(&c->rq.xdpsq.cq); - mlx5e_close_cq(&c->rq.cq); - mlx5e_close_cq(&c->xdpsq.cq); - mlx5e_close_tx_cqs(c); - mlx5e_close_cq(&c->icosq.cq); + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_close_xsk(c); + mlx5e_close_queues(c); netif_napi_del(&c->napi); mlx5e_free_xps_cpumask(c); @@ -2041,6 +2022,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct mlx5e_rq_frags_info *info) { u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); @@ -2053,10 +2035,10 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, byte_count += MLX5E_METADATA_ETHER_LEN; #endif - if (mlx5e_rx_is_linear_skb(mdev, params)) { + if (mlx5e_rx_is_linear_skb(params, xsk)) { int frag_stride; - frag_stride = mlx5e_rx_get_linear_frag_sz(params); + frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk); frag_stride = roundup_pow_of_two(frag_stride); info->arr[0].frag_size = byte_count; @@ -2107,9 +2089,17 @@ static inline u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs) return order_base_2(sz); } -static void mlx5e_build_rq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_rq_param *param) +static u8 mlx5e_get_rq_log_wq_sz(void *rqc) +{ + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + + return MLX5_GET(wq, wq, log_wq_sz); +} + +void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_param *param) { struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; @@ -2119,16 +2109,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: MLX5_SET(wq, wq, log_wqe_num_of_strides, - mlx5e_mpwqe_get_log_num_strides(mdev, params) - + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); MLX5_SET(wq, wq, log_wqe_stride_size, - mlx5e_mpwqe_get_log_stride_size(mdev, params) - + mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); - MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params)); + MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk)); break; default: /* MLX5_WQ_TYPE_CYCLIC */ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - mlx5e_build_rq_frags_info(mdev, params, ¶m->frags_info); + mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); ndsegs = param->frags_info.num_frags; } @@ -2141,7 +2131,7 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); - param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); + param->wq.buf_numa_node = dev_to_node(mdev->device); } static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv, @@ -2156,11 +2146,11 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv, mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1)); MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter); - param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); + param->wq.buf_numa_node = dev_to_node(mdev->device); } -static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, - struct mlx5e_sq_param *param) +void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2168,7 +2158,7 @@ static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); - param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); + param->wq.buf_numa_node = dev_to_node(priv->mdev->device); } static void mlx5e_build_sq_param(struct mlx5e_priv *priv, @@ -2177,10 +2167,13 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + bool allow_swp; + allow_swp = mlx5_geneve_tx_allowed(priv->mdev) || + !!MLX5_IPSEC_DEV(priv->mdev); mlx5e_build_sq_param_common(priv, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); - MLX5_SET(sqc, sqc, allow_swp, !!MLX5_IPSEC_DEV(priv->mdev)); + MLX5_SET(sqc, sqc, allow_swp, allow_swp); } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -2193,9 +2186,10 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); } -static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_cq_param *param) +void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_cq_param *param) { struct mlx5_core_dev *mdev = priv->mdev; void *cqc = param->cqc; @@ -2203,8 +2197,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) + - mlx5e_mpwqe_get_log_num_strides(mdev, params); + log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); break; default: /* MLX5_WQ_TYPE_CYCLIC */ log_cq_size = params->log_rq_mtu_frames; @@ -2220,9 +2214,9 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; } -static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_cq_param *param) +void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_cq_param *param) { void *cqc = param->cqc; @@ -2232,9 +2226,9 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; } -static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_cq_param *param) +void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_cq_param *param) { void *cqc = param->cqc; @@ -2242,12 +2236,12 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } -static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_sq_param *param) +void mlx5e_build_icosq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2258,9 +2252,9 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); } -static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_sq_param *param) +void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2270,17 +2264,32 @@ static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); } +static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params, + struct mlx5e_rq_param *rqp) +{ + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return order_base_2(MLX5E_UMR_WQEBBS) + + mlx5e_get_rq_log_wq_sz(rqp->rqc); + default: /* MLX5_WQ_TYPE_CYCLIC */ + return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + } +} + static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) { - u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + u8 icosq_log_wq_sz; + + mlx5e_build_rq_param(priv, params, NULL, &cparam->rq); + + icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq); - mlx5e_build_rq_param(priv, params, &cparam->rq); mlx5e_build_sq_param(priv, params, &cparam->sq); mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq); - mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq); + mlx5e_build_rx_cq_param(priv, params, NULL, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq); mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq); } @@ -2301,7 +2310,12 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, mlx5e_build_channel_param(priv, &chs->params, cparam); for (i = 0; i < chs->num; i++) { - err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]); + struct xdp_umem *umem = NULL; + + if (chs->params.xdp_prog) + umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, i); + + err = mlx5e_open_channel(priv, i, &chs->params, cparam, umem, &chs->c[i]); if (err) goto err_close_channels; } @@ -2332,14 +2346,22 @@ static void mlx5e_activate_channels(struct mlx5e_channels *chs) mlx5e_activate_channel(chs->c[i]); } +#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */ + static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) { int err = 0; int i; - for (i = 0; i < chs->num; i++) - err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, - err ? 0 : 20000); + for (i = 0; i < chs->num; i++) { + int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT; + + err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout); + + /* Don't wait on the XSK RQ, because the newer xdpsock sample + * doesn't provide any Fill Ring entries at the setup stage. + */ + } return err ? -ETIMEDOUT : 0; } @@ -2411,35 +2433,35 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv) return err; } -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { - struct mlx5e_rqt *rqt; + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int err; int ix; - for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) { - rqt = &priv->direct_tir[ix].rqt; - err = mlx5e_create_rqt(priv, 1 /*size */, rqt); - if (err) + for (ix = 0; ix < max_nch; ix++) { + err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt); + if (unlikely(err)) goto err_destroy_rqts; } return 0; err_destroy_rqts: - mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err); + mlx5_core_warn(priv->mdev, "create rqts failed, %d\n", err); for (ix--; ix >= 0; ix--) - mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt); + mlx5e_destroy_rqt(priv, &tirs[ix].rqt); return err; } -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv) +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int i; - for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + for (i = 0; i < max_nch; i++) + mlx5e_destroy_rqt(priv, &tirs[i].rqt); } static int mlx5e_rx_hash_fn(int hfunc) @@ -2636,7 +2658,7 @@ static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); MLX5_SET(tirc, tirc, lro_max_ip_payload_size, - (params->lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); + (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout); } @@ -2746,22 +2768,6 @@ free_in: return err; } -static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv, - enum mlx5e_traffic_types tt, - u32 *tirc) -{ - MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); - - mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); - - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1); - - mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, - &tirc_default_config[tt], tirc, true); -} - static int mlx5e_set_mtu(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u16 mtu) { @@ -2811,6 +2817,21 @@ int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) return 0; } +void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv) +{ + struct mlx5e_params *params = &priv->channels.params; + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + u16 max_mtu; + + /* MTU range: 68 - hw-specific max */ + netdev->min_mtu = ETH_MIN_MTU; + + mlx5_query_port_max_mtu(mdev, &max_mtu, 1); + netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu), + ETH_MAX_MTU); +} + static void mlx5e_netdev_set_tcs(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -2860,11 +2881,12 @@ static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv) void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) { int num_txqs = priv->channels.num * priv->channels.params.num_tc; + int num_rxqs = priv->channels.num * MLX5E_NUM_RQ_GROUPS; struct net_device *netdev = priv->netdev; mlx5e_netdev_set_tcs(netdev); netif_set_real_num_tx_queues(netdev, num_txqs); - netif_set_real_num_rx_queues(netdev, priv->channels.num); + netif_set_real_num_rx_queues(netdev, num_rxqs); mlx5e_build_tx2sq_maps(priv); mlx5e_activate_channels(&priv->channels); @@ -2876,10 +2898,14 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_wait_channels_min_rx_wqes(&priv->channels); mlx5e_redirect_rqts_to_channels(priv, &priv->channels); + + mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels); } void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { + mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels); + mlx5e_redirect_rqts_to_drop(priv); if (mlx5e_is_vport_rep(priv)) @@ -2919,7 +2945,7 @@ static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, if (hw_modify) hw_modify(priv); - mlx5e_refresh_tirs(priv, false); + priv->profile->update_rx(priv); mlx5e_activate_priv_channels(priv); /* return carrier back if needed */ @@ -2958,15 +2984,18 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + bool is_xdp = priv->channels.params.xdp_prog; int err; set_bit(MLX5E_STATE_OPENED, &priv->state); + if (is_xdp) + mlx5e_xdp_set_open(priv); err = mlx5e_open_channels(priv, &priv->channels); if (err) goto err_clear_state_opened_flag; - mlx5e_refresh_tirs(priv, false); + priv->profile->update_rx(priv); mlx5e_activate_priv_channels(priv); if (priv->profile->update_carrier) priv->profile->update_carrier(priv); @@ -2975,6 +3004,8 @@ int mlx5e_open_locked(struct net_device *netdev) return 0; err_clear_state_opened_flag: + if (is_xdp) + mlx5e_xdp_set_closed(priv); clear_bit(MLX5E_STATE_OPENED, &priv->state); return err; } @@ -3006,6 +3037,8 @@ int mlx5e_close_locked(struct net_device *netdev) if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; + if (priv->channels.params.xdp_prog) + mlx5e_xdp_set_closed(priv); clear_bit(MLX5E_STATE_OPENED, &priv->state); netif_carrier_off(priv->netdev); @@ -3058,8 +3091,8 @@ static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev, struct mlx5e_cq *cq, struct mlx5e_cq_param *param) { - param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); - param->wq.db_numa_node = dev_to_node(&mdev->pdev->dev); + param->wq.buf_numa_node = dev_to_node(mdev->device); + param->wq.db_numa_node = dev_to_node(mdev->device); return mlx5e_alloc_cq_common(mdev, param, cq); } @@ -3117,20 +3150,19 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq) mlx5e_free_cq(&drop_rq->cq); } -int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, - u32 underlay_qpn, u32 *tisn) +int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn) { - u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); - MLX5_SET(tisc, tisc, prio, tc << 1); - MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn); MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); + if (MLX5_GET(tisc, tisc, tls_en)) + MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn); + if (mlx5_lag_is_lacp_owner(mdev)) MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); - return mlx5_core_create_tis(mdev, in, sizeof(in), tisn); + return mlx5_core_create_tis(mdev, in, MLX5_ST_SZ_BYTES(create_tis_in), tisn); } void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn) @@ -3144,7 +3176,14 @@ int mlx5e_create_tises(struct mlx5e_priv *priv) int tc; for (tc = 0; tc < priv->profile->max_tc; tc++) { - err = mlx5e_create_tis(priv->mdev, tc, 0, &priv->tisn[tc]); + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, prio, tc << 1); + + err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[tc]); if (err) goto err_close_tises; } @@ -3167,32 +3206,42 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); } -static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, - enum mlx5e_traffic_types tt, - u32 *tirc) +static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv, + u32 rqtn, u32 *tirc) { MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, rqtn); + MLX5_SET(tirc, tirc, tunneled_offload_en, + priv->channels.params.tunneled_offload_en); mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); +} - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - +static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, + enum mlx5e_traffic_types tt, + u32 *tirc) +{ + mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc); mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &tirc_default_config[tt], tirc, false); } static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) { - MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); - - mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); - - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - MLX5_SET(tirc, tirc, indirect_table, rqtn); + mlx5e_build_indir_tir_ctx_common(priv, rqtn, tirc); MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); } +static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv, + enum mlx5e_traffic_types tt, + u32 *tirc) +{ + mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc); + mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, + &tirc_default_config[tt], tirc, true); +} + int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc) { struct mlx5e_tir *tir; @@ -3252,13 +3301,13 @@ err_destroy_inner_tirs: return err; } -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { - int nch = mlx5e_get_netdev_max_channels(priv->netdev); + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); struct mlx5e_tir *tir; void *tirc; int inlen; - int err; + int err = 0; u32 *in; int ix; @@ -3267,25 +3316,24 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) if (!in) return -ENOMEM; - for (ix = 0; ix < nch; ix++) { + for (ix = 0; ix < max_nch; ix++) { memset(in, 0, inlen); - tir = &priv->direct_tir[ix]; + tir = &tirs[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc); + mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); - if (err) + if (unlikely(err)) goto err_destroy_ch_tirs; } - kvfree(in); - - return 0; + goto out; err_destroy_ch_tirs: - mlx5_core_warn(priv->mdev, "create direct tirs failed, %d\n", err); + mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err); for (ix--; ix >= 0; ix--) - mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]); + mlx5e_destroy_tir(priv->mdev, &tirs[ix]); +out: kvfree(in); return err; @@ -3305,13 +3353,13 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc) mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); } -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { - int nch = mlx5e_get_netdev_max_channels(priv->netdev); + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int i; - for (i = 0; i < nch; i++) - mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]); + for (i = 0; i < max_nch; i++) + mlx5e_destroy_tir(priv->mdev, &tirs[i]); } static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) @@ -3378,17 +3426,17 @@ out: #ifdef CONFIG_MLX5_ESWITCH static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *cls_flower, + struct flow_cls_offload *cls_flower, int flags) { switch (cls_flower->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: return mlx5e_configure_flower(priv->netdev, priv, cls_flower, flags); - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: return mlx5e_delete_flower(priv->netdev, priv, cls_flower, flags); - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: return mlx5e_stats_flower(priv->netdev, priv, cls_flower, flags); default: @@ -3409,36 +3457,22 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, return -EOPNOTSUPP; } } - -static int mlx5e_setup_tc_block(struct net_device *dev, - struct tc_block_offload *f) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb, - priv, priv, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb, - priv); - return 0; - default: - return -EOPNOTSUPP; - } -} #endif +static LIST_HEAD(mlx5e_block_cb_list); + static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { + struct mlx5e_priv *priv = netdev_priv(dev); + switch (type) { #ifdef CONFIG_MLX5_ESWITCH case TC_SETUP_BLOCK: - return mlx5e_setup_tc_block(dev, type_data); + return flow_block_cb_setup_simple(type_data, + &mlx5e_block_cb_list, + mlx5e_setup_tc_block_cb, + priv, priv, true); #endif case TC_SETUP_QDISC_MQPRIO: return mlx5e_setup_tc_mqprio(dev, type_data); @@ -3453,11 +3487,12 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) { struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i]; + struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; int j; - s->rx_packets += rq_stats->packets; - s->rx_bytes += rq_stats->bytes; + s->rx_packets += rq_stats->packets + xskrq_stats->packets; + s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes; for (j = 0; j < priv->max_opened_tc; j++) { struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; @@ -3556,6 +3591,13 @@ static int set_feature_lro(struct net_device *netdev, bool enable) mutex_lock(&priv->state_lock); + if (enable && priv->xsk.refcnt) { + netdev_warn(netdev, "LRO is incompatible with AF_XDP (%hu XSKs are active)\n", + priv->xsk.refcnt); + err = -EINVAL; + goto out; + } + old_params = &priv->channels.params; if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { netdev_warn(netdev, "can't set LRO with legacy RQ\n"); @@ -3569,8 +3611,8 @@ static int set_feature_lro(struct net_device *netdev, bool enable) new_channels.params.lro_en = enable; if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { - if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) == - mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params)) + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params, NULL) == + mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL)) reset = false; } @@ -3698,8 +3740,7 @@ static int mlx5e_handle_feature(struct net_device *netdev, return 0; } -static int mlx5e_set_features(struct net_device *netdev, - netdev_features_t features) +int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) { netdev_features_t oper_features = netdev->features; int err = 0; @@ -3750,11 +3791,54 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n"); } + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { + features &= ~NETIF_F_RXHASH; + if (netdev->features & NETIF_F_RXHASH) + netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); + } + mutex_unlock(&priv->state_lock); return features; } +static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, + struct mlx5e_channels *chs, + struct mlx5e_params *new_params, + struct mlx5_core_dev *mdev) +{ + u16 ix; + + for (ix = 0; ix < chs->params.num_channels; ix++) { + struct xdp_umem *umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, ix); + struct mlx5e_xsk_param xsk; + + if (!umem) + continue; + + mlx5e_build_xsk_param(umem, &xsk); + + if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) { + u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk); + int max_mtu_frame, max_mtu_page, max_mtu; + + /* Two criteria must be met: + * 1. HW MTU + all headrooms <= XSK frame size. + * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE. + */ + max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr); + max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk); + max_mtu = min(max_mtu_frame, max_mtu_page); + + netdev_err(netdev, "MTU %d is too big for an XSK running on channel %hu. Try MTU <= %d\n", + new_params->sw_mtu, ix, max_mtu); + return false; + } + } + + return true; +} + int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, change_hw_mtu_cb set_mtu_cb) { @@ -3775,18 +3859,31 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, new_channels.params.sw_mtu = new_mtu; if (params->xdp_prog && - !mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) { + !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) { netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n", - new_mtu, mlx5e_xdp_max_mtu(params)); + new_mtu, mlx5e_xdp_max_mtu(params, NULL)); + err = -EINVAL; + goto out; + } + + if (priv->xsk.refcnt && + !mlx5e_xsk_validate_mtu(netdev, &priv->channels, + &new_channels.params, priv->mdev)) { err = -EINVAL; goto out; } if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { - bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_channels.params); - u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params); - u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params); + bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, + &new_channels.params, + NULL); + u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL); + u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params, NULL); + + /* If XSK is active, XSK RQs are linear. */ + is_linear |= priv->xsk.refcnt; + /* Always reset in linear mode - hw_mtu is used in data path. */ reset = reset && (is_linear || (ppw_old != ppw_new)); } @@ -3875,6 +3972,9 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) memcpy(&priv->tstamp, &config, sizeof(config)); mutex_unlock(&priv->state_lock); + /* might need to fix some features */ + netdev_update_features(priv->netdev); + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; } @@ -4115,6 +4215,12 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, /* Verify if UDP port is being offloaded by HW */ if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port)) return features; + +#if IS_ENABLED(CONFIG_GENEVE) + /* Support Geneve offload for default UDP port */ + if (port == GENEVE_UDP_PORT && mlx5_geneve_tx_allowed(priv->mdev)) + return features; +#endif } out: @@ -4210,16 +4316,29 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) new_channels.params = priv->channels.params; new_channels.params.xdp_prog = prog; - if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) { + /* No XSK params: AF_XDP can't be enabled yet at the point of setting + * the XDP program. + */ + if (!mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) { netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n", new_channels.params.sw_mtu, - mlx5e_xdp_max_mtu(&new_channels.params)); + mlx5e_xdp_max_mtu(&new_channels.params, NULL)); return -EINVAL; } return 0; } +static int mlx5e_xdp_update_state(struct mlx5e_priv *priv) +{ + if (priv->channels.params.xdp_prog) + mlx5e_xdp_set_open(priv); + else + mlx5e_xdp_set_closed(priv); + + return 0; +} + static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4240,8 +4359,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* no need for full reset when exchanging programs */ reset = (!priv->channels.params.xdp_prog || !prog); - if (was_opened && reset) - mlx5e_close_locked(netdev); if (was_opened && !reset) { /* num_channels is invariant here, so we can take the * batched reference right upfront. @@ -4253,20 +4370,31 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) } } - /* exchange programs, extra prog reference we got from caller - * as long as we don't fail from this point onwards. - */ - old_prog = xchg(&priv->channels.params.xdp_prog, prog); + if (was_opened && reset) { + struct mlx5e_channels new_channels = {}; + + new_channels.params = priv->channels.params; + new_channels.params.xdp_prog = prog; + mlx5e_set_rq_type(priv->mdev, &new_channels.params); + old_prog = priv->channels.params.xdp_prog; + + err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_xdp_update_state); + if (err) + goto unlock; + } else { + /* exchange programs, extra prog reference we got from caller + * as long as we don't fail from this point onwards. + */ + old_prog = xchg(&priv->channels.params.xdp_prog, prog); + } + if (old_prog) bpf_prog_put(old_prog); - if (reset) /* change RQ type according to priv->xdp_prog */ + if (!was_opened && reset) /* change RQ type according to priv->xdp_prog */ mlx5e_set_rq_type(priv->mdev, &priv->channels.params); - if (was_opened && reset) - mlx5e_open_locked(netdev); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) + if (!was_opened || reset) goto unlock; /* exchanging programs w/o reset, we update ref counts on behalf @@ -4274,19 +4402,29 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) */ for (i = 0; i < priv->channels.num; i++) { struct mlx5e_channel *c = priv->channels.c[i]; + bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); + if (xsk_open) + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); napi_synchronize(&c->napi); /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ old_prog = xchg(&c->rq.xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + if (xsk_open) { + old_prog = xchg(&c->xskrq.xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); + if (xsk_open) + set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); /* napi_schedule in case we have missed anything */ napi_schedule(&c->napi); - - if (old_prog) - bpf_prog_put(old_prog); } unlock: @@ -4317,6 +4455,9 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) case XDP_QUERY_PROG: xdp->prog_id = mlx5e_xdp_query(dev); return 0; + case XDP_SETUP_XSK_UMEM: + return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem, + xdp->xsk.queue_id); default: return -EINVAL; } @@ -4399,6 +4540,7 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_tx_timeout = mlx5e_tx_timeout, .ndo_bpf = mlx5e_xdp, .ndo_xdp_xmit = mlx5e_xdp_xmit, + .ndo_xsk_async_xmit = mlx5e_xsk_async_xmit, #ifdef CONFIG_MLX5_EN_ARFS .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif @@ -4468,9 +4610,9 @@ static bool slow_pci_heuristic(struct mlx5_core_dev *mdev) link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; } -static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) +static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) { - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; moder.cq_period_mode = cq_period_mode; moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; @@ -4481,9 +4623,9 @@ static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) return moder; } -static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) +static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) { - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; moder.cq_period_mode = cq_period_mode; moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; @@ -4497,8 +4639,8 @@ static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) { return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? - NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE : - NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + DIM_CQ_PERIOD_MODE_START_FROM_CQE : + DIM_CQ_PERIOD_MODE_START_FROM_EQE; } void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) @@ -4550,11 +4692,13 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, * - Striding RQ configuration is not possible/supported. * - Slow PCI heuristic. * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. + * + * No XSK params: checking the availability of striding RQ in general. */ if (!slow_pci_heuristic(mdev) && mlx5e_striding_rq_possible(mdev, params) && - (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) || - !mlx5e_rx_is_linear_skb(mdev, params))) + (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || + !mlx5e_rx_is_linear_skb(params, NULL))) MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); mlx5e_set_rq_type(mdev, params); mlx5e_init_rq_type_params(mdev, params); @@ -4576,6 +4720,7 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, } void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_xsk *xsk, struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, u16 max_channels, u16 mtu) @@ -4611,9 +4756,11 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* HW LRO */ /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ - if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + /* No XSK params: checking the availability of striding RQ in general. */ + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) params->lro_en = !slow_pci_heuristic(mdev); + } params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ @@ -4630,13 +4777,18 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* RSS */ mlx5e_build_rss_params(rss_params, params->num_channels); + params->tunneled_offload_en = + mlx5e_tunnel_inner_ft_supported(mdev); + + /* AF_XDP */ + params->xsk = xsk; } static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr); + mlx5_query_mac_address(priv->mdev, netdev->dev_addr); if (is_zero_ether_addr(netdev->dev_addr) && !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) { eth_hw_addr_random(netdev); @@ -4651,7 +4803,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) bool fcs_supported; bool fcs_enabled; - SET_NETDEV_DEV(netdev, &mdev->pdev->dev); + SET_NETDEV_DEV(netdev, mdev->device); netdev->netdev_ops = &mlx5e_netdev_ops; @@ -4665,14 +4817,18 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->ethtool_ops = &mlx5e_ethtool_ops; netdev->vlan_features |= NETIF_F_SG; - netdev->vlan_features |= NETIF_F_IP_CSUM; - netdev->vlan_features |= NETIF_F_IPV6_CSUM; + netdev->vlan_features |= NETIF_F_HW_CSUM; netdev->vlan_features |= NETIF_F_GRO; netdev->vlan_features |= NETIF_F_TSO; netdev->vlan_features |= NETIF_F_TSO6; netdev->vlan_features |= NETIF_F_RXCSUM; netdev->vlan_features |= NETIF_F_RXHASH; + netdev->mpls_features |= NETIF_F_SG; + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->mpls_features |= NETIF_F_TSO; + netdev->mpls_features |= NETIF_F_TSO6; + netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX; @@ -4686,15 +4842,15 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; - if (mlx5_vxlan_allowed(mdev->vxlan) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { - netdev->hw_enc_features |= NETIF_F_IP_CSUM; - netdev->hw_enc_features |= NETIF_F_IPV6_CSUM; + if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) || + MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; netdev->hw_enc_features |= NETIF_F_TSO6; netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; } - if (mlx5_vxlan_allowed(mdev->vxlan)) { + if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM; netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | @@ -4734,6 +4890,10 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (!priv->channels.params.scatter_fcs_en) netdev->features &= ~NETIF_F_RXFCS; + /* prefere CQE compression over rxhash */ + if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) + netdev->features &= ~NETIF_F_RXHASH; + #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) if (FT_CAP(flow_modify_en) && FT_CAP(modify_root) && @@ -4797,7 +4957,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (err) return err; - mlx5e_build_nic_params(mdev, rss, &priv->channels.params, + mlx5e_build_nic_params(mdev, &priv->xsk, rss, &priv->channels.params, mlx5e_get_netdev_max_channels(netdev), netdev->mtu); @@ -4839,7 +4999,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir); if (err) goto err_destroy_indirect_rqts; @@ -4847,14 +5007,22 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir); if (err) goto err_destroy_indirect_tirs; + err = mlx5e_create_direct_rqts(priv, priv->xsk_tir); + if (unlikely(err)) + goto err_destroy_direct_tirs; + + err = mlx5e_create_direct_tirs(priv, priv->xsk_tir); + if (unlikely(err)) + goto err_destroy_xsk_rqts; + err = mlx5e_create_flow_steering(priv); if (err) { mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); - goto err_destroy_direct_tirs; + goto err_destroy_xsk_tirs; } err = mlx5e_tc_nic_init(priv); @@ -4865,12 +5033,16 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) err_destroy_flow_steering: mlx5e_destroy_flow_steering(priv); +err_destroy_xsk_tirs: + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); +err_destroy_xsk_rqts: + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv, true); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -4884,9 +5056,11 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { mlx5e_tc_nic_cleanup(priv); mlx5e_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); mlx5e_destroy_indirect_tirs(priv, true); - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@ -4913,7 +5087,6 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; - u16 max_mtu; mlx5e_init_l2_addr(priv); @@ -4921,10 +5094,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) if (!netif_running(netdev)) mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); - /* MTU range: 68 - hw-specific max */ - netdev->min_mtu = ETH_MIN_MTU; - mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); - netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); + mlx5e_set_netdev_mtu_boundaries(priv); mlx5e_set_dev_port_mtu(priv); mlx5_lag_add(mdev, netdev); @@ -4972,6 +5142,11 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5_lag_remove(mdev); } +int mlx5e_update_nic_rx(struct mlx5e_priv *priv) +{ + return mlx5e_refresh_tirs(priv, false); +} + static const struct mlx5e_profile mlx5e_nic_profile = { .init = mlx5e_nic_init, .cleanup = mlx5e_nic_cleanup, @@ -4981,6 +5156,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .cleanup_tx = mlx5e_cleanup_nic_tx, .enable = mlx5e_nic_enable, .disable = mlx5e_nic_disable, + .update_rx = mlx5e_update_nic_rx, .update_stats = mlx5e_update_ndo_stats, .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe, @@ -5040,7 +5216,7 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), nch * profile->max_tc, - nch); + nch * MLX5E_NUM_RQ_GROUPS); if (!netdev) { mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); return NULL; @@ -5153,6 +5329,11 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) struct mlx5e_priv *priv = vpriv; struct net_device *netdev = priv->netdev; +#ifdef CONFIG_MLX5_ESWITCH + if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev) + return; +#endif + if (!netif_device_present(netdev)) return; @@ -5173,7 +5354,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) #ifdef CONFIG_MLX5_ESWITCH if (MLX5_ESWITCH_MANAGER(mdev) && - mlx5_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { + mlx5_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) { mlx5e_rep_register_vport_reps(mdev); return mdev; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index a66b6ed80b30..10ef90a7bddd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -37,6 +37,7 @@ #include <net/act_api.h> #include <net/netevent.h> #include <net/arp.h> +#include <net/devlink.h> #include "eswitch.h" #include "en.h" @@ -65,9 +66,26 @@ static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, static void mlx5e_rep_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + strlcpy(drvinfo->driver, mlx5e_rep_driver_name, sizeof(drvinfo->driver)); strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d (%.16s)", + fw_rev_maj(mdev), fw_rev_min(mdev), + fw_rev_sub(mdev), mdev->board_id); +} + +static void mlx5e_uplink_rep_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_rep_get_drvinfo(dev, drvinfo); + strlcpy(drvinfo->bus_info, pci_name(priv->mdev->pdev), + sizeof(drvinfo->bus_info)); } static const struct counter_desc sw_rep_stats_desc[] = { @@ -111,7 +129,7 @@ static void mlx5e_rep_get_strings(struct net_device *dev, } } -static void mlx5e_vf_rep_update_hw_counters(struct mlx5e_priv *priv) +static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -149,17 +167,6 @@ static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv) vport_stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); } -static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; - - if (rep->vport == MLX5_VPORT_UPLINK) - mlx5e_uplink_rep_update_hw_counters(priv); - else - mlx5e_vf_rep_update_hw_counters(priv); -} - static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) { struct mlx5e_sw_stats *s = &priv->stats.sw; @@ -186,7 +193,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, mutex_lock(&priv->state_lock); mlx5e_rep_update_sw_counters(priv); - mlx5e_rep_update_hw_counters(priv); + priv->profile->update_stats(priv); mutex_unlock(&priv->state_lock); for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) @@ -346,7 +353,7 @@ static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev, return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); } -static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = { +static const struct ethtool_ops mlx5e_rep_ethtool_ops = { .get_drvinfo = mlx5e_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, @@ -363,7 +370,7 @@ static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = { }; static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { - .get_drvinfo = mlx5e_rep_get_drvinfo, + .get_drvinfo = mlx5e_uplink_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, .get_sset_count = mlx5e_rep_get_sset_count, @@ -385,30 +392,19 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { static int mlx5e_rep_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid) { - struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *uplink_upper = NULL; - struct mlx5e_priv *uplink_priv = NULL; - struct net_device *uplink_dev; - - if (esw->mode == SRIOV_NONE) - return -EOPNOTSUPP; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + u64 parent_id; - uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - if (uplink_dev) { - uplink_upper = netdev_master_upper_dev_get(uplink_dev); - uplink_priv = netdev_priv(uplink_dev); - } + priv = netdev_priv(dev); + esw = priv->mdev->priv.eswitch; - ppid->id_len = ETH_ALEN; - if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) { - ether_addr_copy(ppid->id, uplink_upper->dev_addr); - } else { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; + if (esw->mode == MLX5_ESWITCH_NONE) + return -EOPNOTSUPP; - ether_addr_copy(ppid->id, rep->hw_id); - } + parent_id = mlx5_query_nic_system_image_guid(priv->mdev); + ppid->id_len = sizeof(parent_id); + memcpy(ppid->id, &parent_id, sizeof(parent_id)); return 0; } @@ -419,7 +415,7 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, struct mlx5e_rep_sq *rep_sq, *tmp; struct mlx5e_rep_priv *rpriv; - if (esw->mode != SRIOV_OFFLOADS) + if (esw->mode != MLX5_ESWITCH_OFFLOADS) return; rpriv = mlx5e_rep_to_rep_priv(rep); @@ -440,7 +436,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, int err; int i; - if (esw->mode != SRIOV_OFFLOADS) + if (esw->mode != MLX5_ESWITCH_OFFLOADS) return 0; rpriv = mlx5e_rep_to_rep_priv(rep); @@ -660,7 +656,7 @@ static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) static int mlx5e_rep_indr_offload(struct net_device *netdev, - struct tc_cls_flower_offload *flower, + struct flow_cls_offload *flower, struct mlx5e_rep_indr_block_priv *indr_priv) { struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); @@ -668,13 +664,13 @@ mlx5e_rep_indr_offload(struct net_device *netdev, int err = 0; switch (flower->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: err = mlx5e_configure_flower(netdev, priv, flower, flags); break; - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: err = mlx5e_delete_flower(netdev, priv, flower, flags); break; - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: err = mlx5e_stats_flower(netdev, priv, flower, flags); break; default: @@ -697,23 +693,39 @@ static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type, } } +static void mlx5e_rep_indr_tc_block_unbind(void *cb_priv) +{ + struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv; + + list_del(&indr_priv->list); + kfree(indr_priv); +} + +static LIST_HEAD(mlx5e_block_cb_list); + static int mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, struct mlx5e_rep_priv *rpriv, - struct tc_block_offload *f) + struct flow_block_offload *f) { struct mlx5e_rep_indr_block_priv *indr_priv; - int err = 0; + struct flow_block_cb *block_cb; - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; + f->driver_block_list = &mlx5e_block_cb_list; + switch (f->command) { - case TC_BLOCK_BIND: + case FLOW_BLOCK_BIND: indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); if (indr_priv) return -EEXIST; + if (flow_block_cb_is_busy(mlx5e_rep_indr_setup_block_cb, + indr_priv, &mlx5e_block_cb_list)) + return -EBUSY; + indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL); if (!indr_priv) return -ENOMEM; @@ -723,26 +735,32 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, list_add(&indr_priv->list, &rpriv->uplink_priv.tc_indr_block_priv_list); - err = tcf_block_cb_register(f->block, - mlx5e_rep_indr_setup_block_cb, - indr_priv, indr_priv, f->extack); - if (err) { + block_cb = flow_block_cb_alloc(f->net, + mlx5e_rep_indr_setup_block_cb, + indr_priv, indr_priv, + mlx5e_rep_indr_tc_block_unbind); + if (IS_ERR(block_cb)) { list_del(&indr_priv->list); kfree(indr_priv); + return PTR_ERR(block_cb); } + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list); - return err; - case TC_BLOCK_UNBIND: + return 0; + case FLOW_BLOCK_UNBIND: indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); if (!indr_priv) return -ENOENT; - tcf_block_cb_unregister(f->block, - mlx5e_rep_indr_setup_block_cb, - indr_priv); - list_del(&indr_priv->list); - kfree(indr_priv); + block_cb = flow_block_cb_lookup(f, + mlx5e_rep_indr_setup_block_cb, + indr_priv); + if (!block_cb) + return -ENOENT; + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); return 0; default: return -EOPNOTSUPP; @@ -795,7 +813,8 @@ static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); struct net_device *netdev = netdev_notifier_info_to_dev(ptr); - if (!mlx5e_tc_tun_device_to_offload(priv, netdev)) + if (!mlx5e_tc_tun_device_to_offload(priv, netdev) && + !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev)) return NOTIFY_OK; switch (event) { @@ -1083,7 +1102,7 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); } -static int mlx5e_vf_rep_open(struct net_device *dev) +static int mlx5e_rep_open(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1106,7 +1125,7 @@ unlock: return err; } -static int mlx5e_vf_rep_close(struct net_device *dev) +static int mlx5e_rep_close(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1123,42 +1142,18 @@ static int mlx5e_vf_rep_close(struct net_device *dev) return ret; } -static int mlx5e_rep_get_phys_port_name(struct net_device *dev, - char *buf, size_t len) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; - unsigned int fn; - int ret; - - fn = PCI_FUNC(priv->mdev->pdev->devfn); - if (fn >= MLX5_MAX_PORTS) - return -EOPNOTSUPP; - - if (rep->vport == MLX5_VPORT_UPLINK) - ret = snprintf(buf, len, "p%d", fn); - else - ret = snprintf(buf, len, "pf%dvf%d", fn, rep->vport - 1); - - if (ret >= len) - return -EOPNOTSUPP; - - return 0; -} - static int mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *cls_flower, int flags) + struct flow_cls_offload *cls_flower, int flags) { switch (cls_flower->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: return mlx5e_configure_flower(priv->netdev, priv, cls_flower, flags); - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: return mlx5e_delete_flower(priv->netdev, priv, cls_flower, flags); - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: return mlx5e_stats_flower(priv->netdev, priv, cls_flower, flags); default: @@ -1180,32 +1175,16 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, } } -static int mlx5e_rep_setup_tc_block(struct net_device *dev, - struct tc_block_offload *f) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb, - priv, priv, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv); - return 0; - default: - return -EOPNOTSUPP; - } -} - static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { + struct mlx5e_priv *priv = netdev_priv(dev); + switch (type) { case TC_SETUP_BLOCK: - return mlx5e_rep_setup_tc_block(dev, type_data); + return flow_block_cb_setup_simple(type_data, NULL, + mlx5e_rep_setup_tc_cb, + priv, priv, true); default: return -EOPNOTSUPP; } @@ -1258,7 +1237,7 @@ static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev } static void -mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -1267,7 +1246,7 @@ mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); } -static int mlx5e_vf_rep_change_mtu(struct net_device *netdev, int new_mtu) +static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu) { return mlx5e_change_mtu(netdev, new_mtu, NULL); } @@ -1300,17 +1279,24 @@ static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan return 0; } -static const struct net_device_ops mlx5e_netdev_ops_vf_rep = { - .ndo_open = mlx5e_vf_rep_open, - .ndo_stop = mlx5e_vf_rep_close, +static struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + return &rpriv->dl_port; +} + +static const struct net_device_ops mlx5e_netdev_ops_rep = { + .ndo_open = mlx5e_rep_open, + .ndo_stop = mlx5e_rep_close, .ndo_start_xmit = mlx5e_xmit, - .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, .ndo_setup_tc = mlx5e_rep_setup_tc, - .ndo_get_stats64 = mlx5e_vf_rep_get_stats, + .ndo_get_devlink_port = mlx5e_get_devlink_port, + .ndo_get_stats64 = mlx5e_rep_get_stats, .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, - .ndo_change_mtu = mlx5e_vf_rep_change_mtu, - .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, + .ndo_change_mtu = mlx5e_rep_change_mtu, }; static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { @@ -1318,8 +1304,8 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { .ndo_stop = mlx5e_close, .ndo_start_xmit = mlx5e_xmit, .ndo_set_mac_address = mlx5e_uplink_rep_set_mac, - .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, .ndo_setup_tc = mlx5e_rep_setup_tc, + .ndo_get_devlink_port = mlx5e_get_devlink_port, .ndo_get_stats64 = mlx5e_get_stats, .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, @@ -1332,12 +1318,12 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_get_vf_stats = mlx5e_get_vf_stats, .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan, - .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, + .ndo_set_features = mlx5e_set_features, }; bool mlx5e_eswitch_rep(struct net_device *netdev) { - if (netdev->netdev_ops == &mlx5e_netdev_ops_vf_rep || + if (netdev->netdev_ops == &mlx5e_netdev_ops_rep || netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep) return true; @@ -1374,6 +1360,7 @@ static void mlx5e_build_rep_params(struct net_device *netdev) mlx5e_set_rx_cq_mode_params(params, cq_period_mode); params->num_tc = 1; + params->tunneled_offload_en = false; mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); @@ -1389,27 +1376,26 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) struct mlx5_core_dev *mdev = priv->mdev; if (rep->vport == MLX5_VPORT_UPLINK) { - SET_NETDEV_DEV(netdev, &priv->mdev->pdev->dev); + SET_NETDEV_DEV(netdev, mdev->device); netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep; /* we want a persistent mac for the uplink rep */ - mlx5_query_nic_vport_mac_address(mdev, 0, netdev->dev_addr); + mlx5_query_mac_address(mdev, netdev->dev_addr); netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops; #ifdef CONFIG_MLX5_CORE_EN_DCB if (MLX5_CAP_GEN(mdev, qos)) netdev->dcbnl_ops = &mlx5e_dcbnl_ops; #endif } else { - netdev->netdev_ops = &mlx5e_netdev_ops_vf_rep; + netdev->netdev_ops = &mlx5e_netdev_ops_rep; eth_hw_addr_random(netdev); - netdev->ethtool_ops = &mlx5e_vf_rep_ethtool_ops; + netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; } netdev->watchdog_timeo = 15 * HZ; + netdev->features |= NETIF_F_NETNS_LOCAL; - netdev->features |= NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; - netdev->hw_features |= NETIF_F_HW_TC; - + netdev->hw_features |= NETIF_F_HW_TC; netdev->hw_features |= NETIF_F_SG; netdev->hw_features |= NETIF_F_IP_CSUM; netdev->hw_features |= NETIF_F_IPV6_CSUM; @@ -1418,7 +1404,9 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_TSO6; netdev->hw_features |= NETIF_F_RXCSUM; - if (rep->vport != MLX5_VPORT_UPLINK) + if (rep->vport == MLX5_VPORT_UPLINK) + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + else netdev->features |= NETIF_F_VLAN_CHALLENGED; netdev->features |= netdev->hw_features; @@ -1509,7 +1497,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir); if (err) goto err_destroy_indirect_rqts; @@ -1517,7 +1505,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir); if (err) goto err_destroy_indirect_tirs; @@ -1534,11 +1522,11 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv, false); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -1552,9 +1540,9 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) mlx5_del_flow_rules(rpriv->vport_rx_rule); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); mlx5e_destroy_indirect_tirs(priv, false); - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); } @@ -1621,15 +1609,14 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) } } -static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv) +static void mlx5e_rep_enable(struct mlx5e_priv *priv) { - struct net_device *netdev = priv->netdev; - struct mlx5_core_dev *mdev = priv->mdev; - u16 max_mtu; + mlx5e_set_netdev_mtu_boundaries(priv); +} - netdev->min_mtu = ETH_MIN_MTU; - mlx5_query_port_max_mtu(mdev, &max_mtu, 1); - netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); +static int mlx5e_update_rep_rx(struct mlx5e_priv *priv) +{ + return 0; } static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data) @@ -1699,15 +1686,16 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) mlx5_lag_remove(mdev); } -static const struct mlx5e_profile mlx5e_vf_rep_profile = { +static const struct mlx5e_profile mlx5e_rep_profile = { .init = mlx5e_init_rep, .cleanup = mlx5e_cleanup_rep, .init_rx = mlx5e_init_rep_rx, .cleanup_rx = mlx5e_cleanup_rep_rx, .init_tx = mlx5e_init_rep_tx, .cleanup_tx = mlx5e_cleanup_rep_tx, - .enable = mlx5e_vf_rep_enable, - .update_stats = mlx5e_vf_rep_update_hw_counters, + .enable = mlx5e_rep_enable, + .update_rx = mlx5e_update_rep_rx, + .update_stats = mlx5e_rep_update_hw_counters, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, .max_tc = 1, @@ -1722,6 +1710,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .cleanup_tx = mlx5e_cleanup_rep_tx, .enable = mlx5e_uplink_rep_enable, .disable = mlx5e_uplink_rep_disable, + .update_rx = mlx5e_update_rep_rx, .update_stats = mlx5e_uplink_rep_update_hw_counters, .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, @@ -1729,6 +1718,55 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .max_tc = MLX5E_MAX_NUM_TC, }; +static bool +is_devlink_port_supported(const struct mlx5_core_dev *dev, + const struct mlx5e_rep_priv *rpriv) +{ + return rpriv->rep->vport == MLX5_VPORT_UPLINK || + rpriv->rep->vport == MLX5_VPORT_PF || + mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport); +} + +static int register_devlink_port(struct mlx5_core_dev *dev, + struct mlx5e_rep_priv *rpriv) +{ + struct devlink *devlink = priv_to_devlink(dev); + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct netdev_phys_item_id ppid = {}; + int ret; + + if (!is_devlink_port_supported(dev, rpriv)) + return 0; + + ret = mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid); + if (ret) + return ret; + + if (rep->vport == MLX5_VPORT_UPLINK) + devlink_port_attrs_set(&rpriv->dl_port, + DEVLINK_PORT_FLAVOUR_PHYSICAL, + PCI_FUNC(dev->pdev->devfn), false, 0, + &ppid.id[0], ppid.id_len); + else if (rep->vport == MLX5_VPORT_PF) + devlink_port_attrs_pci_pf_set(&rpriv->dl_port, + &ppid.id[0], ppid.id_len, + dev->pdev->devfn); + else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport)) + devlink_port_attrs_pci_vf_set(&rpriv->dl_port, + &ppid.id[0], ppid.id_len, + dev->pdev->devfn, + rep->vport - 1); + + return devlink_port_register(devlink, &rpriv->dl_port, rep->vport); +} + +static void unregister_devlink_port(struct mlx5_core_dev *dev, + struct mlx5e_rep_priv *rpriv) +{ + if (is_devlink_port_supported(dev, rpriv)) + devlink_port_unregister(&rpriv->dl_port); +} + /* e-Switch vport representors */ static int mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) @@ -1746,7 +1784,8 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) rpriv->rep = rep; nch = mlx5e_get_max_num_channels(dev); - profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; + profile = (rep->vport == MLX5_VPORT_UPLINK) ? + &mlx5e_uplink_rep_profile : &mlx5e_rep_profile; netdev = mlx5e_create_netdev(dev, profile, nch, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", @@ -1756,7 +1795,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) } rpriv->netdev = netdev; - rep->rep_if[REP_ETH].priv = rpriv; + rep->rep_data[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); if (rep->vport == MLX5_VPORT_UPLINK) { @@ -1779,15 +1818,27 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto err_detach_netdev; } + err = register_devlink_port(dev, rpriv); + if (err) { + esw_warn(dev, "Failed to register devlink port %d\n", + rep->vport); + goto err_neigh_cleanup; + } + err = register_netdev(netdev); if (err) { pr_warn("Failed to register representor netdev for vport %d\n", rep->vport); - goto err_neigh_cleanup; + goto err_devlink_cleanup; } + if (is_devlink_port_supported(dev, rpriv)) + devlink_port_type_eth_set(&rpriv->dl_port, netdev); return 0; +err_devlink_cleanup: + unregister_devlink_port(dev, rpriv); + err_neigh_cleanup: mlx5e_rep_neigh_cleanup(rpriv); @@ -1810,9 +1861,13 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *dev = priv->mdev; void *ppriv = priv->ppriv; + if (is_devlink_port_supported(dev, rpriv)) + devlink_port_type_clear(&rpriv->dl_port); unregister_netdev(netdev); + unregister_devlink_port(dev, rpriv); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); if (rep->vport == MLX5_VPORT_UPLINK) @@ -1830,16 +1885,17 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) return rpriv->netdev; } +static const struct mlx5_eswitch_rep_ops rep_ops = { + .load = mlx5e_vport_rep_load, + .unload = mlx5e_vport_rep_unload, + .get_proto_dev = mlx5e_vport_rep_get_proto_dev +}; + void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep_if rep_if = {}; - - rep_if.load = mlx5e_vport_rep_load; - rep_if.unload = mlx5e_vport_rep_unload; - rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH); + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_ETH); } void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 83b573b1abac..c56e6ee4350c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -86,12 +86,13 @@ struct mlx5e_rep_priv { struct mlx5_flow_handle *vport_rx_rule; struct list_head vport_sqs_list; struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ + struct devlink_port dl_port; }; static inline struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep) { - return (struct mlx5e_rep_priv *)rep->rep_if[REP_ETH].priv; + return rep->rep_data[REP_ETH].priv; } struct mlx5e_neigh { @@ -150,13 +151,12 @@ struct mlx5e_encap_entry { struct hlist_node encap_hlist; struct list_head flows; u32 encap_id; - struct ip_tunnel_info tun_info; + const struct ip_tunnel_info *tun_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; struct net_device *route_dev; - int tunnel_type; - int tunnel_hlen; + struct mlx5e_tc_tunnel *tunnel; int reformat_type; u8 flags; char *encap_header; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index c3b3002ff62f..56a2f4666c47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -34,6 +34,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/tcp.h> +#include <linux/indirect_call_wrapper.h> #include <net/ip6_checksum.h> #include <net/page_pool.h> #include <net/inet_ecn.h> @@ -46,6 +47,7 @@ #include "en_accel/tls_rxtx.h" #include "lib/clock.h" #include "en/xdp.h" +#include "en/xsk/rx.h" static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) { @@ -234,8 +236,8 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, return true; } -static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) +static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) { if (mlx5e_rx_cache_get(rq, dma_info)) return 0; @@ -247,7 +249,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0, PAGE_SIZE, rq->buff.map_dir); if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { - put_page(dma_info->page); + page_pool_recycle_direct(rq->page_pool, dma_info->page); dma_info->page = NULL; return -ENOMEM; } @@ -255,13 +257,23 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, return 0; } +static inline int mlx5e_page_alloc(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + if (rq->umem) + return mlx5e_xsk_page_alloc_umem(rq, dma_info); + else + return mlx5e_page_alloc_pool(rq, dma_info); +} + void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir); } -void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, - bool recycle) +void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info, + bool recycle) { if (likely(recycle)) { if (mlx5e_rx_cache_put(rq, dma_info)) @@ -271,10 +283,25 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, page_pool_recycle_direct(rq->page_pool, dma_info->page); } else { mlx5e_page_dma_unmap(rq, dma_info); + page_pool_release_page(rq->page_pool, dma_info->page); put_page(dma_info->page); } } +static inline void mlx5e_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info, + bool recycle) +{ + if (rq->umem) + /* The `recycle` parameter is ignored, and the page is always + * put into the Reuse Ring, because there is no way to return + * the page to the userspace when the interface goes down. + */ + mlx5e_xsk_page_release(rq, dma_info); + else + mlx5e_page_release_dynamic(rq, dma_info, recycle); +} + static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *frag) { @@ -286,7 +313,7 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, * offset) should just use the new one without replenishing again * by themselves. */ - err = mlx5e_page_alloc_mapped(rq, frag->di); + err = mlx5e_page_alloc(rq, frag->di); return err; } @@ -352,6 +379,13 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk) int err; int i; + if (rq->umem) { + int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags; + + if (unlikely(!mlx5e_xsk_pages_enough_umem(rq, pages_desired))) + return -ENOMEM; + } + for (i = 0; i < wqe_bulk; i++) { struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i); @@ -399,24 +433,31 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, static void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle) { - const bool no_xdp_xmit = - bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); + bool no_xdp_xmit; struct mlx5e_dma_info *dma_info = wi->umr.dma_info; int i; + /* A common case for AF_XDP. */ + if (bitmap_full(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE)) + return; + + no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, + MLX5_MPWRQ_PAGES_PER_WQE); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) mlx5e_page_release(rq, &dma_info[i], recycle); } -static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) +static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n) { struct mlx5_wq_ll *wq = &rq->mpwqe.wq; - struct mlx5e_rx_wqe_ll *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); - rq->mpwqe.umr_in_progress = false; + do { + u16 next_wqe_index = mlx5_wq_ll_get_wqe_next_ix(wq, wq->head); - mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); + mlx5_wq_ll_push(wq, next_wqe_index); + } while (--n); /* ensure wqes are visible to device before updating doorbell record */ dma_wmb(); @@ -424,11 +465,6 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) mlx5_wq_ll_update_db_record(wq); } -static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq) -{ - return sq->pc >> MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; -} - static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq, struct mlx5_wq_cyc *wq, u16 pi, u16 nnops) @@ -456,6 +492,12 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) int err; int i; + if (rq->umem && + unlikely(!mlx5e_xsk_pages_enough_umem(rq, MLX5_MPWRQ_PAGES_PER_WQE))) { + err = -ENOMEM; + goto err; + } + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) { @@ -464,12 +506,10 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) } umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); - if (unlikely(mlx5e_icosq_wrap_cnt(sq) < 2)) - memcpy(umr_wqe, &rq->mpwqe.umr_wqe, - offsetof(struct mlx5e_umr_wqe, inline_mtts)); + memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts)); for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { - err = mlx5e_page_alloc_mapped(rq, dma_info); + err = mlx5e_page_alloc(rq, dma_info); if (unlikely(err)) goto err_unmap; umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR); @@ -478,16 +518,16 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); wi->consumed_strides = 0; - rq->mpwqe.umr_in_progress = true; - umr_wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset); sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; + sq->db.ico_wqe[pi].umr.rq = rq; sq->pc += MLX5E_UMR_WQEBBS; - mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &umr_wqe->ctrl); + + sq->doorbell_cseg = &umr_wqe->ctrl; return 0; @@ -496,6 +536,8 @@ err_unmap: dma_info--; mlx5e_page_release(rq, dma_info, true); } + +err: rq->stats->buff_alloc_err++; return err; @@ -542,37 +584,12 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) return !!err; } -static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, - struct mlx5e_icosq *sq, - struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe) -{ - struct mlx5_wq_cyc *wq = &sq->wq; - u16 ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); - struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; - - mlx5_cqwq_pop(&cq->wq); - - if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { - netdev_WARN_ONCE(cq->channel->netdev, - "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); - return; - } - - if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { - mlx5e_post_rx_mpwqe(rq); - return; - } - - if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) - netdev_WARN_ONCE(cq->channel->netdev, - "Bad OPCODE in ICOSQ WQE info: 0x%x\n", icowi->opcode); -} - -static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) +void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) { struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); struct mlx5_cqe64 *cqe; + u16 sqcc; + int i; if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return; @@ -581,28 +598,105 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) if (likely(!cqe)) return; - /* by design, there's only a single cqe */ - mlx5e_poll_ico_single_cqe(cq, sq, rq, cqe); + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + i = 0; + do { + u16 wqe_counter; + bool last_wqe; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { + netdev_WARN_ONCE(cq->channel->netdev, + "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); + break; + } + do { + struct mlx5e_sq_wqe_info *wi; + u16 ci; + + last_wqe = (sqcc == wqe_counter); + + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.ico_wqe[ci]; + + if (likely(wi->opcode == MLX5_OPCODE_UMR)) { + sqcc += MLX5E_UMR_WQEBBS; + wi->umr.rq->mpwqe.umr_completed++; + } else if (likely(wi->opcode == MLX5_OPCODE_NOP)) { + sqcc++; + } else { + netdev_WARN_ONCE(cq->channel->netdev, + "Bad OPCODE in ICOSQ WQE info: 0x%x\n", + wi->opcode); + } + + } while (!last_wqe); + + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + + sq->cc = sqcc; mlx5_cqwq_update_db_record(&cq->wq); } bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) { + struct mlx5e_icosq *sq = &rq->channel->icosq; struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + u8 umr_completed = rq->mpwqe.umr_completed; + int alloc_err = 0; + u8 missing, i; + u16 head; if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return false; - mlx5e_poll_ico_cq(&rq->channel->icosq.cq, rq); + if (umr_completed) { + mlx5e_post_rx_mpwqe(rq, umr_completed); + rq->mpwqe.umr_in_progress -= umr_completed; + rq->mpwqe.umr_completed = 0; + } + + missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress; + + if (unlikely(rq->mpwqe.umr_in_progress > rq->mpwqe.umr_last_bulk)) + rq->stats->congst_umr++; - if (mlx5_wq_ll_is_full(wq)) +#define UMR_WQE_BULK (2) + if (likely(missing < UMR_WQE_BULK)) return false; - if (!rq->mpwqe.umr_in_progress) - mlx5e_alloc_rx_mpwqe(rq, wq->head); - else - rq->stats->congst_umr += mlx5_wq_ll_missing(wq) > 2; + head = rq->mpwqe.actual_wq_head; + i = missing; + do { + alloc_err = mlx5e_alloc_rx_mpwqe(rq, head); + + if (unlikely(alloc_err)) + break; + head = mlx5_wq_ll_get_wqe_next_ix(wq, head); + } while (--i); + + rq->mpwqe.umr_last_bulk = missing - i; + if (sq->doorbell_cseg) { + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg); + sq->doorbell_cseg = NULL; + } + + rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk; + rq->mpwqe.actual_wq_head = head; + + /* If XSK Fill Ring doesn't have enough frames, busy poll by + * rescheduling the NAPI poll. + */ + if (unlikely(alloc_err == -ENOMEM && rq->umem)) + return true; return false; } @@ -972,7 +1066,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, } rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, false); rcu_read_unlock(); if (consumed) return NULL; /* page/packet was consumed by XDP */ @@ -1048,7 +1142,10 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + rq, cqe, wi, cqe_bcnt); if (!skb) { /* probably for XDP */ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { @@ -1186,7 +1283,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, prefetch(data); rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, false); rcu_read_unlock(); if (consumed) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) @@ -1235,8 +1332,10 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); - skb = rq->mpwqe.skb_from_cqe_mpwrq(rq, wi, cqe_bcnt, head_offset, - page_idx); + skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq, + mlx5e_skb_from_cqe_mpwrq_linear, + mlx5e_skb_from_cqe_mpwrq_nonlinear, + rq, wi, cqe_bcnt, head_offset, page_idx); if (!skb) goto mpwrq_cqe_out; @@ -1283,7 +1382,8 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) mlx5_cqwq_pop(cqwq); - rq->handle_rx_cqe(rq, cqe); + INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe, rq, cqe); } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); out: @@ -1393,7 +1493,10 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + rq, cqe, wi, cqe_bcnt); if (!skb) goto wq_free_wqe; @@ -1425,7 +1528,10 @@ void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + rq, cqe, wi, cqe_bcnt); if (unlikely(!skb)) { /* a DROP, save the page-reuse checks */ mlx5e_free_rx_wqe(rq, wi, true); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 4382ef85488c..840ec945ccba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -64,7 +64,7 @@ static int mlx5e_test_health_info(struct mlx5e_priv *priv) { struct mlx5_core_health *health = &priv->mdev->priv.health; - return health->sick ? 1 : 0; + return health->fatal_error ? 1 : 0; } static int mlx5e_test_link_state(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index b75aa8b8bf04..539b4d3656da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -48,8 +48,15 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) }, #ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_no_sync_data) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_bypass_req) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) }, #endif { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, @@ -65,6 +72,8 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_mpwqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_inlnw) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) }, @@ -79,6 +88,8 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_mpwqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_inlnw) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) }, @@ -89,7 +100,6 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_page_reuse) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, @@ -101,7 +111,33 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_force_irq) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_ecn_mark) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_removed_vlan_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_redirect) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_strides) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_congst_umr) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_arfs_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_xmit) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_mpwqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_cqes) }, }; #define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) @@ -141,6 +177,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) &priv->channel_stats[i]; struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq; struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq; + struct mlx5e_xdpsq_stats *xsksq_stats = &channel_stats->xsksq; + struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; struct mlx5e_ch_stats *ch_stats = &channel_stats->ch; int j; @@ -160,6 +198,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_xdp_drop += rq_stats->xdp_drop; s->rx_xdp_redirect += rq_stats->xdp_redirect; s->rx_xdp_tx_xmit += xdpsq_stats->xmit; + s->rx_xdp_tx_mpwqe += xdpsq_stats->mpwqe; + s->rx_xdp_tx_inlnw += xdpsq_stats->inlnw; s->rx_xdp_tx_full += xdpsq_stats->full; s->rx_xdp_tx_err += xdpsq_stats->err; s->rx_xdp_tx_cqe += xdpsq_stats->cqes; @@ -170,7 +210,6 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_buff_alloc_err += rq_stats->buff_alloc_err; s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; - s->rx_page_reuse += rq_stats->page_reuse; s->rx_cache_reuse += rq_stats->cache_reuse; s->rx_cache_full += rq_stats->cache_full; s->rx_cache_empty += rq_stats->cache_empty; @@ -182,12 +221,41 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->ch_poll += ch_stats->poll; s->ch_arm += ch_stats->arm; s->ch_aff_change += ch_stats->aff_change; + s->ch_force_irq += ch_stats->force_irq; s->ch_eq_rearm += ch_stats->eq_rearm; /* xdp redirect */ s->tx_xdp_xmit += xdpsq_red_stats->xmit; + s->tx_xdp_mpwqe += xdpsq_red_stats->mpwqe; + s->tx_xdp_inlnw += xdpsq_red_stats->inlnw; s->tx_xdp_full += xdpsq_red_stats->full; s->tx_xdp_err += xdpsq_red_stats->err; s->tx_xdp_cqes += xdpsq_red_stats->cqes; + /* AF_XDP zero-copy */ + s->rx_xsk_packets += xskrq_stats->packets; + s->rx_xsk_bytes += xskrq_stats->bytes; + s->rx_xsk_csum_complete += xskrq_stats->csum_complete; + s->rx_xsk_csum_unnecessary += xskrq_stats->csum_unnecessary; + s->rx_xsk_csum_unnecessary_inner += xskrq_stats->csum_unnecessary_inner; + s->rx_xsk_csum_none += xskrq_stats->csum_none; + s->rx_xsk_ecn_mark += xskrq_stats->ecn_mark; + s->rx_xsk_removed_vlan_packets += xskrq_stats->removed_vlan_packets; + s->rx_xsk_xdp_drop += xskrq_stats->xdp_drop; + s->rx_xsk_xdp_redirect += xskrq_stats->xdp_redirect; + s->rx_xsk_wqe_err += xskrq_stats->wqe_err; + s->rx_xsk_mpwqe_filler_cqes += xskrq_stats->mpwqe_filler_cqes; + s->rx_xsk_mpwqe_filler_strides += xskrq_stats->mpwqe_filler_strides; + s->rx_xsk_oversize_pkts_sw_drop += xskrq_stats->oversize_pkts_sw_drop; + s->rx_xsk_buff_alloc_err += xskrq_stats->buff_alloc_err; + s->rx_xsk_cqe_compress_blks += xskrq_stats->cqe_compress_blks; + s->rx_xsk_cqe_compress_pkts += xskrq_stats->cqe_compress_pkts; + s->rx_xsk_congst_umr += xskrq_stats->congst_umr; + s->rx_xsk_arfs_err += xskrq_stats->arfs_err; + s->tx_xsk_xmit += xsksq_stats->xmit; + s->tx_xsk_mpwqe += xsksq_stats->mpwqe; + s->tx_xsk_inlnw += xsksq_stats->inlnw; + s->tx_xsk_full += xsksq_stats->full; + s->tx_xsk_err += xsksq_stats->err; + s->tx_xsk_cqes += xsksq_stats->cqes; for (j = 0; j < priv->max_opened_tc; j++) { struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; @@ -210,8 +278,15 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_csum_none += sq_stats->csum_none; s->tx_csum_partial += sq_stats->csum_partial; #ifdef CONFIG_MLX5_EN_TLS - s->tx_tls_ooo += sq_stats->tls_ooo; - s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes; + s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets; + s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes; + s->tx_tls_ctx += sq_stats->tls_ctx; + s->tx_tls_ooo += sq_stats->tls_ooo; + s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes; + s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data; + s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req; + s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes; + s->tx_tls_dump_packets += sq_stats->tls_dump_packets; #endif s->tx_cqes += sq_stats->cqes; } @@ -1212,7 +1287,6 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, page_reuse) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, @@ -1233,6 +1307,16 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, +#endif { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, @@ -1245,6 +1329,8 @@ static const struct counter_desc sq_stats_desc[] = { static const struct counter_desc rq_xdpsq_stats_desc[] = { { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, @@ -1252,16 +1338,50 @@ static const struct counter_desc rq_xdpsq_stats_desc[] = { static const struct counter_desc xdpsq_stats_desc[] = { { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, }; +static const struct counter_desc xskrq_stats_desc[] = { + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, congst_umr) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, arfs_err) }, +}; + +static const struct counter_desc xsksq_stats_desc[] = { + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, full) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, err) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, +}; + static const struct counter_desc ch_stats_desc[] = { { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, force_irq) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) }, }; @@ -1269,6 +1389,8 @@ static const struct counter_desc ch_stats_desc[] = { #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) #define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc) #define NUM_RQ_XDPSQ_STATS ARRAY_SIZE(rq_xdpsq_stats_desc) +#define NUM_XSKRQ_STATS ARRAY_SIZE(xskrq_stats_desc) +#define NUM_XSKSQ_STATS ARRAY_SIZE(xsksq_stats_desc) #define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc) static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) @@ -1279,13 +1401,16 @@ static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) (NUM_CH_STATS * max_nch) + (NUM_SQ_STATS * max_nch * priv->max_opened_tc) + (NUM_RQ_XDPSQ_STATS * max_nch) + - (NUM_XDPSQ_STATS * max_nch); + (NUM_XDPSQ_STATS * max_nch) + + (NUM_XSKRQ_STATS * max_nch * priv->xsk.ever_used) + + (NUM_XSKSQ_STATS * max_nch * priv->xsk.ever_used); } static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) { int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); + bool is_xsk = priv->xsk.ever_used; int i, j, tc; for (i = 0; i < max_nch; i++) @@ -1297,6 +1422,9 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, for (j = 0; j < NUM_RQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_stats_desc[j].format, i); + for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xskrq_stats_desc[j].format, i); for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_xdpsq_stats_desc[j].format, i); @@ -1309,10 +1437,14 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, sq_stats_desc[j].format, priv->channel_tc2txq[i][tc]); - for (i = 0; i < max_nch; i++) + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xsksq_stats_desc[j].format, i); for (j = 0; j < NUM_XDPSQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, xdpsq_stats_desc[j].format, i); + } return idx; } @@ -1321,6 +1453,7 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) { int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); + bool is_xsk = priv->xsk.ever_used; int i, j, tc; for (i = 0; i < max_nch; i++) @@ -1334,6 +1467,10 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq, rq_stats_desc, j); + for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xskrq, + xskrq_stats_desc, j); for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq, @@ -1347,11 +1484,16 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc], sq_stats_desc, j); - for (i = 0; i < max_nch; i++) + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xsksq, + xsksq_stats_desc, j); for (j = 0; j < NUM_XDPSQ_STATS; j++) data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq, xdpsq_stats_desc, j); + } return idx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 16c3b785f282..76ac111e14d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -46,6 +46,8 @@ #define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XSKRQ_STAT(type, fld) "rx%d_xsk_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XSKSQ_STAT(type, fld) "tx%d_xsk_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld) struct counter_desc { @@ -77,6 +79,8 @@ struct mlx5e_sw_stats { u64 rx_xdp_drop; u64 rx_xdp_redirect; u64 rx_xdp_tx_xmit; + u64 rx_xdp_tx_mpwqe; + u64 rx_xdp_tx_inlnw; u64 rx_xdp_tx_full; u64 rx_xdp_tx_err; u64 rx_xdp_tx_cqe; @@ -91,6 +95,8 @@ struct mlx5e_sw_stats { u64 tx_queue_wake; u64 tx_cqe_err; u64 tx_xdp_xmit; + u64 tx_xdp_mpwqe; + u64 tx_xdp_inlnw; u64 tx_xdp_full; u64 tx_xdp_err; u64 tx_xdp_cqes; @@ -101,7 +107,6 @@ struct mlx5e_sw_stats { u64 rx_buff_alloc_err; u64 rx_cqe_compress_blks; u64 rx_cqe_compress_pkts; - u64 rx_page_reuse; u64 rx_cache_reuse; u64 rx_cache_full; u64 rx_cache_empty; @@ -113,12 +118,46 @@ struct mlx5e_sw_stats { u64 ch_poll; u64 ch_arm; u64 ch_aff_change; + u64 ch_force_irq; u64 ch_eq_rearm; #ifdef CONFIG_MLX5_EN_TLS + u64 tx_tls_encrypted_packets; + u64 tx_tls_encrypted_bytes; + u64 tx_tls_ctx; u64 tx_tls_ooo; u64 tx_tls_resync_bytes; + u64 tx_tls_drop_no_sync_data; + u64 tx_tls_drop_bypass_req; + u64 tx_tls_dump_packets; + u64 tx_tls_dump_bytes; #endif + + u64 rx_xsk_packets; + u64 rx_xsk_bytes; + u64 rx_xsk_csum_complete; + u64 rx_xsk_csum_unnecessary; + u64 rx_xsk_csum_unnecessary_inner; + u64 rx_xsk_csum_none; + u64 rx_xsk_ecn_mark; + u64 rx_xsk_removed_vlan_packets; + u64 rx_xsk_xdp_drop; + u64 rx_xsk_xdp_redirect; + u64 rx_xsk_wqe_err; + u64 rx_xsk_mpwqe_filler_cqes; + u64 rx_xsk_mpwqe_filler_strides; + u64 rx_xsk_oversize_pkts_sw_drop; + u64 rx_xsk_buff_alloc_err; + u64 rx_xsk_cqe_compress_blks; + u64 rx_xsk_cqe_compress_pkts; + u64 rx_xsk_congst_umr; + u64 rx_xsk_arfs_err; + u64 tx_xsk_xmit; + u64 tx_xsk_mpwqe; + u64 tx_xsk_inlnw; + u64 tx_xsk_full; + u64 tx_xsk_err; + u64 tx_xsk_cqes; }; struct mlx5e_qcounter_stats { @@ -201,7 +240,6 @@ struct mlx5e_rq_stats { u64 buff_alloc_err; u64 cqe_compress_blks; u64 cqe_compress_pkts; - u64 page_reuse; u64 cache_reuse; u64 cache_full; u64 cache_empty; @@ -225,8 +263,15 @@ struct mlx5e_sq_stats { u64 added_vlan_packets; u64 nop; #ifdef CONFIG_MLX5_EN_TLS + u64 tls_encrypted_packets; + u64 tls_encrypted_bytes; + u64 tls_ctx; u64 tls_ooo; u64 tls_resync_bytes; + u64 tls_drop_no_sync_data; + u64 tls_drop_bypass_req; + u64 tls_dump_packets; + u64 tls_dump_bytes; #endif /* less likely accessed in data path */ u64 csum_none; @@ -241,6 +286,8 @@ struct mlx5e_sq_stats { struct mlx5e_xdpsq_stats { u64 xmit; + u64 mpwqe; + u64 inlnw; u64 full; u64 err; /* dirtied @completion */ @@ -252,6 +299,7 @@ struct mlx5e_ch_stats { u64 poll; u64 arm; u64 aff_change; + u64 force_irq; u64 eq_rearm; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index d75dc44eb2ff..2d6436257f9d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -44,6 +44,7 @@ #include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_csum.h> #include <net/arp.h> +#include <net/ipv6_stubs.h> #include "en.h" #include "en_rep.h" #include "en_tc.h" @@ -52,6 +53,7 @@ #include "en/port.h" #include "en/tc_tun.h" #include "lib/devcom.h" +#include "lib/geneve.h" struct mlx5_nic_flow_attr { u32 action; @@ -125,7 +127,7 @@ struct mlx5e_tc_flow { }; struct mlx5e_tc_flow_parse_attr { - struct ip_tunnel_info tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; + const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; int num_mod_hdr_actions; @@ -663,7 +665,8 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, } netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", - hp->tirn, hp->pair->rqn[0], hp->pair->peer_mdev->priv.name, + hp->tirn, hp->pair->rqn[0], + dev_name(hp->pair->peer_mdev->device), hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); hpe->hp = hp; @@ -700,7 +703,7 @@ static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, hpe = list_entry(next, struct mlx5e_hairpin_entry, flows); netdev_dbg(priv->netdev, "del hairpin: peer %s\n", - hpe->hp->pair->peer_mdev->priv.name); + dev_name(hpe->hp->pair->peer_mdev->device)); mlx5e_hairpin_destroy(hpe->hp); hash_del(&hpe->hairpin_hlist); @@ -714,19 +717,22 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context; struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, - .flow_tag = attr->flow_tag, .reformat_id = 0, - .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND, + .flags = FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; bool table_created = false; int err, dest_ix = 0; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = attr->flow_tag; + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); if (err) { @@ -797,7 +803,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } if (attr->match_level != MLX5_MATCH_NONE) - parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, &flow_act, dest, dest_ix); @@ -1061,6 +1067,19 @@ err_max_prio_chain: return err; } +static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) +{ + struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec; + void *headers_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + misc_parameters_3); + u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3, + headers_v, + geneve_tlv_option_0_data); + + return !!geneve_tlv_opt_0_data; +} + static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { @@ -1082,6 +1101,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); } + if (mlx5_flow_has_geneve_opt(flow)) + mlx5_geneve_tlv_option_del(priv->mdev->geneve); + mlx5_eswitch_del_vlan_action(esw, attr); for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) @@ -1328,7 +1350,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, struct net_device *filter_dev, u8 *match_level) { struct netlink_ext_ack *extack = f->common.extack; @@ -1336,8 +1358,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, outer_headers); void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); - struct flow_match_control enc_control; + struct flow_rule *rule = flow_cls_offload_flow_rule(f); int err; err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, @@ -1348,9 +1369,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return err; } - flow_rule_match_enc_control(rule, &enc_control); - - if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { struct flow_match_ipv4_addrs match; flow_rule_match_enc_ipv4_addrs(rule, &match); @@ -1370,7 +1389,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); - } else if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { struct flow_match_ipv6_addrs match; flow_rule_match_enc_ipv6_addrs(rule, &match); @@ -1437,9 +1456,29 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return 0; } +static void *get_match_headers_criteria(u32 flags, + struct mlx5_flow_spec *spec) +{ + return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? + MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + inner_headers) : + MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); +} + +static void *get_match_headers_value(u32 flags, + struct mlx5_flow_spec *spec) +{ + return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? + MLX5_ADDR_OF(fte_match_param, spec->match_value, + inner_headers) : + MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); +} + static int __parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, struct net_device *filter_dev, u8 *match_level, u8 *tunnel_match_level) { @@ -1452,7 +1491,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, misc_parameters); void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector *dissector = rule->match.dissector; u16 addr_type = 0; u8 ip_proto = 0; @@ -1475,37 +1514,29 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | BIT(FLOW_DISSECTOR_KEY_TCP) | BIT(FLOW_DISSECTOR_KEY_IP) | - BIT(FLOW_DISSECTOR_KEY_ENC_IP))) { + BIT(FLOW_DISSECTOR_KEY_ENC_IP) | + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", dissector->used_keys); return -EOPNOTSUPP; } - if ((flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) && - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { - struct flow_match_control match; - - flow_rule_match_enc_control(rule, &match); - switch (match.key->addr_type) { - case FLOW_DISSECTOR_KEY_IPV4_ADDRS: - case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) - return -EOPNOTSUPP; - break; - default: + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) return -EOPNOTSUPP; - } /* In decap flow, header pointers should point to the inner * headers, outer header were already set by parse_tunnel_attr */ - headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - inner_headers); - headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - inner_headers); + headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP, + spec); + headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP, + spec); } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { @@ -1520,11 +1551,23 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, if (match.mask->n_proto) *match_level = MLX5_MATCH_L2; } - - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) || + is_vlan_dev(filter_dev)) { + struct flow_dissector_key_vlan filter_dev_mask; + struct flow_dissector_key_vlan filter_dev_key; struct flow_match_vlan match; - flow_rule_match_vlan(rule, &match); + if (is_vlan_dev(filter_dev)) { + match.key = &filter_dev_key; + match.key->vlan_id = vlan_dev_vlan_id(filter_dev); + match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev); + match.key->vlan_priority = 0; + match.mask = &filter_dev_mask; + memset(match.mask, 0xff, sizeof(*match.mask)); + match.mask->vlan_priority = 0; + } else { + flow_rule_match_vlan(rule, &match); + } if (match.mask->vlan_id || match.mask->vlan_priority || match.mask->vlan_tpid) { @@ -1561,7 +1604,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { struct flow_match_vlan match; - flow_rule_match_vlan(rule, &match); + flow_rule_match_cvlan(rule, &match); if (match.mask->vlan_id || match.mask->vlan_priority || match.mask->vlan_tpid) { @@ -1788,7 +1831,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, struct net_device *filter_dev) { struct netlink_ext_ack *extack = f->common.extack; @@ -1827,6 +1870,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct pedit_headers { struct ethhdr eth; + struct vlan_hdr vlan; struct iphdr ip4; struct ipv6hdr ip6; struct tcphdr tcp; @@ -1873,38 +1917,83 @@ struct mlx5_fields { u8 field; u8 size; u32 offset; + u32 match_offset; }; -#define OFFLOAD(fw_field, size, field, off) \ - {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, offsetof(struct pedit_headers, field) + (off)} +#define OFFLOAD(fw_field, size, field, off, match_field) \ + {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, \ + offsetof(struct pedit_headers, field) + (off), \ + MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)} + +/* masked values are the same and there are no rewrites that do not have a + * match. + */ +#define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \ + type matchmaskx = *(type *)(matchmaskp); \ + type matchvalx = *(type *)(matchvalp); \ + type maskx = *(type *)(maskp); \ + type valx = *(type *)(valp); \ + \ + (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \ + matchmaskx)); \ +}) + +static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp, + void *matchmaskp, int size) +{ + bool same = false; + + switch (size) { + case sizeof(u8): + same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp); + break; + case sizeof(u16): + same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp); + break; + case sizeof(u32): + same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp); + break; + } + + return same; +} static struct mlx5_fields fields[] = { - OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0), - OFFLOAD(DMAC_15_0, 2, eth.h_dest[4], 0), - OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0), - OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0), - OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0), - - OFFLOAD(IP_TTL, 1, ip4.ttl, 0), - OFFLOAD(SIPV4, 4, ip4.saddr, 0), - OFFLOAD(DIPV4, 4, ip4.daddr, 0), - - OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0), - OFFLOAD(SIPV6_95_64, 4, ip6.saddr.s6_addr32[1], 0), - OFFLOAD(SIPV6_63_32, 4, ip6.saddr.s6_addr32[2], 0), - OFFLOAD(SIPV6_31_0, 4, ip6.saddr.s6_addr32[3], 0), - OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0), - OFFLOAD(DIPV6_95_64, 4, ip6.daddr.s6_addr32[1], 0), - OFFLOAD(DIPV6_63_32, 4, ip6.daddr.s6_addr32[2], 0), - OFFLOAD(DIPV6_31_0, 4, ip6.daddr.s6_addr32[3], 0), - OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0), - - OFFLOAD(TCP_SPORT, 2, tcp.source, 0), - OFFLOAD(TCP_DPORT, 2, tcp.dest, 0), - OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5), - - OFFLOAD(UDP_SPORT, 2, udp.source, 0), - OFFLOAD(UDP_DPORT, 2, udp.dest, 0), + OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0, dmac_47_16), + OFFLOAD(DMAC_15_0, 2, eth.h_dest[4], 0, dmac_15_0), + OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0, smac_47_16), + OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0, smac_15_0), + OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0, ethertype), + OFFLOAD(FIRST_VID, 2, vlan.h_vlan_TCI, 0, first_vid), + + OFFLOAD(IP_TTL, 1, ip4.ttl, 0, ttl_hoplimit), + OFFLOAD(SIPV4, 4, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4), + OFFLOAD(DIPV4, 4, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + + OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0, + src_ipv4_src_ipv6.ipv6_layout.ipv6[0]), + OFFLOAD(SIPV6_95_64, 4, ip6.saddr.s6_addr32[1], 0, + src_ipv4_src_ipv6.ipv6_layout.ipv6[4]), + OFFLOAD(SIPV6_63_32, 4, ip6.saddr.s6_addr32[2], 0, + src_ipv4_src_ipv6.ipv6_layout.ipv6[8]), + OFFLOAD(SIPV6_31_0, 4, ip6.saddr.s6_addr32[3], 0, + src_ipv4_src_ipv6.ipv6_layout.ipv6[12]), + OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]), + OFFLOAD(DIPV6_95_64, 4, ip6.daddr.s6_addr32[1], 0, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]), + OFFLOAD(DIPV6_63_32, 4, ip6.daddr.s6_addr32[2], 0, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]), + OFFLOAD(DIPV6_31_0, 4, ip6.daddr.s6_addr32[3], 0, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), + OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0, ttl_hoplimit), + + OFFLOAD(TCP_SPORT, 2, tcp.source, 0, tcp_sport), + OFFLOAD(TCP_DPORT, 2, tcp.dest, 0, tcp_dport), + OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5, tcp_flags), + + OFFLOAD(UDP_SPORT, 2, udp.source, 0, udp_sport), + OFFLOAD(UDP_DPORT, 2, udp.dest, 0, udp_dport), }; /* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at @@ -1913,9 +2002,14 @@ static struct mlx5_fields fields[] = { */ static int offload_pedit_fields(struct pedit_headers_action *hdrs, struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action_flags, struct netlink_ext_ack *extack) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; + void *headers_c = get_match_headers_criteria(*action_flags, + &parse_attr->spec); + void *headers_v = get_match_headers_value(*action_flags, + &parse_attr->spec); int i, action_size, nactions, max_actions, first, last, next_z; void *s_masks_p, *a_masks_p, *vals_p; struct mlx5_fields *f; @@ -1939,6 +2033,8 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, nactions = parse_attr->num_mod_hdr_actions; for (i = 0; i < ARRAY_SIZE(fields); i++) { + bool skip; + f = &fields[i]; /* avoid seeing bits set from previous iterations */ s_mask = 0; @@ -1967,19 +2063,34 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, return -EOPNOTSUPP; } + skip = false; if (s_mask) { + void *match_mask = headers_c + f->match_offset; + void *match_val = headers_v + f->match_offset; + cmd = MLX5_ACTION_TYPE_SET; mask = s_mask; vals_p = (void *)set_vals + f->offset; + /* don't rewrite if we have a match on the same value */ + if (cmp_val_mask(vals_p, s_masks_p, match_val, + match_mask, f->size)) + skip = true; /* clear to denote we consumed this field */ memset(s_masks_p, 0, f->size); } else { + u32 zero = 0; + cmd = MLX5_ACTION_TYPE_ADD; mask = a_mask; vals_p = (void *)add_vals + f->offset; + /* add 0 is no change */ + if (!memcmp(vals_p, &zero, f->size)) + skip = true; /* clear to denote we consumed this field */ memset(a_masks_p, 0, f->size); } + if (skip) + continue; field_bsize = f->size * BITS_PER_BYTE; @@ -2026,6 +2137,15 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, return 0; } +static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev, + int namespace) +{ + if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ + return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions); + else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */ + return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions); +} + static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, struct pedit_headers_action *hdrs, int namespace, @@ -2037,11 +2157,7 @@ static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ - max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions); - else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */ - max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions); - + max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace); /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */ max_actions = min(max_actions, nkeys * 16); @@ -2074,6 +2190,12 @@ static int parse_tc_pedit_action(struct mlx5e_priv *priv, goto out_err; } + if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) { + NL_SET_ERR_MSG_MOD(extack, + "The pedit offload action is not supported"); + goto out_err; + } + mask = act->mangle.mask; val = act->mangle.val; offset = act->mangle.offset; @@ -2092,6 +2214,7 @@ out_err: static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, struct pedit_headers_action *hdrs, + u32 *action_flags, struct netlink_ext_ack *extack) { struct pedit_headers *cmd_masks; @@ -2104,7 +2227,7 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, goto out_err; } - err = offload_pedit_fields(hdrs, parse_attr, extack); + err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack); if (err < 0) goto out_dealloc_parsed_actions; @@ -2216,11 +2339,7 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, u8 ip_proto; int i; - if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) - headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); - else - headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); - + headers_v = get_match_headers_value(actions, spec); ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); /* for non-IP we only re-write MACs, so we're okay */ @@ -2266,7 +2385,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv, actions = flow->nic_attr->action; if (flow->flags & MLX5E_TC_FLOW_EGRESS && - !(actions & MLX5_FLOW_CONTEXT_ACTION_DECAP)) + !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) || + (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP))) return false; if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) @@ -2291,6 +2411,74 @@ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) return (fsystem_guid == psystem_guid); } +static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace, + const struct flow_action_entry *act, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + u32 *action, struct netlink_ext_ack *extack) +{ + u16 mask16 = VLAN_VID_MASK; + u16 val16 = act->vlan.vid & VLAN_VID_MASK; + const struct flow_action_entry pedit_act = { + .id = FLOW_ACTION_MANGLE, + .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH, + .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI), + .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16), + .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16), + }; + u8 match_prio_mask, match_prio_val; + void *headers_c, *headers_v; + int err; + + headers_c = get_match_headers_criteria(*action, &parse_attr->spec); + headers_v = get_match_headers_value(*action, &parse_attr->spec); + + if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) && + MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) { + NL_SET_ERR_MSG_MOD(extack, + "VLAN rewrite action must have VLAN protocol match"); + return -EOPNOTSUPP; + } + + match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); + match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); + if (act->vlan.prio != (match_prio_val & match_prio_mask)) { + NL_SET_ERR_MSG_MOD(extack, + "Changing VLAN prio is not supported"); + return -EOPNOTSUPP; + } + + err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, + hdrs, NULL); + *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + return err; +} + +static int +add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + u32 *action, struct netlink_ext_ack *extack) +{ + const struct flow_action_entry prio_tag_act = { + .vlan.vid = 0, + .vlan.prio = + MLX5_GET(fte_match_set_lyr_2_4, + get_match_headers_value(*action, + &parse_attr->spec), + first_prio) & + MLX5_GET(fte_match_set_lyr_2_4, + get_match_headers_criteria(*action, + &parse_attr->spec), + first_prio), + }; + + return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, + &prio_tag_act, parse_attr, hdrs, action, + extack); +} + static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, @@ -2326,6 +2514,15 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; break; + case FLOW_ACTION_VLAN_MANGLE: + err = add_vlan_rewrite_action(priv, + MLX5_FLOW_NAMESPACE_KERNEL, + act, parse_attr, hdrs, + &action, extack); + if (err) + return err; + + break; case FLOW_ACTION_CSUM: if (csum_offload_supported(priv, action, act->csum_flags, @@ -2365,16 +2562,24 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, } break; default: - return -EINVAL; + NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); + return -EOPNOTSUPP; } } if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL, - parse_attr, hdrs, extack); + parse_attr, hdrs, &action, extack); if (err) return err; + /* in case all pedit actions are skipped, remove the MOD_HDR + * flag. + */ + if (parse_attr->num_mod_hdr_actions == 0) { + action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + kfree(parse_attr->mod_hdr_actions); + } } attr->action = action; @@ -2385,21 +2590,21 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, } struct encap_key { - struct ip_tunnel_key *ip_tun_key; - int tunnel_type; + const struct ip_tunnel_key *ip_tun_key; + struct mlx5e_tc_tunnel *tc_tunnel; }; static inline int cmp_encap_info(struct encap_key *a, struct encap_key *b) { return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || - a->tunnel_type != b->tunnel_type; + a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; } static inline int hash_encap_info(struct encap_key *key) { return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), - key->tunnel_type); + key->tc_tunnel->tunnel_type); } @@ -2429,7 +2634,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct ip_tunnel_info *tun_info; + const struct ip_tunnel_info *tun_info; struct encap_key key, e_key; struct mlx5e_encap_entry *e; unsigned short family; @@ -2438,17 +2643,17 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, int err = 0; parse_attr = attr->parse_attr; - tun_info = &parse_attr->tun_info[out_index]; + tun_info = parse_attr->tun_info[out_index]; family = ip_tunnel_info_af(tun_info); key.ip_tun_key = &tun_info->key; - key.tunnel_type = mlx5e_tc_tun_get_type(mirred_dev); + key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); hash_key = hash_encap_info(&key); hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { - e_key.ip_tun_key = &e->tun_info.key; - e_key.tunnel_type = e->tunnel_type; + e_key.ip_tun_key = &e->tun_info->key; + e_key.tc_tunnel = e->tunnel; if (!cmp_encap_info(&e_key, &key)) { found = true; break; @@ -2463,7 +2668,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, if (!e) return -ENOMEM; - e->tun_info = *tun_info; + e->tun_info = tun_info; err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); if (err) goto out_err; @@ -2544,8 +2749,7 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, } break; default: - /* action is FLOW_ACT_VLAN_MANGLE */ - return -EOPNOTSUPP; + return -EINVAL; } attr->total_vlan = vlan_idx + 1; @@ -2553,15 +2757,70 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, return 0; } +static int add_vlan_push_action(struct mlx5e_priv *priv, + struct mlx5_esw_flow_attr *attr, + struct net_device **out_dev, + u32 *action) +{ + struct net_device *vlan_dev = *out_dev; + struct flow_action_entry vlan_act = { + .id = FLOW_ACTION_VLAN_PUSH, + .vlan.vid = vlan_dev_vlan_id(vlan_dev), + .vlan.proto = vlan_dev_vlan_proto(vlan_dev), + .vlan.prio = 0, + }; + int err; + + err = parse_tc_vlan_action(priv, &vlan_act, attr, action); + if (err) + return err; + + *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), + dev_get_iflink(vlan_dev)); + if (is_vlan_dev(*out_dev)) + err = add_vlan_push_action(priv, attr, out_dev, action); + + return err; +} + +static int add_vlan_pop_action(struct mlx5e_priv *priv, + struct mlx5_esw_flow_attr *attr, + u32 *action) +{ + int nest_level = vlan_get_encap_level(attr->parse_attr->filter_dev); + struct flow_action_entry vlan_act = { + .id = FLOW_ACTION_VLAN_POP, + }; + int err = 0; + + while (nest_level--) { + err = parse_tc_vlan_action(priv, &vlan_act, attr, action); + if (err) + return err; + } + + return err; +} + +bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, + struct net_device *out_dev) +{ + if (is_merged_eswitch_dev(priv, out_dev)) + return true; + + return mlx5e_eswitch_rep(out_dev) && + same_hw_devs(priv, netdev_priv(out_dev)); +} + static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { struct pedit_headers_action hdrs[2] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; const struct ip_tunnel_info *info = NULL; const struct flow_action_entry *act; @@ -2572,9 +2831,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (!flow_action_has_entries(flow_action)) return -EINVAL; - attr->in_rep = rpriv->rep; - attr->in_mdev = priv->mdev; - flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_DROP: @@ -2621,9 +2877,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - if (netdev_port_same_parent_id(priv->netdev, - out_dev) || - is_merged_eswitch_dev(priv, out_dev)) { + if (netdev_port_same_parent_id(priv->netdev, out_dev)) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev); @@ -2633,8 +2887,28 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, uplink_upper == out_dev) out_dev = uplink_dev; - if (!mlx5e_eswitch_rep(out_dev)) + if (is_vlan_dev(out_dev)) { + err = add_vlan_push_action(priv, attr, + &out_dev, + &action); + if (err) + return err; + } + + if (is_vlan_dev(parse_attr->filter_dev)) { + err = add_vlan_pop_action(priv, attr, + &action); + if (err) + return err; + } + + if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not on same switch HW, can't offload forwarding"); + pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", + priv->netdev->name, out_dev->name); return -EOPNOTSUPP; + } out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; @@ -2644,9 +2918,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } else if (encap) { parse_attr->mirred_ifindex[attr->out_count] = out_dev->ifindex; - parse_attr->tun_info[attr->out_count] = *info; + parse_attr->tun_info[attr->out_count] = info; encap = false; - attr->parse_attr = parse_attr; attr->dests[attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; attr->out_count++; @@ -2679,7 +2952,27 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, break; case FLOW_ACTION_VLAN_PUSH: case FLOW_ACTION_VLAN_POP: - err = parse_tc_vlan_action(priv, act, attr, &action); + if (act->id == FLOW_ACTION_VLAN_PUSH && + (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) { + /* Replace vlan pop+push with vlan modify */ + action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = add_vlan_rewrite_action(priv, + MLX5_FLOW_NAMESPACE_FDB, + act, parse_attr, hdrs, + &action, extack); + } else { + err = parse_tc_vlan_action(priv, act, attr, &action); + } + if (err) + return err; + + attr->split_count = attr->out_count; + break; + case FLOW_ACTION_VLAN_MANGLE: + err = add_vlan_rewrite_action(priv, + MLX5_FLOW_NAMESPACE_FDB, + act, parse_attr, hdrs, + &action, extack); if (err) return err; @@ -2705,16 +2998,40 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, break; } default: - return -EINVAL; + NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); + return -EOPNOTSUPP; } } + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { + /* For prio tag mode, replace vlan pop with rewrite vlan prio + * tag rewrite. + */ + action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs, + &action, extack); + if (err) + return err; + } + if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB, - parse_attr, hdrs, extack); + parse_attr, hdrs, &action, extack); if (err) return err; + /* in case all pedit actions are skipped, remove the MOD_HDR + * flag. we might have set split_count either by pedit or + * pop/push. if there is no pop/push either, reset it too. + */ + if (parse_attr->num_mod_hdr_actions == 0) { + action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + kfree(parse_attr->mod_hdr_actions); + if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || + (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) + attr->split_count = 0; + } } attr->action = action; @@ -2798,7 +3115,7 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) static int mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, - struct tc_cls_flower_offload *f, u16 flow_flags, + struct flow_cls_offload *f, u16 flow_flags, struct mlx5e_tc_flow_parse_attr **__parse_attr, struct mlx5e_tc_flow **__flow) { @@ -2832,7 +3149,7 @@ static void mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr, struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, struct mlx5_eswitch_rep *in_rep, struct mlx5_core_dev *in_mdev) { @@ -2854,13 +3171,13 @@ mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr, static struct mlx5e_tc_flow * __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, u16 flow_flags, struct net_device *filter_dev, struct mlx5_eswitch_rep *in_rep, struct mlx5_core_dev *in_mdev) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; @@ -2883,7 +3200,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; - err = parse_tc_fdb_actions(priv, &rule->action, parse_attr, flow, extack); + err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); if (err) goto err_free; @@ -2904,7 +3221,7 @@ out: return ERR_PTR(err); } -static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f, +static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, struct mlx5e_tc_flow *flow, u16 flow_flags) { @@ -2956,7 +3273,7 @@ out: static int mlx5e_add_fdb_flow(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, u16 flow_flags, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) @@ -2990,12 +3307,12 @@ out: static int mlx5e_add_nic_flow(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, u16 flow_flags, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; @@ -3041,7 +3358,7 @@ out: static int mlx5e_tc_add_flow(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, int flags, struct net_device *filter_dev, struct mlx5e_tc_flow **flow) @@ -3055,7 +3372,7 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv, if (!tc_can_offload_extack(priv->netdev, f->common.extack)) return -EOPNOTSUPP; - if (esw && esw->mode == SRIOV_OFFLOADS) + if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) err = mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, flow); else @@ -3066,7 +3383,7 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv, } int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags) + struct flow_cls_offload *f, int flags) { struct netlink_ext_ack *extack = f->common.extack; struct rhashtable *tc_ht = get_tc_ht(priv, flags); @@ -3080,6 +3397,7 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie); + err = -EEXIST; goto out; } @@ -3112,7 +3430,7 @@ static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) } int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags) + struct flow_cls_offload *f, int flags) { struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; @@ -3131,7 +3449,7 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, } int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags) + struct flow_cls_offload *f, int flags) { struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct rhashtable *tc_ht = get_tc_ht(priv, flags); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index f62e81902d27..3ab39275ca7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -54,12 +54,12 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht); void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags); + struct flow_cls_offload *f, int flags); int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags); + struct flow_cls_offload *f, int flags); int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags); + struct flow_cls_offload *f, int flags); struct mlx5e_encap_entry; void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, @@ -74,6 +74,9 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags); void mlx5e_tc_reoffload_flows_work(struct work_struct *work); +bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, + struct net_device *out_dev); + #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 25a8f8260c14..600e92cb629a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -32,57 +32,15 @@ #include <linux/tcp.h> #include <linux/if_vlan.h> +#include <net/geneve.h> #include <net/dsfield.h> #include "en.h" +#include "en/txrx.h" #include "ipoib/ipoib.h" #include "en_accel/en_accel.h" +#include "en_accel/ktls.h" #include "lib/clock.h" -#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS - -#ifndef CONFIG_MLX5_EN_TLS -#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ - MLX5E_SQ_NOPS_ROOM) -#else -/* TLS offload requires MLX5E_SQ_STOP_ROOM to have - * enough room for a resync SKB, a normal SKB and a NOP - */ -#define MLX5E_SQ_STOP_ROOM (2 * MLX5_SEND_WQE_MAX_WQEBBS +\ - MLX5E_SQ_NOPS_ROOM) -#endif - -static inline void mlx5e_tx_dma_unmap(struct device *pdev, - struct mlx5e_sq_dma *dma) -{ - switch (dma->type) { - case MLX5E_DMA_MAP_SINGLE: - dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); - break; - case MLX5E_DMA_MAP_PAGE: - dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); - break; - default: - WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); - } -} - -static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i) -{ - return &sq->db.dma_fifo[i & sq->dma_fifo_mask]; -} - -static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq, - dma_addr_t addr, - u32 size, - enum mlx5e_dma_map_type map_type) -{ - struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++); - - dma->addr = addr; - dma->size = size; - dma->type = map_type; -} - static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) { int i; @@ -110,16 +68,15 @@ static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb #endif u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { + int txq_ix = netdev_pick_tx(dev, skb, NULL); struct mlx5e_priv *priv = netdev_priv(dev); - int channel_ix = fallback(dev, skb, NULL); u16 num_channels; int up = 0; if (!netdev_get_num_tc(dev)) - return channel_ix; + return txq_ix; #ifdef CONFIG_MLX5_CORE_EN_DCB if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP) @@ -129,14 +86,14 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, if (skb_vlan_tag_present(skb)) up = skb_vlan_tag_get_prio(skb); - /* channel_ix can be larger than num_channels since + /* txq_ix can be larger than num_channels since * dev->num_real_tx_queues = num_channels * num_tc */ num_channels = priv->channels.params.num_channels; - if (channel_ix >= num_channels) - channel_ix = reciprocal_scale(channel_ix, num_channels); + if (txq_ix >= num_channels) + txq_ix = priv->txq2sq[txq_ix]->ch_ix; - return priv->channel_tc2txq[channel_ix][up]; + return priv->channel_tc2txq[txq_ix][up]; } static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) @@ -163,7 +120,7 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, case MLX5_INLINE_MODE_NONE: return 0; case MLX5_INLINE_MODE_TCP_UDP: - hlen = eth_get_headlen(skb->data, skb_headlen(skb)); + hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb)); if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) hlen += VLAN_HLEN; break; @@ -277,29 +234,14 @@ dma_unmap_wqe_err: return -ENOMEM; } -static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, - struct mlx5_wq_cyc *wq, - u16 pi, u16 nnops) -{ - struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; - - edge_wi = wi + nnops; - - /* fill sq frag edge with nops to avoid wqe wrapping two pages */ - for (; wi < edge_wi; wi++) { - wi->skb = NULL; - wi->num_wqebbs = 1; - mlx5e_post_nop(wq, sq->sqn, &sq->pc); - } - sq->stats->nop += nnops; -} - static inline void mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma, - struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg) + struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg, + bool xmit_more) { struct mlx5_wq_cyc *wq = &sq->wq; + bool send_doorbell; wi->num_bytes = num_bytes; wi->num_dma = num_dma; @@ -309,25 +251,23 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - netdev_tx_sent_queue(sq->txq, num_bytes); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; sq->pc += wi->num_wqebbs; - if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) { + if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room))) { netif_tx_stop_queue(sq->txq); sq->stats->stopped++; } - if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) + send_doorbell = __netdev_tx_sent_queue(sq->txq, num_bytes, + xmit_more); + if (send_doorbell) mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); } -#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) - netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5e_tx_wqe *wqe, u16 pi) + struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more) { struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5_wqe_ctrl_seg *cseg; @@ -352,15 +292,18 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; stats->packets += skb_shinfo(skb)->gso_segs; } else { + u8 mode = mlx5e_transport_inline_tx_wqe(wqe) ? + MLX5_INLINE_MODE_TCP_UDP : sq->min_inline_mode; + opcode = MLX5_OPCODE_SEND; mss = 0; - ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + ihs = mlx5e_calc_min_inline(mode, skb); num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); stats->packets++; } stats->bytes += num_bytes; - stats->xmit_more += skb->xmit_more; + stats->xmit_more += xmit_more; headlen = skb->len - ihs - skb->data_len; ds_cnt += !!headlen; @@ -379,11 +322,17 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, #ifdef CONFIG_MLX5_EN_IPSEC struct mlx5_wqe_eth_seg cur_eth = wqe->eth; #endif +#ifdef CONFIG_MLX5_EN_TLS + struct mlx5_wqe_ctrl_seg cur_ctrl = wqe->ctrl; +#endif mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); - mlx5e_sq_fetch_wqe(sq, &wqe, &pi); + wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi); #ifdef CONFIG_MLX5_EN_IPSEC wqe->eth = cur_eth; #endif +#ifdef CONFIG_MLX5_EN_TLS + wqe->ctrl = cur_ctrl; +#endif } /* fill wqe */ @@ -392,6 +341,10 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, eseg = &wqe->eth; dseg = wqe->data; +#if IS_ENABLED(CONFIG_GENEVE) + if (skb->encapsulation) + mlx5e_tx_tunnel_accel(skb, eseg); +#endif mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); eseg->mss = mss; @@ -419,7 +372,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, goto err_drop; mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes, - num_dma, wi, cseg); + num_dma, wi, cseg, xmit_more); return NETDEV_TX_OK; @@ -438,14 +391,14 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) u16 pi; sq = priv->txq2sq[skb_get_queue_mapping(skb)]; - mlx5e_sq_fetch_wqe(sq, &wqe, &pi); + wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi); /* might send skbs and update wqe and pi */ skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi); if (unlikely(!skb)) return NETDEV_TX_OK; - return mlx5e_sq_xmit(sq, skb, wqe, pi); + return mlx5e_sq_xmit(sq, skb, wqe, pi, netdev_xmit_more()); } static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq, @@ -526,8 +479,16 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wi = &sq->db.wqe_info[ci]; skb = wi->skb; - if (unlikely(!skb)) { /* nop */ - sqcc++; + if (unlikely(!skb)) { +#ifdef CONFIG_MLX5_EN_TLS + if (wi->resync_dump_frag) { + struct mlx5e_sq_dma *dma = + mlx5e_dma_get(sq, dma_fifo_cc++); + + mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma); + } +#endif + sqcc += wi->num_wqebbs; continue; } @@ -569,8 +530,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) netdev_tx_completed_queue(sq->txq, npkts, nbytes); if (netif_tx_queue_stopped(sq->txq) && - mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - MLX5E_SQ_STOP_ROOM) && + mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { netif_tx_wake_queue(sq->txq); stats->wake++; @@ -619,7 +579,8 @@ mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey, } netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5_av *av, u32 dqpn, u32 dqkey) + struct mlx5_av *av, u32 dqpn, u32 dqkey, + bool xmit_more) { struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5i_tx_wqe *wqe; @@ -655,7 +616,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, } stats->bytes += num_bytes; - stats->xmit_more += skb->xmit_more; + stats->xmit_more += xmit_more; headlen = skb->len - ihs - skb->data_len; ds_cnt += !!headlen; @@ -700,7 +661,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, goto err_drop; mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes, - num_dma, wi, cseg); + num_dma, wi, cseg, xmit_more); return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index b4af5e19f6ac..c50b6f0769c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -33,6 +33,7 @@ #include <linux/irq.h> #include "en.h" #include "en/xdp.h" +#include "en/xsk/tx.h" static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) { @@ -48,35 +49,49 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) { struct mlx5e_sq_stats *stats = sq->stats; - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state))) return; - net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes, - &dim_sample); + dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); net_dim(&sq->dim, dim_sample); } static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) { struct mlx5e_rq_stats *stats = rq->stats; - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state))) return; - net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes, - &dim_sample); + dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); net_dim(&rq->dim, dim_sample); } +void mlx5e_trigger_irq(struct mlx5e_icosq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_tx_wqe *nopwqe; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl); +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, napi); struct mlx5e_ch_stats *ch_stats = c->stats; + struct mlx5e_xdpsq *xsksq = &c->xsksq; + struct mlx5e_rq *xskrq = &c->xskrq; struct mlx5e_rq *rq = &c->rq; + bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + bool aff_change = false; + bool busy_xsk = false; bool busy = false; int work_done = 0; int i; @@ -86,22 +101,38 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); - busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq, NULL); + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); if (c->xdp) - busy |= mlx5e_poll_xdpsq_cq(&rq->xdpsq.cq, rq); + busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq); if (likely(budget)) { /* budget=0 means: don't poll rx rings */ - work_done = mlx5e_poll_rx_cq(&rq->cq, budget); + if (xsk_open) + work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget); + + if (likely(budget - work_done)) + work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done); + busy |= work_done == budget; } - busy |= c->rq.post_wqes(rq); + mlx5e_poll_ico_cq(&c->icosq.cq); + + busy |= rq->post_wqes(rq); + if (xsk_open) { + mlx5e_poll_ico_cq(&c->xskicosq.cq); + busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq); + busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); + busy_xsk |= xskrq->post_wqes(xskrq); + } + + busy |= busy_xsk; if (busy) { if (likely(mlx5e_channel_no_affinity_change(c))) return budget; ch_stats->aff_change++; + aff_change = true; if (budget && work_done == budget) work_done--; } @@ -122,10 +153,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_cq_arm(&c->icosq.cq); mlx5e_cq_arm(&c->xdpsq.cq); + if (xsk_open) { + mlx5e_handle_rx_dim(xskrq); + mlx5e_cq_arm(&c->xskicosq.cq); + mlx5e_cq_arm(&xsksq->cq); + mlx5e_cq_arm(&xskrq->cq); + } + + if (unlikely(aff_change && busy_xsk)) { + mlx5e_trigger_irq(&c->icosq); + ch_stats->force_irq++; + } + return work_done; } -void mlx5e_completion_event(struct mlx5_core_cq *mcq) +void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) { struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index bb6e5b5d9681..41f25ea2e8d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -61,17 +61,21 @@ enum { MLX5_EQ_DOORBEL_OFFSET = 0x40, }; -struct mlx5_irq_info { - cpumask_var_t mask; - char name[MLX5_MAX_IRQ_NAME]; - void *context; /* dev_id provided to request_irq */ +/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update + * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is + * used to set the EQ size, budget must be smaller than the EQ size. + */ +enum { + MLX5_EQ_POLLING_BUDGET = 128, }; +static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); + struct mlx5_eq_table { struct list_head comp_eqs_list; - struct mlx5_eq pages_eq; - struct mlx5_eq cmd_eq; - struct mlx5_eq async_eq; + struct mlx5_eq_async pages_eq; + struct mlx5_eq_async cmd_eq; + struct mlx5_eq_async async_eq; struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; @@ -79,11 +83,8 @@ struct mlx5_eq_table { struct mlx5_nb cq_err_nb; struct mutex lock; /* sync async eqs creations */ - int num_comp_vectors; - struct mlx5_irq_info *irq_info; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif + int num_comp_eqs; + struct mlx5_irq_table *irq_table; }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@ -124,16 +125,24 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) return cq; } -static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) +static int mlx5_eq_comp_int(struct notifier_block *nb, + __always_unused unsigned long action, + __always_unused void *data) { - struct mlx5_eq_comp *eq_comp = eq_ptr; - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_comp *eq_comp = + container_of(nb, struct mlx5_eq_comp, irq_nb); + struct mlx5_eq *eq = &eq_comp->core; struct mlx5_eqe *eqe; - int set_ci = 0; + int num_eqes = 0; u32 cqn = -1; - while ((eqe = next_eqe_sw(eq))) { + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { struct mlx5_core_cq *cq; + /* Make sure we read EQ entry contents after we've * checked the ownership bit. */ @@ -144,33 +153,23 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) cq = mlx5_eq_cq_get(eq, cqn); if (likely(cq)) { ++cq->arm_sn; - cq->comp(cq); + cq->comp(cq, eqe); mlx5_cq_put(cq); } else { mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); } ++eq->cons_index; - ++set_ci; - /* The HCA will think the queue has overflowed if we - * don't tell it we've been processing events. We - * create our EQs with MLX5_NUM_SPARE_EQE extra - * entries, so we must update our consumer index at - * least that often. - */ - if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq, 0); - set_ci = 0; - } - } + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); +out: eq_update_ci(eq, 1); if (cqn != -1) tasklet_schedule(&eq_comp->tasklet_ctx.task); - return IRQ_HANDLED; + return 0; } /* Some architectures don't latch interrupts when they are disabled, so using @@ -184,25 +183,32 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) disable_irq(eq->core.irqn); count_eqe = eq->core.cons_index; - mlx5_eq_comp_int(eq->core.irqn, eq); + mlx5_eq_comp_int(&eq->irq_nb, 0, NULL); count_eqe = eq->core.cons_index - count_eqe; enable_irq(eq->core.irqn); return count_eqe; } -static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) +static int mlx5_eq_async_int(struct notifier_block *nb, + unsigned long action, void *data) { - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_async *eq_async = + container_of(nb, struct mlx5_eq_async, irq_nb); + struct mlx5_eq *eq = &eq_async->core; struct mlx5_eq_table *eqt; struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; - int set_ci = 0; + int num_eqes = 0; dev = eq->dev; eqt = dev->priv.eq_table; - while ((eqe = next_eqe_sw(eq))) { + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { /* * Make sure we read EQ entry contents after we've * checked the ownership bit. @@ -217,23 +223,13 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); ++eq->cons_index; - ++set_ci; - /* The HCA will think the queue has overflowed if we - * don't tell it we've been processing events. We - * create our EQs with MLX5_NUM_SPARE_EQE extra - * entries, so we must update our consumer index at - * least that often. - */ - if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq, 0); - set_ci = 0; - } - } + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); +out: eq_update_ci(eq, 1); - return IRQ_HANDLED; + return 0; } static void init_eq_buf(struct mlx5_eq *eq) @@ -248,22 +244,19 @@ static void init_eq_buf(struct mlx5_eq *eq) } static int -create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, +create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - u8 vecidx = param->index; + u8 vecidx = param->irq_index; __be64 *pas; void *eqc; int inlen; u32 *in; int err; - - if (eq_table->irq_info[vecidx].context) - return -EEXIST; + int i; /* Init CQ table */ memset(cq_table, 0, sizeof(*cq_table)); @@ -291,7 +284,12 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, mlx5_fill_page_array(&eq->buf, pas); MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); - MLX5_SET64(create_eq_in, in, event_bitmask, param->mask); + if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx)) + MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID); + + for (i = 0; i < 4; i++) + MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i, + param->mask[i]); eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); @@ -304,34 +302,19 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, if (err) goto err_in; - snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", - name, pci_name(dev->pdev)); - eq_table->irq_info[vecidx].context = param->context; - eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(eq->irqn, param->handler, 0, - eq_table->irq_info[vecidx].name, param->context); - if (err) - goto err_eq; err = mlx5_debug_eq_add(dev, eq); if (err) - goto err_irq; - - /* EQs are created in ARMED state - */ - eq_update_ci(eq, 1); + goto err_eq; kvfree(in); return 0; -err_irq: - free_irq(eq->irqn, eq); - err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -343,18 +326,48 @@ err_buf: return err; } -static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +/** + * mlx5_eq_enable - Enable EQ for receiving EQEs + * @dev - Device which owns the eq + * @eq - EQ to enable + * @nb - notifier call block + * mlx5_eq_enable - must be called after EQ is created in device. + */ +int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; - struct mlx5_irq_info *irq_info; int err; - irq_info = &eq_table->irq_info[eq->vecidx]; + err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb); + if (!err) + eq_update_ci(eq, 1); - mlx5_debug_eq_remove(dev, eq); + return err; +} +EXPORT_SYMBOL(mlx5_eq_enable); - free_irq(eq->irqn, irq_info->context); - irq_info->context = NULL; +/** + * mlx5_eq_disable - Enable EQ for receiving EQEs + * @dev - Device which owns the eq + * @eq - EQ to disable + * @nb - notifier call block + * mlx5_eq_disable - must be called before EQ is destroyed. + */ +void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + + mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb); +} +EXPORT_SYMBOL(mlx5_eq_disable); + +static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + mlx5_debug_eq_remove(dev, eq); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) @@ -379,7 +392,7 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) return err; } -int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) +void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) { struct mlx5_cq_table *table = &eq->cq_table; struct mlx5_core_cq *tmp; @@ -389,16 +402,14 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) spin_unlock(&table->lock); if (!tmp) { - mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn); - return -ENOENT; - } - - if (tmp != cq) { - mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn); - return -EINVAL; + mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", + eq->eqn, cq->cqn); + return; } - return 0; + if (tmp != cq) + mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", + eq->eqn, cq->cqn); } int mlx5_eq_table_init(struct mlx5_core_dev *dev) @@ -420,6 +431,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); + eq_table->irq_table = dev->priv.irq_table; return 0; kvfree_eq_table: @@ -436,19 +448,20 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) /* Async EQs */ -static int create_async_eq(struct mlx5_core_dev *dev, const char *name, +static int create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; mutex_lock(&eq_table->lock); - if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) { - err = -ENOSPC; + /* Async EQs must share irq index 0 */ + if (param->irq_index != 0) { + err = -EINVAL; goto unlock; } - err = create_map_eq(dev, eq, name, param); + err = create_map_eq(dev, eq, param); unlock: mutex_unlock(&eq_table->lock); return err; @@ -477,7 +490,7 @@ static int cq_err_event_notifier(struct notifier_block *nb, /* type == MLX5_EVENT_TYPE_CQ_ERROR */ eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); - eq = &eqt->async_eq; + eq = &eqt->async_eq.core; eqe = data; cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; @@ -490,22 +503,38 @@ static int cq_err_event_notifier(struct notifier_block *nb, return NOTIFY_OK; } - cq->event(cq, type); + if (cq->event) + cq->event(cq, type); mlx5_cq_put(cq); return NOTIFY_OK; } -static u64 gather_async_events_mask(struct mlx5_core_dev *dev) +static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4]) +{ + __be64 *user_unaffiliated_events; + __be64 *user_affiliated_events; + int i; + + user_affiliated_events = + MLX5_CAP_DEV_EVENT(dev, user_affiliated_events); + user_unaffiliated_events = + MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events); + + for (i = 0; i < 4; i++) + mask[i] |= be64_to_cpu(user_affiliated_events[i] | + user_unaffiliated_events[i]); +} + +static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4]) { u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; if (MLX5_VPORT_MANAGER(dev)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); - if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && - MLX5_CAP_GEN(dev, general_notification_event)) + if (MLX5_CAP_GEN(dev, general_notification_event)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT); if (MLX5_CAP_GEN(dev, port_module_event)) @@ -531,10 +560,14 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); - if (mlx5_core_is_ecpf_esw_manager(dev)) - async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE); + if (mlx5_eswitch_is_funcs_handler(dev)) + async_event_mask |= + (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED); + + mask[0] = async_event_mask; - return async_event_mask; + if (MLX5_CAP_GEN(dev, event_cap)) + gather_user_async_events(dev, mask); } static int create_async_eqs(struct mlx5_core_dev *dev) @@ -546,55 +579,76 @@ static int create_async_eqs(struct mlx5_core_dev *dev) MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); mlx5_eq_notifier_register(dev, &table->cq_err_nb); + table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_CMD_IDX, - .mask = 1ull << MLX5_EVENT_TYPE_CMD, + .irq_index = 0, .nent = MLX5_NUM_CMD_EQE, - .context = &table->cmd_eq, - .handler = mlx5_eq_async_int, }; - err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, ¶m); + + param.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD; + err = create_async_eq(dev, &table->cmd_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); goto err0; } - + err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err); + goto err1; + } mlx5_cmd_use_events(dev); + table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_ASYNC_IDX, - .mask = gather_async_events_mask(dev), + .irq_index = 0, .nent = MLX5_NUM_ASYNC_EQE, - .context = &table->async_eq, - .handler = mlx5_eq_async_int, }; - err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, ¶m); + + gather_async_events_mask(dev, param.mask); + err = create_async_eq(dev, &table->async_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); - goto err1; + goto err2; + } + err = mlx5_eq_enable(dev, &table->async_eq.core, + &table->async_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable async EQ %d\n", err); + goto err3; } + table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_PAGEREQ_IDX, - .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, + .irq_index = 0, .nent = /* TODO: sriov max_vf + */ 1, - .context = &table->pages_eq, - .handler = mlx5_eq_async_int, }; - err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, ¶m); + + param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST; + err = create_async_eq(dev, &table->pages_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); - goto err2; + goto err4; + } + err = mlx5_eq_enable(dev, &table->pages_eq.core, + &table->pages_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err); + goto err5; } return err; +err5: + destroy_async_eq(dev, &table->pages_eq.core); +err4: + mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); +err3: + destroy_async_eq(dev, &table->async_eq.core); err2: - destroy_async_eq(dev, &table->async_eq); - -err1: mlx5_cmd_use_polling(dev); - destroy_async_eq(dev, &table->cmd_eq); + mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); +err1: + destroy_async_eq(dev, &table->cmd_eq.core); err0: mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; @@ -605,19 +659,22 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; int err; - err = destroy_async_eq(dev, &table->pages_eq); + mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb); + err = destroy_async_eq(dev, &table->pages_eq.core); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); - err = destroy_async_eq(dev, &table->async_eq); + mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); + err = destroy_async_eq(dev, &table->async_eq.core); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); mlx5_cmd_use_polling(dev); - err = destroy_async_eq(dev, &table->cmd_eq); + mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); + err = destroy_async_eq(dev, &table->cmd_eq.core); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); @@ -627,24 +684,24 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) { - return &dev->priv.eq_table->async_eq; + return &dev->priv.eq_table->async_eq.core; } void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) { - synchronize_irq(dev->priv.eq_table->async_eq.irqn); + synchronize_irq(dev->priv.eq_table->async_eq.core.irqn); } void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) { - synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); + synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn); } /* Generic EQ API for mlx5_core consumers * Needed For RDMA ODP EQ for now */ struct mlx5_eq * -mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, +mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param) { struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); @@ -653,7 +710,7 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, if (!eq) return ERR_PTR(-ENOMEM); - err = create_async_eq(dev, name, eq, param); + err = create_async_eq(dev, eq, param); if (err) { kvfree(eq); eq = ERR_PTR(err); @@ -707,88 +764,18 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) __raw_writel((__force u32)cpu_to_be32(val), addr); /* We still want ordering, just not swabbing, so add a barrier */ - mb(); + wmb(); } EXPORT_SYMBOL(mlx5_eq_update_ci); -/* Completion EQs */ - -static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; - - if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { - mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); - return -ENOMEM; - } - - cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - irq_info->mask); - - if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, irq_info->mask)) - mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); - - return 0; -} - -static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; - - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(irq_info->mask); -} - -static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) -{ - int err; - int i; - - for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) { - err = set_comp_irq_affinity_hint(mdev, i); - if (err) - goto err_out; - } - - return 0; - -err_out: - for (i--; i >= 0; i--) - clear_comp_irq_affinity_hint(mdev, i); - - return err; -} - -static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) -{ - int i; - - for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) - clear_comp_irq_affinity_hint(mdev, i); -} - static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq, *n; - clear_comp_irqs_affinity_hints(dev); - -#ifdef CONFIG_RFS_ACCEL - if (table->rmap) { - free_irq_cpu_rmap(table->rmap); - table->rmap = NULL; - } -#endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); if (destroy_unmap_eq(dev, &eq->core)) mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", eq->core.eqn); @@ -800,23 +787,17 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - char name[MLX5_MAX_IRQ_NAME]; struct mlx5_eq_comp *eq; - int ncomp_vec; + int ncomp_eqs; int nent; int err; int i; INIT_LIST_HEAD(&table->comp_eqs_list); - ncomp_vec = table->num_comp_vectors; + ncomp_eqs = table->num_comp_eqs; nent = MLX5_COMP_EQ_SIZE; -#ifdef CONFIG_RFS_ACCEL - table->rmap = alloc_irq_cpu_rmap(ncomp_vec); - if (!table->rmap) - return -ENOMEM; -#endif - for (i = 0; i < ncomp_vec; i++) { - int vecidx = i + MLX5_EQ_VEC_COMP_BASE; + for (i = 0; i < ncomp_eqs; i++) { + int vecidx = i + MLX5_IRQ_VEC_COMP_BASE; struct mlx5_eq_param param = {}; eq = kzalloc(sizeof(*eq), GFP_KERNEL); @@ -831,33 +812,28 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, (unsigned long)&eq->tasklet_ctx); -#ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); -#endif - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); + eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { - .index = vecidx, - .mask = 0, + .irq_index = vecidx, .nent = nent, - .context = &eq->core, - .handler = mlx5_eq_comp_int }; - err = create_map_eq(dev, &eq->core, name, ¶m); + err = create_map_eq(dev, &eq->core, ¶m); + if (err) { + kfree(eq); + goto clean; + } + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); if (err) { + destroy_unmap_eq(dev, &eq->core); kfree(eq); goto clean; } + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ list_add_tail(&eq->list, &table->comp_eqs_list); } - err = set_comp_irq_affinity_hints(dev); - if (err) { - mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); - goto clean; - } - return 0; clean: @@ -888,26 +864,26 @@ EXPORT_SYMBOL(mlx5_vector2eqn); unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->num_comp_vectors; + return dev->priv.eq_table->num_comp_eqs; } EXPORT_SYMBOL(mlx5_comp_vectors_count); struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { - /* TODO: consider irq_get_affinity_mask(irq) */ - return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; + int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE; + + return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table, + vecidx); } EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); +#ifdef CONFIG_RFS_ACCEL struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) { -#ifdef CONFIG_RFS_ACCEL - return dev->priv.eq_table->rmap; -#else - return NULL; -#endif + return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table); } +#endif struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) { @@ -926,82 +902,19 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - int i, max_eqs; - - clear_comp_irqs_affinity_hints(dev); - -#ifdef CONFIG_RFS_ACCEL - if (table->rmap) { - free_irq_cpu_rmap(table->rmap); - table->rmap = NULL; - } -#endif mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ - max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; - for (i = max_eqs - 1; i >= 0; i--) { - if (!table->irq_info[i].context) - continue; - free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context); - table->irq_info[i].context = NULL; - } + mlx5_irq_table_destroy(dev); mutex_unlock(&table->lock); - pci_free_irq_vectors(dev->pdev); -} - -static int alloc_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *table = priv->eq_table; - int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? - MLX5_CAP_GEN(dev, max_num_eqs) : - 1 << MLX5_CAP_GEN(dev, log_max_eq); - int nvec; - int err; - - nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + - MLX5_EQ_VEC_COMP_BASE; - nvec = min_t(int, nvec, num_eqs); - if (nvec <= MLX5_EQ_VEC_COMP_BASE) - return -ENOMEM; - - table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); - if (!table->irq_info) - return -ENOMEM; - - nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, - nvec, PCI_IRQ_MSIX); - if (nvec < 0) { - err = nvec; - goto err_free_irq_info; - } - - table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; - - return 0; - -err_free_irq_info: - kfree(table->irq_info); - return err; -} - -static void free_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - - pci_free_irq_vectors(dev->pdev); - kfree(priv->eq_table->irq_info); } int mlx5_eq_table_create(struct mlx5_core_dev *dev) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; - err = alloc_irq_vectors(dev); - if (err) { - mlx5_core_err(dev, "alloc irq vectors failed\n"); - return err; - } + eq_table->num_comp_eqs = + mlx5_irq_get_num_comp(eq_table->irq_table); err = create_async_eqs(dev); if (err) { @@ -1019,7 +932,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) err_comp_eqs: destroy_async_eqs(dev); err_async_eqs: - free_irq_vectors(dev); return err; } @@ -1027,7 +939,6 @@ void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) { destroy_comp_eqs(dev); destroy_async_eqs(dev); - free_irq_vectors(dev); } int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) @@ -1039,6 +950,7 @@ int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); } +EXPORT_SYMBOL(mlx5_eq_notifier_register); int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) { @@ -1049,3 +961,4 @@ int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); } +EXPORT_SYMBOL(mlx5_eq_notifier_unregister); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8a67fd197b79..7281f8d6cba6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -72,25 +72,22 @@ static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw); MC_ADDR_CHANGE | \ PROMISC_CHANGE) -/* The vport getter/iterator are only valid after esw->total_vports - * and vport->vport are initialized in mlx5_eswitch_init. - */ -#define mlx5_esw_for_all_vports(esw, i, vport) \ - for ((i) = MLX5_VPORT_PF; \ - (vport) = &(esw)->vports[i], \ - (i) < (esw)->total_vports; (i)++) +struct mlx5_vport *__must_check +mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + u16 idx; -#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \ - for ((i) = MLX5_VPORT_FIRST_VF; \ - (vport) = &(esw)->vports[i], \ - (i) <= (nvfs); (i)++) + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) + return ERR_PTR(-EPERM); -static struct mlx5_vport *mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, - u16 vport_num) -{ - u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); + idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); + + if (idx > esw->total_vports - 1) { + esw_debug(esw->dev, "vport out of range: num(0x%x), idx(0x%x)\n", + vport_num, idx); + return ERR_PTR(-EINVAL); + } - WARN_ON(idx > esw->total_vports - 1); return &esw->vports[idx]; } @@ -137,6 +134,30 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *in, int inlen) +{ + return modify_esw_vport_context_cmd(esw->dev, vport, in, inlen); +} + +static int query_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, + void *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {}; + + MLX5_SET(query_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *out, int outlen) +{ + return query_esw_vport_context_cmd(esw->dev, vport, out, outlen); +} + static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, u16 vlan, u8 qos, u8 set_flags) { @@ -476,7 +497,7 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) fdb_add: /* SRIOV is enabled: Forward UC MAC to vport */ - if (esw->fdb_table.legacy.fdb && esw->mode == SRIOV_LEGACY) + if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY) vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n", @@ -644,9 +665,8 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) /* Apply vport UC/MC list to HW l2 table and FDB table */ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, - u16 vport_num, int list_type) + struct mlx5_vport *vport, int list_type) { - struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; vport_addr_action vport_addr_add; vport_addr_action vport_addr_del; @@ -679,9 +699,8 @@ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, /* Sync vport UC/MC list from vport context */ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, - u16 vport_num, int list_type) + struct mlx5_vport *vport, int list_type) { - struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; u8 (*mac_list)[ETH_ALEN]; struct l2addr_node *node; @@ -710,12 +729,12 @@ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, if (!vport->enabled) goto out; - err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type, + err = mlx5_query_nic_vport_mac_list(esw->dev, vport->vport, list_type, mac_list, &size); if (err) goto out; esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", - vport_num, is_uc ? "UC" : "MC", size); + vport->vport, is_uc ? "UC" : "MC", size); for (i = 0; i < size; i++) { if (is_uc && !is_valid_ether_addr(mac_list[i])) @@ -753,10 +772,10 @@ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, if (!addr) { esw_warn(esw->dev, "Failed to add MAC(%pM) to vport[%d] DB\n", - mac_list[i], vport_num); + mac_list[i], vport->vport); continue; } - addr->vport = vport_num; + addr->vport = vport->vport; addr->action = MLX5_ACTION_ADD; } out: @@ -766,9 +785,9 @@ out: /* Sync vport UC/MC list from vport context * Must be called after esw_update_vport_addr_list */ -static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num) +static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); struct l2addr_node *node; struct vport_addr *addr; struct hlist_head *hash; @@ -791,20 +810,20 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num) if (!addr) { esw_warn(esw->dev, "Failed to add allmulti MAC(%pM) to vport[%d] DB\n", - mac, vport_num); + mac, vport->vport); continue; } - addr->vport = vport_num; + addr->vport = vport->vport; addr->action = MLX5_ACTION_ADD; addr->mc_promisc = true; } } /* Apply vport rx mode to HW FDB table */ -static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num, +static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, bool promisc, bool mc_promisc) { - struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) @@ -812,7 +831,7 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num, if (mc_promisc) { vport->allmulti_rule = - esw_fdb_set_vport_allmulti_rule(esw, vport_num); + esw_fdb_set_vport_allmulti_rule(esw, vport->vport); if (!allmulti_addr->uplink_rule) allmulti_addr->uplink_rule = esw_fdb_set_vport_allmulti_rule(esw, @@ -835,8 +854,8 @@ promisc: return; if (promisc) { - vport->promisc_rule = esw_fdb_set_vport_promisc_rule(esw, - vport_num); + vport->promisc_rule = + esw_fdb_set_vport_promisc_rule(esw, vport->vport); } else if (vport->promisc_rule) { mlx5_del_flow_rules(vport->promisc_rule); vport->promisc_rule = NULL; @@ -844,23 +863,23 @@ promisc: } /* Sync vport rx mode from vport context */ -static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num) +static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); int promisc_all = 0; int promisc_uc = 0; int promisc_mc = 0; int err; err = mlx5_query_nic_vport_promisc(esw->dev, - vport_num, + vport->vport, &promisc_uc, &promisc_mc, &promisc_all); if (err) return; esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n", - vport_num, promisc_all, promisc_mc); + vport->vport, promisc_all, promisc_mc); if (!vport->info.trusted || !vport->enabled) { promisc_uc = 0; @@ -868,7 +887,7 @@ static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num) promisc_all = 0; } - esw_apply_vport_rx_mode(esw, vport_num, promisc_all, + esw_apply_vport_rx_mode(esw, vport, promisc_all, (promisc_all || promisc_mc)); } @@ -878,32 +897,26 @@ static void esw_vport_change_handle_locked(struct mlx5_vport *vport) struct mlx5_eswitch *esw = dev->priv.eswitch; u8 mac[ETH_ALEN]; - mlx5_query_nic_vport_mac_address(dev, vport->vport, mac); + mlx5_query_nic_vport_mac_address(dev, vport->vport, true, mac); esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", vport->vport, mac); if (vport->enabled_events & UC_ADDR_CHANGE) { - esw_update_vport_addr_list(esw, vport->vport, - MLX5_NVPRT_LIST_TYPE_UC); - esw_apply_vport_addr_list(esw, vport->vport, - MLX5_NVPRT_LIST_TYPE_UC); + esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC); + esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC); } - if (vport->enabled_events & MC_ADDR_CHANGE) { - esw_update_vport_addr_list(esw, vport->vport, - MLX5_NVPRT_LIST_TYPE_MC); - } + if (vport->enabled_events & MC_ADDR_CHANGE) + esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC); if (vport->enabled_events & PROMISC_CHANGE) { - esw_update_vport_rx_mode(esw, vport->vport); + esw_update_vport_rx_mode(esw, vport); if (!IS_ERR_OR_NULL(vport->allmulti_rule)) - esw_update_vport_mc_promisc(esw, vport->vport); + esw_update_vport_mc_promisc(esw, vport); } - if (vport->enabled_events & (PROMISC_CHANGE | MC_ADDR_CHANGE)) { - esw_apply_vport_addr_list(esw, vport->vport, - MLX5_NVPRT_LIST_TYPE_MC); - } + if (vport->enabled_events & (PROMISC_CHANGE | MC_ADDR_CHANGE)) + esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC); esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); if (vport->enabled) @@ -922,8 +935,8 @@ static void esw_vport_change_handler(struct work_struct *work) mutex_unlock(&esw->state_lock); } -static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *vlan_grp = NULL; @@ -950,7 +963,7 @@ static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size)); root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS, - vport->vport); + mlx5_eswitch_vport_num_to_index(esw, vport->vport)); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport); return -EOPNOTSUPP; @@ -1006,8 +1019,8 @@ out: return err; } -static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) mlx5_del_flow_rules(vport->egress.allowed_vlan); @@ -1019,8 +1032,8 @@ static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, vport->egress.drop_rule = NULL; } -static void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { if (IS_ERR_OR_NULL(vport->egress.acl)) return; @@ -1036,8 +1049,8 @@ static void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, vport->egress.acl = NULL; } -static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; @@ -1068,7 +1081,7 @@ static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, - vport->vport); + mlx5_eswitch_vport_num_to_index(esw, vport->vport)); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport); return -EOPNOTSUPP; @@ -1168,8 +1181,8 @@ out: return err; } -static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { if (!IS_ERR_OR_NULL(vport->ingress.drop_rule)) mlx5_del_flow_rules(vport->ingress.drop_rule); @@ -1179,10 +1192,12 @@ static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, vport->ingress.drop_rule = NULL; vport->ingress.allow_rule = NULL; + + esw_vport_del_ingress_acl_modify_metadata(esw, vport); } -static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { if (IS_ERR_OR_NULL(vport->ingress.acl)) return; @@ -1420,10 +1435,10 @@ static void esw_destroy_tsar(struct mlx5_eswitch *esw) esw->qos.enabled = false; } -static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, +static int esw_vport_enable_qos(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, u32 initial_max_rate, u32 initial_bw_share) { - struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; struct mlx5_core_dev *dev = esw->dev; void *vport_elem; @@ -1440,7 +1455,7 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); - MLX5_SET(vport_element, vport_elem, vport_number, vport_num); + MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); MLX5_SET(scheduling_context, sched_ctx, parent_element_id, esw->qos.root_tsar_id); MLX5_SET(scheduling_context, sched_ctx, max_average_bw, @@ -1453,7 +1468,7 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, &vport->qos.esw_tsar_ix); if (err) { esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", - vport_num, err); + vport->vport, err); return err; } @@ -1461,10 +1476,10 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, return 0; } -static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) +static void esw_vport_disable_qos(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); - int err = 0; + int err; if (!vport->qos.enabled) return; @@ -1474,15 +1489,15 @@ static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) vport->qos.esw_tsar_ix); if (err) esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", - vport_num, err); + vport->vport, err); vport->qos.enabled = false; } -static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, +static int esw_vport_qos_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, u32 max_rate, u32 bw_share) { - struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; struct mlx5_core_dev *dev = esw->dev; void *vport_elem; @@ -1499,7 +1514,7 @@ static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); - MLX5_SET(vport_element, vport_elem, vport_number, vport_num); + MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); MLX5_SET(scheduling_context, sched_ctx, parent_element_id, esw->qos.root_tsar_id); MLX5_SET(scheduling_context, sched_ctx, max_average_bw, @@ -1515,7 +1530,7 @@ static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, bitmask); if (err) { esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", - vport_num, err); + vport->vport, err); return err; } @@ -1537,7 +1552,8 @@ static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) static void esw_apply_vport_conf(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - int vport_num = vport->vport; + u16 vport_num = vport->vport; + int flags; if (esw->manager_vport == vport_num) return; @@ -1555,11 +1571,13 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, vport->info.node_guid); } + flags = (vport->info.vlan || vport->info.qos) ? + SET_VLAN_STRIP | SET_VLAN_INSERT : 0; modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, - (vport->info.vlan || vport->info.qos)); + flags); /* Only legacy mode needs ACLs */ - if (esw->mode == SRIOV_LEGACY) { + if (esw->mode == MLX5_ESWITCH_LEGACY) { esw_vport_ingress_config(esw, vport); esw_vport_egress_config(esw, vport); } @@ -1611,14 +1629,14 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); /* Create steering drop counters for ingress and egress ACLs */ - if (vport_num && esw->mode == SRIOV_LEGACY) + if (vport_num && esw->mode == MLX5_ESWITCH_LEGACY) esw_vport_create_drop_counters(vport); /* Restore old vport configuration */ esw_apply_vport_conf(esw, vport); /* Attach vport to the eswitch rate limiter */ - if (esw_vport_enable_qos(esw, vport_num, vport->info.max_rate, + if (esw_vport_enable_qos(esw, vport, vport->info.max_rate, vport->qos.bw_share)) esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num); @@ -1663,9 +1681,9 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, */ esw_vport_change_handle_locked(vport); vport->enabled_events = 0; - esw_vport_disable_qos(esw, vport_num); + esw_vport_disable_qos(esw, vport); if (esw->manager_vport != vport_num && - esw->mode == SRIOV_LEGACY) { + esw->mode == MLX5_ESWITCH_LEGACY) { mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, vport_num, 1, @@ -1688,60 +1706,100 @@ static int eswitch_vport_event(struct notifier_block *nb, vport_num = be16_to_cpu(eqe->data.vport_change.vport_num); vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return NOTIFY_OK; + if (vport->enabled) queue_work(esw->work_queue, &vport->vport_change_handler); return NOTIFY_OK; } +/** + * mlx5_esw_query_functions - Returns raw output about functions state + * @dev: Pointer to device to query + * + * mlx5_esw_query_functions() allocates and returns functions changed + * raw output memory pointer from device on success. Otherwise returns ERR_PTR. + * Caller must free the memory using kvfree() when valid pointer is returned. + */ +const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) +{ + int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out); + u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {}; + u32 *out; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return ERR_PTR(-ENOMEM); + + MLX5_SET(query_esw_functions_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_FUNCTIONS); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); + if (!err) + return out; + + kvfree(out); + return ERR_PTR(err); +} + +static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw) +{ + MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->nb); + + if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) { + MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler, + ESW_FUNCTIONS_CHANGED); + mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb); + } +} + +static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw) +{ + if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) + mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); + + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + + flush_workqueue(esw->work_queue); +} + /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) -int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { - int vf_nvports = 0, total_nvports = 0; struct mlx5_vport *vport; int err; int i, enabled_events; if (!ESW_ALLOWED(esw) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { - esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); + esw_warn(esw->dev, "FDB is not supported, aborting ...\n"); return -EOPNOTSUPP; } if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support)) - esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n"); + esw_warn(esw->dev, "ingress ACL is not supported by FW\n"); if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) - esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); - - esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); - - if (mode == SRIOV_OFFLOADS) { - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports); - if (err) - return err; - total_nvports = esw->total_vports; - } else { - vf_nvports = nvfs; - total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev); - } - } + esw_warn(esw->dev, "engress ACL is not supported by FW\n"); esw->mode = mode; mlx5_lag_update(esw->dev); - if (mode == SRIOV_LEGACY) { + if (mode == MLX5_ESWITCH_LEGACY) { err = esw_create_legacy_table(esw); if (err) goto abort; } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, vf_nvports, total_nvports); + err = esw_offloads_init(esw); } if (err) @@ -1751,11 +1809,8 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (err) esw_warn(esw->dev, "Failed to create eswitch TSAR"); - /* Don't enable vport events when in SRIOV_OFFLOADS mode, since: - * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode - * 2. FDB/Eswitch is programmed by user space tools - */ - enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0; + enabled_events = (mode == MLX5_ESWITCH_LEGACY) ? SRIOV_VPORT_EVENTS : + UC_ADDR_CHANGE; /* Enable PF vport */ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); @@ -1768,22 +1823,21 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } /* Enable VF vports */ - mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) esw_enable_vport(esw, vport, enabled_events); - if (mode == SRIOV_LEGACY) { - MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); - mlx5_eq_notifier_register(esw->dev, &esw->nb); - } + mlx5_eswitch_event_handlers_register(esw); + + esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n", + mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->esw_funcs.num_vfs, esw->enabled_vports); - esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", - esw->enabled_vports); return 0; abort: - esw->mode = SRIOV_NONE; + esw->mode = MLX5_ESWITCH_NONE; - if (mode == SRIOV_OFFLOADS) { + if (mode == MLX5_ESWITCH_OFFLOADS) { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); } @@ -1791,23 +1845,22 @@ abort: return err; } -void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) +void mlx5_eswitch_disable(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; struct mlx5_vport *vport; int old_mode; int i; - if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE) + if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE) return; - esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", - esw->enabled_vports, esw->mode); + esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->esw_funcs.num_vfs, esw->enabled_vports); mc_promisc = &esw->mc_promisc; - - if (esw->mode == SRIOV_LEGACY) - mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + mlx5_eswitch_event_handlers_unregister(esw); mlx5_esw_for_all_vports(esw, i, vport) esw_disable_vport(esw, vport); @@ -1817,17 +1870,17 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_destroy_tsar(esw); - if (esw->mode == SRIOV_LEGACY) + if (esw->mode == MLX5_ESWITCH_LEGACY) esw_destroy_legacy_table(esw); - else if (esw->mode == SRIOV_OFFLOADS) + else if (esw->mode == MLX5_ESWITCH_OFFLOADS) esw_offloads_cleanup(esw); old_mode = esw->mode; - esw->mode = SRIOV_NONE; + esw->mode = MLX5_ESWITCH_NONE; mlx5_lag_update(esw->dev); - if (old_mode == SRIOV_OFFLOADS) { + if (old_mode == MLX5_ESWITCH_OFFLOADS) { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); } @@ -1835,14 +1888,16 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) int mlx5_eswitch_init(struct mlx5_core_dev *dev) { - int total_vports = MLX5_TOTAL_VPORTS(dev); struct mlx5_eswitch *esw; struct mlx5_vport *vport; + int total_vports; int err, i; if (!MLX5_VPORT_MANAGER(dev)) return 0; + total_vports = mlx5_eswitch_get_total_vports(dev); + esw_info(dev, "Total vports %d, per vport: max uc(%d) max mc(%d)\n", total_vports, @@ -1855,6 +1910,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->dev = dev; esw->manager_vport = mlx5_eswitch_manager_vport(dev); + esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev); esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { @@ -1888,7 +1944,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) } esw->enabled_vports = 0; - esw->mode = SRIOV_NONE; + esw->mode = MLX5_ESWITCH_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) @@ -1922,22 +1978,19 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) } /* Vport Administration */ -#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) - int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, - int vport, u8 mac[ETH_ALEN]) + u16 vport, u8 mac[ETH_ALEN]) { - struct mlx5_vport *evport; + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); u64 node_guid; int err = 0; - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) - return -EPERM; - if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) + if (IS_ERR(evport)) + return PTR_ERR(evport); + if (is_multicast_ether_addr(mac)) return -EINVAL; mutex_lock(&esw->state_lock); - evport = &esw->vports[vport]; if (evport->info.spoofchk && !is_valid_ether_addr(mac)) mlx5_core_warn(esw->dev, @@ -1961,7 +2014,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, ether_addr_copy(evport->info.mac, mac); evport->info.node_guid = node_guid; - if (evport->enabled && esw->mode == SRIOV_LEGACY) + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) err = esw_vport_ingress_config(esw, evport); unlock: @@ -1970,18 +2023,17 @@ unlock: } int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, - int vport, int link_state) + u16 vport, int link_state) { - struct mlx5_vport *evport; + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); int err = 0; if (!ESW_ALLOWED(esw)) return -EPERM; - if (!LEGAL_VPORT(esw, vport)) - return -EINVAL; + if (IS_ERR(evport)) + return PTR_ERR(evport); mutex_lock(&esw->state_lock); - evport = &esw->vports[vport]; err = mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, @@ -2001,16 +2053,12 @@ unlock: } int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, - int vport, struct ifla_vf_info *ivi) + u16 vport, struct ifla_vf_info *ivi) { - struct mlx5_vport *evport; - - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) - return -EPERM; - if (!LEGAL_VPORT(esw, vport)) - return -EINVAL; + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); - evport = &esw->vports[vport]; + if (IS_ERR(evport)) + return PTR_ERR(evport); memset(ivi, 0, sizeof(*ivi)); ivi->vf = vport - 1; @@ -2030,18 +2078,19 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, } int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - int vport, u16 vlan, u8 qos, u8 set_flags) + u16 vport, u16 vlan, u8 qos, u8 set_flags) { - struct mlx5_vport *evport; + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); int err = 0; if (!ESW_ALLOWED(esw)) return -EPERM; - if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7)) + if (IS_ERR(evport)) + return PTR_ERR(evport); + if (vlan > 4095 || qos > 7) return -EINVAL; mutex_lock(&esw->state_lock); - evport = &esw->vports[vport]; err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); if (err) @@ -2049,7 +2098,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, evport->info.vlan = vlan; evport->info.qos = qos; - if (evport->enabled && esw->mode == SRIOV_LEGACY) { + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_vport_ingress_config(esw, evport); if (err) goto unlock; @@ -2062,7 +2111,7 @@ unlock: } int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - int vport, u16 vlan, u8 qos) + u16 vport, u16 vlan, u8 qos) { u8 set_flags = 0; @@ -2073,26 +2122,25 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, } int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, - int vport, bool spoofchk) + u16 vport, bool spoofchk) { - struct mlx5_vport *evport; + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); bool pschk; int err = 0; if (!ESW_ALLOWED(esw)) return -EPERM; - if (!LEGAL_VPORT(esw, vport)) - return -EINVAL; + if (IS_ERR(evport)) + return PTR_ERR(evport); mutex_lock(&esw->state_lock); - evport = &esw->vports[vport]; pschk = evport->info.spoofchk; evport->info.spoofchk = spoofchk; if (pschk && !is_valid_ether_addr(evport->info.mac)) mlx5_core_warn(esw->dev, "Spoofchk in set while MAC is invalid, vport(%d)\n", evport->vport); - if (evport->enabled && esw->mode == SRIOV_LEGACY) + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) err = esw_vport_ingress_config(esw, evport); if (err) evport->info.spoofchk = pschk; @@ -2188,7 +2236,7 @@ int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting) return -EPERM; mutex_lock(&esw->state_lock); - if (esw->mode != SRIOV_LEGACY) { + if (esw->mode != MLX5_ESWITCH_LEGACY) { err = -EOPNOTSUPP; goto out; } @@ -2211,7 +2259,7 @@ int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) return -EPERM; mutex_lock(&esw->state_lock); - if (esw->mode != SRIOV_LEGACY) { + if (esw->mode != MLX5_ESWITCH_LEGACY) { err = -EOPNOTSUPP; goto out; } @@ -2224,17 +2272,16 @@ out: } int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, - int vport, bool setting) + u16 vport, bool setting) { - struct mlx5_vport *evport; + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); if (!ESW_ALLOWED(esw)) return -EPERM; - if (!LEGAL_VPORT(esw, vport)) - return -EINVAL; + if (IS_ERR(evport)) + return PTR_ERR(evport); mutex_lock(&esw->state_lock); - evport = &esw->vports[vport]; evport->info.trusted = setting; if (evport->enabled) esw_vport_change_handle_locked(evport); @@ -2284,7 +2331,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) if (bw_share == evport->qos.bw_share) continue; - err = esw_vport_qos_config(esw, evport->vport, vport_max_rate, + err = esw_vport_qos_config(esw, evport, vport_max_rate, bw_share); if (!err) evport->qos.bw_share = bw_share; @@ -2295,10 +2342,10 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) return 0; } -int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, u32 max_rate, u32 min_rate) { - struct mlx5_vport *evport; + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); u32 fw_max_bw_share; u32 previous_min_rate; u32 divider; @@ -2308,8 +2355,8 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, if (!ESW_ALLOWED(esw)) return -EPERM; - if (!LEGAL_VPORT(esw, vport)) - return -EINVAL; + if (IS_ERR(evport)) + return PTR_ERR(evport); fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && @@ -2320,7 +2367,6 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, return -EOPNOTSUPP; mutex_lock(&esw->state_lock); - evport = &esw->vports[vport]; if (min_rate == evport->info.min_rate) goto set_max_rate; @@ -2338,7 +2384,7 @@ set_max_rate: if (max_rate == evport->info.max_rate) goto unlock; - err = esw_vport_qos_config(esw, vport, max_rate, evport->qos.bw_share); + err = esw_vport_qos_config(esw, evport, max_rate, evport->qos.bw_share); if (!err) evport->info.max_rate = max_rate; @@ -2348,16 +2394,15 @@ unlock: } static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, - int vport_idx, + struct mlx5_vport *vport, struct mlx5_vport_drop_stats *stats) { struct mlx5_eswitch *esw = dev->priv.eswitch; - struct mlx5_vport *vport = &esw->vports[vport_idx]; u64 rx_discard_vport_down, tx_discard_vport_down; u64 bytes = 0; int err = 0; - if (!vport->enabled || esw->mode != SRIOV_LEGACY) + if (!vport->enabled || esw->mode != MLX5_ESWITCH_LEGACY) return 0; if (vport->egress.drop_counter) @@ -2372,7 +2417,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) return 0; - err = mlx5_query_vport_down_stats(dev, vport_idx, 1, + err = mlx5_query_vport_down_stats(dev, vport->vport, 1, &rx_discard_vport_down, &tx_discard_vport_down); if (err) @@ -2387,19 +2432,18 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, } int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, - int vport, + u16 vport_num, struct ifla_vf_stats *vf_stats) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0}; struct mlx5_vport_drop_stats stats = {0}; int err = 0; u32 *out; - if (!ESW_ALLOWED(esw)) - return -EPERM; - if (!LEGAL_VPORT(esw, vport)) - return -EINVAL; + if (IS_ERR(vport)) + return PTR_ERR(vport); out = kvzalloc(outlen, GFP_KERNEL); if (!out) @@ -2408,7 +2452,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); - MLX5_SET(query_vport_counter_in, in, vport_number, vport); + MLX5_SET(query_vport_counter_in, in, vport_number, vport->vport); MLX5_SET(query_vport_counter_in, in, other_vport, 1); memset(out, 0, outlen); @@ -2468,16 +2512,27 @@ free_out: u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) { - return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE; + return ESW_ALLOWED(esw) ? esw->mode : MLX5_ESWITCH_NONE; } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); +enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw; + + esw = dev->priv.eswitch; + return ESW_ALLOWED(esw) ? esw->offloads.encap : + DEVLINK_ESWITCH_ENCAP_MODE_NONE; +} +EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); + bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { - if ((dev0->priv.eswitch->mode == SRIOV_NONE && - dev1->priv.eswitch->mode == SRIOV_NONE) || - (dev0->priv.eswitch->mode == SRIOV_OFFLOADS && - dev1->priv.eswitch->mode == SRIOV_OFFLOADS)) + if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && + dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) || + (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS && + dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS)) return true; return false; @@ -2486,6 +2541,26 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { - return (dev0->priv.eswitch->mode == SRIOV_OFFLOADS && - dev1->priv.eswitch->mode == SRIOV_OFFLOADS); + return (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS && + dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS); +} + +void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) +{ + const u32 *out; + + WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE); + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) { + esw->esw_funcs.num_vfs = num_vfs; + return; + } + + out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(out)) + return; + + esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + kvfree(out); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 3f3cd32ae60a..a38e8a3c7c9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -68,6 +68,8 @@ struct vport_ingress { struct mlx5_flow_group *allow_spoofchk_only_grp; struct mlx5_flow_group *allow_untagged_only_grp; struct mlx5_flow_group *drop_grp; + int modify_metadata_id; + struct mlx5_flow_handle *modify_metadata_rule; struct mlx5_flow_handle *allow_rule; struct mlx5_flow_handle *drop_rule; struct mlx5_fc *drop_counter; @@ -173,9 +175,12 @@ struct mlx5_esw_offload { struct mutex peer_mutex; DECLARE_HASHTABLE(encap_tbl, 8); DECLARE_HASHTABLE(mod_hdr_tbl, 8); + DECLARE_HASHTABLE(termtbl_tbl, 8); + struct mutex termtbl_mutex; /* protects termtbl hash */ + const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; u64 num_flows; - u8 encap; + enum devlink_eswitch_encap_mode encap; }; /* E-Switch MC FDB table hash node */ @@ -190,11 +195,15 @@ struct mlx5_host_work { struct mlx5_eswitch *esw; }; -struct mlx5_host_info { +struct mlx5_esw_functions { struct mlx5_nb nb; u16 num_vfs; }; +enum { + MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -202,6 +211,7 @@ struct mlx5_eswitch { struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; struct mlx5_vport *vports; + u32 flags; int total_vports; int enabled_vports; /* Synchronize between vport change events @@ -219,43 +229,81 @@ struct mlx5_eswitch { int mode; int nvports; u16 manager_vport; - struct mlx5_host_info host_info; + u16 first_host_vport; + struct mlx5_esw_functions esw_funcs; }; void esw_offloads_cleanup(struct mlx5_eswitch *esw); -int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, - int total_nvports); +int esw_offloads_init(struct mlx5_eswitch *esw); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); +void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); +int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); +void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); +int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); +void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); +void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); -int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); -void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode); +void mlx5_eswitch_disable(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, - int vport, u8 mac[ETH_ALEN]); + u16 vport, u8 mac[ETH_ALEN]); int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, - int vport, int link_state); + u16 vport, int link_state); int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - int vport, u16 vlan, u8 qos); + u16 vport, u16 vlan, u8 qos); int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, - int vport, bool spoofchk); + u16 vport, bool spoofchk); int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, - int vport_num, bool setting); -int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, + u16 vport_num, bool setting); +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, u32 max_rate, u32 min_rate); int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting); int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, - int vport, struct ifla_vf_info *ivi); + u16 vport, struct ifla_vf_info *ivi); int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, - int vport, + u16 vport, struct ifla_vf_stats *vf_stats); void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule); +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *in, int inlen); +int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *out, int outlen); + struct mlx5_flow_spec; struct mlx5_esw_flow_attr; +struct mlx5_termtbl_handle; + +bool +mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec); + +struct mlx5_flow_handle * +mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest); + +void +mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, + struct mlx5_termtbl_handle *tt); struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, @@ -284,7 +332,7 @@ u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw); struct mlx5_flow_handle * -mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, struct mlx5_flow_destination *dest); enum { @@ -326,6 +374,7 @@ struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *rep; struct mlx5_core_dev *mdev; u32 encap_id; + struct mlx5_termtbl_handle *termtbl; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; u32 mod_hdr_id; u8 match_level; @@ -343,10 +392,12 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); -int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode); -int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, +int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode); +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack); -int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap); +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, @@ -354,7 +405,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr); int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - int vport, u16 vlan, u8 qos, u8 set_flags); + u16 vport, u16 vlan, u8 qos, u8 set_flags); static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, u8 vlan_depth) @@ -374,13 +425,15 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1); +const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) -#define esw_info(dev, format, ...) \ - pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) +#define esw_info(__dev, format, ...) \ + dev_info((__dev)->device, "E-Switch: " format, ##__VA_ARGS__) -#define esw_warn(dev, format, ...) \ - pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) +#define esw_warn(__dev, format, ...) \ + dev_warn((__dev)->device, "E-Switch: " format, ##__VA_ARGS__) #define esw_debug(dev, format, ...) \ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) @@ -392,6 +445,24 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) MLX5_VPORT_ECPF : MLX5_VPORT_PF; } +static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF; +} + +static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) +{ + /* Ideally device should have the functions changed supported + * capability regardless of it being ECPF or PF wherever such + * event should be processed such as on eswitch manager device. + * However, some ECPF based device might not have this capability + * set. Hence OR for ECPF check to cover such device. + */ + return MLX5_CAP_ESW(dev, esw_functions_changed) || + mlx5_core_is_ecpf_esw_manager(dev); +} + static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) { /* Uplink always locate at the last element of the array.*/ @@ -418,7 +489,7 @@ static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, return vport_num; } -static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, +static inline u16 mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, int index) { if (index == mlx5_eswitch_ecpf_idx(esw) && @@ -431,13 +502,92 @@ static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, return index; } +/* TODO: This mlx5e_tc function shouldn't be called by eswitch */ +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); + +/* The vport getter/iterator are only valid after esw->total_vports + * and vport->vport are initialized in mlx5_eswitch_init. + */ +#define mlx5_esw_for_all_vports(esw, i, vport) \ + for ((i) = MLX5_VPORT_PF; \ + (vport) = &(esw)->vports[i], \ + (i) < (esw)->total_vports; (i)++) + +#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \ + for ((i) = MLX5_VPORT_FIRST_VF; \ + (vport) = &(esw)->vports[(i)], \ + (i) <= (nvfs); (i)++) + +#define mlx5_esw_for_each_vf_vport_reverse(esw, i, vport, nvfs) \ + for ((i) = (nvfs); \ + (vport) = &(esw)->vports[(i)], \ + (i) >= MLX5_VPORT_FIRST_VF; (i)--) + +/* The rep getter/iterator are only valid after esw->total_vports + * and vport->vport are initialized in mlx5_eswitch_init. + */ +#define mlx5_esw_for_all_reps(esw, i, rep) \ + for ((i) = MLX5_VPORT_PF; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) < (esw)->total_vports; (i)++) + +#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs) \ + for ((i) = MLX5_VPORT_FIRST_VF; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) <= (nvfs); (i)++) + +#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs) \ + for ((i) = (nvfs); \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) >= MLX5_VPORT_FIRST_VF; (i)--) + +#define mlx5_esw_for_each_vf_vport_num(esw, vport, nvfs) \ + for ((vport) = MLX5_VPORT_FIRST_VF; (vport) <= (nvfs); (vport)++) + +#define mlx5_esw_for_each_vf_vport_num_reverse(esw, vport, nvfs) \ + for ((vport) = (nvfs); (vport) >= MLX5_VPORT_FIRST_VF; (vport)--) + +/* Includes host PF (vport 0) if it's not esw manager. */ +#define mlx5_esw_for_each_host_func_rep(esw, i, rep, nvfs) \ + for ((i) = (esw)->first_host_vport; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) <= (nvfs); (i)++) + +#define mlx5_esw_for_each_host_func_rep_reverse(esw, i, rep, nvfs) \ + for ((i) = (nvfs); \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) >= (esw)->first_host_vport; (i)--) + +#define mlx5_esw_for_each_host_func_vport(esw, vport, nvfs) \ + for ((vport) = (esw)->first_host_vport; \ + (vport) <= (nvfs); (vport)++) + +#define mlx5_esw_for_each_host_func_vport_reverse(esw, vport, nvfs) \ + for ((vport) = (nvfs); \ + (vport) >= (esw)->first_host_vport; (vport)--) + +struct mlx5_vport *__must_check +mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); + +bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num); + +void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs); +int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} -static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } -static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} +static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; } +static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } +static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } +static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {} #define FDB_MAX_CHAIN 1 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9b2d78ee22b8..8ed4497929b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -37,17 +37,12 @@ #include <linux/mlx5/fs.h> #include "mlx5_core.h" #include "eswitch.h" +#include "rdma.h" #include "en.h" #include "fs_core.h" #include "lib/devcom.h" -#include "ecpf.h" #include "lib/eq.h" -enum { - FDB_FAST_PATH = 0, - FDB_SLOW_PATH -}; - /* There are two match-all miss flows, one for unicast dst mac and * one for multicast. */ @@ -58,36 +53,10 @@ enum { #define UPLINK_REP_INDEX 0 -/* The rep getter/iterator are only valid after esw->total_vports - * and vport->vport are initialized in mlx5_eswitch_init. - */ -#define mlx5_esw_for_all_reps(esw, i, rep) \ - for ((i) = MLX5_VPORT_PF; \ - (rep) = &(esw)->offloads.vport_reps[i], \ - (i) < (esw)->total_vports; (i)++) - -#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs) \ - for ((i) = MLX5_VPORT_FIRST_VF; \ - (rep) = &(esw)->offloads.vport_reps[i], \ - (i) <= (nvfs); (i)++) - -#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs) \ - for ((i) = (nvfs); \ - (rep) = &(esw)->offloads.vport_reps[i], \ - (i) >= MLX5_VPORT_FIRST_VF; (i)--) - -#define mlx5_esw_for_each_vf_vport(esw, vport, nvfs) \ - for ((vport) = MLX5_VPORT_FIRST_VF; \ - (vport) <= (nvfs); (vport)++) - -#define mlx5_esw_for_each_vf_vport_reverse(esw, vport, nvfs) \ - for ((vport) = (nvfs); \ - (vport) >= MLX5_VPORT_FIRST_VF; (vport)--) - static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, u16 vport_num) { - u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); + int idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); WARN_ON(idx > esw->total_vports - 1); return &esw->offloads.vport_reps[idx]; @@ -119,6 +88,53 @@ u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw) return 1; } +static void +mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) +{ + void *misc2; + void *misc; + + /* Use metadata matching because vport is not represented by single + * VHCA in dual-port RoCE mode, and matching on source vport may fail. + */ + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch, + attr->in_rep->vport)); + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc))) + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); + + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } + + if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) && + attr->in_rep->vport == MLX5_VPORT_UPLINK) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; +} + struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, @@ -130,9 +146,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; int j, i = 0; - void *misc; - if (esw->mode != SRIOV_OFFLOADS) + if (esw->mode != MLX5_ESWITCH_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); flow_act.action = attr->action; @@ -190,21 +205,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, i++; } - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); - - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + mlx5_eswitch_set_rule_source_port(esw, spec, attr); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); - - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { if (attr->tunnel_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; @@ -223,7 +225,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, goto err_esw_get; } - rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); + if (mlx5_eswitch_termtbl_required(esw, &flow_act, spec)) + rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr, + &flow_act, dest, i); + else + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); if (IS_ERR(rule)) goto err_add_rule; else @@ -250,7 +256,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_table *fast_fdb; struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_handle *rule; - void *misc; int i; fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0); @@ -282,25 +287,11 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, dest[i].ft = fwd_fdb, i++; - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); - - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(attr->in_mdev, vhca_id)); - - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); + mlx5_eswitch_set_rule_source_port(esw, spec, attr); - if (attr->match_level == MLX5_MATCH_NONE) - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; - else - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | - MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + if (attr->match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); @@ -325,8 +316,16 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, bool fwd_rule) { bool split = (attr->split_count > 0); + int i; mlx5_del_flow_rules(rule); + + /* unref the term table */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (attr->dests[i].termtbl) + mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); + } + esw->offloads.num_flows--; if (fwd_rule) { @@ -358,12 +357,11 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) { struct mlx5_eswitch_rep *rep; - int vf_vport, err = 0; + int i, err = 0; esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); - for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { - rep = &esw->offloads.vport_reps[vf_vport]; - if (rep->rep_if[REP_ETH].state != REP_LOADED) + mlx5_esw_for_each_host_func_rep(esw, i, rep, esw->esw_funcs.num_vfs) { + if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) continue; err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); @@ -545,7 +543,8 @@ out: } struct mlx5_flow_handle * -mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport, + u32 sqn) { struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_destination dest = {}; @@ -588,23 +587,87 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } -static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, +static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; + u8 fdb_to_vport_reg_c_id; + int err; + + err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, + out, sizeof(out)); + if (err) + return err; + + fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + + fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.fdb_to_vport_reg_c_id, 1); + + return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, + in, sizeof(in)); +} + +static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; + u8 fdb_to_vport_reg_c_id; + int err; + + err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, + out, sizeof(out)); + if (err) + return err; + + fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + + fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; + + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.fdb_to_vport_reg_c_id, 1); + + return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, + in, sizeof(in)); +} + +static void peer_miss_rules_setup(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev, struct mlx5_flow_spec *spec, struct mlx5_flow_destination *dest) { - void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); + void *misc; - MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(peer_dev, vhca_id)); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(peer_dev, vhca_id)); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + } dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest->vport.num = peer_dev->priv.eswitch->manager_vport; @@ -612,6 +675,26 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; } +static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw, + struct mlx5_flow_spec *spec, + u16 vport) +{ + void *misc; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(peer_esw, + vport)); + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + } +} + static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_core_dev *peer_dev) { @@ -629,7 +712,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, if (!spec) return -ENOMEM; - peer_miss_rules_setup(peer_dev, spec, &dest); + peer_miss_rules_setup(esw, peer_dev, spec, &dest); flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL); if (!flows) { @@ -642,7 +725,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc_parameters); if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF); + esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, + spec, MLX5_VPORT_PF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { @@ -663,8 +748,11 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows[mlx5_eswitch_ecpf_idx(esw)] = flow; } - mlx5_esw_for_each_vf_vport(esw, i, mlx5_core_max_vfs(esw->dev)) { - MLX5_SET(fte_match_set_misc, misc, source_port, i); + mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) { + esw_set_peer_miss_rule_source_port(esw, + peer_dev->priv.eswitch, + spec, i); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { @@ -681,7 +769,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, add_vf_flow_err: nvports = --i; - mlx5_esw_for_each_vf_vport_reverse(esw, i, nvports) + mlx5_esw_for_each_vf_vport_num_reverse(esw, i, nvports) mlx5_del_flow_rules(flows[i]); if (mlx5_ecpf_vport_exists(esw->dev)) @@ -704,7 +792,8 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) flows = esw->fdb_table.offloads.peer_miss_rules; - mlx5_esw_for_each_vf_vport_reverse(esw, i, mlx5_core_max_vfs(esw->dev)) + mlx5_esw_for_each_vf_vport_num_reverse(esw, i, + mlx5_core_max_vfs(esw->dev)) mlx5_del_flow_rules(flows[i]); if (mlx5_ecpf_vport_exists(esw->dev)) @@ -947,6 +1036,30 @@ static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw) #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 +static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, + u32 *flow_group_in) +{ + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0); + } else { + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + } +} + static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -1044,19 +1157,21 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) /* create peer esw miss group */ memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, - match_criteria); + esw_set_flow_group_source_port(esw, flow_group_in); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_port); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_eswitch_owner_vhca_id); + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } - MLX5_SET(create_flow_group_in, flow_group_in, - source_eswitch_owner_vhca_id_valid, 1); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + esw->total_vports - 1); @@ -1170,7 +1285,6 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *g; u32 *flow_group_in; - void *match_criteria, *misc; int err = 0; nvports = nvports + MLX5_ESW_MISS_FLOWS; @@ -1180,12 +1294,8 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) /* create vport rx group */ memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + esw_set_flow_group_source_port(esw, flow_group_in); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); @@ -1210,7 +1320,7 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) } struct mlx5_flow_handle * -mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, struct mlx5_flow_destination *dest) { struct mlx5_flow_act flow_act = {0}; @@ -1224,13 +1334,24 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, goto out; } - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, vport); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport)); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, @@ -1248,21 +1369,22 @@ out: static int esw_offloads_start(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { - int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1; - if (esw->mode != SRIOV_LEGACY && + if (esw->mode != MLX5_ESWITCH_LEGACY && !mlx5_core_is_ecpf_esw_manager(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, "Can't set offloads mode, SRIOV legacy not enabled"); return -EINVAL; } - mlx5_eswitch_disable_sriov(esw); - err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + mlx5_eswitch_disable(esw); + mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs); + err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to offloads"); - err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to legacy"); @@ -1270,7 +1392,6 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, } if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) { if (mlx5_eswitch_inline_mode_get(esw, - num_vfs, &esw->offloads.inline_mode)) { esw->offloads.inline_mode = MLX5_INLINE_MODE_L2; NL_SET_ERR_MSG_MOD(extack, @@ -1287,26 +1408,28 @@ void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw) int esw_offloads_init_reps(struct mlx5_eswitch *esw) { - int total_vfs = MLX5_TOTAL_VPORTS(esw->dev); + int total_vports = esw->total_vports; struct mlx5_core_dev *dev = esw->dev; struct mlx5_eswitch_rep *rep; u8 hw_id[ETH_ALEN], rep_type; - int vport; + int vport_index; - esw->offloads.vport_reps = kcalloc(total_vfs, + esw->offloads.vport_reps = kcalloc(total_vports, sizeof(struct mlx5_eswitch_rep), GFP_KERNEL); if (!esw->offloads.vport_reps) return -ENOMEM; - mlx5_query_nic_vport_mac_address(dev, 0, hw_id); + mlx5_query_mac_address(dev, hw_id); - mlx5_esw_for_all_reps(esw, vport, rep) { - rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport); + mlx5_esw_for_all_reps(esw, vport_index, rep) { + rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index); + rep->vport_index = vport_index; ether_addr_copy(rep->hw_id, hw_id); for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) - rep->rep_if[rep_type].state = REP_UNREGISTERED; + atomic_set(&rep->rep_data[rep_type].state, + REP_UNREGISTERED); } return 0; @@ -1315,11 +1438,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { - if (rep->rep_if[rep_type].state != REP_LOADED) - return; - - rep->rep_if[rep_type].unload(rep); - rep->rep_if[rep_type].state = REP_REGISTERED; + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, + REP_LOADED, REP_REGISTERED) == REP_LOADED) + esw->offloads.rep_ops[rep_type]->unload(rep); } static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) @@ -1358,21 +1479,20 @@ static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports) __unload_reps_vf_vport(esw, nvports, rep_type); } -static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) { - __unload_reps_vf_vport(esw, nvports, rep_type); + __unload_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type); /* Special vports must be the last to unload. */ __unload_reps_special_vport(esw, rep_type); } -static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports) +static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw) { u8 rep_type = NUM_REP_TYPES; while (rep_type-- > 0) - __unload_reps_all_vport(esw, nvports, rep_type); + __unload_reps_all_vport(esw, rep_type); } static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, @@ -1380,16 +1500,15 @@ static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, { int err = 0; - if (rep->rep_if[rep_type].state != REP_REGISTERED) - return 0; - - err = rep->rep_if[rep_type].load(esw->dev, rep); - if (err) - return err; - - rep->rep_if[rep_type].state = REP_LOADED; + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, + REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { + err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); + if (err) + atomic_set(&rep->rep_data[rep_type].state, + REP_REGISTERED); + } - return 0; + return err; } static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) @@ -1449,6 +1568,26 @@ err_vf: return err; } +static int __load_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) +{ + int err; + + /* Special vports must be loaded first, uplink rep creates mdev resource. */ + err = __load_reps_special_vport(esw, rep_type); + if (err) + return err; + + err = __load_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type); + if (err) + goto err_vfs; + + return 0; + +err_vfs: + __unload_reps_special_vport(esw, rep_type); + return err; +} + static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports) { u8 rep_type = 0; @@ -1468,34 +1607,13 @@ err_reps: return err; } -static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) -{ - int err; - - /* Special vports must be loaded first. */ - err = __load_reps_special_vport(esw, rep_type); - if (err) - return err; - - err = __load_reps_vf_vport(esw, nvports, rep_type); - if (err) - goto err_vfs; - - return 0; - -err_vfs: - __unload_reps_special_vport(esw, rep_type); - return err; -} - -static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw) { u8 rep_type = 0; int err; for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = __load_reps_all_vport(esw, nvports, rep_type); + err = __load_reps_all_vport(esw, rep_type); if (err) goto err_reps; } @@ -1504,7 +1622,7 @@ static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) err_reps: while (rep_type-- > 0) - __unload_reps_all_vport(esw, nvports, rep_type); + __unload_reps_all_vport(esw, rep_type); return err; } @@ -1523,8 +1641,6 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, return 0; } -void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); - static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) { mlx5e_tc_clean_fdb_peer_flows(esw); @@ -1542,6 +1658,10 @@ static int mlx5_esw_offloads_devcom_event(int event, switch (event) { case ESW_OFFLOADS_DEVCOM_PAIR: + if (mlx5_eswitch_vport_match_metadata_enabled(esw) != + mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) + break; + err = mlx5_esw_offloads_pair(esw, peer_esw); if (err) goto err_out; @@ -1607,22 +1727,314 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); } -static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) +static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_spec *spec; + int err = 0; + + /* For prio tag mode, there is only 1 FTEs: + * 1) Untagged packets - push prio tag VLAN and modify metadata if + * required, allow + * Unmatched traffic is allowed by default + */ + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out_no_mem; + } + + /* Untagged packets - push prio tag VLAN, allow */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | + MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_act.vlan[0].ethtype = ETH_P_8021Q; + flow_act.vlan[0].vid = 0; + flow_act.vlan[0].prio = 0; + + if (vport->ingress.modify_metadata_rule) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_id = vport->ingress.modify_metadata_id; + } + + vport->ingress.allow_rule = + mlx5_add_flow_rules(vport->ingress.acl, spec, + &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.allow_rule)) { + err = PTR_ERR(vport->ingress.allow_rule); + esw_warn(esw->dev, + "vport[%d] configure ingress untagged allow rule, err(%d)\n", + vport->vport, err); + vport->ingress.allow_rule = NULL; + goto out; + } + +out: + kvfree(spec); +out_no_mem: + if (err) + esw_vport_cleanup_ingress_rules(esw, vport); + return err; +} + +static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec spec = {}; + int err = 0; + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); + MLX5_SET(set_action_in, action, data, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); + + err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + 1, action, &vport->ingress.modify_metadata_id); + if (err) { + esw_warn(esw->dev, + "failed to alloc modify header for vport %d ingress acl (%d)\n", + vport->vport, err); + return err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_act.modify_id = vport->ingress.modify_metadata_id; + vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, + &spec, &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.modify_metadata_rule)) { + err = PTR_ERR(vport->ingress.modify_metadata_rule); + esw_warn(esw->dev, + "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n", + vport->vport, err); + vport->ingress.modify_metadata_rule = NULL; + goto out; + } + +out: + if (err) + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + return err; +} + +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (vport->ingress.modify_metadata_rule) { + mlx5_del_flow_rules(vport->ingress.modify_metadata_rule); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + + vport->ingress.modify_metadata_rule = NULL; + } +} + +static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_spec *spec; + int err = 0; + + if (!MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + + /* For prio tag mode, there is only 1 FTEs: + * 1) prio tag packets - pop the prio tag VLAN, allow + * Unmatched traffic is allowed by default + */ + + esw_vport_cleanup_egress_rules(esw, vport); + + err = esw_vport_enable_egress_acl(esw, vport); + if (err) { + mlx5_core_warn(esw->dev, + "failed to enable egress acl (%d) on vport[%d]\n", + err, vport->vport); + return err; + } + + esw_debug(esw->dev, + "vport[%d] configure prio tag egress rules\n", vport->vport); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out_no_mem; + } + + /* prio tag vlan rule - pop it so VF receives untagged packets */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, 0); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_POP | + MLX5_FLOW_CONTEXT_ACTION_ALLOW; + vport->egress.allowed_vlan = + mlx5_add_flow_rules(vport->egress.acl, spec, + &flow_act, NULL, 0); + if (IS_ERR(vport->egress.allowed_vlan)) { + err = PTR_ERR(vport->egress.allowed_vlan); + esw_warn(esw->dev, + "vport[%d] configure egress pop prio tag vlan rule failed, err(%d)\n", + vport->vport, err); + vport->egress.allowed_vlan = NULL; + goto out; + } + +out: + kvfree(spec); +out_no_mem: + if (err) + esw_vport_cleanup_egress_rules(esw, vport); + return err; +} + +static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { int err; + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && + !MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + + esw_vport_cleanup_ingress_rules(esw, vport); + + err = esw_vport_enable_ingress_acl(esw, vport); + if (err) { + esw_warn(esw->dev, + "failed to enable ingress acl (%d) on vport[%d]\n", + err, vport->vport); + return err; + } + + esw_debug(esw->dev, + "vport[%d] configure ingress rules\n", vport->vport); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + err = esw_vport_add_ingress_acl_modify_metadata(esw, vport); + if (err) + goto out; + } + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + mlx5_eswitch_is_vf_vport(esw, vport->vport)) { + err = esw_vport_ingress_prio_tag_config(esw, vport); + if (err) + goto out; + } + +out: + if (err) + esw_vport_disable_ingress_acl(esw, vport); + return err; +} + +static bool +esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) +{ + if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl)) + return false; + + if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) & + MLX5_FDB_TO_VPORT_REG_C_0)) + return false; + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) + return false; + + if (mlx5_core_is_ecpf_esw_manager(esw->dev) || + mlx5_ecpf_vport_exists(esw->dev)) + return false; + + return true; +} + +static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int i, j; + int err; + + if (esw_check_vport_match_metadata_supported(esw)) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + + mlx5_esw_for_all_vports(esw, i, vport) { + err = esw_vport_ingress_common_config(esw, vport); + if (err) + goto err_ingress; + + if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { + err = esw_vport_egress_prio_tag_config(esw, vport); + if (err) + goto err_egress; + } + } + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + esw_info(esw->dev, "Use metadata reg_c as source vport to match\n"); + + return 0; + +err_egress: + esw_vport_disable_ingress_acl(esw, vport); +err_ingress: + for (j = MLX5_VPORT_PF; j < i; j++) { + vport = &esw->vports[j]; + esw_vport_disable_egress_acl(esw, vport); + esw_vport_disable_ingress_acl(esw, vport); + } + + return err; +} + +static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int i; + + mlx5_esw_for_all_vports(esw, i, vport) { + esw_vport_disable_egress_acl(esw, vport); + esw_vport_disable_ingress_acl(esw, vport); + } + + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; +} + +static int esw_offloads_steering_init(struct mlx5_eswitch *esw) +{ + int num_vfs = esw->esw_funcs.num_vfs; + int total_vports; + int err; + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + total_vports = esw->total_vports; + else + total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev); + memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); - err = esw_create_offloads_fdb_tables(esw, nvports); + err = esw_create_offloads_acl_tables(esw); if (err) return err; - err = esw_create_offloads_table(esw, nvports); + err = esw_create_offloads_fdb_tables(esw, total_vports); + if (err) + goto create_fdb_err; + + err = esw_create_offloads_table(esw, total_vports); if (err) goto create_ft_err; - err = esw_create_vport_rx_group(esw, nvports); + err = esw_create_vport_rx_group(esw, total_vports); if (err) goto create_fg_err; @@ -1634,6 +2046,9 @@ create_fg_err: create_ft_err: esw_destroy_offloads_fdb_tables(esw); +create_fdb_err: + esw_destroy_offloads_acl_tables(esw); + return err; } @@ -1642,86 +2057,105 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_vport_rx_group(esw); esw_destroy_offloads_table(esw); esw_destroy_offloads_fdb_tables(esw); + esw_destroy_offloads_acl_tables(esw); } -static void esw_host_params_event_handler(struct work_struct *work) +static void +esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) { - struct mlx5_host_work *host_work; - struct mlx5_eswitch *esw; - int err, num_vf = 0; + bool host_pf_disabled; + u16 new_num_vfs; - host_work = container_of(work, struct mlx5_host_work, work); - esw = host_work->esw; + new_num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + host_pf_disabled = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_pf_disabled); - err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf); - if (err || num_vf == esw->host_info.num_vfs) - goto out; + if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled) + return; /* Number of VFs can only change from "0 to x" or "x to 0". */ - if (esw->host_info.num_vfs > 0) { - esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs); + if (esw->esw_funcs.num_vfs > 0) { + esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs); } else { - err = esw_offloads_load_vf_reps(esw, num_vf); + int err; + err = esw_offloads_load_vf_reps(esw, new_num_vfs); if (err) - goto out; + return; } + esw->esw_funcs.num_vfs = new_num_vfs; +} + +static void esw_functions_changed_event_handler(struct work_struct *work) +{ + struct mlx5_host_work *host_work; + struct mlx5_eswitch *esw; + const u32 *out; - esw->host_info.num_vfs = num_vf; + host_work = container_of(work, struct mlx5_host_work, work); + esw = host_work->esw; + out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(out)) + goto out; + + esw_vfs_changed_event_handler(esw, out); + kvfree(out); out: kfree(host_work); } -static int esw_host_params_event(struct notifier_block *nb, - unsigned long type, void *data) +int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data) { + struct mlx5_esw_functions *esw_funcs; struct mlx5_host_work *host_work; - struct mlx5_host_info *host_info; struct mlx5_eswitch *esw; host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC); if (!host_work) return NOTIFY_DONE; - host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb); - esw = container_of(host_info, struct mlx5_eswitch, host_info); + esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb); + esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs); host_work->esw = esw; - INIT_WORK(&host_work->work, esw_host_params_event_handler); + INIT_WORK(&host_work->work, esw_functions_changed_event_handler); queue_work(esw->work_queue, &host_work->work); return NOTIFY_OK; } -int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, - int total_nvports) +int esw_offloads_init(struct mlx5_eswitch *esw) { int err; - mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); - - err = esw_offloads_steering_init(esw, total_nvports); + err = esw_offloads_steering_init(esw); if (err) return err; - err = esw_offloads_load_all_reps(esw, vf_nvports); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + err = mlx5_eswitch_enable_passing_vport_metadata(esw); + if (err) + goto err_vport_metadata; + } + + err = esw_offloads_load_all_reps(esw); if (err) goto err_reps; esw_offloads_devcom_init(esw); + mutex_init(&esw->offloads.termtbl_mutex); - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event, - HOST_PARAMS_CHANGE); - mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb); - esw->host_info.num_vfs = vf_nvports; - } + mlx5_rdma_enable_roce(esw->dev); return 0; err_reps: + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + mlx5_eswitch_disable_passing_vport_metadata(esw); +err_vport_metadata: esw_offloads_steering_cleanup(esw); return err; } @@ -1729,13 +2163,13 @@ err_reps: static int esw_offloads_stop(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { - int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1; - mlx5_eswitch_disable_sriov(esw); - err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + mlx5_eswitch_disable(esw); + err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); - err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to offloads"); @@ -1747,18 +2181,11 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_cleanup(struct mlx5_eswitch *esw) { - u16 num_vfs; - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb); - flush_workqueue(esw->work_queue); - num_vfs = esw->host_info.num_vfs; - } else { - num_vfs = esw->dev->priv.sriov.num_vfs; - } - + mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_all_reps(esw, num_vfs); + esw_offloads_unload_all_reps(esw); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + mlx5_eswitch_disable_passing_vport_metadata(esw); esw_offloads_steering_cleanup(esw); } @@ -1766,10 +2193,10 @@ static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) { switch (mode) { case DEVLINK_ESWITCH_MODE_LEGACY: - *mlx5_mode = SRIOV_LEGACY; + *mlx5_mode = MLX5_ESWITCH_LEGACY; break; case DEVLINK_ESWITCH_MODE_SWITCHDEV: - *mlx5_mode = SRIOV_OFFLOADS; + *mlx5_mode = MLX5_ESWITCH_OFFLOADS; break; default: return -EINVAL; @@ -1781,10 +2208,10 @@ static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode) { switch (mlx5_mode) { - case SRIOV_LEGACY: + case MLX5_ESWITCH_LEGACY: *mode = DEVLINK_ESWITCH_MODE_LEGACY; break; - case SRIOV_OFFLOADS: + case MLX5_ESWITCH_OFFLOADS: *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; break; default: @@ -1848,7 +2275,7 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink) if(!MLX5_ESWITCH_MANAGER(dev)) return -EPERM; - if (dev->priv.eswitch->mode == SRIOV_NONE && + if (dev->priv.eswitch->mode == MLX5_ESWITCH_NONE && !mlx5_core_is_ecpf_esw_manager(dev)) return -EOPNOTSUPP; @@ -1899,7 +2326,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; - int err, vport; + int err, vport, num_vport; u8 mlx5_mode; err = mlx5_devlink_eswitch_check(devlink); @@ -1928,7 +2355,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (err) goto out; - for (vport = 1; vport < esw->enabled_vports; vport++) { + mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) { err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode); if (err) { NL_SET_ERR_MSG_MOD(extack, @@ -1941,7 +2368,8 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, return 0; revert_inline_mode: - while (--vport > 0) + num_vport = --vport; + mlx5_esw_for_each_host_func_vport_reverse(esw, vport, num_vport) mlx5_modify_nic_vport_min_inline(dev, vport, esw->offloads.inline_mode); @@ -1962,7 +2390,7 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); } -int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) +int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode) { u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; struct mlx5_core_dev *dev = esw->dev; @@ -1971,7 +2399,7 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) if (!MLX5_CAP_GEN(dev, vport_group_manager)) return -EOPNOTSUPP; - if (esw->mode == SRIOV_NONE) + if (esw->mode == MLX5_ESWITCH_NONE) return -EOPNOTSUPP; switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { @@ -1986,9 +2414,10 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) } query_vports: - for (vport = 1; vport <= nvfs; vport++) { + mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode); + mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) { mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode); - if (vport > 1 && prev_mlx5_mode != mlx5_mode) + if (prev_mlx5_mode != mlx5_mode) return -EINVAL; prev_mlx5_mode = mlx5_mode; } @@ -1998,7 +2427,8 @@ out: return 0; } -int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); @@ -2017,7 +2447,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) return -EOPNOTSUPP; - if (esw->mode == SRIOV_LEGACY) { + if (esw->mode == MLX5_ESWITCH_LEGACY) { esw->offloads.encap = encap; return 0; } @@ -2047,7 +2477,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, return err; } -int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; @@ -2062,36 +2493,31 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) } void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep_if *__rep_if, + const struct mlx5_eswitch_rep_ops *ops, u8 rep_type) { - struct mlx5_eswitch_rep_if *rep_if; + struct mlx5_eswitch_rep_data *rep_data; struct mlx5_eswitch_rep *rep; int i; + esw->offloads.rep_ops[rep_type] = ops; mlx5_esw_for_all_reps(esw, i, rep) { - rep_if = &rep->rep_if[rep_type]; - rep_if->load = __rep_if->load; - rep_if->unload = __rep_if->unload; - rep_if->get_proto_dev = __rep_if->get_proto_dev; - rep_if->priv = __rep_if->priv; - - rep_if->state = REP_REGISTERED; + rep_data = &rep->rep_data[rep_type]; + atomic_set(&rep_data->state, REP_REGISTERED); } } EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) { - u16 max_vf = mlx5_core_max_vfs(esw->dev); struct mlx5_eswitch_rep *rep; int i; - if (esw->mode == SRIOV_OFFLOADS) - __unload_reps_all_vport(esw, max_vf, rep_type); + if (esw->mode == MLX5_ESWITCH_OFFLOADS) + __unload_reps_all_vport(esw, rep_type); mlx5_esw_for_all_reps(esw, i, rep) - rep->rep_if[rep_type].state = REP_UNREGISTERED; + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); } EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); @@ -2100,20 +2526,20 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) struct mlx5_eswitch_rep *rep; rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - return rep->rep_if[rep_type].priv; + return rep->rep_data[rep_type].priv; } void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, - int vport, + u16 vport, u8 rep_type) { struct mlx5_eswitch_rep *rep; rep = mlx5_eswitch_get_rep(esw, vport); - if (rep->rep_if[rep_type].state == REP_LOADED && - rep->rep_if[rep_type].get_proto_dev) - return rep->rep_if[rep_type].get_proto_dev(rep); + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + esw->offloads.rep_ops[rep_type]->get_proto_dev) + return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep); return NULL; } EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); @@ -2125,8 +2551,27 @@ void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type) EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev); struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, - int vport) + u16 vport) { return mlx5_eswitch_get_rep(esw, vport); } EXPORT_SYMBOL(mlx5_eswitch_vport_rep); + +bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num >= MLX5_VPORT_FIRST_VF && + vport_num <= esw->dev->priv.sriov.max_vfs; +} + +bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) +{ + return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA); +} +EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); + +u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + u16 vport_num) +{ + return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num; +} +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c new file mode 100644 index 000000000000..1d55a324a17e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include <linux/mlx5/fs.h> +#include "eswitch.h" + +struct mlx5_termtbl_handle { + struct hlist_node termtbl_hlist; + + struct mlx5_flow_table *termtbl; + struct mlx5_flow_act flow_act; + struct mlx5_flow_destination dest; + + struct mlx5_flow_handle *rule; + int ref_count; +}; + +static u32 +mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest) +{ + u32 hash; + + hash = jhash_1word(flow_act->action, 0); + hash = jhash((const void *)&flow_act->vlan, + sizeof(flow_act->vlan), hash); + hash = jhash((const void *)&dest->vport.num, + sizeof(dest->vport.num), hash); + hash = jhash((const void *)&dest->vport.vhca_id, + sizeof(dest->vport.num), hash); + return hash; +} + +static int +mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, + struct mlx5_flow_destination *dest1, + struct mlx5_flow_act *flow_act2, + struct mlx5_flow_destination *dest2) +{ + return flow_act1->action != flow_act2->action || + dest1->vport.num != dest2->vport.num || + dest1->vport.vhca_id != dest2->vport.vhca_id || + memcmp(&flow_act1->vlan, &flow_act2->vlan, + sizeof(flow_act1->vlan)); +} + +static int +mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, + struct mlx5_termtbl_handle *tt, + struct mlx5_flow_act *flow_act) +{ + static const struct mlx5_flow_spec spec = {}; + struct mlx5_flow_namespace *root_ns; + int prio, flags; + int err; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + /* As this is the terminating action then the termination table is the + * same prio as the slow path + */ + prio = FDB_SLOW_PATH; + flags = MLX5_FLOW_TABLE_TERMINATION; + tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, prio, 1, 1, + 0, flags); + if (IS_ERR(tt->termtbl)) { + esw_warn(dev, "Failed to create termination table\n"); + return -EOPNOTSUPP; + } + + tt->rule = mlx5_add_flow_rules(tt->termtbl, &spec, flow_act, + &tt->dest, 1); + + if (IS_ERR(tt->rule)) { + esw_warn(dev, "Failed to create termination table rule\n"); + goto add_flow_err; + } + return 0; + +add_flow_err: + err = mlx5_destroy_flow_table(tt->termtbl); + if (err) + esw_warn(dev, "Failed to destroy termination table\n"); + + return -EOPNOTSUPP; +} + +static struct mlx5_termtbl_handle * +mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest) +{ + struct mlx5_termtbl_handle *tt; + bool found = false; + u32 hash_key; + int err; + + mutex_lock(&esw->offloads.termtbl_mutex); + + hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest); + hash_for_each_possible(esw->offloads.termtbl_tbl, tt, + termtbl_hlist, hash_key) { + if (!mlx5_eswitch_termtbl_cmp(&tt->flow_act, &tt->dest, + flow_act, dest)) { + found = true; + break; + } + } + if (found) + goto tt_add_ref; + + tt = kzalloc(sizeof(*tt), GFP_KERNEL); + if (!tt) { + err = -ENOMEM; + goto tt_create_err; + } + + tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + tt->dest.vport.num = dest->vport.num; + tt->dest.vport.vhca_id = dest->vport.vhca_id; + memcpy(&tt->flow_act, flow_act, sizeof(*flow_act)); + + err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act); + if (err) { + esw_warn(esw->dev, "Failed to create termination table\n"); + goto tt_create_err; + } + hash_add(esw->offloads.termtbl_tbl, &tt->termtbl_hlist, hash_key); +tt_add_ref: + tt->ref_count++; + mutex_unlock(&esw->offloads.termtbl_mutex); + return tt; +tt_create_err: + kfree(tt); + mutex_unlock(&esw->offloads.termtbl_mutex); + return ERR_PTR(err); +} + +void +mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, + struct mlx5_termtbl_handle *tt) +{ + mutex_lock(&esw->offloads.termtbl_mutex); + if (--tt->ref_count == 0) + hash_del(&tt->termtbl_hlist); + mutex_unlock(&esw->offloads.termtbl_mutex); + + if (!tt->ref_count) { + mlx5_del_flow_rules(tt->rule); + mlx5_destroy_flow_table(tt->termtbl); + kfree(tt); + } +} + +static void +mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, + struct mlx5_flow_act *dst) +{ + if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)) + return; + + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0])); + memset(&src->vlan[0], 0, sizeof(src->vlan[0])); + + if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2)) + return; + + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1])); + memset(&src->vlan[1], 0, sizeof(src->vlan[1])); +} + +bool +mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec) +{ + u32 port_mask = MLX5_GET(fte_match_param, spec->match_criteria, + misc_parameters.source_port); + u32 port_value = MLX5_GET(fte_match_param, spec->match_value, + misc_parameters.source_port); + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table)) + return false; + + /* push vlan on RX */ + return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) && + ((port_mask & port_value) == MLX5_VPORT_UPLINK); +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest) +{ + struct mlx5_flow_act term_tbl_act = {}; + struct mlx5_flow_handle *rule = NULL; + bool term_table_created = false; + int num_vport_dests = 0; + int i, curr_dest; + + mlx5_eswitch_termtbl_actions_move(flow_act, &term_tbl_act); + term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + for (i = 0; i < num_dest; i++) { + struct mlx5_termtbl_handle *tt; + + /* only vport destinations can be terminated */ + if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) + continue; + + /* get the terminating table for the action list */ + tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, + &dest[i]); + if (IS_ERR(tt)) { + esw_warn(esw->dev, "Failed to create termination table\n"); + goto revert_changes; + } + attr->dests[num_vport_dests].termtbl = tt; + num_vport_dests++; + + /* link the destination with the termination table */ + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = tt->termtbl; + term_table_created = true; + } + + /* at least one destination should reference a termination table */ + if (!term_table_created) + goto revert_changes; + + /* create the FTE */ + rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); + if (IS_ERR(rule)) + goto revert_changes; + + goto out; + +revert_changes: + /* revert the changes that were made to the original flow_act + * and fall-back to the original rule actions + */ + mlx5_eswitch_termtbl_actions_move(&term_tbl_act, flow_act); + + for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) { + struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl; + + /* search for the destination associated with the + * current term table + */ + for (i = 0; i < num_dest; i++) { + if (dest[i].ft != tt->termtbl) + continue; + + memset(&dest[i], 0, sizeof(dest[i])); + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[i].vport.num = tt->dest.vport.num; + dest[i].vport.vhca_id = tt->dest.vport.vhca_id; + mlx5_eswitch_termtbl_put(esw, tt); + break; + } + } + rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); +out: + return rule; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 5d5864e8df3c..8bcf3426b9c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -21,6 +21,7 @@ struct mlx5_event_nb { static int any_notifier(struct notifier_block *, unsigned long, void *); static int temp_warn(struct notifier_block *, unsigned long, void *); static int port_module(struct notifier_block *, unsigned long, void *); +static int pcie_core(struct notifier_block *, unsigned long, void *); /* handler which forwards the event to events->nh, driver notifiers */ static int forward_event(struct notifier_block *, unsigned long, void *); @@ -30,6 +31,7 @@ static struct mlx5_nb events_nbs_ref[] = { {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, + {.nb.notifier_call = pcie_core, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, @@ -51,11 +53,14 @@ static struct mlx5_nb events_nbs_ref[] = { struct mlx5_events { struct mlx5_core_dev *dev; + struct workqueue_struct *wq; struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)]; /* driver notifier chain */ struct atomic_notifier_head nh; /* port module events stats */ struct mlx5_pme_stats pme_stats; + /*pcie_core*/ + struct work_struct pcie_core_work; }; static const char *eqe_type_str(u8 type) @@ -103,8 +108,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_STALL_EVENT"; case MLX5_EVENT_TYPE_CMD: return "MLX5_EVENT_TYPE_CMD"; - case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE: - return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE"; + case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED: + return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED"; case MLX5_EVENT_TYPE_PAGE_REQUEST: return "MLX5_EVENT_TYPE_PAGE_REQUEST"; case MLX5_EVENT_TYPE_PAGE_FAULT: @@ -249,6 +254,69 @@ static int port_module(struct notifier_block *nb, unsigned long type, void *data return NOTIFY_OK; } +enum { + MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0, + MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1, + MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2, +}; + +static void mlx5_pcie_event(struct work_struct *work) +{ + u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0}; + struct mlx5_events *events; + struct mlx5_core_dev *dev; + u8 power_status; + u16 pci_power; + + events = container_of(work, struct mlx5_events, pcie_core_work); + dev = events->dev; + + if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power)) + return; + + mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MPEIN, 0, 0); + power_status = MLX5_GET(mpein_reg, out, pwr_status); + pci_power = MLX5_GET(mpein_reg, out, pci_power); + + switch (power_status) { + case MLX5_PCI_POWER_COULD_NOT_BE_READ: + mlx5_core_info_rl(dev, + "PCIe slot power capability was not advertised.\n"); + break; + case MLX5_PCI_POWER_INSUFFICIENT_REPORTED: + mlx5_core_warn_rl(dev, + "Detected insufficient power on the PCIe slot (%uW).\n", + pci_power); + break; + case MLX5_PCI_POWER_SUFFICIENT_REPORTED: + mlx5_core_info_rl(dev, + "PCIe slot advertised sufficient power (%uW).\n", + pci_power); + break; + } +} + +static int pcie_core(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, + struct mlx5_event_nb, + nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT: + queue_work(events->wq, &events->pcie_core_work); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) { *stats = dev->priv.events->pme_stats; @@ -277,11 +345,17 @@ int mlx5_events_init(struct mlx5_core_dev *dev) ATOMIC_INIT_NOTIFIER_HEAD(&events->nh); events->dev = dev; dev->priv.events = events; + events->wq = create_singlethread_workqueue("mlx5_events"); + if (!events->wq) + return -ENOMEM; + INIT_WORK(&events->pcie_core_work, mlx5_pcie_event); + return 0; } void mlx5_events_cleanup(struct mlx5_core_dev *dev) { + destroy_workqueue(dev->priv.events->wq); kvfree(dev->priv.events); } @@ -304,6 +378,7 @@ void mlx5_events_stop(struct mlx5_core_dev *dev) for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--) mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb); + flush_workqueue(events->wq); } int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile index d8e17110f25d..c78512eed8d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index 873541ef4c1b..4c50efe4e7f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -135,7 +135,7 @@ static void mlx5_fpga_conn_notify_hw(struct mlx5_fpga_conn *conn, void *wqe) *conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc); /* Make sure that doorbell record is visible before ringing */ wmb(); - mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET, NULL); + mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET); } static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn, @@ -414,7 +414,8 @@ static void mlx5_fpga_conn_cq_tasklet(unsigned long data) mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET); } -static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq) +static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq, + struct mlx5_eqe *eqe) { struct mlx5_fpga_conn *conn; @@ -429,6 +430,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) struct mlx5_fpga_device *fdev = conn->fdev; struct mlx5_core_dev *mdev = fdev->mdev; u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; struct mlx5_wq_param wqp; struct mlx5_cqe64 *cqe; int inlen, err, eqn; @@ -476,7 +478,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas); - err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen); + err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen, out, sizeof(out)); kvfree(in); if (err) @@ -867,7 +869,7 @@ struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, conn->cb_arg = attr->cb_arg; remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32); - err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac); + err = mlx5_query_mac_address(fdev->mdev, remote_mac); if (err) { mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err); ret = ERR_PTR(err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h index 7e2e871dbf83..52c9dee91ea4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -37,6 +37,7 @@ #include <linux/mlx5/eq.h> +#include "mlx5_core.h" #include "lib/eq.h" #include "fpga/cmd.h" @@ -62,26 +63,26 @@ struct mlx5_fpga_device { }; #define mlx5_fpga_dbg(__adev, format, ...) \ - dev_dbg(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \ - __func__, __LINE__, current->pid, ##__VA_ARGS__) + mlx5_core_dbg((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) #define mlx5_fpga_err(__adev, format, ...) \ - dev_err(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \ - __func__, __LINE__, current->pid, ##__VA_ARGS__) + mlx5_core_err((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) #define mlx5_fpga_warn(__adev, format, ...) \ - dev_warn(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \ - __func__, __LINE__, current->pid, ##__VA_ARGS__) + mlx5_core_warn((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) #define mlx5_fpga_warn_ratelimited(__adev, format, ...) \ - dev_warn_ratelimited(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d: " \ - format, __func__, __LINE__, ##__VA_ARGS__) + mlx5_core_err_rl((__adev)->mdev, "FPGA: %s:%d: " \ + format, __func__, __LINE__, ##__VA_ARGS__) #define mlx5_fpga_notice(__adev, format, ...) \ - dev_notice(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__) + mlx5_core_info((__adev)->mdev, "FPGA: " format, ##__VA_ARGS__) #define mlx5_fpga_info(__adev, format, ...) \ - dev_info(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__) + mlx5_core_info((__adev)->mdev, "FPGA: " format, ##__VA_ARGS__) int mlx5_fpga_init(struct mlx5_core_dev *mdev); void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 5a22c5874f3b..c76da309506b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -636,7 +636,8 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, u8 match_criteria_enable, const u32 *match_c, const u32 *match_v, - struct mlx5_flow_act *flow_act) + struct mlx5_flow_act *flow_act, + struct mlx5_flow_context *flow_context) { const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); @@ -655,7 +656,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, (match_criteria_enable & ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) || (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) || - (flow_act->flags & FLOW_ACT_HAS_TAG)) + (flow_context->flags & FLOW_CONTEXT_HAS_TAG)) return false; return true; @@ -767,7 +768,8 @@ mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev, fg->mask.match_criteria_enable, fg->mask.match_criteria, fte->val, - &fte->action)) + &fte->action, + &fte->flow_context)) return ERR_PTR(-EINVAL); else if (!mlx5_is_fpga_ipsec_rule(mdev, fg->mask.match_criteria_enable, @@ -989,32 +991,33 @@ static enum fs_flow_table_type egress_to_fs_ft(bool egress) return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX; } -static int fpga_ipsec_fs_create_flow_group(struct mlx5_core_dev *dev, +static int fpga_ipsec_fs_create_flow_group(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 *in, - unsigned int *group_id, + struct mlx5_flow_group *fg, bool is_egress) { - int (*create_flow_group)(struct mlx5_core_dev *dev, + int (*create_flow_group)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 *in, - unsigned int *group_id) = + struct mlx5_flow_group *fg) = mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group; char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria.misc_parameters); + struct mlx5_core_dev *dev = ns->dev; u32 saved_outer_esp_spi_mask; u8 match_criteria_enable; int ret; if (MLX5_CAP_FLOWTABLE(dev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - return create_flow_group(dev, ft, in, group_id); + return create_flow_group(ns, ft, in, fg); match_criteria_enable = MLX5_GET(create_flow_group_in, in, match_criteria_enable); saved_outer_esp_spi_mask = MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi); if (!match_criteria_enable || !saved_outer_esp_spi_mask) - return create_flow_group(dev, ft, in, group_id); + return create_flow_group(ns, ft, in, fg); MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0); @@ -1023,7 +1026,7 @@ static int fpga_ipsec_fs_create_flow_group(struct mlx5_core_dev *dev, MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS); - ret = create_flow_group(dev, ft, in, group_id); + ret = create_flow_group(ns, ft, in, fg); MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask); MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable); @@ -1031,17 +1034,18 @@ static int fpga_ipsec_fs_create_flow_group(struct mlx5_core_dev *dev, return ret; } -static int fpga_ipsec_fs_create_fte(struct mlx5_core_dev *dev, +static int fpga_ipsec_fs_create_fte(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_group *fg, struct fs_fte *fte, bool is_egress) { - int (*create_fte)(struct mlx5_core_dev *dev, + int (*create_fte)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_group *fg, struct fs_fte *fte) = mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte; + struct mlx5_core_dev *dev = ns->dev; struct mlx5_fpga_device *fdev = dev->fpga; struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; struct mlx5_fpga_ipsec_rule *rule; @@ -1053,7 +1057,7 @@ static int fpga_ipsec_fs_create_fte(struct mlx5_core_dev *dev, !(fte->action.action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) - return create_fte(dev, ft, fg, fte); + return create_fte(ns, ft, fg, fte); rule = kzalloc(sizeof(*rule), GFP_KERNEL); if (!rule) @@ -1070,7 +1074,7 @@ static int fpga_ipsec_fs_create_fte(struct mlx5_core_dev *dev, WARN_ON(rule_insert(fipsec, rule)); modify_spec_mailbox(dev, fte, &mbox_mod); - ret = create_fte(dev, ft, fg, fte); + ret = create_fte(ns, ft, fg, fte); restore_spec_mailbox(fte, &mbox_mod); if (ret) { _rule_delete(fipsec, rule); @@ -1081,19 +1085,20 @@ static int fpga_ipsec_fs_create_fte(struct mlx5_core_dev *dev, return ret; } -static int fpga_ipsec_fs_update_fte(struct mlx5_core_dev *dev, +static int fpga_ipsec_fs_update_fte(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, - unsigned int group_id, + struct mlx5_flow_group *fg, int modify_mask, struct fs_fte *fte, bool is_egress) { - int (*update_fte)(struct mlx5_core_dev *dev, + int (*update_fte)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, - unsigned int group_id, + struct mlx5_flow_group *fg, int modify_mask, struct fs_fte *fte) = mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte; + struct mlx5_core_dev *dev = ns->dev; bool is_esp = fte->action.esp_id; struct mailbox_mod mbox_mod; int ret; @@ -1102,24 +1107,25 @@ static int fpga_ipsec_fs_update_fte(struct mlx5_core_dev *dev, !(fte->action.action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) - return update_fte(dev, ft, group_id, modify_mask, fte); + return update_fte(ns, ft, fg, modify_mask, fte); modify_spec_mailbox(dev, fte, &mbox_mod); - ret = update_fte(dev, ft, group_id, modify_mask, fte); + ret = update_fte(ns, ft, fg, modify_mask, fte); restore_spec_mailbox(fte, &mbox_mod); return ret; } -static int fpga_ipsec_fs_delete_fte(struct mlx5_core_dev *dev, +static int fpga_ipsec_fs_delete_fte(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct fs_fte *fte, bool is_egress) { - int (*delete_fte)(struct mlx5_core_dev *dev, + int (*delete_fte)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct fs_fte *fte) = mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte; + struct mlx5_core_dev *dev = ns->dev; struct mlx5_fpga_device *fdev = dev->fpga; struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; struct mlx5_fpga_ipsec_rule *rule; @@ -1131,7 +1137,7 @@ static int fpga_ipsec_fs_delete_fte(struct mlx5_core_dev *dev, !(fte->action.action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) - return delete_fte(dev, ft, fte); + return delete_fte(ns, ft, fte); rule = rule_search(fipsec, fte); if (!rule) @@ -1141,84 +1147,84 @@ static int fpga_ipsec_fs_delete_fte(struct mlx5_core_dev *dev, rule_delete(fipsec, rule); modify_spec_mailbox(dev, fte, &mbox_mod); - ret = delete_fte(dev, ft, fte); + ret = delete_fte(ns, ft, fte); restore_spec_mailbox(fte, &mbox_mod); return ret; } static int -mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_core_dev *dev, +mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 *in, - unsigned int *group_id) + struct mlx5_flow_group *fg) { - return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, true); + return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, true); } static int -mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_core_dev *dev, +mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_group *fg, struct fs_fte *fte) { - return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, true); + return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, true); } static int -mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_core_dev *dev, +mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, - unsigned int group_id, + struct mlx5_flow_group *fg, int modify_mask, struct fs_fte *fte) { - return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte, + return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte, true); } static int -mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_core_dev *dev, +mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct fs_fte *fte) { - return fpga_ipsec_fs_delete_fte(dev, ft, fte, true); + return fpga_ipsec_fs_delete_fte(ns, ft, fte, true); } static int -mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_core_dev *dev, +mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 *in, - unsigned int *group_id) + struct mlx5_flow_group *fg) { - return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, false); + return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, false); } static int -mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_core_dev *dev, +mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_group *fg, struct fs_fte *fte) { - return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, false); + return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, false); } static int -mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_core_dev *dev, +mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, - unsigned int group_id, + struct mlx5_flow_group *fg, int modify_mask, struct fs_fte *fte) { - return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte, + return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte, false); } static int -mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_core_dev *dev, +mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct fs_fte *fte) { - return fpga_ipsec_fs_delete_fte(dev, ft, fte, false); + return fpga_ipsec_fs_delete_fte(ns, ft, fte, false); } static struct mlx5_flow_cmds fpga_ipsec_ingress; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h index 2b5e63b0d4d6..382985e65b48 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h @@ -37,8 +37,6 @@ #include "accel/ipsec.h" #include "fs_cmd.h" -#ifdef CONFIG_MLX5_FPGA - u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev); unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev); int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, @@ -66,77 +64,4 @@ int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, const struct mlx5_flow_cmds * mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type); -#else - -static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) -{ - return 0; -} - -static inline unsigned int -mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev) -{ - return 0; -} - -static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, - u64 *counters) -{ - return 0; -} - -static inline void * -mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *accel_xfrm, - const __be32 saddr[4], - const __be32 daddr[4], - const __be32 spi, bool is_ipv6) -{ - return NULL; -} - -static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context) -{ -} - -static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) -{ - return 0; -} - -static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) -{ -} - -static inline void mlx5_fpga_ipsec_build_fs_cmds(void) -{ -} - -static inline struct mlx5_accel_esp_xfrm * -mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) -{ -} - -static inline int -mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs) -{ - return -EOPNOTSUPP; -} - -static inline const struct mlx5_flow_cmds * -mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) -{ - return mlx5_fs_cmd_get_default(type); -} - -#endif /* CONFIG_MLX5_FPGA */ - #endif /* __MLX5_FPGA_SADB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index c44ccb67c4a3..7ac1249eadc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -39,7 +39,7 @@ #include "mlx5_core.h" #include "eswitch.h" -static int mlx5_cmd_stub_update_root_ft(struct mlx5_core_dev *dev, +static int mlx5_cmd_stub_update_root_ft(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 underlay_qpn, bool disconnect) @@ -47,47 +47,43 @@ static int mlx5_cmd_stub_update_root_ft(struct mlx5_core_dev *dev, return 0; } -static int mlx5_cmd_stub_create_flow_table(struct mlx5_core_dev *dev, - u16 vport, - enum fs_flow_table_op_mod op_mod, - enum fs_flow_table_type type, - unsigned int level, +static int mlx5_cmd_stub_create_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, unsigned int log_size, - struct mlx5_flow_table *next_ft, - unsigned int *table_id, u32 flags) + struct mlx5_flow_table *next_ft) { return 0; } -static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_core_dev *dev, +static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft) { return 0; } -static int mlx5_cmd_stub_modify_flow_table(struct mlx5_core_dev *dev, +static int mlx5_cmd_stub_modify_flow_table(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_table *next_ft) { return 0; } -static int mlx5_cmd_stub_create_flow_group(struct mlx5_core_dev *dev, +static int mlx5_cmd_stub_create_flow_group(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 *in, - unsigned int *group_id) + struct mlx5_flow_group *fg) { return 0; } -static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_core_dev *dev, +static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, - unsigned int group_id) + struct mlx5_flow_group *fg) { return 0; } -static int mlx5_cmd_stub_create_fte(struct mlx5_core_dev *dev, +static int mlx5_cmd_stub_create_fte(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_group *group, struct fs_fte *fte) @@ -95,28 +91,29 @@ static int mlx5_cmd_stub_create_fte(struct mlx5_core_dev *dev, return 0; } -static int mlx5_cmd_stub_update_fte(struct mlx5_core_dev *dev, +static int mlx5_cmd_stub_update_fte(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, - unsigned int group_id, + struct mlx5_flow_group *group, int modify_mask, struct fs_fte *fte) { return -EOPNOTSUPP; } -static int mlx5_cmd_stub_delete_fte(struct mlx5_core_dev *dev, +static int mlx5_cmd_stub_delete_fte(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct fs_fte *fte) { return 0; } -static int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, +static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 underlay_qpn, bool disconnect) { u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0}; u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0}; + struct mlx5_core_dev *dev = ns->dev; if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && underlay_qpn == 0) @@ -143,29 +140,27 @@ static int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, - u16 vport, - enum fs_flow_table_op_mod op_mod, - enum fs_flow_table_type type, - unsigned int level, +static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, unsigned int log_size, - struct mlx5_flow_table *next_ft, - unsigned int *table_id, u32 flags) + struct mlx5_flow_table *next_ft) { - int en_encap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); - int en_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); + int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION); u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; + struct mlx5_core_dev *dev = ns->dev; int err; MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); - MLX5_SET(create_flow_table_in, in, table_type, type); - MLX5_SET(create_flow_table_in, in, flow_table_context.level, level); + MLX5_SET(create_flow_table_in, in, table_type, ft->type); + MLX5_SET(create_flow_table_in, in, flow_table_context.level, ft->level); MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, log_size); - if (vport) { - MLX5_SET(create_flow_table_in, in, vport_number, vport); + if (ft->vport) { + MLX5_SET(create_flow_table_in, in, vport_number, ft->vport); MLX5_SET(create_flow_table_in, in, other_vport, 1); } @@ -173,14 +168,21 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, en_decap); MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, en_encap); + MLX5_SET(create_flow_table_in, in, flow_table_context.termination_table, + term); - switch (op_mod) { + switch (ft->op_mod) { case FS_FT_OP_MOD_NORMAL: if (next_ft) { MLX5_SET(create_flow_table_in, in, - flow_table_context.table_miss_action, 1); + flow_table_context.table_miss_action, + MLX5_FLOW_TABLE_MISS_ACTION_FWD); MLX5_SET(create_flow_table_in, in, flow_table_context.table_miss_id, next_ft->id); + } else { + MLX5_SET(create_flow_table_in, in, + flow_table_context.table_miss_action, + ns->def_miss_action); } break; @@ -195,16 +197,17 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) - *table_id = MLX5_GET(create_flow_table_out, out, - table_id); + ft->id = MLX5_GET(create_flow_table_out, out, + table_id); return err; } -static int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, +static int mlx5_cmd_destroy_flow_table(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft) { u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {0}; u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0}; + struct mlx5_core_dev *dev = ns->dev; MLX5_SET(destroy_flow_table_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE); @@ -218,12 +221,13 @@ static int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -static int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, +static int mlx5_cmd_modify_flow_table(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_table *next_ft) { u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {0}; u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0}; + struct mlx5_core_dev *dev = ns->dev; MLX5_SET(modify_flow_table_in, in, opcode, MLX5_CMD_OP_MODIFY_FLOW_TABLE); @@ -250,26 +254,29 @@ static int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID); if (next_ft) { MLX5_SET(modify_flow_table_in, in, - flow_table_context.table_miss_action, 1); + flow_table_context.table_miss_action, + MLX5_FLOW_TABLE_MISS_ACTION_FWD); MLX5_SET(modify_flow_table_in, in, flow_table_context.table_miss_id, next_ft->id); } else { MLX5_SET(modify_flow_table_in, in, - flow_table_context.table_miss_action, 0); + flow_table_context.table_miss_action, + ns->def_miss_action); } } return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -static int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, +static int mlx5_cmd_create_flow_group(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 *in, - unsigned int *group_id) + struct mlx5_flow_group *fg) { u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = ns->dev; int err; MLX5_SET(create_flow_group_in, in, opcode, @@ -283,23 +290,24 @@ static int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) - *group_id = MLX5_GET(create_flow_group_out, out, - group_id); + fg->id = MLX5_GET(create_flow_group_out, out, + group_id); return err; } -static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, +static int mlx5_cmd_destroy_flow_group(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, - unsigned int group_id) + struct mlx5_flow_group *fg) { u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {0}; + struct mlx5_core_dev *dev = ns->dev; MLX5_SET(destroy_flow_group_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); MLX5_SET(destroy_flow_group_in, in, table_type, ft->type); MLX5_SET(destroy_flow_group_in, in, table_id, ft->id); - MLX5_SET(destroy_flow_group_in, in, group_id, group_id); + MLX5_SET(destroy_flow_group_in, in, group_id, fg->id); if (ft->vport) { MLX5_SET(destroy_flow_group_in, in, vport_number, ft->vport); MLX5_SET(destroy_flow_group_in, in, other_vport, 1); @@ -388,7 +396,11 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); MLX5_SET(flow_context, in_flow_context, group_id, group_id); - MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_tag, + fte->flow_context.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_source, + fte->flow_context.flow_source); + MLX5_SET(flow_context, in_flow_context, extended_destination, extended_dest); if (extended_dest) { @@ -505,23 +517,25 @@ err_out: return err; } -static int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, +static int mlx5_cmd_create_fte(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_group *group, struct fs_fte *fte) { + struct mlx5_core_dev *dev = ns->dev; unsigned int group_id = group->id; return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte); } -static int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, +static int mlx5_cmd_update_fte(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, - unsigned int group_id, + struct mlx5_flow_group *fg, int modify_mask, struct fs_fte *fte) { int opmod; + struct mlx5_core_dev *dev = ns->dev; int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev, flow_table_properties_nic_receive. flow_modify_en); @@ -529,15 +543,16 @@ static int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, return -EOPNOTSUPP; opmod = 1; - return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte); + return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, fg->id, fte); } -static int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, +static int mlx5_cmd_delete_fte(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct fs_fte *fte) { u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0}; u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {0}; + struct mlx5_core_dev *dev = ns->dev; MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); MLX5_SET(delete_fte_in, in, table_type, ft->type); @@ -760,6 +775,10 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions); table_type = FS_FT_NIC_TX; break; + case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions); + table_type = FS_FT_ESW_INGRESS_ACL; + break; default: return -EOPNOTSUPP; } @@ -853,6 +872,7 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ case FS_FT_SNIFFER_RX: case FS_FT_SNIFFER_TX: case FS_FT_NIC_TX: + case FS_FT_RDMA_RX: return mlx5_fs_cmd_get_fw_cmds(); default: return mlx5_fs_cmd_get_stub_cmds(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 6228ba7bfa1a..e340f9af2f5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -36,45 +36,42 @@ #include "fs_core.h" struct mlx5_flow_cmds { - int (*create_flow_table)(struct mlx5_core_dev *dev, - u16 vport, - enum fs_flow_table_op_mod op_mod, - enum fs_flow_table_type type, - unsigned int level, unsigned int log_size, - struct mlx5_flow_table *next_ft, - unsigned int *table_id, u32 flags); - int (*destroy_flow_table)(struct mlx5_core_dev *dev, + int (*create_flow_table)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + unsigned int log_size, + struct mlx5_flow_table *next_ft); + int (*destroy_flow_table)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft); - int (*modify_flow_table)(struct mlx5_core_dev *dev, + int (*modify_flow_table)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_table *next_ft); - int (*create_flow_group)(struct mlx5_core_dev *dev, + int (*create_flow_group)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 *in, - unsigned int *group_id); + struct mlx5_flow_group *fg); - int (*destroy_flow_group)(struct mlx5_core_dev *dev, + int (*destroy_flow_group)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, - unsigned int group_id); + struct mlx5_flow_group *fg); - int (*create_fte)(struct mlx5_core_dev *dev, + int (*create_fte)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_group *fg, struct fs_fte *fte); - int (*update_fte)(struct mlx5_core_dev *dev, + int (*update_fte)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, - unsigned int group_id, + struct mlx5_flow_group *fg, int modify_mask, struct fs_fte *fte); - int (*delete_fte)(struct mlx5_core_dev *dev, + int (*delete_fte)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct fs_fte *fte); - int (*update_root_ft)(struct mlx5_core_dev *dev, + int (*update_root_ft)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 underlay_qpn, bool disconnect); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 0be3eb86dd84..3e99799bdb40 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -403,7 +403,7 @@ static void del_hw_flow_table(struct fs_node *node) trace_mlx5_fs_del_ft(ft); if (node->active) { - err = root->cmds->destroy_flow_table(dev, ft); + err = root->cmds->destroy_flow_table(root, ft); if (err) mlx5_core_warn(dev, "flow steering can't destroy ft\n"); } @@ -435,7 +435,7 @@ static void modify_fte(struct fs_fte *fte) dev = get_dev(&fte->node); root = find_root(&ft->node); - err = root->cmds->update_fte(dev, ft, fg->id, fte->modify_mask, fte); + err = root->cmds->update_fte(root, ft, fg, fte->modify_mask, fte); if (err) mlx5_core_warn(dev, "%s can't del rule fg id=%d fte_index=%d\n", @@ -492,7 +492,7 @@ static void del_hw_fte(struct fs_node *node) dev = get_dev(&ft->node); root = find_root(&ft->node); if (node->active) { - err = root->cmds->delete_fte(dev, ft, fte); + err = root->cmds->delete_fte(root, ft, fte); if (err) mlx5_core_warn(dev, "flow steering can't delete fte in index %d of flow group id %d\n", @@ -532,7 +532,7 @@ static void del_hw_flow_group(struct fs_node *node) trace_mlx5_fs_del_fg(fg); root = find_root(&ft->node); - if (fg->node.active && root->cmds->destroy_flow_group(dev, ft, fg->id)) + if (fg->node.active && root->cmds->destroy_flow_group(root, ft, fg)) mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", fg->id, ft->id); } @@ -584,7 +584,7 @@ err_ida_remove: } static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, - u32 *match_value, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act) { struct mlx5_flow_steering *steering = get_steering(&ft->node); @@ -594,9 +594,10 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, if (!fte) return ERR_PTR(-ENOMEM); - memcpy(fte->val, match_value, sizeof(fte->val)); + memcpy(fte->val, &spec->match_value, sizeof(fte->val)); fte->node.type = FS_TYPE_FLOW_ENTRY; fte->action = *flow_act; + fte->flow_context = spec->flow_context; tree_init_node(&fte->node, NULL, del_sw_fte); @@ -612,7 +613,7 @@ static void dealloc_flow_group(struct mlx5_flow_steering *steering, static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering, u8 match_criteria_enable, - void *match_criteria, + const void *match_criteria, int start_index, int end_index) { @@ -642,7 +643,7 @@ static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steer static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft, u8 match_criteria_enable, - void *match_criteria, + const void *match_criteria, int start_index, int end_index, struct list_head *prev) @@ -783,7 +784,7 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev, fs_for_each_ft(iter, prio) { i++; - err = root->cmds->modify_flow_table(dev, iter, ft); + err = root->cmds->modify_flow_table(root, iter, ft); if (err) { mlx5_core_warn(dev, "Failed to modify flow table %d\n", iter->id); @@ -819,7 +820,7 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio struct mlx5_flow_root_namespace *root = find_root(&prio->node); struct mlx5_ft_underlay_qp *uqp; int min_level = INT_MAX; - int err; + int err = 0; u32 qpn; if (root->root_ft) @@ -831,11 +832,11 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio if (list_empty(&root->underlay_qpns)) { /* Don't set any QPN (zero) in case QPN list is empty */ qpn = 0; - err = root->cmds->update_root_ft(root->dev, ft, qpn, false); + err = root->cmds->update_root_ft(root, ft, qpn, false); } else { list_for_each_entry(uqp, &root->underlay_qpns, list) { qpn = uqp->qpn; - err = root->cmds->update_root_ft(root->dev, ft, + err = root->cmds->update_root_ft(root, ft, qpn, false); if (err) break; @@ -871,7 +872,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, memcpy(&rule->dest_attr, dest, sizeof(*dest)); root = find_root(&ft->node); - err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id, + err = root->cmds->update_fte(root, ft, fg, modify_mask, fte); up_write_ref_node(&fte->node, false); @@ -1013,9 +1014,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); - err = root->cmds->create_flow_table(root->dev, ft->vport, ft->op_mod, - ft->type, ft->level, log_table_sz, - next_ft, &ft->id, ft->flags); + err = root->cmds->create_flow_table(root, ft, log_table_sz, next_ft); if (err) goto free_ft; @@ -1032,7 +1031,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa trace_mlx5_fs_add_ft(ft); return ft; destroy_ft: - root->cmds->destroy_flow_table(root->dev, ft); + root->cmds->destroy_flow_table(root, ft); free_ft: kfree(ft); unlock_root: @@ -1114,7 +1113,6 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, start_flow_index); int end_index = MLX5_GET(create_flow_group_in, fg_in, end_flow_index); - struct mlx5_core_dev *dev = get_dev(&ft->node); struct mlx5_flow_group *fg; int err; @@ -1129,7 +1127,7 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, if (IS_ERR(fg)) return fg; - err = root->cmds->create_flow_group(dev, ft, fg_in, &fg->id); + err = root->cmds->create_flow_group(root, ft, fg_in, fg); if (err) { tree_put_node(&fg->node, false); return ERR_PTR(err); @@ -1269,11 +1267,9 @@ add_rule_fte(struct fs_fte *fte, fs_get_obj(ft, fg->node.parent); root = find_root(&fg->node); if (!(fte->status & FS_FTE_STATUS_EXISTING)) - err = root->cmds->create_fte(get_dev(&ft->node), - ft, fg, fte); + err = root->cmds->create_fte(root, ft, fg, fte); else - err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id, - modify_mask, fte); + err = root->cmds->update_fte(root, ft, fg, modify_mask, fte); if (err) goto free_handle; @@ -1290,7 +1286,7 @@ free_handle: } static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec) + const struct mlx5_flow_spec *spec) { struct list_head *prev = &ft->node.children; struct mlx5_flow_group *fg; @@ -1339,7 +1335,6 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft, struct mlx5_flow_group *fg) { struct mlx5_flow_root_namespace *root = find_root(&ft->node); - struct mlx5_core_dev *dev = get_dev(&ft->node); int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); void *match_criteria_addr; u8 src_esw_owner_mask_on; @@ -1369,7 +1364,7 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft, memcpy(match_criteria_addr, fg->mask.match_criteria, sizeof(fg->mask.match_criteria)); - err = root->cmds->create_flow_group(dev, ft, in, &fg->id); + err = root->cmds->create_flow_group(root, ft, in, fg); if (!err) { fg->node.active = true; trace_mlx5_fs_add_fg(fg); @@ -1386,6 +1381,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && d1->vport.num == d2->vport.num && d1->vport.flags == d2->vport.flags && + ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ? + (d1->vport.vhca_id == d2->vport.vhca_id) : true) && ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ? (d1->vport.reformat_id == d2->vport.reformat_id) : true)) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && @@ -1434,7 +1431,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2) return false; } -static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act) +static int check_conflicting_ftes(struct fs_fte *fte, + const struct mlx5_flow_context *flow_context, + const struct mlx5_flow_act *flow_act) { if (check_conflicting_actions(flow_act->action, fte->action.action)) { mlx5_core_warn(get_dev(&fte->node), @@ -1442,12 +1441,12 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act return -EEXIST; } - if ((flow_act->flags & FLOW_ACT_HAS_TAG) && - fte->action.flow_tag != flow_act->flow_tag) { + if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) && + fte->flow_context.flow_tag != flow_context->flow_tag) { mlx5_core_warn(get_dev(&fte->node), "FTE flow tag %u already exists with different flow tag %u\n", - fte->action.flow_tag, - flow_act->flow_tag); + fte->flow_context.flow_tag, + flow_context->flow_tag); return -EEXIST; } @@ -1455,7 +1454,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act } static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, - u32 *match_value, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num, @@ -1466,7 +1465,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, int i; int ret; - ret = check_conflicting_ftes(fte, flow_act); + ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act); if (ret) return ERR_PTR(ret); @@ -1540,7 +1539,7 @@ static void free_match_list(struct match_list_head *head) static int build_match_list(struct match_list_head *match_head, struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec) + const struct mlx5_flow_spec *spec) { struct rhlist_head *tmp, *list; struct mlx5_flow_group *g; @@ -1593,7 +1592,7 @@ static u64 matched_fgs_get_version(struct list_head *match_head) static struct fs_fte * lookup_fte_locked(struct mlx5_flow_group *g, - u32 *match_value, + const u32 *match_value, bool take_write) { struct fs_fte *fte_tmp; @@ -1626,7 +1625,7 @@ out: static struct mlx5_flow_handle * try_add_to_existing_fg(struct mlx5_flow_table *ft, struct list_head *match_head, - struct mlx5_flow_spec *spec, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num, @@ -1641,7 +1640,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, u64 version; int err; - fte = alloc_fte(ft, spec->match_value, flow_act); + fte = alloc_fte(ft, spec, flow_act); if (IS_ERR(fte)) return ERR_PTR(-ENOMEM); @@ -1657,8 +1656,7 @@ search_again_locked: fte_tmp = lookup_fte_locked(g, spec->match_value, take_write); if (!fte_tmp) continue; - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte_tmp); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp); up_write_ref_node(&fte_tmp->node, false); tree_put_node(&fte_tmp->node, false); kmem_cache_free(steering->ftes_cache, fte); @@ -1705,8 +1703,7 @@ skip_search: nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node, false); - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); return rule; @@ -1719,7 +1716,7 @@ out: static struct mlx5_flow_handle * _mlx5_add_flow_rules(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num) @@ -1792,7 +1789,7 @@ search_again_locked: if (err) goto err_release_fg; - fte = alloc_fte(ft, spec->match_value, flow_act); + fte = alloc_fte(ft, spec, flow_act); if (IS_ERR(fte)) { err = PTR_ERR(fte); goto err_release_fg; @@ -1806,8 +1803,7 @@ search_again_locked: nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node, false); - rule = add_rule_fg(g, spec->match_value, flow_act, dest, - dest_num, fte); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); tree_put_node(&g->node, false); @@ -1827,7 +1823,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) struct mlx5_flow_handle * mlx5_add_flow_rules(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int num_dest) @@ -1941,12 +1937,12 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft) if (list_empty(&root->underlay_qpns)) { /* Don't set any QPN (zero) in case QPN list is empty */ qpn = 0; - err = root->cmds->update_root_ft(root->dev, new_root_ft, + err = root->cmds->update_root_ft(root, new_root_ft, qpn, false); } else { list_for_each_entry(uqp, &root->underlay_qpns, list) { qpn = uqp->qpn; - err = root->cmds->update_root_ft(root->dev, + err = root->cmds->update_root_ft(root, new_root_ft, qpn, false); if (err) @@ -2060,6 +2056,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, if (steering->sniffer_tx_root_ns) return &steering->sniffer_tx_root_ns->ns; return NULL; + case MLX5_FLOW_NAMESPACE_RDMA_RX: + if (steering->rdma_rx_root_ns) + return &steering->rdma_rx_root_ns->ns; + return NULL; default: break; } @@ -2092,7 +2092,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d { struct mlx5_flow_steering *steering = dev->priv.steering; - if (!steering || vport >= MLX5_TOTAL_VPORTS(dev)) + if (!steering || vport >= mlx5_eswitch_get_total_vports(dev)) return NULL; switch (type) { @@ -2284,7 +2284,7 @@ static struct mlx5_flow_root_namespace cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type); /* Create the root namespace */ - root_ns = kvzalloc(sizeof(*root_ns), GFP_KERNEL); + root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL); if (!root_ns) return NULL; @@ -2423,10 +2423,11 @@ static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev) if (!steering->esw_egress_root_ns) return; - for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) + for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++) cleanup_root_ns(steering->esw_egress_root_ns[i]); kfree(steering->esw_egress_root_ns); + steering->esw_egress_root_ns = NULL; } static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev) @@ -2437,10 +2438,11 @@ static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev) if (!steering->esw_ingress_root_ns) return; - for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) + for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++) cleanup_root_ns(steering->esw_ingress_root_ns[i]); kfree(steering->esw_ingress_root_ns); + steering->esw_ingress_root_ns = NULL; } void mlx5_cleanup_fs(struct mlx5_core_dev *dev) @@ -2456,6 +2458,7 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) steering->fdb_sub_ns = NULL; cleanup_root_ns(steering->sniffer_rx_root_ns); cleanup_root_ns(steering->sniffer_tx_root_ns); + cleanup_root_ns(steering->rdma_rx_root_ns); cleanup_root_ns(steering->egress_root_ns); mlx5_cleanup_fc_stats(dev); kmem_cache_destroy(steering->ftes_cache); @@ -2473,11 +2476,7 @@ static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering) /* Create single prio */ prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1); - if (IS_ERR(prio)) { - cleanup_root_ns(steering->sniffer_tx_root_ns); - return PTR_ERR(prio); - } - return 0; + return PTR_ERR_OR_ZERO(prio); } static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering) @@ -2490,13 +2489,24 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering) /* Create single prio */ prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1); - if (IS_ERR(prio)) { - cleanup_root_ns(steering->sniffer_rx_root_ns); - return PTR_ERR(prio); - } - return 0; + return PTR_ERR_OR_ZERO(prio); } +static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->rdma_rx_root_ns = create_root_ns(steering, FS_FT_RDMA_RX); + if (!steering->rdma_rx_root_ns) + return -ENOMEM; + + steering->rdma_rx_root_ns->def_miss_action = + MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN; + + /* Create single prio */ + prio = fs_create_prio(&steering->rdma_rx_root_ns->ns, 0, 1); + return PTR_ERR_OR_ZERO(prio); +} static int init_fdb_root_ns(struct mlx5_flow_steering *steering) { struct mlx5_flow_namespace *ns; @@ -2516,8 +2526,16 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) if (!steering->fdb_sub_ns) return -ENOMEM; + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH, + 1); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1); - maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, 0, + maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, + FDB_FAST_PATH, levels); if (IS_ERR(maj_prio)) { err = PTR_ERR(maj_prio); @@ -2542,7 +2560,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) steering->fdb_sub_ns[chain] = ns; } - maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1); + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1); if (IS_ERR(maj_prio)) { err = PTR_ERR(maj_prio); goto out_err; @@ -2588,16 +2606,18 @@ static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vpo static int init_egress_acls_root_ns(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; + int total_vports = mlx5_eswitch_get_total_vports(dev); int err; int i; - steering->esw_egress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev), - sizeof(*steering->esw_egress_root_ns), - GFP_KERNEL); + steering->esw_egress_root_ns = + kcalloc(total_vports, + sizeof(*steering->esw_egress_root_ns), + GFP_KERNEL); if (!steering->esw_egress_root_ns) return -ENOMEM; - for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) { + for (i = 0; i < total_vports; i++) { err = init_egress_acl_root_ns(steering, i); if (err) goto cleanup_root_ns; @@ -2609,22 +2629,25 @@ cleanup_root_ns: for (i--; i >= 0; i--) cleanup_root_ns(steering->esw_egress_root_ns[i]); kfree(steering->esw_egress_root_ns); + steering->esw_egress_root_ns = NULL; return err; } static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; + int total_vports = mlx5_eswitch_get_total_vports(dev); int err; int i; - steering->esw_ingress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev), - sizeof(*steering->esw_ingress_root_ns), - GFP_KERNEL); + steering->esw_ingress_root_ns = + kcalloc(total_vports, + sizeof(*steering->esw_ingress_root_ns), + GFP_KERNEL); if (!steering->esw_ingress_root_ns) return -ENOMEM; - for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) { + for (i = 0; i < total_vports; i++) { err = init_ingress_acl_root_ns(steering, i); if (err) goto cleanup_root_ns; @@ -2636,6 +2659,7 @@ cleanup_root_ns: for (i--; i >= 0; i--) cleanup_root_ns(steering->esw_ingress_root_ns[i]); kfree(steering->esw_ingress_root_ns); + steering->esw_ingress_root_ns = NULL; return err; } @@ -2725,6 +2749,13 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) goto err; } + if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) && + MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)) { + err = init_rdma_rx_root_ns(steering); + if (err) + goto err; + } + if (MLX5_IPSEC_DEV(dev) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) { err = init_egress_root_ns(steering); if (err) @@ -2754,7 +2785,7 @@ int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) goto update_ft_fail; } - err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn, + err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn, false); if (err) { mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n", @@ -2798,7 +2829,7 @@ int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) goto out; } - err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn, + err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn, true); if (err) mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 87de0e4d9124..c48c382f926f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -67,6 +67,7 @@ enum fs_flow_table_type { FS_FT_FDB = 0X4, FS_FT_SNIFFER_RX = 0X5, FS_FT_SNIFFER_TX = 0X6, + FS_FT_RDMA_RX = 0X7, FS_FT_MAX_TYPE = FS_FT_SNIFFER_TX, }; @@ -90,6 +91,7 @@ struct mlx5_flow_steering { struct mlx5_flow_root_namespace **esw_ingress_root_ns; struct mlx5_flow_root_namespace *sniffer_tx_root_ns; struct mlx5_flow_root_namespace *sniffer_rx_root_ns; + struct mlx5_flow_root_namespace *rdma_rx_root_ns; struct mlx5_flow_root_namespace *egress_root_ns; }; @@ -150,7 +152,7 @@ struct mlx5_ft_underlay_qp { u32 qpn; }; -#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_800 +#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_a00 /* Calculate the fte_match_param length and without the reserved length. * Make sure the reserved field is the last. */ @@ -168,6 +170,7 @@ struct fs_fte { u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; u32 index; + struct mlx5_flow_context flow_context; struct mlx5_flow_act action; enum fs_fte_status status; struct mlx5_fc *counter; @@ -216,6 +219,7 @@ struct mlx5_flow_root_namespace { struct mutex chain_lock; struct list_head underlay_qpns; const struct mlx5_flow_cmds *cmds; + enum mlx5_flow_table_miss_action def_miss_action; }; int mlx5_init_fc_stats(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index c6c28f56aa29..b3762123a69c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -102,13 +102,15 @@ static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev, struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; unsigned long next_id = (unsigned long)id + 1; struct mlx5_fc *counter; + unsigned long tmp; rcu_read_lock(); /* skip counters that are in idr, but not yet in counters list */ - while ((counter = idr_get_next_ul(&fc_stats->counters_idr, - &next_id)) != NULL && - list_empty(&counter->list)) - next_id++; + idr_for_each_entry_continue_ul(&fc_stats->counters_idr, + counter, tmp, next_id) { + if (!list_empty(&counter->list)) + break; + } rcu_read_unlock(); return counter ? &counter->list : &fc_stats->counters; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 1ab6f7e3bec6..a19790dee7b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -37,6 +37,37 @@ #include "mlx5_core.h" #include "../../mlxfw/mlxfw.h" +enum { + MCQS_IDENTIFIER_BOOT_IMG = 0x1, + MCQS_IDENTIFIER_OEM_NVCONFIG = 0x4, + MCQS_IDENTIFIER_MLNX_NVCONFIG = 0x5, + MCQS_IDENTIFIER_CS_TOKEN = 0x6, + MCQS_IDENTIFIER_DBG_TOKEN = 0x7, + MCQS_IDENTIFIER_GEARBOX = 0xA, +}; + +enum { + MCQS_UPDATE_STATE_IDLE, + MCQS_UPDATE_STATE_IN_PROGRESS, + MCQS_UPDATE_STATE_APPLIED, + MCQS_UPDATE_STATE_ACTIVE, + MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET, + MCQS_UPDATE_STATE_FAILED, + MCQS_UPDATE_STATE_CANCELED, + MCQS_UPDATE_STATE_BUSY, +}; + +enum { + MCQI_INFO_TYPE_CAPABILITIES = 0x0, + MCQI_INFO_TYPE_VERSION = 0x1, + MCQI_INFO_TYPE_ACTIVATION_METHOD = 0x5, +}; + +enum { + MCQI_FW_RUNNING_VERSION = 0, + MCQI_FW_STORED_VERSION = 1, +}; + static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, int outlen) { @@ -202,6 +233,18 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, event_cap)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_EVENT); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, tls)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_TLS); + if (err) + return err; + } + return 0; } @@ -392,33 +435,49 @@ static int mlx5_reg_mcda_set(struct mlx5_core_dev *dev, } static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev, - u16 component_index, - u32 *max_component_size, - u8 *log_mcda_word_size, - u16 *mcda_max_write_size) + u16 component_index, bool read_pending, + u8 info_type, u16 data_size, void *mcqi_data) { - u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_ST_SZ_DW(mcqi_cap)]; - int offset = MLX5_ST_SZ_DW(mcqi_reg); - u32 in[MLX5_ST_SZ_DW(mcqi_reg)]; + u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_UN_SZ_DW(mcqi_reg_data)] = {}; + u32 in[MLX5_ST_SZ_DW(mcqi_reg)] = {}; + void *data; int err; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(mcqi_reg, in, component_index, component_index); - MLX5_SET(mcqi_reg, in, data_size, MLX5_ST_SZ_BYTES(mcqi_cap)); + MLX5_SET(mcqi_reg, in, read_pending_component, read_pending); + MLX5_SET(mcqi_reg, in, info_type, info_type); + MLX5_SET(mcqi_reg, in, data_size, data_size); err = mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_MCQI, 0, 0); + MLX5_ST_SZ_BYTES(mcqi_reg) + data_size, + MLX5_REG_MCQI, 0, 0); if (err) - goto out; + return err; - *max_component_size = MLX5_GET(mcqi_cap, out + offset, max_component_size); - *log_mcda_word_size = MLX5_GET(mcqi_cap, out + offset, log_mcda_word_size); - *mcda_max_write_size = MLX5_GET(mcqi_cap, out + offset, mcda_max_write_size); + data = MLX5_ADDR_OF(mcqi_reg, out, data); + memcpy(mcqi_data, data, data_size); -out: - return err; + return 0; +} + +static int mlx5_reg_mcqi_caps_query(struct mlx5_core_dev *dev, u16 component_index, + u32 *max_component_size, u8 *log_mcda_word_size, + u16 *mcda_max_write_size) +{ + u32 mcqi_reg[MLX5_ST_SZ_DW(mcqi_cap)] = {}; + int err; + + err = mlx5_reg_mcqi_query(dev, component_index, 0, + MCQI_INFO_TYPE_CAPABILITIES, + MLX5_ST_SZ_BYTES(mcqi_cap), mcqi_reg); + if (err) + return err; + + *max_component_size = MLX5_GET(mcqi_cap, mcqi_reg, max_component_size); + *log_mcda_word_size = MLX5_GET(mcqi_cap, mcqi_reg, log_mcda_word_size); + *mcda_max_write_size = MLX5_GET(mcqi_cap, mcqi_reg, mcda_max_write_size); + + return 0; } struct mlx5_mlxfw_dev { @@ -434,8 +493,13 @@ static int mlx5_component_query(struct mlxfw_dev *mlxfw_dev, container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; - return mlx5_reg_mcqi_query(dev, component_index, p_max_size, - p_align_bits, p_max_write_size); + if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi)) { + mlx5_core_warn(dev, "caps query isn't supported by running FW\n"); + return -EOPNOTSUPP; + } + + return mlx5_reg_mcqi_caps_query(dev, component_index, p_max_size, + p_align_bits, p_max_write_size); } static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle) @@ -552,7 +616,8 @@ static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = { }; int mlx5_firmware_flash(struct mlx5_core_dev *dev, - const struct firmware *firmware) + const struct firmware *firmware, + struct netlink_ext_ack *extack) { struct mlx5_mlxfw_dev mlx5_mlxfw_dev = { .mlxfw_dev = { @@ -571,5 +636,133 @@ int mlx5_firmware_flash(struct mlx5_core_dev *dev, return -EOPNOTSUPP; } - return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, firmware); + return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, + firmware, extack); +} + +static int mlx5_reg_mcqi_version_query(struct mlx5_core_dev *dev, + u16 component_index, bool read_pending, + u32 *mcqi_version_out) +{ + return mlx5_reg_mcqi_query(dev, component_index, read_pending, + MCQI_INFO_TYPE_VERSION, + MLX5_ST_SZ_BYTES(mcqi_version), + mcqi_version_out); +} + +static int mlx5_reg_mcqs_query(struct mlx5_core_dev *dev, u32 *out, + u16 component_index) +{ + u8 out_sz = MLX5_ST_SZ_BYTES(mcqs_reg); + u32 in[MLX5_ST_SZ_DW(mcqs_reg)] = {}; + int err; + + memset(out, 0, out_sz); + + MLX5_SET(mcqs_reg, in, component_index, component_index); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + out_sz, MLX5_REG_MCQS, 0, 0); + return err; +} + +/* scans component index sequentially, to find the boot img index */ +static int mlx5_get_boot_img_component_index(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(mcqs_reg)] = {}; + u16 identifier, component_idx = 0; + bool quit; + int err; + + do { + err = mlx5_reg_mcqs_query(dev, out, component_idx); + if (err) + return err; + + identifier = MLX5_GET(mcqs_reg, out, identifier); + quit = !!MLX5_GET(mcqs_reg, out, last_index_flag); + quit |= identifier == MCQS_IDENTIFIER_BOOT_IMG; + } while (!quit && ++component_idx); + + if (identifier != MCQS_IDENTIFIER_BOOT_IMG) { + mlx5_core_warn(dev, "mcqs: can't find boot_img component ix, last scanned idx %d\n", + component_idx); + return -EOPNOTSUPP; + } + + return component_idx; +} + +static int +mlx5_fw_image_pending(struct mlx5_core_dev *dev, + int component_index, + bool *pending_version_exists) +{ + u32 out[MLX5_ST_SZ_DW(mcqs_reg)]; + u8 component_update_state; + int err; + + err = mlx5_reg_mcqs_query(dev, out, component_index); + if (err) + return err; + + component_update_state = MLX5_GET(mcqs_reg, out, component_update_state); + + if (component_update_state == MCQS_UPDATE_STATE_IDLE) { + *pending_version_exists = false; + } else if (component_update_state == MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET) { + *pending_version_exists = true; + } else { + mlx5_core_warn(dev, + "mcqs: can't read pending fw version while fw state is %d\n", + component_update_state); + return -ENODATA; + } + return 0; +} + +int mlx5_fw_version_query(struct mlx5_core_dev *dev, + u32 *running_ver, u32 *pending_ver) +{ + u32 reg_mcqi_version[MLX5_ST_SZ_DW(mcqi_version)] = {}; + bool pending_version_exists; + int component_index; + int err; + + if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi) || + !MLX5_CAP_MCAM_REG(dev, mcqs)) { + mlx5_core_warn(dev, "fw query isn't supported by the FW\n"); + return -EOPNOTSUPP; + } + + component_index = mlx5_get_boot_img_component_index(dev); + if (component_index < 0) + return component_index; + + err = mlx5_reg_mcqi_version_query(dev, component_index, + MCQI_FW_RUNNING_VERSION, + reg_mcqi_version); + if (err) + return err; + + *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); + + err = mlx5_fw_image_pending(dev, component_index, &pending_version_exists); + if (err) + return err; + + if (!pending_version_exists) { + *pending_ver = 0; + return 0; + } + + err = mlx5_reg_mcqi_version_query(dev, component_index, + MCQI_FW_STORED_VERSION, + reg_mcqi_version); + if (err) + return err; + + *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); + + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index cb9fa3430c53..2fe6923f7ce0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -40,6 +40,8 @@ #include "mlx5_core.h" #include "lib/eq.h" #include "lib/mlx5.h" +#include "lib/pci_vsc.h" +#include "diag/fw_tracer.h" enum { MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, @@ -62,12 +64,20 @@ enum { enum { MLX5_DROP_NEW_HEALTH_WORK, - MLX5_DROP_NEW_RECOVERY_WORK, +}; + +enum { + MLX5_SENSOR_NO_ERR = 0, + MLX5_SENSOR_PCI_COMM_ERR = 1, + MLX5_SENSOR_PCI_ERR = 2, + MLX5_SENSOR_NIC_DISABLED = 3, + MLX5_SENSOR_NIC_SW_RESET = 4, + MLX5_SENSOR_FW_SYND_RFR = 5, }; u8 mlx5_get_nic_state(struct mlx5_core_dev *dev) { - return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3; + return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7; } void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) @@ -80,18 +90,105 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) &dev->iseg->cmdq_addr_l_sz); } -static int in_fatal(struct mlx5_core_dev *dev) +static bool sensor_pci_not_working(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; struct health_buffer __iomem *h = health->health; + /* Offline PCI reads return 0xffffffff */ + return (ioread32be(&h->fw_ver) == 0xffffffff); +} + +static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET; + u8 synd = ioread8(&h->synd); + + if (rfr && synd) + mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd); + return rfr && synd; +} + +static u32 check_fatal_sensors(struct mlx5_core_dev *dev) +{ + if (sensor_pci_not_working(dev)) + return MLX5_SENSOR_PCI_COMM_ERR; + if (pci_channel_offline(dev->pdev)) + return MLX5_SENSOR_PCI_ERR; if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) - return 1; + return MLX5_SENSOR_NIC_DISABLED; + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET) + return MLX5_SENSOR_NIC_SW_RESET; + if (sensor_fw_synd_rfr(dev)) + return MLX5_SENSOR_FW_SYND_RFR; - if (ioread32be(&h->fw_ver) == 0xffffffff) - return 1; + return MLX5_SENSOR_NO_ERR; +} - return 0; +static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock) +{ + enum mlx5_vsc_state state; + int ret; + + if (!mlx5_core_is_pf(dev)) + return -EBUSY; + + /* Try to lock GW access, this stage doesn't return + * EBUSY because locked GW does not mean that other PF + * already started the reset. + */ + ret = mlx5_vsc_gw_lock(dev); + if (ret == -EBUSY) + return -EINVAL; + if (ret) + return ret; + + state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK; + /* At this stage, if the return status == EBUSY, then we know + * for sure that another PF started the reset, so don't allow + * another reset. + */ + ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state); + if (ret) + mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); + + /* Unlock GW access */ + mlx5_vsc_gw_unlock(dev); + + return ret; +} + +static bool reset_fw_if_needed(struct mlx5_core_dev *dev) +{ + bool supported = (ioread32be(&dev->iseg->initializing) >> + MLX5_FW_RESET_SUPPORTED_OFFSET) & 1; + u32 fatal_error; + + if (!supported) + return false; + + /* The reset only needs to be issued by one PF. The health buffer is + * shared between all functions, and will be cleared during a reset. + * Check again to avoid a redundant 2nd reset. If the fatal erros was + * PCI related a reset won't help. + */ + fatal_error = check_fatal_sensors(dev); + if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR || + fatal_error == MLX5_SENSOR_NIC_DISABLED || + fatal_error == MLX5_SENSOR_NIC_SW_RESET) { + mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help."); + return false; + } + + mlx5_core_warn(dev, "Issuing FW Reset\n"); + /* Write the NIC interface field to initiate the reset, the command + * interface address also resides here, don't overwrite it. + */ + mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET); + + return true; } void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) @@ -99,14 +196,65 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) mutex_lock(&dev->intf_state_mutex); if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) goto unlock; + if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) { + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + goto unlock; + } - mlx5_core_err(dev, "start\n"); - if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { + if (check_fatal_sensors(dev) || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mlx5_cmd_flush(dev); } mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); +unlock: + mutex_unlock(&dev->intf_state_mutex); +} + +#define MLX5_CRDUMP_WAIT_MS 60000 +#define MLX5_FW_RESET_WAIT_MS 1000 +void mlx5_error_sw_reset(struct mlx5_core_dev *dev) +{ + unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS; + int lock = -EBUSY; + + mutex_lock(&dev->intf_state_mutex); + if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto unlock; + + mlx5_core_err(dev, "start\n"); + + if (check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) { + /* Get cr-dump and reset FW semaphore */ + lock = lock_sem_sw_reset(dev, true); + + if (lock == -EBUSY) { + delay_ms = MLX5_CRDUMP_WAIT_MS; + goto recover_from_sw_reset; + } + /* Execute SW reset */ + reset_fw_if_needed(dev); + } + +recover_from_sw_reset: + /* Recover from SW reset */ + end = jiffies + msecs_to_jiffies(delay_ms); + do { + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + break; + + cond_resched(); + } while (!time_after(jiffies, end)); + + if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) { + dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n", + mlx5_get_nic_state(dev), delay_ms); + } + + /* Release FW semaphore if you are the lock owner */ + if (!lock) + lock_sem_sw_reset(dev, false); + mlx5_core_err(dev, "end\n"); unlock: @@ -129,6 +277,20 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) case MLX5_NIC_IFC_NO_DRAM_NIC: mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n"); break; + + case MLX5_NIC_IFC_SW_RESET: + /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases: + * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded + * and this is a VF), this is not recoverable by SW reset. + * Logging of this is handled elsewhere. + * 2. FW reset has been issued by another function, driver can + * be reloaded to recover after the mode switches to + * MLX5_NIC_IFC_DISABLED. + */ + if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR) + mlx5_core_warn(dev, "NIC SW reset in progress\n"); + break; + default: mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n", nic_interface); @@ -137,52 +299,32 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) mlx5_disable_device(dev); } -static void health_recover(struct work_struct *work) -{ - struct mlx5_core_health *health; - struct delayed_work *dwork; - struct mlx5_core_dev *dev; - struct mlx5_priv *priv; - u8 nic_state; - - dwork = container_of(work, struct delayed_work, work); - health = container_of(dwork, struct mlx5_core_health, recover_work); - priv = container_of(health, struct mlx5_priv, health); - dev = container_of(priv, struct mlx5_core_dev, priv); - - nic_state = mlx5_get_nic_state(dev); - if (nic_state == MLX5_NIC_IFC_INVALID) { - dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n"); - return; - } - - dev_err(&dev->pdev->dev, "starting health recovery flow\n"); - mlx5_recover_device(dev); -} - /* How much time to wait until health resetting the driver (in msecs) */ -#define MLX5_RECOVERY_DELAY_MSECS 60000 -static void health_care(struct work_struct *work) +#define MLX5_RECOVERY_WAIT_MSECS 60000 +static int mlx5_health_try_recover(struct mlx5_core_dev *dev) { - unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS); - struct mlx5_core_health *health; - struct mlx5_core_dev *dev; - struct mlx5_priv *priv; - unsigned long flags; + unsigned long end; - health = container_of(work, struct mlx5_core_health, work); - priv = container_of(health, struct mlx5_priv, health); - dev = container_of(priv, struct mlx5_core_dev, priv); mlx5_core_warn(dev, "handling bad device here\n"); mlx5_handle_bad_state(dev); + end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS); + while (sensor_pci_not_working(dev)) { + if (time_after(jiffies, end)) { + mlx5_core_err(dev, + "health recovery flow aborted, PCI reads still not working\n"); + return -EIO; + } + msleep(100); + } - spin_lock_irqsave(&health->wq_lock, flags); - if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags)) - schedule_delayed_work(&health->recover_work, recover_delay); - else - dev_err(&dev->pdev->dev, - "new health works are not permitted at this stage\n"); - spin_unlock_irqrestore(&health->wq_lock, flags); + mlx5_core_err(dev, "starting health recovery flow\n"); + mlx5_recover_device(dev); + if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state) || + check_fatal_sensors(dev)) { + mlx5_core_err(dev, "health recovery failed\n"); + return -EIO; + } + return 0; } static const char *hsynd_str(u8 synd) @@ -228,18 +370,298 @@ static void print_health_info(struct mlx5_core_dev *dev) return; for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) - dev_err(&dev->pdev->dev, "assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i)); + mlx5_core_err(dev, "assert_var[%d] 0x%08x\n", i, + ioread32be(h->assert_var + i)); - dev_err(&dev->pdev->dev, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr)); - dev_err(&dev->pdev->dev, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra)); + mlx5_core_err(dev, "assert_exit_ptr 0x%08x\n", + ioread32be(&h->assert_exit_ptr)); + mlx5_core_err(dev, "assert_callra 0x%08x\n", + ioread32be(&h->assert_callra)); sprintf(fw_str, "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev)); - dev_err(&dev->pdev->dev, "fw_ver %s\n", fw_str); - dev_err(&dev->pdev->dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id)); - dev_err(&dev->pdev->dev, "irisc_index %d\n", ioread8(&h->irisc_index)); - dev_err(&dev->pdev->dev, "synd 0x%x: %s\n", ioread8(&h->synd), hsynd_str(ioread8(&h->synd))); - dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); + mlx5_core_err(dev, "fw_ver %s\n", fw_str); + mlx5_core_err(dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id)); + mlx5_core_err(dev, "irisc_index %d\n", ioread8(&h->irisc_index)); + mlx5_core_err(dev, "synd 0x%x: %s\n", ioread8(&h->synd), + hsynd_str(ioread8(&h->synd))); + mlx5_core_err(dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); fw = ioread32be(&h->fw_ver); - dev_err(&dev->pdev->dev, "raw fw_ver 0x%08x\n", fw); + mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw); +} + +static int +mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u8 synd; + int err; + + synd = ioread8(&h->synd); + err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); + if (err || !synd) + return err; + return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd)); +} + +struct mlx5_fw_reporter_ctx { + u8 err_synd; + int miss_counter; +}; + +static int +mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg, + struct mlx5_fw_reporter_ctx *fw_reporter_ctx) +{ + int err; + + err = devlink_fmsg_u8_pair_put(fmsg, "syndrome", + fw_reporter_ctx->err_synd); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter", + fw_reporter_ctx->miss_counter); + if (err) + return err; + return 0; +} + +static int +mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + int err; + int i; + + if (!ioread8(&h->synd)) + return 0; + + err = devlink_fmsg_pair_nest_start(fmsg, "health buffer"); + if (err) + return err; + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + err = devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var"); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) { + err = devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i)); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr", + ioread32be(&h->assert_exit_ptr)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra", + ioread32be(&h->assert_callra)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id)); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index", + ioread8(&h->irisc_index)); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd", + ioread16be(&h->ext_synd)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver", + ioread32be(&h->fw_ver)); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + return devlink_fmsg_pair_nest_end(fmsg); +} + +static int +mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + int err; + + err = mlx5_fw_tracer_trigger_core_dump_general(dev); + if (err) + return err; + + if (priv_ctx) { + struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; + + err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx); + if (err) + return err; + } + + err = mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg); + if (err) + return err; + return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg); +} + +static void mlx5_fw_reporter_err_work(struct work_struct *work) +{ + struct mlx5_fw_reporter_ctx fw_reporter_ctx; + struct mlx5_core_health *health; + + health = container_of(work, struct mlx5_core_health, report_work); + + if (IS_ERR_OR_NULL(health->fw_reporter)) + return; + + fw_reporter_ctx.err_synd = health->synd; + fw_reporter_ctx.miss_counter = health->miss_counter; + if (fw_reporter_ctx.err_synd) { + devlink_health_report(health->fw_reporter, + "FW syndrom reported", &fw_reporter_ctx); + return; + } + if (fw_reporter_ctx.miss_counter) + devlink_health_report(health->fw_reporter, + "FW miss counter reported", + &fw_reporter_ctx); +} + +static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = { + .name = "fw", + .diagnose = mlx5_fw_reporter_diagnose, + .dump = mlx5_fw_reporter_dump, +}; + +static int +mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + + return mlx5_health_try_recover(dev); +} + +#define MLX5_CR_DUMP_CHUNK_SIZE 256 +static int +mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + u32 crdump_size = dev->priv.health.crdump_size; + u32 *cr_data; + u32 data_size; + u32 offset; + int err; + + if (!mlx5_core_is_pf(dev)) + return -EPERM; + + cr_data = kvmalloc(crdump_size, GFP_KERNEL); + if (!cr_data) + return -ENOMEM; + err = mlx5_crdump_collect(dev, cr_data); + if (err) + return err; + + if (priv_ctx) { + struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; + + err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx); + if (err) + goto free_data; + } + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "crdump_data"); + if (err) + goto free_data; + for (offset = 0; offset < crdump_size; offset += data_size) { + if (crdump_size - offset < MLX5_CR_DUMP_CHUNK_SIZE) + data_size = crdump_size - offset; + else + data_size = MLX5_CR_DUMP_CHUNK_SIZE; + err = devlink_fmsg_binary_put(fmsg, cr_data, data_size); + if (err) + goto free_data; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + +free_data: + kfree(cr_data); + return err; +} + +static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) +{ + struct mlx5_fw_reporter_ctx fw_reporter_ctx; + struct mlx5_core_health *health; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + + health = container_of(work, struct mlx5_core_health, fatal_report_work); + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + + mlx5_enter_error_state(dev, false); + if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { + if (mlx5_health_try_recover(dev)) + mlx5_core_err(dev, "health recovery failed\n"); + return; + } + fw_reporter_ctx.err_synd = health->synd; + fw_reporter_ctx.miss_counter = health->miss_counter; + devlink_health_report(health->fw_fatal_reporter, + "FW fatal error reported", &fw_reporter_ctx); +} + +static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { + .name = "fw_fatal", + .recover = mlx5_fw_fatal_reporter_recover, + .dump = mlx5_fw_fatal_reporter_dump, +}; + +#define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000 +static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct devlink *devlink = priv_to_devlink(dev); + + health->fw_reporter = + devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops, + 0, false, dev); + if (IS_ERR(health->fw_reporter)) + mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", + PTR_ERR(health->fw_reporter)); + + health->fw_fatal_reporter = + devlink_health_reporter_create(devlink, + &mlx5_fw_fatal_reporter_ops, + MLX5_REPORTER_FW_GRACEFUL_PERIOD, + true, dev); + if (IS_ERR(health->fw_fatal_reporter)) + mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", + PTR_ERR(health->fw_fatal_reporter)); +} + +static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (!IS_ERR_OR_NULL(health->fw_reporter)) + devlink_health_reporter_destroy(health->fw_reporter); + + if (!IS_ERR_OR_NULL(health->fw_fatal_reporter)) + devlink_health_reporter_destroy(health->fw_fatal_reporter); } static unsigned long get_next_poll_jiffies(void) @@ -260,10 +682,9 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev) spin_lock_irqsave(&health->wq_lock, flags); if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) - queue_work(health->wq, &health->work); + queue_work(health->wq, &health->fatal_report_work); else - dev_err(&dev->pdev->dev, - "new health works are not permitted at this stage\n"); + mlx5_core_err(dev, "new health works are not permitted at this stage\n"); spin_unlock_irqrestore(&health->wq_lock, flags); } @@ -271,6 +692,9 @@ static void poll_health(struct timer_list *t) { struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u32 fatal_error; + u8 prev_synd; u32 count; if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) @@ -284,12 +708,21 @@ static void poll_health(struct timer_list *t) health->prev = count; if (health->miss_counter == MAX_MISSES) { - dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n"); + mlx5_core_err(dev, "device's health compromised - reached miss count\n"); print_health_info(dev); + queue_work(health->wq, &health->report_work); } - if (in_fatal(dev) && !health->sick) { - health->sick = true; + prev_synd = health->synd; + health->synd = ioread8(&h->synd); + if (health->synd && health->synd != prev_synd) + queue_work(health->wq, &health->report_work); + + fatal_error = check_fatal_sensors(dev); + + if (fatal_error && !health->fatal_error) { + mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); + dev->priv.health.fatal_error = fatal_error; print_health_info(dev); mlx5_trigger_health_work(dev); } @@ -303,9 +736,8 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; timer_setup(&health->timer, poll_health, 0); - health->sick = 0; + health->fatal_error = MLX5_SENSOR_NO_ERR; clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; @@ -321,7 +753,6 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) if (disable_health) { spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); spin_unlock_irqrestore(&health->wq_lock, flags); } @@ -335,21 +766,16 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev) spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); spin_unlock_irqrestore(&health->wq_lock, flags); - cancel_delayed_work_sync(&health->recover_work); - cancel_work_sync(&health->work); + cancel_work_sync(&health->report_work); + cancel_work_sync(&health->fatal_report_work); } -void mlx5_drain_health_recovery(struct mlx5_core_dev *dev) +void mlx5_health_flush(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - unsigned long flags; - spin_lock_irqsave(&health->wq_lock, flags); - set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); - spin_unlock_irqrestore(&health->wq_lock, flags); - cancel_delayed_work_sync(&dev->priv.health.recover_work); + flush_workqueue(health->wq); } void mlx5_health_cleanup(struct mlx5_core_dev *dev) @@ -357,6 +783,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; destroy_workqueue(health->wq); + mlx5_fw_reporters_destroy(dev); } int mlx5_health_init(struct mlx5_core_dev *dev) @@ -364,20 +791,26 @@ int mlx5_health_init(struct mlx5_core_dev *dev) struct mlx5_core_health *health; char *name; + mlx5_fw_reporters_create(dev); + health = &dev->priv.health; name = kmalloc(64, GFP_KERNEL); if (!name) - return -ENOMEM; + goto out_err; strcpy(name, "mlx5_health"); - strcat(name, dev_name(&dev->pdev->dev)); + strcat(name, dev_name(dev->device)); health->wq = create_singlethread_workqueue(name); kfree(name); if (!health->wq) - return -ENOMEM; + goto out_err; spin_lock_init(&health->wq_lock); - INIT_WORK(&health->work, health_care); - INIT_DELAYED_WORK(&health->recover_work, health_recover); + INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work); + INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work); return 0; + +out_err: + mlx5_fw_reporters_destroy(dev); + return -ENOMEM; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile index d8e17110f25d..c78512eed8d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 90cb50fe17fd..ebd81f6b556e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -122,14 +122,6 @@ static int mlx5i_get_ts_info(struct net_device *netdev, return mlx5e_ethtool_get_ts_info(priv, info); } -static int mlx5i_flash_device(struct net_device *netdev, - struct ethtool_flash *flash) -{ - struct mlx5e_priv *priv = mlx5i_epriv(netdev); - - return mlx5e_ethtool_flash_device(priv, flash); -} - enum mlx5_ptys_width { MLX5_PTYS_WIDTH_1X = 1 << 0, MLX5_PTYS_WIDTH_2X = 1 << 1, @@ -241,7 +233,6 @@ const struct ethtool_ops mlx5i_ethtool_ops = { .get_ethtool_stats = mlx5i_get_ethtool_stats, .get_ringparam = mlx5i_get_ringparam, .set_ringparam = mlx5i_set_ringparam, - .flash_device = mlx5i_flash_device, .get_channels = mlx5i_get_channels, .set_channels = mlx5i_set_channels, .get_coalesce = mlx5i_get_coalesce, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 4eac42555c7d..faf197d53743 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -68,6 +68,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, params->lro_en = false; params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; + params->tunneled_offload_en = false; } /* Called directly after IPoIB netdevice was created to initialize SW structs */ @@ -77,17 +78,16 @@ int mlx5i_init(struct mlx5_core_dev *mdev, void *ppriv) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); - u16 max_mtu; int err; err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv); if (err) return err; - mlx5_query_port_max_mtu(mdev, &max_mtu, 1); - netdev->mtu = max_mtu; + mlx5e_set_netdev_mtu_boundaries(priv); + netdev->mtu = netdev->max_mtu; - mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params, + mlx5e_build_nic_params(mdev, NULL, &priv->rss_params, &priv->channels.params, mlx5e_get_netdev_max_channels(netdev), netdev->mtu); mlx5i_build_nic_params(mdev, &priv->channels.params); @@ -258,6 +258,18 @@ void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp * mlx5_core_destroy_qp(mdev, qp); } +int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn); + + return mlx5e_create_tis(mdev, in, tisn); +} + static int mlx5i_init_tx(struct mlx5e_priv *priv) { struct mlx5i_priv *ipriv = priv->ppriv; @@ -269,7 +281,7 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv) return err; } - err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); + err = mlx5i_create_tis(priv->mdev, ipriv->qp.qpn, &priv->tisn[0]); if (err) { mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); goto err_destroy_underlay_qp; @@ -365,7 +377,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir); if (err) goto err_destroy_indirect_rqts; @@ -373,7 +385,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir); if (err) goto err_destroy_indirect_tirs; @@ -384,11 +396,11 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) return 0; err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv, true); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -401,9 +413,9 @@ err_destroy_q_counters: static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { mlx5i_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); mlx5e_destroy_indirect_tirs(priv, true); - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@ -418,6 +430,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .cleanup_rx = mlx5i_cleanup_rx, .enable = NULL, /* mlx5i_enable */ .disable = NULL, /* mlx5i_disable */ + .update_rx = mlx5e_update_nic_rx, .update_stats = NULL, /* mlx5i_update_stats */ .update_carrier = NULL, /* no HW update in IB link */ .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, @@ -526,7 +539,7 @@ static int mlx5i_open(struct net_device *netdev) if (err) goto err_remove_fs_underlay_qp; - mlx5e_refresh_tirs(epriv, false); + epriv->profile->update_rx(epriv); mlx5e_activate_priv_channels(epriv); mutex_unlock(&epriv->state_lock); @@ -619,7 +632,7 @@ static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb, struct mlx5_ib_ah *mah = to_mah(address); struct mlx5i_priv *ipriv = epriv->ppriv; - return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey); + return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey, netdev_xmit_more()); } static void mlx5i_set_pkey_index(struct net_device *netdev, int id) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 9165ca567047..c87962cab921 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -59,6 +59,8 @@ struct mlx5i_priv { char *mlx5e_priv[0]; }; +int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn); + /* Underlay QP create/destroy functions */ int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); @@ -119,7 +121,8 @@ static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq, } netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5_av *av, u32 dqpn, u32 dqkey); + struct mlx5_av *av, u32 dqpn, u32 dqkey, + bool xmit_more); void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index b491b8f5fd6b..6e56fa769d2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -210,7 +210,7 @@ static int mlx5i_pkey_open(struct net_device *netdev) goto err_unint_underlay_qp; } - err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]); + err = mlx5i_create_tis(mdev, ipriv->qp.qpn, &epriv->tisn[0]); if (err) { mlx5_core_warn(mdev, "create child tis failed, %d\n", err); goto err_remove_rx_uderlay_qp; @@ -221,7 +221,7 @@ static int mlx5i_pkey_open(struct net_device *netdev) mlx5_core_warn(mdev, "opening child channels failed, %d\n", err); goto err_clear_state_opened_flag; } - mlx5e_refresh_tirs(epriv, false); + epriv->profile->update_rx(epriv); mlx5e_activate_priv_channels(epriv); mutex_unlock(&epriv->state_lock); @@ -350,6 +350,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = { .cleanup_rx = mlx5i_pkey_cleanup_rx, .enable = NULL, .disable = NULL, + .update_rx = mlx5e_update_nic_rx, .update_stats = NULL, .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 959605559858..c5ef2ff26465 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -305,8 +305,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) !mlx5_sriov_is_enabled(dev1); #ifdef CONFIG_MLX5_ESWITCH - roce_lag &= dev0->priv.eswitch->mode == SRIOV_NONE && - dev1->priv.eswitch->mode == SRIOV_NONE; + roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && + dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE; #endif if (roce_lag) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 5633f8572800..e69766393990 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include <linux/netdevice.h> +#include <net/nexthop.h> #include "lag.h" #include "lag_mp.h" #include "mlx5_core.h" @@ -110,6 +111,8 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, struct fib_info *fi) { struct lag_mp *mp = &ldev->lag_mp; + struct fib_nh *fib_nh0, *fib_nh1; + unsigned int nhs; /* Handle delete event */ if (event == FIB_EVENT_ENTRY_DEL) { @@ -120,9 +123,11 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } /* Handle add/replace event */ - if (fi->fib_nhs == 1) { + nhs = fib_info_num_path(fi); + if (nhs == 1) { if (__mlx5_lag_is_active(ldev)) { - struct net_device *nh_dev = fi->fib_nh[0].nh_dev; + struct fib_nh *nh = fib_info_nh(fi, 0); + struct net_device *nh_dev = nh->fib_nh_dev; int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); mlx5_lag_set_port_affinity(ldev, ++i); @@ -130,14 +135,16 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, return; } - if (fi->fib_nhs != 2) + if (nhs != 2) return; /* Verify next hops are ports of the same hca */ - if (!(fi->fib_nh[0].nh_dev == ldev->pf[0].netdev && - fi->fib_nh[1].nh_dev == ldev->pf[1].netdev) && - !(fi->fib_nh[0].nh_dev == ldev->pf[1].netdev && - fi->fib_nh[1].nh_dev == ldev->pf[0].netdev)) { + fib_nh0 = fib_info_nh(fi, 0); + fib_nh1 = fib_info_nh(fi, 1); + if (!(fib_nh0->fib_nh_dev == ldev->pf[0].netdev && + fib_nh1->fib_nh_dev == ldev->pf[1].netdev) && + !(fib_nh0->fib_nh_dev == ldev->pf[1].netdev && + fib_nh1->fib_nh_dev == ldev->pf[0].netdev)) { mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n"); return; } @@ -167,14 +174,14 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, /* nh added/removed */ if (event == FIB_EVENT_NH_DEL) { - int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->nh_dev); + int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev); if (i >= 0) { i = (i + 1) % 2 + 1; /* peer port */ mlx5_lag_set_port_affinity(ldev, i); } } else if (event == FIB_EVENT_NH_ADD && - fi->fib_nhs == 2) { + fib_info_num_path(fi) == 2) { mlx5_lag_set_port_affinity(ldev, 0); } } @@ -238,6 +245,7 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, struct mlx5_fib_event_work *fib_work; struct fib_entry_notifier_info *fen_info; struct fib_nh_notifier_info *fnh_info; + struct net_device *fib_dev; struct fib_info *fi; if (info->family != AF_INET) @@ -254,8 +262,13 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, fen_info = container_of(info, struct fib_entry_notifier_info, info); fi = fen_info->fi; - if (fi->fib_dev != ldev->pf[0].netdev && - fi->fib_dev != ldev->pf[1].netdev) { + if (fi->nh) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); + return notifier_from_errno(-EINVAL); + } + fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; + if (fib_dev != ldev->pf[0].netdev && + fib_dev != ldev->pf[1].netdev) { return NOTIFY_DONE; } fib_work = mlx5_lag_init_fib_work(ldev, event); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile index d8e17110f25d..c78512eed8d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index ca0ee9916e9e..0059b290e095 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -535,23 +535,16 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) do_div(ns, NSEC_PER_SEC / HZ); clock->overflow_period = ns; - mdev->clock_info_page = alloc_page(GFP_KERNEL); - if (mdev->clock_info_page) { - mdev->clock_info = kmap(mdev->clock_info_page); - if (!mdev->clock_info) { - __free_page(mdev->clock_info_page); - mlx5_core_warn(mdev, "failed to map clock page\n"); - } else { - mdev->clock_info->sign = 0; - mdev->clock_info->nsec = clock->tc.nsec; - mdev->clock_info->cycles = clock->tc.cycle_last; - mdev->clock_info->mask = clock->cycles.mask; - mdev->clock_info->mult = clock->nominal_c_mult; - mdev->clock_info->shift = clock->cycles.shift; - mdev->clock_info->frac = clock->tc.frac; - mdev->clock_info->overflow_period = - clock->overflow_period; - } + mdev->clock_info = + (struct mlx5_ib_clock_info *)get_zeroed_page(GFP_KERNEL); + if (mdev->clock_info) { + mdev->clock_info->nsec = clock->tc.nsec; + mdev->clock_info->cycles = clock->tc.cycle_last; + mdev->clock_info->mask = clock->cycles.mask; + mdev->clock_info->mult = clock->nominal_c_mult; + mdev->clock_info->shift = clock->cycles.shift; + mdev->clock_info->frac = clock->tc.frac; + mdev->clock_info->overflow_period = clock->overflow_period; } INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); @@ -599,8 +592,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) cancel_delayed_work_sync(&clock->overflow_work); if (mdev->clock_info) { - kunmap(mdev->clock_info_page); - __free_page(mdev->clock_info_page); + free_page((unsigned long)mdev->clock_info); mdev->clock_info = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c new file mode 100644 index 000000000000..ea9ee88491e5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include "mlx5_core.h" + +int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, + void *key, u32 sz_bytes, + u32 *p_key_id) +{ + u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u32 sz_bits = sz_bytes * BITS_PER_BYTE; + u8 general_obj_key_size; + u64 general_obj_types; + void *obj, *key_p; + int err; + + obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object); + key_p = MLX5_ADDR_OF(encryption_key_obj, obj, key); + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY)) + return -EINVAL; + + switch (sz_bits) { + case 128: + general_obj_key_size = + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128; + break; + case 256: + general_obj_key_size = + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256; + break; + default: + return -EINVAL; + } + + memcpy(key_p, key, sz_bytes); + + MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size); + MLX5_SET(encryption_key_obj, obj, key_type, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK); + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.pdn); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) + *p_key_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + /* avoid leaking key on the stack */ + memzero_explicit(in, sizeof(in)); + + return err; +} + +void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, key_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index c0fb6d72b695..3dfab91ae5f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -7,7 +7,6 @@ #include <linux/mlx5/eq.h> #include <linux/mlx5/cq.h> -#define MLX5_MAX_IRQ_NAME (32) #define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe)) struct mlx5_eq_tasklet { @@ -36,8 +35,14 @@ struct mlx5_eq { struct mlx5_rsc_debug *dbg; }; +struct mlx5_eq_async { + struct mlx5_eq core; + struct notifier_block irq_nb; +}; + struct mlx5_eq_comp { - struct mlx5_eq core; /* Must be first */ + struct mlx5_eq core; + struct notifier_block irq_nb; struct mlx5_eq_tasklet tasklet_ctx; struct list_head list; }; @@ -70,7 +75,7 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev); void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn); struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev); void mlx5_cq_tasklet_cb(unsigned long data); @@ -92,7 +97,4 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); #endif -int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb); -int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb); - #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c new file mode 100644 index 000000000000..23361a9ae4fa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/kernel.h> +#include "mlx5_core.h" +#include "geneve.h" + +struct mlx5_geneve { + struct mlx5_core_dev *mdev; + __be16 opt_class; + u8 opt_type; + u32 obj_id; + struct mutex sync_lock; /* protect GENEVE obj operations */ + u32 refcount; +}; + +static int mlx5_geneve_tlv_option_create(struct mlx5_core_dev *mdev, + __be16 class, + u8 type, + u8 len) +{ + u32 in[MLX5_ST_SZ_DW(create_geneve_tlv_option_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u64 general_obj_types; + void *hdr, *opt; + u16 obj_id; + int err; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT)) + return -EINVAL; + + hdr = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, hdr); + opt = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, geneve_tlv_opt); + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT); + + MLX5_SET(geneve_tlv_option, opt, option_class, be16_to_cpu(class)); + MLX5_SET(geneve_tlv_option, opt, option_type, type); + MLX5_SET(geneve_tlv_option, opt, option_data_length, len); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return obj_id; +} + +static void mlx5_geneve_tlv_option_destroy(struct mlx5_core_dev *mdev, u16 obj_id) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) +{ + int res = 0; + + if (IS_ERR_OR_NULL(geneve)) + return -EOPNOTSUPP; + + mutex_lock(&geneve->sync_lock); + + if (geneve->refcount) { + if (geneve->opt_class == opt->opt_class && + geneve->opt_type == opt->type) { + /* We already have TLV options obj allocated */ + geneve->refcount++; + } else { + /* TLV options obj allocated, but its params + * do not match the new request. + * We support only one such object. + */ + mlx5_core_warn(geneve->mdev, + "Won't create Geneve TLV opt object with class:type:len = 0x%x:0x%x:%d (another class:type already exists)\n", + be16_to_cpu(opt->opt_class), + opt->type, + opt->length); + res = -EOPNOTSUPP; + goto unlock; + } + } else { + /* We don't have any TLV options obj allocated */ + + res = mlx5_geneve_tlv_option_create(geneve->mdev, + opt->opt_class, + opt->type, + opt->length); + if (res < 0) { + mlx5_core_warn(geneve->mdev, + "Failed creating Geneve TLV opt object class:type:len = 0x%x:0x%x:%d (err=%d)\n", + be16_to_cpu(opt->opt_class), + opt->type, opt->length, res); + goto unlock; + } + geneve->opt_class = opt->opt_class; + geneve->opt_type = opt->type; + geneve->obj_id = res; + geneve->refcount++; + } + +unlock: + mutex_unlock(&geneve->sync_lock); + return res; +} + +void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) +{ + if (IS_ERR_OR_NULL(geneve)) + return; + + mutex_lock(&geneve->sync_lock); + if (--geneve->refcount == 0) { + /* We've just removed the last user of Geneve option. + * Now delete the object in FW. + */ + mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id); + + geneve->opt_class = 0; + geneve->opt_type = 0; + geneve->obj_id = 0; + } + mutex_unlock(&geneve->sync_lock); +} + +struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev) +{ + struct mlx5_geneve *geneve = + kzalloc(sizeof(*geneve), GFP_KERNEL); + + if (!geneve) + return ERR_PTR(-ENOMEM); + geneve->mdev = mdev; + mutex_init(&geneve->sync_lock); + + return geneve; +} + +void mlx5_geneve_destroy(struct mlx5_geneve *geneve) +{ + if (IS_ERR_OR_NULL(geneve)) + return; + + /* Lockless since we are unloading */ + if (geneve->refcount) + mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id); + + kfree(geneve); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h new file mode 100644 index 000000000000..adee0cbba19c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_GENEVE_H__ +#define __MLX5_GENEVE_H__ + +#include <net/geneve.h> +#include <linux/mlx5/driver.h> + +struct mlx5_geneve; + +#ifdef CONFIG_MLX5_ESWITCH + +struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev); +void mlx5_geneve_destroy(struct mlx5_geneve *geneve); + +int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt); +void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline struct mlx5_geneve +*mlx5_geneve_create(struct mlx5_core_dev *mdev) { return NULL; } +static inline void +mlx5_geneve_destroy(struct mlx5_geneve *geneve) {} +static inline int +mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) { return 0; } +static inline void +mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) {} + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_GENEVE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 397a2847867a..b99d469e4e64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -41,6 +41,9 @@ int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count); void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count); int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index); void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index); +int mlx5_crdump_enable(struct mlx5_core_dev *dev); +void mlx5_crdump_disable(struct mlx5_core_dev *dev); +int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data); /* TODO move to lib/events.h */ @@ -76,4 +79,9 @@ struct mlx5_pme_stats { void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); +/* Crypto */ +int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, + void *key, u32 sz_bytes, u32 *p_key_id); +void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index a71d5b9c7ab2..3118e8d66407 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -67,6 +67,7 @@ static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index) struct l2table_node { struct l2addr_node node; u32 index; /* index in HW l2 table */ + int ref_count; }; struct mlx5_mpfs { @@ -134,8 +135,8 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { struct mlx5_mpfs *mpfs = dev->priv.mpfs; struct l2table_node *l2addr; + int err = 0; u32 index; - int err; if (!MLX5_ESWITCH_MANAGER(dev)) return 0; @@ -144,30 +145,35 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); if (l2addr) { - err = -EEXIST; - goto abort; + l2addr->ref_count++; + goto out; } err = alloc_l2table_index(mpfs, &index); if (err) - goto abort; + goto out; l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL); if (!l2addr) { - free_l2table_index(mpfs, index); err = -ENOMEM; - goto abort; + goto hash_add_err; } - l2addr->index = index; err = set_l2table_entry_cmd(dev, index, mac); - if (err) { - l2addr_hash_del(l2addr); - free_l2table_index(mpfs, index); - } + if (err) + goto set_table_entry_err; + + l2addr->index = index; + l2addr->ref_count = 1; mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index); -abort: + goto out; + +set_table_entry_err: + l2addr_hash_del(l2addr); +hash_add_err: + free_l2table_index(mpfs, index); +out: mutex_unlock(&mpfs->lock); return err; } @@ -190,6 +196,9 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) goto unlock; } + if (--l2addr->ref_count > 0) + goto unlock; + index = l2addr->index; del_l2table_entry_cmd(dev, index); l2addr_hash_del(l2addr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c new file mode 100644 index 000000000000..6b774e0c2766 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <linux/pci.h> +#include "mlx5_core.h" +#include "pci_vsc.h" + +#define MLX5_EXTRACT_C(source, offset, size) \ + ((((u32)(source)) >> (offset)) & MLX5_ONES32(size)) +#define MLX5_EXTRACT(src, start, len) \ + (((len) == 32) ? (src) : MLX5_EXTRACT_C(src, start, len)) +#define MLX5_ONES32(size) \ + ((size) ? (0xffffffff >> (32 - (size))) : 0) +#define MLX5_MASK32(offset, size) \ + (MLX5_ONES32(size) << (offset)) +#define MLX5_MERGE_C(rsrc1, rsrc2, start, len) \ + ((((rsrc2) << (start)) & (MLX5_MASK32((start), (len)))) | \ + ((rsrc1) & (~MLX5_MASK32((start), (len))))) +#define MLX5_MERGE(rsrc1, rsrc2, start, len) \ + (((len) == 32) ? (rsrc2) : MLX5_MERGE_C(rsrc1, rsrc2, start, len)) +#define vsc_read(dev, offset, val) \ + pci_read_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) +#define vsc_write(dev, offset, val) \ + pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) +#define VSC_MAX_RETRIES 2048 + +enum { + VSC_CTRL_OFFSET = 0x4, + VSC_COUNTER_OFFSET = 0x8, + VSC_SEMAPHORE_OFFSET = 0xc, + VSC_ADDR_OFFSET = 0x10, + VSC_DATA_OFFSET = 0x14, + + VSC_FLAG_BIT_OFFS = 31, + VSC_FLAG_BIT_LEN = 1, + + VSC_SYND_BIT_OFFS = 30, + VSC_SYND_BIT_LEN = 1, + + VSC_ADDR_BIT_OFFS = 0, + VSC_ADDR_BIT_LEN = 30, + + VSC_SPACE_BIT_OFFS = 0, + VSC_SPACE_BIT_LEN = 16, + + VSC_SIZE_VLD_BIT_OFFS = 28, + VSC_SIZE_VLD_BIT_LEN = 1, + + VSC_STATUS_BIT_OFFS = 29, + VSC_STATUS_BIT_LEN = 3, +}; + +void mlx5_pci_vsc_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_pf(dev)) + return; + + dev->vsc_addr = pci_find_capability(dev->pdev, + PCI_CAP_ID_VNDR); + if (!dev->vsc_addr) + mlx5_core_warn(dev, "Failed to get valid vendor specific ID\n"); +} + +int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev) +{ + u32 counter = 0; + int retries = 0; + u32 lock_val; + int ret; + + pci_cfg_access_lock(dev->pdev); + do { + if (retries > VSC_MAX_RETRIES) { + ret = -EBUSY; + goto pci_unlock; + } + + /* Check if semaphore is already locked */ + ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val); + if (ret) + goto pci_unlock; + + if (lock_val) { + retries++; + usleep_range(1000, 2000); + continue; + } + + /* Read and write counter value, if written value is + * the same, semaphore was acquired successfully. + */ + ret = vsc_read(dev, VSC_COUNTER_OFFSET, &counter); + if (ret) + goto pci_unlock; + + ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, counter); + if (ret) + goto pci_unlock; + + ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val); + if (ret) + goto pci_unlock; + + retries++; + } while (counter != lock_val); + + return 0; + +pci_unlock: + pci_cfg_access_unlock(dev->pdev); + return ret; +} + +int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev) +{ + int ret; + + ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, MLX5_VSC_UNLOCK); + pci_cfg_access_unlock(dev->pdev); + return ret; +} + +int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space, + u32 *ret_space_size) +{ + int ret; + u32 val = 0; + + if (!mlx5_vsc_accessible(dev)) + return -EINVAL; + + if (ret_space_size) + *ret_space_size = 0; + + /* Get a unique val */ + ret = vsc_read(dev, VSC_CTRL_OFFSET, &val); + if (ret) + goto out; + + /* Try to modify the lock */ + val = MLX5_MERGE(val, space, VSC_SPACE_BIT_OFFS, VSC_SPACE_BIT_LEN); + ret = vsc_write(dev, VSC_CTRL_OFFSET, val); + if (ret) + goto out; + + /* Verify lock was modified */ + ret = vsc_read(dev, VSC_CTRL_OFFSET, &val); + if (ret) + goto out; + + if (MLX5_EXTRACT(val, VSC_STATUS_BIT_OFFS, VSC_STATUS_BIT_LEN) == 0) + return -EINVAL; + + /* Get space max address if indicated by size valid bit */ + if (ret_space_size && + MLX5_EXTRACT(val, VSC_SIZE_VLD_BIT_OFFS, VSC_SIZE_VLD_BIT_LEN)) { + ret = vsc_read(dev, VSC_ADDR_OFFSET, &val); + if (ret) { + mlx5_core_warn(dev, "Failed to get max space size\n"); + goto out; + } + *ret_space_size = MLX5_EXTRACT(val, VSC_ADDR_BIT_OFFS, + VSC_ADDR_BIT_LEN); + } + return 0; + +out: + return ret; +} + +static int mlx5_vsc_wait_on_flag(struct mlx5_core_dev *dev, u8 expected_val) +{ + int retries = 0; + u32 flag; + int ret; + + do { + if (retries > VSC_MAX_RETRIES) + return -EBUSY; + + ret = vsc_read(dev, VSC_ADDR_OFFSET, &flag); + if (ret) + return ret; + flag = MLX5_EXTRACT(flag, VSC_FLAG_BIT_OFFS, VSC_FLAG_BIT_LEN); + retries++; + + if ((retries & 0xf) == 0) + usleep_range(1000, 2000); + + } while (flag != expected_val); + + return 0; +} + +static int mlx5_vsc_gw_write(struct mlx5_core_dev *dev, unsigned int address, + u32 data) +{ + int ret; + + if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS, + VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN)) + return -EINVAL; + + /* Set flag to 0x1 */ + address = MLX5_MERGE(address, 1, VSC_FLAG_BIT_OFFS, 1); + ret = vsc_write(dev, VSC_DATA_OFFSET, data); + if (ret) + goto out; + + ret = vsc_write(dev, VSC_ADDR_OFFSET, address); + if (ret) + goto out; + + /* Wait for the flag to be cleared */ + ret = mlx5_vsc_wait_on_flag(dev, 0); + +out: + return ret; +} + +static int mlx5_vsc_gw_read(struct mlx5_core_dev *dev, unsigned int address, + u32 *data) +{ + int ret; + + if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS, + VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN)) + return -EINVAL; + + ret = vsc_write(dev, VSC_ADDR_OFFSET, address); + if (ret) + goto out; + + ret = mlx5_vsc_wait_on_flag(dev, 1); + if (ret) + goto out; + + ret = vsc_read(dev, VSC_DATA_OFFSET, data); +out: + return ret; +} + +static int mlx5_vsc_gw_read_fast(struct mlx5_core_dev *dev, + unsigned int read_addr, + unsigned int *next_read_addr, + u32 *data) +{ + int ret; + + ret = mlx5_vsc_gw_read(dev, read_addr, data); + if (ret) + goto out; + + ret = vsc_read(dev, VSC_ADDR_OFFSET, next_read_addr); + if (ret) + goto out; + + *next_read_addr = MLX5_EXTRACT(*next_read_addr, VSC_ADDR_BIT_OFFS, + VSC_ADDR_BIT_LEN); + + if (*next_read_addr <= read_addr) + ret = -EINVAL; +out: + return ret; +} + +int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, + int length) +{ + unsigned int next_read_addr = 0; + unsigned int read_addr = 0; + + while (read_addr < length) { + if (mlx5_vsc_gw_read_fast(dev, read_addr, &next_read_addr, + &data[(read_addr >> 2)])) + return read_addr; + + read_addr = next_read_addr; + } + return length; +} + +int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space, + enum mlx5_vsc_state state) +{ + u32 data, id = 0; + int ret; + + ret = mlx5_vsc_gw_set_space(dev, MLX5_SEMAPHORE_SPACE_DOMAIN, NULL); + if (ret) { + mlx5_core_warn(dev, "Failed to set gw space %d\n", ret); + return ret; + } + + if (state == MLX5_VSC_LOCK) { + /* Get a unique ID based on the counter */ + ret = vsc_read(dev, VSC_COUNTER_OFFSET, &id); + if (ret) + return ret; + } + + /* Try to modify lock */ + ret = mlx5_vsc_gw_write(dev, space, id); + if (ret) + return ret; + + /* Verify lock was modified */ + ret = mlx5_vsc_gw_read(dev, space, &data); + if (ret) + return -EINVAL; + + if (data != id) + return -EBUSY; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h new file mode 100644 index 000000000000..64272a6d7754 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies */ + +#ifndef __MLX5_PCI_VSC_H__ +#define __MLX5_PCI_VSC_H__ + +enum mlx5_vsc_state { + MLX5_VSC_UNLOCK, + MLX5_VSC_LOCK, +}; + +enum { + MLX5_VSC_SPACE_SCAN_CRSPACE = 0x7, +}; + +void mlx5_pci_vsc_init(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space, + u32 *ret_space_size); +int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, + int length); + +static inline bool mlx5_vsc_accessible(struct mlx5_core_dev *dev) +{ + return !!dev->vsc_addr; +} + +int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space, + enum mlx5_vsc_state state); + +#endif /* __MLX5_PCI_VSC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c index 40f4a19b1ce1..be69c1d7941a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c @@ -80,10 +80,8 @@ void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy, mlx5_query_port_tun_entropy(mdev, &entropy_flags); tun_entropy->num_enabling_entries = 0; tun_entropy->num_disabling_entries = 0; - tun_entropy->enabled = entropy_flags.calc_enabled; - tun_entropy->enabled = - (entropy_flags.calc_supported) ? - entropy_flags.calc_enabled : true; + tun_entropy->enabled = entropy_flags.calc_supported ? + entropy_flags.calc_enabled : true; } static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c index 9a8fd762167b..b9d4f4e19ff9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c @@ -33,6 +33,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/mlx5/driver.h> +#include <net/vxlan.h> #include "mlx5_core.h" #include "vxlan.h" @@ -204,8 +205,8 @@ struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev) spin_lock_init(&vxlan->lock); hash_init(vxlan->htable); - /* Hardware adds 4789 by default */ - mlx5_vxlan_add_port(vxlan, 4789); + /* Hardware adds 4789 (IANA_VXLAN_UDP_PORT) by default */ + mlx5_vxlan_add_port(vxlan, IANA_VXLAN_UDP_PORT); return vxlan; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 76716419370d..b15b27a497fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -56,6 +56,7 @@ #include "fs_core.h" #include "lib/mpfs.h" #include "eswitch.h" +#include "devlink.h" #include "lib/mlx5.h" #include "fpga/core.h" #include "fpga/ipsec.h" @@ -63,7 +64,9 @@ #include "accel/tls.h" #include "lib/clock.h" #include "lib/vxlan.h" +#include "lib/geneve.h" #include "lib/devcom.h" +#include "lib/pci_vsc.h" #include "diag/fw_tracer.h" #include "ecpf.h" @@ -169,18 +172,28 @@ static struct mlx5_profile profile[] = { #define FW_INIT_TIMEOUT_MILI 2000 #define FW_INIT_WAIT_MS 2 -#define FW_PRE_INIT_TIMEOUT_MILI 10000 +#define FW_PRE_INIT_TIMEOUT_MILI 120000 +#define FW_INIT_WARN_MESSAGE_INTERVAL 20000 -static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili) +static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, + u32 warn_time_mili) { + unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili); unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); int err = 0; + BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL); + while (fw_initializing(dev)) { if (time_after(jiffies, end)) { err = -EBUSY; break; } + if (warn_time_mili && time_after(jiffies, warn)) { + mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n", + jiffies_to_msecs(end - warn) / 1000); + warn = jiffies + msecs_to_jiffies(warn_time_mili); + } msleep(FW_INIT_WAIT_MS); } @@ -567,24 +580,23 @@ query_ex: static int set_hca_cap(struct mlx5_core_dev *dev) { - struct pci_dev *pdev = dev->pdev; int err; err = handle_hca_cap(dev); if (err) { - dev_err(&pdev->dev, "handle_hca_cap failed\n"); + mlx5_core_err(dev, "handle_hca_cap failed\n"); goto out; } err = handle_hca_cap_atomic(dev); if (err) { - dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n"); + mlx5_core_err(dev, "handle_hca_cap_atomic failed\n"); goto out; } err = handle_hca_cap_odp(dev); if (err) { - dev_err(&pdev->dev, "handle_hca_cap_odp failed\n"); + mlx5_core_err(dev, "handle_hca_cap_odp failed\n"); goto out; } @@ -716,36 +728,27 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) return -EOPNOTSUPP; } -static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, + const struct pci_device_id *id) { - struct pci_dev *pdev = dev->pdev; + struct mlx5_priv *priv = &dev->priv; int err = 0; + mutex_init(&dev->pci_status_mutex); pci_set_drvdata(dev->pdev, dev); - strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN); - priv->name[MLX5_MAX_NAME_LEN - 1] = 0; - - mutex_init(&priv->pgdir_mutex); - INIT_LIST_HEAD(&priv->pgdir_list); - spin_lock_init(&priv->mkey_lock); - - mutex_init(&priv->alloc_mutex); + dev->bar_addr = pci_resource_start(pdev, 0); priv->numa_node = dev_to_node(&dev->pdev->dev); - if (mlx5_debugfs_root) - priv->dbg_root = - debugfs_create_dir(pci_name(pdev), mlx5_debugfs_root); - err = mlx5_pci_enable_device(dev); if (err) { - dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n"); - goto err_dbg; + mlx5_core_err(dev, "Cannot enable PCI device, aborting\n"); + return err; } err = request_bar(pdev); if (err) { - dev_err(&pdev->dev, "error requesting BARs, aborting\n"); + mlx5_core_err(dev, "error requesting BARs, aborting\n"); goto err_disable; } @@ -753,7 +756,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) err = set_dma_caps(pdev); if (err) { - dev_err(&pdev->dev, "Failed setting DMA capabilities mask, aborting\n"); + mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n"); goto err_clr_master; } @@ -762,14 +765,16 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128)) mlx5_core_dbg(dev, "Enabling pci atomics failed\n"); - dev->iseg_base = pci_resource_start(dev->pdev, 0); + dev->iseg_base = dev->bar_addr; dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg)); if (!dev->iseg) { err = -ENOMEM; - dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n"); + mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n"); goto err_clr_master; } + mlx5_pci_vsc_init(dev); + return 0; err_clr_master: @@ -777,52 +782,53 @@ err_clr_master: release_bar(dev->pdev); err_disable: mlx5_pci_disable_device(dev); - -err_dbg: - debugfs_remove(priv->dbg_root); return err; } -static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +static void mlx5_pci_close(struct mlx5_core_dev *dev) { iounmap(dev->iseg); pci_clear_master(dev->pdev); release_bar(dev->pdev); mlx5_pci_disable_device(dev); - debugfs_remove_recursive(priv->dbg_root); } -static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +static int mlx5_init_once(struct mlx5_core_dev *dev) { - struct pci_dev *pdev = dev->pdev; int err; - priv->devcom = mlx5_devcom_register_device(dev); - if (IS_ERR(priv->devcom)) - dev_err(&pdev->dev, "failed to register with devcom (0x%p)\n", - priv->devcom); + dev->priv.devcom = mlx5_devcom_register_device(dev); + if (IS_ERR(dev->priv.devcom)) + mlx5_core_err(dev, "failed to register with devcom (0x%p)\n", + dev->priv.devcom); err = mlx5_query_board_id(dev); if (err) { - dev_err(&pdev->dev, "query board id failed\n"); + mlx5_core_err(dev, "query board id failed\n"); goto err_devcom; } - err = mlx5_eq_table_init(dev); + err = mlx5_irq_table_init(dev); if (err) { - dev_err(&pdev->dev, "failed to initialize eq\n"); + mlx5_core_err(dev, "failed to initialize irq table\n"); goto err_devcom; } + err = mlx5_eq_table_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize eq\n"); + goto err_irq_cleanup; + } + err = mlx5_events_init(dev); if (err) { - dev_err(&pdev->dev, "failed to initialize events\n"); + mlx5_core_err(dev, "failed to initialize events\n"); goto err_eq_cleanup; } err = mlx5_cq_debugfs_init(dev); if (err) { - dev_err(&pdev->dev, "failed to initialize cq debugfs\n"); + mlx5_core_err(dev, "failed to initialize cq debugfs\n"); goto err_events_cleanup; } @@ -835,50 +841,52 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_clock(dev); dev->vxlan = mlx5_vxlan_create(dev); + dev->geneve = mlx5_geneve_create(dev); err = mlx5_init_rl_table(dev); if (err) { - dev_err(&pdev->dev, "Failed to init rate limiting\n"); + mlx5_core_err(dev, "Failed to init rate limiting\n"); goto err_tables_cleanup; } err = mlx5_mpfs_init(dev); if (err) { - dev_err(&pdev->dev, "Failed to init l2 table %d\n", err); + mlx5_core_err(dev, "Failed to init l2 table %d\n", err); goto err_rl_cleanup; } - err = mlx5_eswitch_init(dev); + err = mlx5_sriov_init(dev); if (err) { - dev_err(&pdev->dev, "Failed to init eswitch %d\n", err); + mlx5_core_err(dev, "Failed to init sriov %d\n", err); goto err_mpfs_cleanup; } - err = mlx5_sriov_init(dev); + err = mlx5_eswitch_init(dev); if (err) { - dev_err(&pdev->dev, "Failed to init sriov %d\n", err); - goto err_eswitch_cleanup; + mlx5_core_err(dev, "Failed to init eswitch %d\n", err); + goto err_sriov_cleanup; } err = mlx5_fpga_init(dev); if (err) { - dev_err(&pdev->dev, "Failed to init fpga device %d\n", err); - goto err_sriov_cleanup; + mlx5_core_err(dev, "Failed to init fpga device %d\n", err); + goto err_eswitch_cleanup; } dev->tracer = mlx5_fw_tracer_create(dev); return 0; -err_sriov_cleanup: - mlx5_sriov_cleanup(dev); err_eswitch_cleanup: mlx5_eswitch_cleanup(dev->priv.eswitch); +err_sriov_cleanup: + mlx5_sriov_cleanup(dev); err_mpfs_cleanup: mlx5_mpfs_cleanup(dev); err_rl_cleanup: mlx5_cleanup_rl_table(dev); err_tables_cleanup: + mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_qp_table(dev); @@ -887,6 +895,8 @@ err_events_cleanup: mlx5_events_cleanup(dev); err_eq_cleanup: mlx5_eq_table_cleanup(dev); +err_irq_cleanup: + mlx5_irq_table_cleanup(dev); err_devcom: mlx5_devcom_unregister_device(dev->priv.devcom); @@ -897,10 +907,11 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { mlx5_fw_tracer_destroy(dev->tracer); mlx5_fpga_cleanup(dev); - mlx5_sriov_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_sriov_cleanup(dev); mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); + mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); @@ -909,96 +920,82 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cq_debugfs_cleanup(dev); mlx5_events_cleanup(dev); mlx5_eq_table_cleanup(dev); + mlx5_irq_table_cleanup(dev); mlx5_devcom_unregister_device(dev->priv.devcom); } -static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, - bool boot) +static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) { - struct pci_dev *pdev = dev->pdev; int err; - dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); - mutex_lock(&dev->intf_state_mutex); - if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { - dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n", - __func__); - goto out; - } - - dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), - fw_rev_min(dev), fw_rev_sub(dev)); + mlx5_core_info(dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), + fw_rev_min(dev), fw_rev_sub(dev)); /* Only PFs hold the relevant PCIe information for this query */ if (mlx5_core_is_pf(dev)) pcie_print_link_status(dev->pdev); - /* on load removing any previous indication of internal error, device is - * up - */ - dev->state = MLX5_DEVICE_STATE_UP; - /* wait for firmware to accept initialization segments configurations */ - err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI); + err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL); if (err) { - dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n", - FW_PRE_INIT_TIMEOUT_MILI); - goto out_err; + mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n", + FW_PRE_INIT_TIMEOUT_MILI); + return err; } err = mlx5_cmd_init(dev); if (err) { - dev_err(&pdev->dev, "Failed initializing command interface, aborting\n"); - goto out_err; + mlx5_core_err(dev, "Failed initializing command interface, aborting\n"); + return err; } - err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI); + err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0); if (err) { - dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n", - FW_INIT_TIMEOUT_MILI); + mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n", + FW_INIT_TIMEOUT_MILI); goto err_cmd_cleanup; } err = mlx5_core_enable_hca(dev, 0); if (err) { - dev_err(&pdev->dev, "enable hca failed\n"); + mlx5_core_err(dev, "enable hca failed\n"); goto err_cmd_cleanup; } err = mlx5_core_set_issi(dev); if (err) { - dev_err(&pdev->dev, "failed to set issi\n"); + mlx5_core_err(dev, "failed to set issi\n"); goto err_disable_hca; } err = mlx5_satisfy_startup_pages(dev, 1); if (err) { - dev_err(&pdev->dev, "failed to allocate boot pages\n"); + mlx5_core_err(dev, "failed to allocate boot pages\n"); goto err_disable_hca; } err = set_hca_ctrl(dev); if (err) { - dev_err(&pdev->dev, "set_hca_ctrl failed\n"); + mlx5_core_err(dev, "set_hca_ctrl failed\n"); goto reclaim_boot_pages; } err = set_hca_cap(dev); if (err) { - dev_err(&pdev->dev, "set_hca_cap failed\n"); + mlx5_core_err(dev, "set_hca_cap failed\n"); goto reclaim_boot_pages; } err = mlx5_satisfy_startup_pages(dev, 0); if (err) { - dev_err(&pdev->dev, "failed to allocate init pages\n"); + mlx5_core_err(dev, "failed to allocate init pages\n"); goto reclaim_boot_pages; } err = mlx5_cmd_init_hca(dev, sw_owner_id); if (err) { - dev_err(&pdev->dev, "init hca failed\n"); + mlx5_core_err(dev, "init hca failed\n"); goto reclaim_boot_pages; } @@ -1008,166 +1005,225 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, err = mlx5_query_hca_caps(dev); if (err) { - dev_err(&pdev->dev, "query hca failed\n"); - goto err_stop_poll; + mlx5_core_err(dev, "query hca failed\n"); + goto stop_health; } - if (boot) { - err = mlx5_init_once(dev, priv); - if (err) { - dev_err(&pdev->dev, "sw objs init failed\n"); - goto err_stop_poll; - } + return 0; + +stop_health: + mlx5_stop_health_poll(dev, boot); +reclaim_boot_pages: + mlx5_reclaim_startup_pages(dev); +err_disable_hca: + mlx5_core_disable_hca(dev, 0); +err_cmd_cleanup: + mlx5_cmd_cleanup(dev); + + return err; +} + +static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) +{ + int err; + + mlx5_stop_health_poll(dev, boot); + err = mlx5_cmd_teardown_hca(dev); + if (err) { + mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n"); + return err; } + mlx5_reclaim_startup_pages(dev); + mlx5_core_disable_hca(dev, 0); + mlx5_cmd_cleanup(dev); + + return 0; +} + +static int mlx5_load(struct mlx5_core_dev *dev) +{ + int err; dev->priv.uar = mlx5_get_uars_page(dev); if (IS_ERR(dev->priv.uar)) { - dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); + mlx5_core_err(dev, "Failed allocating uar, aborting\n"); err = PTR_ERR(dev->priv.uar); - goto err_get_uars; + return err; } mlx5_events_start(dev); mlx5_pagealloc_start(dev); + err = mlx5_irq_table_create(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc IRQs\n"); + goto err_irq_table; + } + err = mlx5_eq_table_create(dev); if (err) { - dev_err(&pdev->dev, "Failed to create EQs\n"); + mlx5_core_err(dev, "Failed to create EQs\n"); goto err_eq_table; } err = mlx5_fw_tracer_init(dev->tracer); if (err) { - dev_err(&pdev->dev, "Failed to init FW tracer\n"); + mlx5_core_err(dev, "Failed to init FW tracer\n"); goto err_fw_tracer; } err = mlx5_fpga_device_start(dev); if (err) { - dev_err(&pdev->dev, "fpga device start failed %d\n", err); + mlx5_core_err(dev, "fpga device start failed %d\n", err); goto err_fpga_start; } err = mlx5_accel_ipsec_init(dev); if (err) { - dev_err(&pdev->dev, "IPSec device start failed %d\n", err); + mlx5_core_err(dev, "IPSec device start failed %d\n", err); goto err_ipsec_start; } err = mlx5_accel_tls_init(dev); if (err) { - dev_err(&pdev->dev, "TLS device start failed %d\n", err); + mlx5_core_err(dev, "TLS device start failed %d\n", err); goto err_tls_start; } err = mlx5_init_fs(dev); if (err) { - dev_err(&pdev->dev, "Failed to init flow steering\n"); + mlx5_core_err(dev, "Failed to init flow steering\n"); goto err_fs; } err = mlx5_core_set_hca_defaults(dev); if (err) { - dev_err(&pdev->dev, "Failed to set hca defaults\n"); - goto err_fs; + mlx5_core_err(dev, "Failed to set hca defaults\n"); + goto err_sriov; } err = mlx5_sriov_attach(dev); if (err) { - dev_err(&pdev->dev, "sriov init failed %d\n", err); + mlx5_core_err(dev, "sriov init failed %d\n", err); goto err_sriov; } err = mlx5_ec_init(dev); if (err) { - dev_err(&pdev->dev, "Failed to init embedded CPU\n"); + mlx5_core_err(dev, "Failed to init embedded CPU\n"); goto err_ec; } - if (mlx5_device_registered(dev)) { - mlx5_attach_device(dev); - } else { - err = mlx5_register_device(dev); - if (err) { - dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); - goto err_reg_dev; - } - } - - set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); -out: - mutex_unlock(&dev->intf_state_mutex); - return 0; -err_reg_dev: - mlx5_ec_cleanup(dev); - err_ec: mlx5_sriov_detach(dev); - err_sriov: mlx5_cleanup_fs(dev); - err_fs: mlx5_accel_tls_cleanup(dev); - err_tls_start: mlx5_accel_ipsec_cleanup(dev); - err_ipsec_start: mlx5_fpga_device_stop(dev); - err_fpga_start: mlx5_fw_tracer_cleanup(dev->tracer); - err_fw_tracer: mlx5_eq_table_destroy(dev); - err_eq_table: + mlx5_irq_table_destroy(dev); +err_irq_table: mlx5_pagealloc_stop(dev); mlx5_events_stop(dev); - mlx5_put_uars_page(dev, priv->uar); + mlx5_put_uars_page(dev, dev->priv.uar); + return err; +} -err_get_uars: - if (boot) - mlx5_cleanup_once(dev); +static void mlx5_unload(struct mlx5_core_dev *dev) +{ + mlx5_ec_cleanup(dev); + mlx5_sriov_detach(dev); + mlx5_cleanup_fs(dev); + mlx5_accel_ipsec_cleanup(dev); + mlx5_accel_tls_cleanup(dev); + mlx5_fpga_device_stop(dev); + mlx5_fw_tracer_cleanup(dev->tracer); + mlx5_eq_table_destroy(dev); + mlx5_irq_table_destroy(dev); + mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); + mlx5_put_uars_page(dev, dev->priv.uar); +} -err_stop_poll: - mlx5_stop_health_poll(dev, boot); - if (mlx5_cmd_teardown_hca(dev)) { - dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); - goto out_err; +static int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) +{ + int err = 0; + + dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); + mutex_lock(&dev->intf_state_mutex); + if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { + mlx5_core_warn(dev, "interface is up, NOP\n"); + goto out; } + /* remove any previous indication of internal error */ + dev->state = MLX5_DEVICE_STATE_UP; -reclaim_boot_pages: - mlx5_reclaim_startup_pages(dev); + err = mlx5_function_setup(dev, boot); + if (err) + goto out; -err_disable_hca: - mlx5_core_disable_hca(dev, 0); + if (boot) { + err = mlx5_init_once(dev); + if (err) { + mlx5_core_err(dev, "sw objs init failed\n"); + goto function_teardown; + } + } -err_cmd_cleanup: - mlx5_cmd_cleanup(dev); + err = mlx5_load(dev); + if (err) + goto err_load; -out_err: + if (mlx5_device_registered(dev)) { + mlx5_attach_device(dev); + } else { + err = mlx5_register_device(dev); + if (err) { + mlx5_core_err(dev, "register device failed %d\n", err); + goto err_reg_dev; + } + } + + set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); +out: + mutex_unlock(&dev->intf_state_mutex); + + return err; + +err_reg_dev: + mlx5_unload(dev); +err_load: + if (boot) + mlx5_cleanup_once(dev); +function_teardown: + mlx5_function_teardown(dev, boot); dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mutex_unlock(&dev->intf_state_mutex); return err; } -static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, - bool cleanup) +static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) { int err = 0; if (cleanup) - mlx5_drain_health_recovery(dev); + mlx5_drain_health_wq(dev); mutex_lock(&dev->intf_state_mutex); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { - dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", - __func__); + mlx5_core_warn(dev, "%s: interface is down, NOP\n", + __func__); if (cleanup) mlx5_cleanup_once(dev); goto out; @@ -1178,73 +1234,26 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (mlx5_device_registered(dev)) mlx5_detach_device(dev); - mlx5_ec_cleanup(dev); - mlx5_sriov_detach(dev); - mlx5_cleanup_fs(dev); - mlx5_accel_ipsec_cleanup(dev); - mlx5_accel_tls_cleanup(dev); - mlx5_fpga_device_stop(dev); - mlx5_fw_tracer_cleanup(dev->tracer); - mlx5_eq_table_destroy(dev); - mlx5_pagealloc_stop(dev); - mlx5_events_stop(dev); - mlx5_put_uars_page(dev, priv->uar); + mlx5_unload(dev); + if (cleanup) mlx5_cleanup_once(dev); - mlx5_stop_health_poll(dev, cleanup); - - err = mlx5_cmd_teardown_hca(dev); - if (err) { - dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); - goto out; - } - mlx5_reclaim_startup_pages(dev); - mlx5_core_disable_hca(dev, 0); - mlx5_cmd_cleanup(dev); + mlx5_function_teardown(dev, cleanup); out: mutex_unlock(&dev->intf_state_mutex); return err; } -static const struct devlink_ops mlx5_devlink_ops = { -#ifdef CONFIG_MLX5_ESWITCH - .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, - .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, - .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, - .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, - .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, - .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, -#endif -}; - -#define MLX5_IB_MOD "mlx5_ib" -static int init_one(struct pci_dev *pdev, - const struct pci_device_id *id) +static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) { - struct mlx5_core_dev *dev; - struct devlink *devlink; - struct mlx5_priv *priv; + struct mlx5_priv *priv = &dev->priv; int err; - devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev)); - if (!devlink) { - dev_err(&pdev->dev, "kzalloc failed\n"); - return -ENOMEM; - } - - dev = devlink_priv(devlink); - priv = &dev->priv; - priv->pci_dev_data = id->driver_data; - - pci_set_drvdata(pdev, dev); - - dev->pdev = pdev; - dev->profile = &profile[prof_sel]; + dev->profile = &profile[profile_idx]; INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); - mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); mutex_init(&priv->bfregs.reg_head.lock); @@ -1252,47 +1261,103 @@ static int init_one(struct pci_dev *pdev, INIT_LIST_HEAD(&priv->bfregs.reg_head.list); INIT_LIST_HEAD(&priv->bfregs.wc_head.list); - err = mlx5_pci_init(dev, priv); - if (err) { - dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); - goto clean_dev; + mutex_init(&priv->alloc_mutex); + mutex_init(&priv->pgdir_mutex); + INIT_LIST_HEAD(&priv->pgdir_list); + spin_lock_init(&priv->mkey_lock); + + priv->dbg_root = debugfs_create_dir(dev_name(dev->device), + mlx5_debugfs_root); + if (!priv->dbg_root) { + dev_err(dev->device, "mlx5_core: error, Cannot create debugfs dir, aborting\n"); + return -ENOMEM; } err = mlx5_health_init(dev); - if (err) { - dev_err(&pdev->dev, "mlx5_health_init failed with error code %d\n", err); - goto close_pci; - } + if (err) + goto err_health_init; err = mlx5_pagealloc_init(dev); if (err) goto err_pagealloc_init; - err = mlx5_load_one(dev, priv, true); + return 0; + +err_pagealloc_init: + mlx5_health_cleanup(dev); +err_health_init: + debugfs_remove(dev->priv.dbg_root); + + return err; +} + +static void mlx5_mdev_uninit(struct mlx5_core_dev *dev) +{ + mlx5_pagealloc_cleanup(dev); + mlx5_health_cleanup(dev); + debugfs_remove_recursive(dev->priv.dbg_root); +} + +#define MLX5_IB_MOD "mlx5_ib" +static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct mlx5_core_dev *dev; + struct devlink *devlink; + int err; + + devlink = mlx5_devlink_alloc(); + if (!devlink) { + dev_err(&pdev->dev, "devlink alloc failed\n"); + return -ENOMEM; + } + + dev = devlink_priv(devlink); + dev->device = &pdev->dev; + dev->pdev = pdev; + + dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ? + MLX5_COREDEV_VF : MLX5_COREDEV_PF; + + err = mlx5_mdev_init(dev, prof_sel); + if (err) + goto mdev_init_err; + + err = mlx5_pci_init(dev, pdev, id); + if (err) { + mlx5_core_err(dev, "mlx5_pci_init failed with error code %d\n", + err); + goto pci_init_err; + } + + err = mlx5_load_one(dev, true); if (err) { - dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); + mlx5_core_err(dev, "mlx5_load_one failed with error code %d\n", + err); goto err_load_one; } request_module_nowait(MLX5_IB_MOD); - err = devlink_register(devlink, &pdev->dev); + err = mlx5_devlink_register(devlink, &pdev->dev); if (err) goto clean_load; + err = mlx5_crdump_enable(dev); + if (err) + dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); + pci_save_state(pdev); return 0; clean_load: - mlx5_unload_one(dev, priv, true); + mlx5_unload_one(dev, true); + err_load_one: - mlx5_pagealloc_cleanup(dev); -err_pagealloc_init: - mlx5_health_cleanup(dev); -close_pci: - mlx5_pci_close(dev, priv); -clean_dev: - devlink_free(devlink); + mlx5_pci_close(dev); +pci_init_err: + mlx5_mdev_uninit(dev); +mdev_init_err: + mlx5_devlink_free(devlink); return err; } @@ -1301,38 +1366,34 @@ static void remove_one(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); - struct mlx5_priv *priv = &dev->priv; - devlink_unregister(devlink); + mlx5_crdump_disable(dev); + mlx5_devlink_unregister(devlink); mlx5_unregister_device(dev); - if (mlx5_unload_one(dev, priv, true)) { - dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n"); - mlx5_health_cleanup(dev); + if (mlx5_unload_one(dev, true)) { + mlx5_core_err(dev, "mlx5_unload_one failed\n"); + mlx5_health_flush(dev); return; } - mlx5_pagealloc_cleanup(dev); - mlx5_health_cleanup(dev); - mlx5_pci_close(dev, priv); - devlink_free(devlink); + mlx5_pci_close(dev); + mlx5_mdev_uninit(dev); + mlx5_devlink_free(devlink); } static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_priv *priv = &dev->priv; - dev_info(&pdev->dev, "%s was called\n", __func__); + mlx5_core_info(dev, "%s was called\n", __func__); mlx5_enter_error_state(dev, false); - mlx5_unload_one(dev, priv, false); - /* In case of kernel call drain the health wq */ - if (state) { - mlx5_drain_health_wq(dev); - mlx5_pci_disable_device(dev); - } + mlx5_error_sw_reset(dev); + mlx5_unload_one(dev, false); + mlx5_drain_health_wq(dev); + mlx5_pci_disable_device(dev); return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; @@ -1354,7 +1415,9 @@ static int wait_vital(struct pci_dev *pdev) count = ioread32be(health->health_counter); if (count && count != 0xffffffff) { if (last_count && last_count != count) { - dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i); + mlx5_core_info(dev, + "wait vital counter value 0x%x after %d iterations\n", + count, i); return 0; } last_count = count; @@ -1370,12 +1433,12 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); int err; - dev_info(&pdev->dev, "%s was called\n", __func__); + mlx5_core_info(dev, "%s was called\n", __func__); err = mlx5_pci_enable_device(dev); if (err) { - dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n" - , __func__, err); + mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n", + __func__, err); return PCI_ERS_RESULT_DISCONNECT; } @@ -1384,7 +1447,7 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) pci_save_state(pdev); if (wait_vital(pdev)) { - dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__); + mlx5_core_err(dev, "%s: wait_vital timed out\n", __func__); return PCI_ERS_RESULT_DISCONNECT; } @@ -1394,17 +1457,16 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) static void mlx5_pci_resume(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_priv *priv = &dev->priv; int err; - dev_info(&pdev->dev, "%s was called\n", __func__); + mlx5_core_info(dev, "%s was called\n", __func__); - err = mlx5_load_one(dev, priv, false); + err = mlx5_load_one(dev, false); if (err) - dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" - , __func__, err); + mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n", + __func__, err); else - dev_info(&pdev->dev, "%s: device recovered\n", __func__); + mlx5_core_info(dev, "%s: device recovered\n", __func__); } static const struct pci_error_handlers mlx5_err_handler = { @@ -1466,13 +1528,12 @@ succeed: static void shutdown(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_priv *priv = &dev->priv; int err; - dev_info(&pdev->dev, "Shutdown was called\n"); + mlx5_core_info(dev, "Shutdown was called\n"); err = mlx5_try_fast_unload(dev); if (err) - mlx5_unload_one(dev, priv, false); + mlx5_unload_one(dev, false); mlx5_pci_disable_device(dev); } @@ -1500,7 +1561,8 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); void mlx5_disable_device(struct mlx5_core_dev *dev) { - mlx5_pci_err_detected(dev->pdev, 0); + mlx5_error_sw_reset(dev); + mlx5_unload_one(dev, false); } void mlx5_recover_device(struct mlx5_core_dev *dev) @@ -1538,7 +1600,7 @@ static int __init init(void) get_random_bytes(&sw_owner_id, sizeof(sw_owner_id)); mlx5_core_verify_params(); - mlx5_fpga_ipsec_build_fs_cmds(); + mlx5_accel_ipsec_build_fs_cmds(); mlx5_register_debugfs(); err = pci_register_driver(&mlx5_core_driver); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 7b331674622c..471bbc48bc1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -41,6 +41,7 @@ #include <linux/ptp_clock_kernel.h> #include <linux/mlx5/cq.h> #include <linux/mlx5/fs.h> +#include <linux/mlx5/driver.h> #define DRIVER_NAME "mlx5_core" #define DRIVER_VERSION "5.0-0" @@ -48,44 +49,57 @@ extern uint mlx5_core_debug_mask; #define mlx5_core_dbg(__dev, format, ...) \ - dev_dbg(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + dev_dbg((__dev)->device, "%s:%d:(pid %d): " format, \ __func__, __LINE__, current->pid, \ ##__VA_ARGS__) -#define mlx5_core_dbg_once(__dev, format, ...) \ - dev_dbg_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ - __func__, __LINE__, current->pid, \ +#define mlx5_core_dbg_once(__dev, format, ...) \ + dev_dbg_once((__dev)->device, \ + "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ ##__VA_ARGS__) -#define mlx5_core_dbg_mask(__dev, mask, format, ...) \ -do { \ - if ((mask) & mlx5_core_debug_mask) \ - mlx5_core_dbg(__dev, format, ##__VA_ARGS__); \ +#define mlx5_core_dbg_mask(__dev, mask, format, ...) \ +do { \ + if ((mask) & mlx5_core_debug_mask) \ + mlx5_core_dbg(__dev, format, ##__VA_ARGS__); \ } while (0) -#define mlx5_core_err(__dev, format, ...) \ - dev_err(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ - __func__, __LINE__, current->pid, \ +#define mlx5_core_err(__dev, format, ...) \ + dev_err((__dev)->device, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ ##__VA_ARGS__) -#define mlx5_core_err_rl(__dev, format, ...) \ - dev_err_ratelimited(&(__dev)->pdev->dev, \ - "%s:%d:(pid %d): " format, \ - __func__, __LINE__, current->pid, \ - ##__VA_ARGS__) +#define mlx5_core_err_rl(__dev, format, ...) \ + dev_err_ratelimited((__dev)->device, \ + "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) -#define mlx5_core_warn(__dev, format, ...) \ - dev_warn(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ - __func__, __LINE__, current->pid, \ - ##__VA_ARGS__) +#define mlx5_core_warn(__dev, format, ...) \ + dev_warn((__dev)->device, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) #define mlx5_core_warn_once(__dev, format, ...) \ - dev_warn_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + dev_warn_once((__dev)->device, "%s:%d:(pid %d): " format, \ __func__, __LINE__, current->pid, \ ##__VA_ARGS__) -#define mlx5_core_info(__dev, format, ...) \ - dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__) +#define mlx5_core_warn_rl(__dev, format, ...) \ + dev_warn_ratelimited((__dev)->device, \ + "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_info(__dev, format, ...) \ + dev_info((__dev)->device, format, ##__VA_ARGS__) + +#define mlx5_core_info_rl(__dev, format, ...) \ + dev_info_ratelimited((__dev)->device, \ + "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) enum { MLX5_CMD_DATA, /* print command payload only */ @@ -97,6 +111,11 @@ enum { MLX5_DRIVER_SYND = 0xbadd00de, }; +enum mlx5_semaphore_space_address { + MLX5_SEMAPHORE_SPACE_DOMAIN = 0xA, + MLX5_SEMAPHORE_SW_RESET = 0x20, +}; + int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); @@ -104,6 +123,7 @@ int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); +void mlx5_error_sw_reset(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_recover_device(struct mlx5_core_dev *dev); int mlx5_sriov_init(struct mlx5_core_dev *dev); @@ -111,7 +131,6 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); int mlx5_sriov_attach(struct mlx5_core_dev *dev); void mlx5_sriov_detach(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); -bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, @@ -140,6 +159,19 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +int mlx5_irq_table_init(struct mlx5_core_dev *dev); +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_irq_table_create(struct mlx5_core_dev *dev); +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); +int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb); +int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb); +struct cpumask * +mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx); +struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table); +int mlx5_irq_get_num_comp(struct mlx5_irq_table *table); + int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); void mlx5_events_start(struct mlx5_core_dev *dev); @@ -171,11 +203,19 @@ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj)) -int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw); +int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw, + struct netlink_ext_ack *extack); +int mlx5_fw_version_query(struct mlx5_core_dev *dev, + u32 *running_ver, u32 *stored_ver); void mlx5e_init(void); void mlx5e_cleanup(void); +static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) +{ + return pci_num_vf(dev->pdev) ? true : false; +} + static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) { /* LACP owner conditions: @@ -195,7 +235,7 @@ enum { MLX5_NIC_IFC_FULL = 0, MLX5_NIC_IFC_DISABLED = 1, MLX5_NIC_IFC_NO_DRAM_NIC = 2, - MLX5_NIC_IFC_INVALID = 3 + MLX5_NIC_IFC_SW_RESET = 7 }; u8 mlx5_get_nic_state(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index ea744d8466ea..9231b39d18b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -38,15 +38,12 @@ void mlx5_init_mkey_table(struct mlx5_core_dev *dev) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; - - memset(table, 0, sizeof(*table)); - rwlock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ); } void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) { + WARN_ON(!xa_empty(&dev->priv.mkey_table)); } int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, @@ -56,8 +53,8 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mlx5_async_cbk_t callback, struct mlx5_async_work *context) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; + struct xarray *mkeys = &dev->priv.mkey_table; u32 mkey_index; void *mkc; int err; @@ -88,12 +85,10 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", mkey_index, key, mkey->key); - /* connect to mkey tree */ - write_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey); - write_unlock_irq(&table->lock); + err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey, + GFP_KERNEL)); if (err) { - mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n", + mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n", mlx5_base_mkey(mkey->key), err); mlx5_core_destroy_mkey(dev, mkey); } @@ -114,17 +109,17 @@ EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; + struct xarray *mkeys = &dev->priv.mkey_table; struct mlx5_core_mkey *deleted_mkey; unsigned long flags; - write_lock_irqsave(&table->lock, flags); - deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key)); - write_unlock_irqrestore(&table->lock, flags); + xa_lock_irqsave(mkeys, flags); + deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key)); + xa_unlock_irqrestore(mkeys, flags); if (!deleted_mkey) { - mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n", + mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n", mlx5_base_mkey(mkey->key)); return -ENOENT; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 41025387ff2c..91bd258ecf1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -200,7 +200,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr) rb_erase(&fwp->rb_node, &dev->priv.page_root); if (fwp->free_count != 1) list_del(&fwp->list); - dma_unmap_page(&dev->pdev->dev, addr & MLX5_U64_4K_PAGE_MASK, + dma_unmap_page(dev->device, addr & MLX5_U64_4K_PAGE_MASK, PAGE_SIZE, DMA_BIDIRECTIONAL); __free_page(fwp->page); kfree(fwp); @@ -211,11 +211,12 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr) static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id) { + struct device *device = dev->device; + int nid = dev_to_node(device); struct page *page; u64 zero_addr = 1; u64 addr; int err; - int nid = dev_to_node(&dev->pdev->dev); page = alloc_pages_node(nid, GFP_HIGHUSER, 0); if (!page) { @@ -223,9 +224,8 @@ static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id) return -ENOMEM; } map: - addr = dma_map_page(&dev->pdev->dev, page, 0, - PAGE_SIZE, DMA_BIDIRECTIONAL); - if (dma_mapping_error(&dev->pdev->dev, addr)) { + addr = dma_map_page(device, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(device, addr)) { mlx5_core_warn(dev, "failed dma mapping page\n"); err = -ENOMEM; goto err_mapping; @@ -240,8 +240,7 @@ map: err = insert_page(dev, addr, page, func_id); if (err) { mlx5_core_err(dev, "failed to track allocated page\n"); - dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, - DMA_BIDIRECTIONAL); + dma_unmap_page(device, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); } err_mapping: @@ -249,7 +248,7 @@ err_mapping: __free_page(page); if (zero_addr == 0) - dma_unmap_page(&dev->pdev->dev, zero_addr, PAGE_SIZE, + dma_unmap_page(device, zero_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); return err; @@ -600,8 +599,7 @@ int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages) return 0; } - mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_pages, - dev->priv.name); + mlx5_core_dbg(dev, "Waiting for %d pages\n", prev_pages); while (*pages) { if (time_after(jiffies, end)) { mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages); @@ -614,6 +612,6 @@ int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages) msleep(50); } - mlx5_core_dbg(dev, "All pages received from %s\n", dev->priv.name); + mlx5_core_dbg(dev, "All pages received\n"); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c new file mode 100644 index 000000000000..373981a659c7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif + +#define MLX5_MAX_IRQ_NAME (32) + +struct mlx5_irq { + struct atomic_notifier_head nh; + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_NAME]; +}; + +struct mlx5_irq_table { + struct mlx5_irq *irq; + int nvec; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif +}; + +int mlx5_irq_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table; + + irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); + if (!irq_table) + return -ENOMEM; + + dev->priv.irq_table = irq_table; + return 0; +} + +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) +{ + kvfree(dev->priv.irq_table); +} + +int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) +{ + return table->nvec - MLX5_IRQ_VEC_COMP_BASE; +} + +static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) +{ + struct mlx5_irq_table *irq_table = dev->priv.irq_table; + + return &irq_table->irq[vecidx]; +} + +int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb) +{ + struct mlx5_irq *irq; + + irq = &irq_table->irq[vecidx]; + return atomic_notifier_chain_register(&irq->nh, nb); +} + +int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb) +{ + struct mlx5_irq *irq; + + irq = &irq_table->irq[vecidx]; + return atomic_notifier_chain_unregister(&irq->nh, nb); +} + +static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) +{ + atomic_notifier_call_chain(nh, 0, NULL); + return IRQ_HANDLED; +} + +static void irq_set_name(char *name, int vecidx) +{ + if (vecidx == 0) { + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async"); + return; + } + + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", + vecidx - MLX5_IRQ_VEC_COMP_BASE); + return; +} + +static int request_irqs(struct mlx5_core_dev *dev, int nvec) +{ + char name[MLX5_MAX_IRQ_NAME]; + int err; + int i; + + for (i = 0; i < nvec; i++) { + struct mlx5_irq *irq = mlx5_irq_get(dev, i); + int irqn = pci_irq_vector(dev->pdev, i); + + irq_set_name(name, i); + ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); + snprintf(irq->name, MLX5_MAX_IRQ_NAME, + "%s@pci:%s", name, pci_name(dev->pdev)); + err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name, + &irq->nh); + if (err) { + mlx5_core_err(dev, "Failed to request irq\n"); + goto err_request_irq; + } + } + return 0; + +err_request_irq: + for (; i >= 0; i--) { + struct mlx5_irq *irq = mlx5_irq_get(dev, i); + int irqn = pci_irq_vector(dev->pdev, i); + + free_irq(irqn, &irq->nh); + } + return err; +} + +static void irq_clear_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = dev->priv.irq_table; + + free_irq_cpu_rmap(irq_table->rmap); +#endif +} + +static int irq_set_rmap(struct mlx5_core_dev *mdev) +{ + int err = 0; +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = mdev->priv.irq_table; + int num_affinity_vec; + int vecidx; + + num_affinity_vec = mlx5_irq_get_num_comp(irq_table); + irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec); + if (!irq_table->rmap) { + err = -ENOMEM; + mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err); + goto err_out; + } + + vecidx = MLX5_IRQ_VEC_COMP_BASE; + for (; vecidx < irq_table->nvec; vecidx++) { + err = irq_cpu_rmap_add(irq_table->rmap, + pci_irq_vector(mdev->pdev, vecidx)); + if (err) { + mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", + err); + goto err_irq_cpu_rmap_add; + } + } + return 0; + +err_irq_cpu_rmap_add: + irq_clear_rmap(mdev); +err_out: +#endif + return err; +} + +/* Completion IRQ vectors */ + +static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; + struct mlx5_irq *irq; + int irqn; + + irq = mlx5_irq_get(mdev, vecidx); + irqn = pci_irq_vector(mdev->pdev, vecidx); + if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node), + irq->mask); + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irqn, irq->mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", + irqn); + + return 0; +} + +static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; + struct mlx5_irq *irq; + int irqn; + + irq = mlx5_irq_get(mdev, vecidx); + irqn = pci_irq_vector(mdev->pdev, vecidx); + irq_set_affinity_hint(irqn, NULL); + free_cpumask_var(irq->mask); +} + +static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) +{ + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); + int err; + int i; + + for (i = 0; i < nvec; i++) { + err = set_comp_irq_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + clear_comp_irq_affinity_hint(mdev, i); + + return err; +} + +static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) +{ + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); + int i; + + for (i = 0; i < nvec; i++) + clear_comp_irq_affinity_hint(mdev, i); +} + +struct cpumask * +mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx) +{ + return irq_table->irq[vecidx].mask; +} + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table) +{ + return irq_table->rmap; +} +#endif + +static void unrequest_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int i; + + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); +} + +int mlx5_irq_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_irq_table *table = priv->irq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + int nvec; + int err; + + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + + MLX5_IRQ_VEC_COMP_BASE; + nvec = min_t(int, nvec, num_eqs); + if (nvec <= MLX5_IRQ_VEC_COMP_BASE) + return -ENOMEM; + + table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL); + if (!table->irq) + return -ENOMEM; + + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1, + nvec, PCI_IRQ_MSIX); + if (nvec < 0) { + err = nvec; + goto err_free_irq; + } + + table->nvec = nvec; + + err = irq_set_rmap(dev); + if (err) + goto err_set_rmap; + + err = request_irqs(dev, nvec); + if (err) + goto err_request_irqs; + + err = set_comp_irq_affinity_hints(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); + goto err_set_affinity; + } + + return 0; + +err_set_affinity: + unrequest_irqs(dev); +err_request_irqs: + irq_clear_rmap(dev); +err_set_rmap: + pci_free_irq_vectors(dev->pdev); +err_free_irq: + kfree(table->irq); + return err; +} + +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int i; + + /* free_irq requires that affinity and rmap will be cleared + * before calling it. This is why there is asymmetry with set_rmap + * which should be called after alloc_irq but before request_irq. + */ + irq_clear_rmap(dev); + clear_comp_irqs_affinity_hints(dev); + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); + pci_free_irq_vectors(dev->pdev); + kfree(table->irq); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 361468e0435d..cc262b30aed5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -293,15 +293,36 @@ static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) return 0; } +static int mlx5_eeprom_page(int offset) +{ + if (offset < MLX5_EEPROM_PAGE_LENGTH) + /* Addresses between 0-255 - page 00 */ + return 0; + + /* Addresses between 256 - 639 belongs to pages 01, 02 and 03 + * For example, offset = 400 belongs to page 02: + * 1 + ((400 - 256)/128) = 2 + */ + return 1 + ((offset - MLX5_EEPROM_PAGE_LENGTH) / + MLX5_EEPROM_HIGH_PAGE_LENGTH); +} + +static int mlx5_eeprom_high_page_offset(int page_num) +{ + if (!page_num) /* Page 0 always start from low page */ + return 0; + + /* High page */ + return page_num * MLX5_EEPROM_HIGH_PAGE_LENGTH; +} + int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, u16 offset, u16 size, u8 *data) { + int module_num, page_num, status, err; u32 out[MLX5_ST_SZ_DW(mcia_reg)]; u32 in[MLX5_ST_SZ_DW(mcia_reg)]; - int module_num; u16 i2c_addr; - int status; - int err; void *ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); err = mlx5_query_module_num(dev, &module_num); @@ -311,8 +332,15 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, memset(in, 0, sizeof(in)); size = min_t(int, size, MLX5_EEPROM_MAX_BYTES); - if (offset < MLX5_EEPROM_PAGE_LENGTH && - offset + size > MLX5_EEPROM_PAGE_LENGTH) + /* Get the page number related to the given offset */ + page_num = mlx5_eeprom_page(offset); + + /* Set the right offset according to the page number, + * For page_num > 0, relative offset is always >= 128 (high page). + */ + offset -= mlx5_eeprom_high_page_offset(page_num); + + if (offset + size > MLX5_EEPROM_PAGE_LENGTH) /* Cross pages read, read until offset 256 in low page */ size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; @@ -321,7 +349,7 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, MLX5_SET(mcia_reg, in, l, 0); MLX5_SET(mcia_reg, in, module, module_num); MLX5_SET(mcia_reg, in, i2c_device_address, i2c_addr); - MLX5_SET(mcia_reg, in, page_number, 0); + MLX5_SET(mcia_reg, in, page_number, page_num); MLX5_SET(mcia_reg, in, device_address, offset); MLX5_SET(mcia_reg, in, size, size); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c new file mode 100644 index 000000000000..17ce9dd56b13 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <linux/mlx5/vport.h> +#include <rdma/ib_verbs.h> +#include <net/addrconf.h> + +#include "lib/mlx5.h" +#include "eswitch.h" +#include "fs_core.h" +#include "rdma.h" + +static void mlx5_rdma_disable_roce_steering(struct mlx5_core_dev *dev) +{ + struct mlx5_core_roce *roce = &dev->priv.roce; + + if (!roce->ft) + return; + + mlx5_del_flow_rules(roce->allow_rule); + mlx5_destroy_flow_group(roce->fg); + mlx5_destroy_flow_table(roce->ft); +} + +static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_roce *roce = &dev->priv.roce; + struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + void *match_criteria; + u32 *flow_group_in; + void *misc; + int err; + + if (!(MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) && + MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain))) + return -EOPNOTSUPP; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + kvfree(flow_group_in); + return -ENOMEM; + } + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX); + if (!ns) { + mlx5_core_err(dev, "Failed to get RDMA RX namespace"); + err = -EOPNOTSUPP; + goto free; + } + + ft_attr.max_fte = 1; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + mlx5_core_err(dev, "Failed to create RDMA RX flow table"); + err = PTR_ERR(ft); + goto free; + } + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + + fg = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + mlx5_core_err(dev, "Failed to create RDMA RX flow group err(%d)\n", err); + goto destroy_flow_table; + } + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, + dev->priv.eswitch->manager_vport); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + mlx5_core_err(dev, "Failed to add RoCE allow rule, err=%d\n", + err); + goto destroy_flow_group; + } + + kvfree(spec); + kvfree(flow_group_in); + roce->ft = ft; + roce->fg = fg; + roce->allow_rule = flow_rule; + + return 0; + +destroy_flow_group: + mlx5_destroy_flow_group(fg); +destroy_flow_table: + mlx5_destroy_flow_table(ft); +free: + kvfree(spec); + kvfree(flow_group_in); + return err; +} + +static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev) +{ + mlx5_core_roce_gid_set(dev, 0, 0, 0, + NULL, NULL, false, 0, 0); +} + +static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid) +{ + u8 hw_id[ETH_ALEN]; + + mlx5_query_mac_address(dev, hw_id); + gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + addrconf_addr_eui48(&gid->raw[8], hw_id); +} + +static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) +{ + union ib_gid gid; + u8 mac[ETH_ALEN]; + + mlx5_rdma_make_default_gid(dev, &gid); + return mlx5_core_roce_gid_set(dev, 0, + MLX5_ROCE_VERSION_1, + 0, gid.raw, mac, + false, 0, 1); +} + +void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) +{ + mlx5_rdma_disable_roce_steering(dev); + mlx5_rdma_del_roce_addr(dev); + mlx5_nic_vport_disable_roce(dev); +} + +void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_nic_vport_enable_roce(dev); + if (err) { + mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err); + return; + } + + err = mlx5_rdma_add_roce_addr(dev); + if (err) { + mlx5_core_err(dev, "Failed to add RoCE address: %d\n", err); + goto disable_roce; + } + + err = mlx5_rdma_enable_roce_steering(dev); + if (err) { + mlx5_core_err(dev, "Failed to enable RoCE steering: %d\n", err); + goto del_roce_addr; + } + + return; + +del_roce_addr: + mlx5_rdma_del_roce_addr(dev); +disable_roce: + mlx5_nic_vport_disable_roce(dev); + return; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h new file mode 100644 index 000000000000..750cff2a71a4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_RDMA_H__ +#define __MLX5_RDMA_H__ + +#include "mlx5_core.h" + +#ifdef CONFIG_MLX5_ESWITCH + +void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); +void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) {} +static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {} + +#endif /* CONFIG_MLX5_ESWITCH */ +#endif /* __MLX5_RDMA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 7b23fa8d2d60..61fcfd8b39b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -36,13 +36,6 @@ #include "mlx5_core.h" #include "eswitch.h" -bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) -{ - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - - return !!sriov->num_vfs; -} - static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -81,17 +74,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) int err; int vf; - if (sriov->enabled_vfs) { - mlx5_core_warn(dev, - "failed to enable SRIOV on device, already enabled with %d vfs\n", - sriov->enabled_vfs); - return -EBUSY; - } - if (!MLX5_ESWITCH_MANAGER(dev)) goto enable_vfs_hca; - err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); + mlx5_eswitch_update_num_of_vfs(dev->priv.eswitch, num_vfs); + err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY); if (err) { mlx5_core_warn(dev, "failed to enable eswitch SRIOV (%d)\n", err); @@ -106,7 +93,6 @@ enable_vfs_hca: continue; } sriov->vfs_ctx[vf].enabled = 1; - sriov->enabled_vfs++; if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { err = sriov_restore_guids(dev, vf); if (err) { @@ -125,13 +111,11 @@ enable_vfs_hca: static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int num_vfs = pci_num_vf(dev->pdev); int err; int vf; - if (!sriov->enabled_vfs) - goto out; - - for (vf = 0; vf < sriov->num_vfs; vf++) { + for (vf = num_vfs - 1; vf >= 0; vf--) { if (!sriov->vfs_ctx[vf].enabled) continue; err = mlx5_core_disable_hca(dev, vf + 1); @@ -140,44 +124,19 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) continue; } sriov->vfs_ctx[vf].enabled = 0; - sriov->enabled_vfs--; } -out: if (MLX5_ESWITCH_MANAGER(dev)) - mlx5_eswitch_disable_sriov(dev->priv.eswitch); + mlx5_eswitch_disable(dev->priv.eswitch); if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } -static int mlx5_pci_enable_sriov(struct pci_dev *pdev, int num_vfs) -{ - struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - int err = 0; - - if (pci_num_vf(pdev)) { - mlx5_core_warn(dev, "Unable to enable pci sriov, already enabled\n"); - return -EBUSY; - } - - err = pci_enable_sriov(pdev, num_vfs); - if (err) - mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); - - return err; -} - -static void mlx5_pci_disable_sriov(struct pci_dev *pdev) -{ - pci_disable_sriov(pdev); -} - static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int err = 0; + int err; err = mlx5_device_enable_sriov(dev, num_vfs); if (err) { @@ -185,54 +144,47 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) return err; } - err = mlx5_pci_enable_sriov(pdev, num_vfs); + err = pci_enable_sriov(pdev, num_vfs); if (err) { - mlx5_core_warn(dev, "mlx5_pci_enable_sriov failed : %d\n", err); + mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); mlx5_device_disable_sriov(dev); - return err; } - - sriov->num_vfs = num_vfs; - - return 0; + return err; } static void mlx5_sriov_disable(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - mlx5_pci_disable_sriov(pdev); + pci_disable_sriov(pdev); mlx5_device_disable_sriov(dev); - sriov->num_vfs = 0; } int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; int err = 0; mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs); - if (!mlx5_core_is_pf(dev)) - return -EPERM; if (num_vfs) err = mlx5_sriov_enable(pdev, num_vfs); else mlx5_sriov_disable(pdev); + if (!err) + sriov->num_vfs = num_vfs; return err ? err : num_vfs; } int mlx5_sriov_attach(struct mlx5_core_dev *dev) { - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - - if (!mlx5_core_is_pf(dev) || !sriov->num_vfs) + if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev)) return 0; /* If sriov VFs exist in PCI level, enable them in device level */ - return mlx5_device_enable_sriov(dev, sriov->num_vfs); + return mlx5_device_enable_sriov(dev, pci_num_vf(dev->pdev)); } void mlx5_sriov_detach(struct mlx5_core_dev *dev) @@ -243,6 +195,30 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev) mlx5_device_disable_sriov(dev); } +static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) +{ + u16 host_total_vfs; + const u32 *out; + + if (mlx5_core_is_ecpf_esw_manager(dev)) { + out = mlx5_esw_query_functions(dev); + + /* Old FW doesn't support getting total_vfs from esw func + * but supports getting it from pci_sriov. + */ + if (IS_ERR(out)) + goto done; + host_total_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_total_vfs); + kvfree(out); + if (host_total_vfs) + return host_total_vfs; + } + +done: + return pci_sriov_get_totalvfs(dev->pdev); +} + int mlx5_sriov_init(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -253,6 +229,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) return 0; total_vfs = pci_sriov_get_totalvfs(pdev); + sriov->max_vfs = mlx5_get_max_vfs(dev); sriov->num_vfs = pci_num_vf(pdev); sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index c4d4b76096dc..b1068500f1df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -182,16 +182,24 @@ out: } EXPORT_SYMBOL_GPL(mlx5_core_query_sq_state); +int mlx5_core_create_tir_out(struct mlx5_core_dev *dev, + u32 *in, int inlen, + u32 *out, int outlen) +{ + MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR); + + return mlx5_cmd_exec(dev, in, inlen, out, outlen); +} +EXPORT_SYMBOL(mlx5_core_create_tir_out); + int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn) { - u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {}; int err; - MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + err = mlx5_core_create_tir_out(dev, in, inlen, + out, sizeof(out)); if (!err) *tirn = MLX5_GET(create_tir_out, out, tirn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 94464723ff77..0d006224d7b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -79,7 +79,7 @@ static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index) else system_page_index = index; - return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + system_page_index; + return (mdev->bar_addr >> PAGE_SHIFT) + system_page_index; } static void up_rel_func(struct kref *kref) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index ef95feca9961..c912d82ca64b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -34,6 +34,7 @@ #include <linux/etherdevice.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/vport.h> +#include <linux/mlx5/eswitch.h> #include "mlx5_core.h" /* Mutex to hold while enabling or disabling RoCE */ @@ -155,11 +156,12 @@ int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, } int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, - u16 vport, u8 *addr) + u16 vport, bool other, u8 *addr) { - u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {}; u8 *out_addr; + u32 *out; int err; out = kvzalloc(outlen, GFP_KERNEL); @@ -169,7 +171,12 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, nic_vport_context.permanent_address); - err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(query_nic_vport_context_in, in, other_vport, other); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); if (!err) ether_addr_copy(addr, &out_addr[2]); @@ -178,6 +185,12 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address); +int mlx5_query_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +{ + return mlx5_query_nic_vport_mac_address(mdev, 0, false, addr); +} +EXPORT_SYMBOL_GPL(mlx5_query_mac_address); + int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr) { @@ -194,9 +207,7 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, MLX5_SET(modify_nic_vport_context_in, in, field_select.permanent_address, 1); MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); - - if (vport) - MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); @@ -291,9 +302,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); - - if (vport) - MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); if (err) @@ -371,67 +380,6 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); -int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, - u16 vport, - u16 vlans[], - int *size) -{ - u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; - void *nic_vport_ctx; - int req_list_size; - int max_list_size; - int out_sz; - void *out; - int err; - int i; - - req_list_size = *size; - max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); - if (req_list_size > max_list_size) { - mlx5_core_warn(dev, "Requested list size (%d) > (%d) max list size\n", - req_list_size, max_list_size); - req_list_size = max_list_size; - } - - out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + - req_list_size * MLX5_ST_SZ_BYTES(vlan_layout); - - memset(in, 0, sizeof(in)); - out = kzalloc(out_sz, GFP_KERNEL); - if (!out) - return -ENOMEM; - - MLX5_SET(query_nic_vport_context_in, in, opcode, - MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); - MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, - MLX5_NVPRT_LIST_TYPE_VLAN); - MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); - - if (vport) - MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); - - err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); - if (err) - goto out; - - nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, - nic_vport_context); - req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, - allowed_list_size); - - *size = req_list_size; - for (i = 0; i < req_list_size; i++) { - void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, - nic_vport_ctx, - current_uc_mac_address[i]); - vlans[i] = MLX5_GET(vlan_layout, vlan_addr, vlan); - } -out: - kfree(out); - return err; -} -EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlans); - int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, u16 vlans[], int list_size) @@ -544,7 +492,7 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, MLX5_SET(modify_nic_vport_context_in, in, field_select.node_guid, 1); MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); - MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); @@ -1218,3 +1166,17 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) return tmp; } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); + +/** + * mlx5_eswitch_get_total_vports - Get total vports of the eswitch + * + * @dev: Pointer to core device + * + * mlx5_eswitch_get_total_vports returns total number of vports for + * the eswitch. + */ +u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) +{ + return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev); +} +EXPORT_SYMBOL(mlx5_eswitch_get_total_vports); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index ea934a48c90a..f1ec58c9e9e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -243,6 +243,13 @@ static inline void *mlx5_wq_ll_get_wqe(struct mlx5_wq_ll *wq, u16 ix) return mlx5_frag_buf_get_wqe(&wq->fbc, ix); } +static inline u16 mlx5_wq_ll_get_wqe_next_ix(struct mlx5_wq_ll *wq, u16 ix) +{ + struct mlx5_wqe_srq_next_seg *wqe = mlx5_wq_ll_get_wqe(wq, ix); + + return be16_to_cpu(wqe->next_wqe_index); +} + static inline void mlx5_wq_ll_push(struct mlx5_wq_ll *wq, u16 head_next) { wq->head = head_next; |