diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
124 files changed, 11061 insertions, 2409 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 2391e3cfb56b..37fef8cd25e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -34,6 +34,7 @@ config MLX5_CORE_EN depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE depends on IPV6=y || IPV6=n || MLX5_CORE=m select PAGE_POOL + select DIMLIB default n ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. @@ -96,26 +97,60 @@ config MLX5_CORE_IPOIB ---help--- MLX5 IPoIB offloads & acceleration support. +config MLX5_FPGA_IPSEC + bool "Mellanox Technologies IPsec Innova support" + depends on MLX5_CORE + depends on MLX5_FPGA + default n + help + Build IPsec support for the Innova family of network cards by Mellanox + Technologies. Innova network cards are comprised of a ConnectX chip + and an FPGA chip on one board. If you select this option, the + mlx5_core driver will include the Innova FPGA core and allow building + sandbox-specific client drivers. + config MLX5_EN_IPSEC bool "IPSec XFRM cryptography-offload accelaration" - depends on MLX5_ACCEL depends on MLX5_CORE_EN depends on XFRM_OFFLOAD depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD + depends on MLX5_FPGA_IPSEC default n - ---help--- + help Build support for IPsec cryptography-offload accelaration in the NIC. Note: Support for hardware with this capability needs to be selected for this option to become available. -config MLX5_EN_TLS - bool "TLS cryptography-offload accelaration" +config MLX5_FPGA_TLS + bool "Mellanox Technologies TLS Innova support" + depends on TLS_DEVICE + depends on TLS=y || MLX5_CORE=m + depends on MLX5_FPGA + default n + help + Build TLS support for the Innova family of network cards by Mellanox + Technologies. Innova network cards are comprised of a ConnectX chip + and an FPGA chip on one board. If you select this option, the + mlx5_core driver will include the Innova FPGA core and allow building + sandbox-specific client drivers. + +config MLX5_TLS + bool "Mellanox Technologies TLS Connect-X support" depends on MLX5_CORE_EN depends on TLS_DEVICE depends on TLS=y || MLX5_CORE=m - depends on MLX5_ACCEL + select MLX5_ACCEL default n - ---help--- - Build support for TLS cryptography-offload accelaration in the NIC. - Note: Support for hardware with this capability needs to be selected - for this option to become available. + help + Build TLS support for the Connect-X family of network cards by Mellanox + Technologies. + +config MLX5_EN_TLS + bool "TLS cryptography-offload accelaration" + depends on MLX5_CORE_EN + depends on MLX5_FPGA_TLS || MLX5_TLS + default y + help + Build support for TLS cryptography-offload accelaration in the NIC. + Note: Support for hardware with this capability needs to be selected + for this option to become available. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 243368dc23db..57d2cc666fe3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -13,9 +13,10 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ - transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ - lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o + lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \ + diag/fw_tracer.o diag/crdump.o devlink.o # # Netdev basic @@ -23,7 +24,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o \ - en/params.o + en/params.o en/xsk/umem.o en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o # # Netdev extra @@ -31,12 +32,15 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ + lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ + en/tc_tun_geneve.o # # Core extra # -mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o rdma.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ + ecpf.o rdma.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o @@ -49,12 +53,14 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib # # Accelerations & FPGA # -mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o +mlx5_core-$(CONFIG_MLX5_FPGA_IPSEC) += fpga/ipsec.o +mlx5_core-$(CONFIG_MLX5_FPGA_TLS) += fpga/tls.o +mlx5_core-$(CONFIG_MLX5_ACCEL) += lib/crypto.o accel/tls.o accel/ipsec.o -mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \ - fpga/ipsec.o fpga/tls.o +mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ en_accel/ipsec_stats.o -mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o +mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \ + en_accel/ktls.o en_accel/ktls_tx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c index 9f1b1939716a..eddc34e4a762 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c @@ -31,6 +31,8 @@ * */ +#ifdef CONFIG_MLX5_FPGA_IPSEC + #include <linux/mlx5/device.h> #include "accel/ipsec.h" @@ -74,6 +76,11 @@ int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) return mlx5_fpga_ipsec_init(mdev); } +void mlx5_accel_ipsec_build_fs_cmds(void) +{ + mlx5_fpga_ipsec_build_fs_cmds(); +} + void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) { mlx5_fpga_ipsec_cleanup(mdev); @@ -107,3 +114,5 @@ int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, return mlx5_fpga_esp_modify_xfrm(xfrm, attrs); } EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h index 024dbd22a89b..530e428d46ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h @@ -37,7 +37,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/accel.h> -#ifdef CONFIG_MLX5_ACCEL +#ifdef CONFIG_MLX5_FPGA_IPSEC #define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \ MLX5_ACCEL_IPSEC_CAP_DEVICE) @@ -54,6 +54,7 @@ void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, void mlx5_accel_esp_free_hw_context(void *context); int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev); +void mlx5_accel_ipsec_build_fs_cmds(void); void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev); #else @@ -79,6 +80,10 @@ static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) return 0; } +static inline void mlx5_accel_ipsec_build_fs_cmds(void) +{ +} + static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) { } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c index da7bd26368f9..cab708af3422 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c @@ -35,6 +35,9 @@ #include "accel/tls.h" #include "mlx5_core.h" +#include "lib/mlx5.h" + +#ifdef CONFIG_MLX5_FPGA_TLS #include "fpga/tls.h" int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, @@ -61,7 +64,8 @@ int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { - return mlx5_fpga_is_tls_device(mdev); + return mlx5_fpga_is_tls_device(mdev) || + mlx5_accel_is_ktls_device(mdev); } u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) @@ -78,3 +82,42 @@ void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { mlx5_fpga_tls_cleanup(mdev); } +#endif + +#ifdef CONFIG_MLX5_TLS +int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id) +{ + u32 sz_bytes; + void *key; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + + key = info->key; + sz_bytes = sizeof(info->key); + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = + (struct tls12_crypto_info_aes_gcm_256 *)crypto_info; + + key = info->key; + sz_bytes = sizeof(info->key); + break; + } + default: + return -EINVAL; + } + + return mlx5_create_encryption_key(mdev, key, sz_bytes, p_key_id); +} + +void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) +{ + mlx5_destroy_encryption_key(mdev, key_id); +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h index def4093ebfae..879321b21616 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h @@ -37,8 +37,51 @@ #include <linux/mlx5/driver.h> #include <linux/tls.h> -#ifdef CONFIG_MLX5_ACCEL +#ifdef CONFIG_MLX5_TLS +int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id); +void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id); +static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, tls)) + return false; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return false; + + return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); +} + +static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info) +{ + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + if (crypto_info->version == TLS_1_2_VERSION) + return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); + break; + } + + return false; +} +#else +static inline int +mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id) { return -ENOTSUPP; } +static inline void +mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) {} + +static inline bool +mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; } +static inline bool +mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info) { return false; } +#endif + +#ifdef CONFIG_MLX5_FPGA_TLS enum { MLX5_ACCEL_TLS_TX = BIT(0), MLX5_ACCEL_TLS_RX = BIT(1), @@ -84,11 +127,13 @@ static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, bool direction_sx) { } static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, u64 rcd_sn) { return 0; } -static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; } +static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) +{ + return mlx5_accel_is_ktls_device(mdev); +} static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; } static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; } static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { } - #endif #endif /* __MLX5_ACCEL_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index e94686c42000..8cdd7e66f8df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -316,7 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: case MLX5_CMD_OP_DEALLOC_MEMIC: case MLX5_CMD_OP_PAGE_FAULT_RESUME: - case MLX5_CMD_OP_QUERY_HOST_PARAMS: + case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -632,7 +632,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); - MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS); + MLX5_COMMAND_STR_CASE(QUERY_ESW_FUNCTIONS); MLX5_COMMAND_STR_CASE(CREATE_UCTX); MLX5_COMMAND_STR_CASE(DESTROY_UCTX); MLX5_COMMAND_STR_CASE(CREATE_UMEM); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 713a17ee3751..818edc63e428 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -58,7 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data) list_for_each_entry_safe(mcq, temp, &ctx->process_list, tasklet_ctx.list) { list_del_init(&mcq->tasklet_ctx.list); - mcq->tasklet_ctx.comp(mcq); + mcq->tasklet_ctx.comp(mcq, NULL); mlx5_cq_put(mcq); if (time_after(jiffies, end)) break; @@ -68,7 +68,8 @@ void mlx5_cq_tasklet_cb(unsigned long data) tasklet_schedule(&ctx->task); } -static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq) +static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, + struct mlx5_eqe *eqe) { unsigned long flags; struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; @@ -87,11 +88,10 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq) } int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - u32 *in, int inlen) + u32 *in, int inlen, u32 *out, int outlen) { int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn); u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)]; - u32 out[MLX5_ST_SZ_DW(create_cq_out)]; u32 din[MLX5_ST_SZ_DW(destroy_cq_in)]; struct mlx5_eq_comp *eq; int err; @@ -100,9 +100,9 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, if (IS_ERR(eq)) return PTR_ERR(eq); - memset(out, 0, sizeof(out)); + memset(out, 0, outlen); MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); if (err) return err; @@ -158,13 +158,8 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; int err; - err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); - if (err) - return err; - - err = mlx5_eq_del_cq(&cq->eq->core, cq); - if (err) - return err; + mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); + mlx5_eq_del_cq(&cq->eq->core, cq); MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index f6b1da99e6c2..5bb6a26ea267 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -311,13 +311,20 @@ static u32 mlx5_gen_pci_id(struct mlx5_core_dev *dev) /* Must be called with intf_mutex held */ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) { - u32 pci_id = mlx5_gen_pci_id(dev); struct mlx5_core_dev *res = NULL; struct mlx5_core_dev *tmp_dev; struct mlx5_priv *priv; + u32 pci_id; + if (!mlx5_core_is_pf(dev)) + return NULL; + + pci_id = mlx5_gen_pci_id(dev); list_for_each_entry(priv, &mlx5_dev_list, dev_list) { tmp_dev = container_of(priv, struct mlx5_core_dev, priv); + if (!mlx5_core_is_pf(tmp_dev)) + continue; + if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) { res = tmp_dev; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c new file mode 100644 index 000000000000..a400f4430c28 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <devlink.h> + +#include "mlx5_core.h" +#include "eswitch.h" + +static int mlx5_devlink_flash_update(struct devlink *devlink, + const char *file_name, + const char *component, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + const struct firmware *fw; + int err; + + if (component) + return -EOPNOTSUPP; + + err = request_firmware_direct(&fw, file_name, &dev->pdev->dev); + if (err) + return err; + + return mlx5_firmware_flash(dev, fw, extack); +} + +static u8 mlx5_fw_ver_major(u32 version) +{ + return (version >> 24) & 0xff; +} + +static u8 mlx5_fw_ver_minor(u32 version) +{ + return (version >> 16) & 0xff; +} + +static u16 mlx5_fw_ver_subminor(u32 version) +{ + return version & 0xffff; +} + +#define DEVLINK_FW_STRING_LEN 32 + +static int +mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + char version_str[DEVLINK_FW_STRING_LEN]; + u32 running_fw, stored_fw; + int err; + + err = devlink_info_driver_name_put(req, DRIVER_NAME); + if (err) + return err; + + err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id); + if (err) + return err; + + err = mlx5_fw_version_query(dev, &running_fw, &stored_fw); + if (err) + return err; + + snprintf(version_str, sizeof(version_str), "%d.%d.%04d", + mlx5_fw_ver_major(running_fw), mlx5_fw_ver_minor(running_fw), + mlx5_fw_ver_subminor(running_fw)); + err = devlink_info_version_running_put(req, "fw.version", version_str); + if (err) + return err; + + /* no pending version, return running (stored) version */ + if (stored_fw == 0) + stored_fw = running_fw; + + snprintf(version_str, sizeof(version_str), "%d.%d.%04d", + mlx5_fw_ver_major(stored_fw), mlx5_fw_ver_minor(stored_fw), + mlx5_fw_ver_subminor(stored_fw)); + err = devlink_info_version_stored_put(req, "fw.version", version_str); + if (err) + return err; + + return 0; +} + +static const struct devlink_ops mlx5_devlink_ops = { +#ifdef CONFIG_MLX5_ESWITCH + .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, + .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, + .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, + .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, + .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, + .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, +#endif + .flash_update = mlx5_devlink_flash_update, + .info_get = mlx5_devlink_info_get, +}; + +struct devlink *mlx5_devlink_alloc(void) +{ + return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev)); +} + +void mlx5_devlink_free(struct devlink *devlink) +{ + devlink_free(devlink); +} + +int mlx5_devlink_register(struct devlink *devlink, struct device *dev) +{ + return devlink_register(devlink, dev); +} + +void mlx5_devlink_unregister(struct devlink *devlink) +{ + devlink_unregister(devlink); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h new file mode 100644 index 000000000000..d0ba03774ddf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef __MLX5_DEVLINK_H__ +#define __MLX5_DEVLINK_H__ + +#include <net/devlink.h> + +struct devlink *mlx5_devlink_alloc(void); +void mlx5_devlink_free(struct devlink *devlink); +int mlx5_devlink_register(struct devlink *devlink, struct device *dev); +void mlx5_devlink_unregister(struct devlink *devlink); + +#endif /* __MLX5_DEVLINK_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c new file mode 100644 index 000000000000..28d02749d3c4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include "lib/pci_vsc.h" +#include "lib/mlx5.h" + +#define BAD_ACCESS 0xBADACCE5 +#define MLX5_PROTECTED_CR_SCAN_CRSPACE 0x7 + +static bool mlx5_crdump_enabled(struct mlx5_core_dev *dev) +{ + return !!dev->priv.health.crdump_size; +} + +static int mlx5_crdump_fill(struct mlx5_core_dev *dev, u32 *cr_data) +{ + u32 crdump_size = dev->priv.health.crdump_size; + int i, ret; + + for (i = 0; i < (crdump_size / 4); i++) + cr_data[i] = BAD_ACCESS; + + ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump_size); + if (ret <= 0) { + if (ret == 0) + return -EIO; + return ret; + } + + if (crdump_size != ret) { + mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n", + ret, crdump_size); + return -EINVAL; + } + + return 0; +} + +int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data) +{ + int ret; + + if (!mlx5_crdump_enabled(dev)) + return -ENODEV; + + ret = mlx5_vsc_gw_lock(dev); + if (ret) { + mlx5_core_warn(dev, "crdump: failed to lock vsc gw err %d\n", + ret); + return ret; + } + /* Verify no other PF is running cr-dump or sw reset */ + ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, + MLX5_VSC_LOCK); + if (ret) { + mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); + goto unlock_gw; + } + + ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL); + if (ret) + goto unlock_sem; + + ret = mlx5_crdump_fill(dev, cr_data); + +unlock_sem: + mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, MLX5_VSC_UNLOCK); +unlock_gw: + mlx5_vsc_gw_unlock(dev); + return ret; +} + +int mlx5_crdump_enable(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + u32 space_size; + int ret; + + if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) || + mlx5_crdump_enabled(dev)) + return 0; + + ret = mlx5_vsc_gw_lock(dev); + if (ret) + return ret; + + /* Check if space is supported and get space size */ + ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, + &space_size); + if (ret) { + /* Unlock and mask error since space is not supported */ + mlx5_vsc_gw_unlock(dev); + return 0; + } + + if (!space_size) { + mlx5_core_warn(dev, "Invalid Crspace size, zero\n"); + mlx5_vsc_gw_unlock(dev); + return -EINVAL; + } + + ret = mlx5_vsc_gw_unlock(dev); + if (ret) + return ret; + + priv->health.crdump_size = space_size; + return 0; +} + +void mlx5_crdump_disable(struct mlx5_core_dev *dev) +{ + dev->priv.health.crdump_size = 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index a4cf123e3f17..ddf1b87f1bc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -187,6 +187,7 @@ TRACE_EVENT(mlx5_fs_set_fte, __field(u32, index) __field(u32, action) __field(u32, flow_tag) + __field(u32, flow_source) __field(u8, mask_enable) __field(int, new_fte) __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) @@ -204,7 +205,8 @@ TRACE_EVENT(mlx5_fs_set_fte, __entry->index = fte->index; __entry->action = fte->action.action; __entry->mask_enable = __entry->fg->mask.match_criteria_enable; - __entry->flow_tag = fte->action.flow_tag; + __entry->flow_tag = fte->flow_context.flow_tag; + __entry->flow_source = fte->flow_context.flow_source; memcpy(__entry->mask_outer, MLX5_ADDR_OF(fte_match_param, &__entry->fg->mask.match_criteria, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 6999f4486e9e..8a4930c8bf62 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -243,6 +243,19 @@ free_strings_db: return -ENOMEM; } +static void +mlx5_fw_tracer_init_saved_traces_array(struct mlx5_fw_tracer *tracer) +{ + tracer->st_arr.saved_traces_index = 0; + mutex_init(&tracer->st_arr.lock); +} + +static void +mlx5_fw_tracer_clean_saved_traces_array(struct mlx5_fw_tracer *tracer) +{ + mutex_destroy(&tracer->st_arr.lock); +} + static void mlx5_tracer_read_strings_db(struct work_struct *work) { struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer, @@ -522,6 +535,24 @@ static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer) list_del(&str_frmt->list); } +static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer, + u64 timestamp, bool lost, + u8 event_id, char *msg) +{ + struct mlx5_fw_trace_data *trace_data; + + mutex_lock(&tracer->st_arr.lock); + trace_data = &tracer->st_arr.straces[tracer->st_arr.saved_traces_index]; + trace_data->timestamp = timestamp; + trace_data->lost = lost; + trace_data->event_id = event_id; + strncpy(trace_data->msg, msg, TRACE_STR_MSG); + + tracer->st_arr.saved_traces_index = + (tracer->st_arr.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1); + mutex_unlock(&tracer->st_arr.lock); +} + static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, struct mlx5_core_dev *dev, u64 trace_timestamp) @@ -540,6 +571,9 @@ static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost, str_frmt->event_id, tmp); + mlx5_fw_tracer_save_trace(dev->tracer, trace_timestamp, + str_frmt->lost, str_frmt->event_id, tmp); + /* remove it from hash */ mlx5_tracer_clean_message(str_frmt); } @@ -786,6 +820,109 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work) mlx5_fw_tracer_start(tracer); } +static int mlx5_fw_tracer_set_core_dump_reg(struct mlx5_core_dev *dev, + u32 *in, int size_in) +{ + u32 out[MLX5_ST_SZ_DW(core_dump_reg)] = {}; + + if (!MLX5_CAP_DEBUG(dev, core_dump_general) && + !MLX5_CAP_DEBUG(dev, core_dump_qp)) + return -EOPNOTSUPP; + + return mlx5_core_access_reg(dev, in, size_in, out, sizeof(out), + MLX5_REG_CORE_DUMP, 0, 1); +} + +int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_tracer *tracer = dev->tracer; + u32 in[MLX5_ST_SZ_DW(core_dump_reg)] = {}; + int err; + + if (!MLX5_CAP_DEBUG(dev, core_dump_general) || !tracer) + return -EOPNOTSUPP; + if (!tracer->owner) + return -EPERM; + + MLX5_SET(core_dump_reg, in, core_dump_type, 0x0); + + err = mlx5_fw_tracer_set_core_dump_reg(dev, in, sizeof(in)); + if (err) + return err; + queue_work(tracer->work_queue, &tracer->handle_traces_work); + flush_workqueue(tracer->work_queue); + return 0; +} + +static int +mlx5_devlink_fmsg_fill_trace(struct devlink_fmsg *fmsg, + struct mlx5_fw_trace_data *trace_data) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "timestamp", trace_data->timestamp); + if (err) + return err; + + err = devlink_fmsg_bool_pair_put(fmsg, "lost", trace_data->lost); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "event_id", trace_data->event_id); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "msg", trace_data->msg); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + return 0; +} + +int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer, + struct devlink_fmsg *fmsg) +{ + struct mlx5_fw_trace_data *straces = tracer->st_arr.straces; + u32 index, start_index, end_index; + u32 saved_traces_index; + int err; + + if (!straces[0].timestamp) + return -ENOMSG; + + mutex_lock(&tracer->st_arr.lock); + saved_traces_index = tracer->st_arr.saved_traces_index; + if (straces[saved_traces_index].timestamp) + start_index = saved_traces_index; + else + start_index = 0; + end_index = (saved_traces_index - 1) & (SAVED_TRACES_NUM - 1); + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "dump fw traces"); + if (err) + goto unlock; + index = start_index; + while (index != end_index) { + err = mlx5_devlink_fmsg_fill_trace(fmsg, &straces[index]); + if (err) + goto unlock; + + index = (index + 1) & (SAVED_TRACES_NUM - 1); + } + + err = devlink_fmsg_arr_pair_nest_end(fmsg); +unlock: + mutex_unlock(&tracer->st_arr.lock); + return err; +} + /* Create software resources (Buffers, etc ..) */ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev) { @@ -833,6 +970,7 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev) goto free_log_buf; } + mlx5_fw_tracer_init_saved_traces_array(tracer); mlx5_core_dbg(dev, "FWTracer: Tracer created\n"); return tracer; @@ -917,6 +1055,7 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer) cancel_work_sync(&tracer->read_fw_strings_work); mlx5_fw_tracer_clean_ready_list(tracer); mlx5_fw_tracer_clean_print_hash(tracer); + mlx5_fw_tracer_clean_saved_traces_array(tracer); mlx5_fw_tracer_free_strings_db(tracer); mlx5_fw_tracer_destroy_log_buf(tracer); flush_workqueue(tracer->work_queue); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h index a8b8747f2b61..40601fba80ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -46,6 +46,9 @@ #define TRACER_BLOCK_SIZE_BYTE 256 #define TRACES_PER_BLOCK 32 +#define TRACE_STR_MSG 256 +#define SAVED_TRACES_NUM 8192 + #define TRACER_MAX_PARAMS 7 #define MESSAGE_HASH_BITS 6 #define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS) @@ -53,6 +56,13 @@ #define MASK_52_7 (0x1FFFFFFFFFFF80) #define MASK_6_0 (0x7F) +struct mlx5_fw_trace_data { + u64 timestamp; + bool lost; + u8 event_id; + char msg[TRACE_STR_MSG]; +}; + struct mlx5_fw_tracer { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -83,6 +93,13 @@ struct mlx5_fw_tracer { u32 consumer_index; } buff; + /* Saved Traces Array */ + struct { + struct mlx5_fw_trace_data straces[SAVED_TRACES_NUM]; + u32 saved_traces_index; + struct mutex lock; /* Protect st_arr access */ + } st_arr; + u64 last_timestamp; struct work_struct handle_traces_work; struct hlist_head hash[MESSAGE_HASH_SIZE]; @@ -171,5 +188,8 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev); int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer); void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer); void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer); +int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev); +int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer, + struct devlink_fmsg *fmsg); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 0ccd6d40baf7..d2228e37450f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -83,30 +83,3 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev) mlx5_peer_pf_cleanup(dev); } - -static int mlx5_query_host_params_context(struct mlx5_core_dev *dev, - u32 *out, int outlen) -{ - u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {}; - - MLX5_SET(query_host_params_in, in, opcode, - MLX5_CMD_OP_QUERY_HOST_PARAMS); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); -} - -int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) -{ - u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {}; - int err; - - err = mlx5_query_host_params_context(dev, out, sizeof(out)); - if (err) - return err; - - *num_vf = MLX5_GET(query_host_params_out, out, - host_params_context.host_num_of_vfs); - mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf); - - return 0; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h index 346372df218f..d3d7a00a02ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -16,7 +16,6 @@ enum { bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); int mlx5_ec_init(struct mlx5_core_dev *dev); void mlx5_ec_cleanup(struct mlx5_core_dev *dev); -int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf); #else /* CONFIG_MLX5_ESWITCH */ @@ -24,9 +23,6 @@ static inline bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} -static inline int -mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) -{ return -EOPNOTSUPP; } #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index cc6797e24571..263558875f20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -48,7 +48,7 @@ #include <linux/rhashtable.h> #include <net/switchdev.h> #include <net/xdp.h> -#include <linux/net_dim.h> +#include <linux/dim.h> #include <linux/bits.h> #include "wq.h" #include "mlx5_core.h" @@ -137,6 +137,7 @@ struct page_pool; #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1) #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 +#define MLX5E_TX_XSK_POLL_BUDGET 64 #define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ #define MLX5E_UMR_WQE_INLINE_SZ \ @@ -155,6 +156,11 @@ do { \ ##__VA_ARGS__); \ } while (0) +enum mlx5e_rq_group { + MLX5E_RQ_GROUP_REGULAR, + MLX5E_RQ_GROUP_XSK, + MLX5E_NUM_RQ_GROUPS /* Keep last. */ +}; static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) { @@ -179,7 +185,8 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) /* Use this function to get max num channels after netdev was created */ static inline int mlx5e_get_netdev_max_channels(struct net_device *netdev) { - return min_t(unsigned int, netdev->num_rx_queues, + return min_t(unsigned int, + netdev->num_rx_queues / MLX5E_NUM_RQ_GROUPS, netdev->num_tx_queues); } @@ -202,7 +209,10 @@ struct mlx5e_umr_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_umr_ctrl_seg uctrl; struct mlx5_mkey_seg mkc; - struct mlx5_mtt inline_mtts[0]; + union { + struct mlx5_mtt inline_mtts[0]; + u8 tls_static_params_ctx[0]; + }; }; extern const char mlx5e_self_tests[][ETH_GSTRING_LEN]; @@ -238,9 +248,9 @@ struct mlx5e_params { u16 num_channels; u8 num_tc; bool rx_cqe_compress_def; - struct net_dim_cq_moder rx_cq_moderation; - struct net_dim_cq_moder tx_cq_moderation; bool tunneled_offload_en; + struct dim_cq_moder rx_cq_moderation; + struct dim_cq_moder tx_cq_moderation; bool lro_en; u8 tx_min_inline_mode; bool vlan_strip_disable; @@ -250,6 +260,7 @@ struct mlx5e_params { u32 lro_timeout; u32 pflags; struct bpf_prog *xdp_prog; + struct mlx5e_xsk *xsk; unsigned int sw_mtu; int hard_mtu; }; @@ -325,6 +336,9 @@ struct mlx5e_tx_wqe_info { u32 num_bytes; u8 num_wqebbs; u8 num_dma; +#ifdef CONFIG_MLX5_EN_TLS + skb_frag_t *resync_dump_frag; +#endif }; enum mlx5e_dma_map_type { @@ -348,6 +362,13 @@ enum { struct mlx5e_sq_wqe_info { u8 opcode; + + /* Auxiliary data for different opcodes. */ + union { + struct { + struct mlx5e_rq *rq; + } umr; + }; }; struct mlx5e_txqsq { @@ -356,7 +377,7 @@ struct mlx5e_txqsq { /* dirtied @completion */ u16 cc; u32 dma_fifo_cc; - struct net_dim dim; /* Adaptive Moderation */ + struct dim dim; /* Adaptive Moderation */ /* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; @@ -375,6 +396,7 @@ struct mlx5e_txqsq { void __iomem *uar_map; struct netdev_queue *txq; u32 sqn; + u16 stop_room; u8 min_inline_mode; struct device *pdev; __be32 mkey_be; @@ -392,14 +414,55 @@ struct mlx5e_txqsq { } ____cacheline_aligned_in_smp; struct mlx5e_dma_info { - struct page *page; - dma_addr_t addr; + dma_addr_t addr; + union { + struct page *page; + struct { + u64 handle; + void *data; + } xsk; + }; +}; + +/* XDP packets can be transmitted in different ways. On completion, we need to + * distinguish between them to clean up things in a proper way. + */ +enum mlx5e_xdp_xmit_mode { + /* An xdp_frame was transmitted due to either XDP_REDIRECT from another + * device or XDP_TX from an XSK RQ. The frame has to be unmapped and + * returned. + */ + MLX5E_XDP_XMIT_MODE_FRAME, + + /* The xdp_frame was created in place as a result of XDP_TX from a + * regular RQ. No DMA remapping happened, and the page belongs to us. + */ + MLX5E_XDP_XMIT_MODE_PAGE, + + /* No xdp_frame was created at all, the transmit happened from a UMEM + * page. The UMEM Completion Ring producer pointer has to be increased. + */ + MLX5E_XDP_XMIT_MODE_XSK, }; struct mlx5e_xdp_info { - struct xdp_frame *xdpf; - dma_addr_t dma_addr; - struct mlx5e_dma_info di; + enum mlx5e_xdp_xmit_mode mode; + union { + struct { + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + } frame; + struct { + struct mlx5e_rq *rq; + struct mlx5e_dma_info di; + } page; + }; +}; + +struct mlx5e_xdp_xmit_data { + dma_addr_t dma_addr; + void *data; + u32 len; }; struct mlx5e_xdp_info_fifo { @@ -425,8 +488,12 @@ struct mlx5e_xdp_mpwqe { }; struct mlx5e_xdpsq; -typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*, - struct mlx5e_xdp_info*); +typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *); +typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *, + struct mlx5e_xdp_xmit_data *, + struct mlx5e_xdp_info *, + int); + struct mlx5e_xdpsq { /* data path */ @@ -443,8 +510,10 @@ struct mlx5e_xdpsq { struct mlx5e_cq cq; /* read only */ + struct xdp_umem *umem; struct mlx5_wq_cyc wq; struct mlx5e_xdpsq_stats *stats; + mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check; mlx5e_fp_xmit_xdp_frame xmit_xdp_frame; struct { struct mlx5e_xdp_wqe_info *wqe_info; @@ -487,12 +556,6 @@ struct mlx5e_icosq { struct mlx5e_channel *channel; } ____cacheline_aligned_in_smp; -static inline bool -mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n) -{ - return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc); -} - struct mlx5e_wqe_frag_info { struct mlx5e_dma_info *di; u32 offset; @@ -571,9 +634,11 @@ struct mlx5e_rq { u8 log_stride_sz; u8 umr_in_progress; u8 umr_last_bulk; + u8 umr_completed; } mpwqe; }; struct { + u16 umem_headroom; u16 headroom; u8 map_dir; /* dma map direction */ } buff; @@ -596,14 +661,18 @@ struct mlx5e_rq { int ix; unsigned int hw_mtu; - struct net_dim dim; /* Dynamic Interrupt Moderation */ + struct dim dim; /* Dynamic Interrupt Moderation */ /* XDP */ struct bpf_prog *xdp_prog; - struct mlx5e_xdpsq xdpsq; + struct mlx5e_xdpsq *xdpsq; DECLARE_BITMAP(flags, 8); struct page_pool *page_pool; + /* AF_XDP zero-copy */ + struct zero_copy_allocator zca; + struct xdp_umem *umem; + /* control */ struct mlx5_wq_ctrl wq_ctrl; __be32 mkey_be; @@ -616,9 +685,15 @@ struct mlx5e_rq { struct xdp_rxq_info xdp_rxq; } ____cacheline_aligned_in_smp; +enum mlx5e_channel_state { + MLX5E_CHANNEL_STATE_XSK, + MLX5E_CHANNEL_NUM_STATES +}; + struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; + struct mlx5e_xdpsq rq_xdpsq; struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC]; struct mlx5e_icosq icosq; /* internal control operations */ bool xdp; @@ -631,6 +706,13 @@ struct mlx5e_channel { /* XDP_REDIRECT */ struct mlx5e_xdpsq xdpsq; + /* AF_XDP zero-copy */ + struct mlx5e_rq xskrq; + struct mlx5e_xdpsq xsksq; + struct mlx5e_icosq xskicosq; + /* xskicosq can be accessed from any CPU - the spinlock protects it. */ + spinlock_t xskicosq_lock; + /* data path - accessed per napi poll */ struct irq_desc *irq_desc; struct mlx5e_ch_stats *stats; @@ -639,6 +721,7 @@ struct mlx5e_channel { struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; struct hwtstamp_config *tstamp; + DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; int cpu; cpumask_var_t xps_cpumask; @@ -654,14 +737,17 @@ struct mlx5e_channel_stats { struct mlx5e_ch_stats ch; struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC]; struct mlx5e_rq_stats rq; + struct mlx5e_rq_stats xskrq; struct mlx5e_xdpsq_stats rq_xdpsq; struct mlx5e_xdpsq_stats xdpsq; + struct mlx5e_xdpsq_stats xsksq; } ____cacheline_aligned_in_smp; enum { MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, MLX5E_STATE_XDP_TX_ENABLED, + MLX5E_STATE_XDP_OPEN, }; struct mlx5e_rqt { @@ -694,6 +780,17 @@ struct mlx5e_modify_sq_param { int rl_index; }; +struct mlx5e_xsk { + /* UMEMs are stored separately from channels, because we don't want to + * lose them when channels are recreated. The kernel also stores UMEMs, + * but it doesn't distinguish between zero-copy and non-zero-copy UMEMs, + * so rely on our mechanism. + */ + struct xdp_umem **umems; + u16 refcnt; + bool ever_used; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; @@ -714,6 +811,7 @@ struct mlx5e_priv { struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS]; struct mlx5e_rss_params rss_params; u32 tx_rates[MLX5E_MAX_NUM_SQS]; @@ -750,6 +848,7 @@ struct mlx5e_priv { struct mlx5e_tls *tls; #endif struct devlink_health_reporter *tx_reporter; + struct mlx5e_xsk xsk; }; struct mlx5e_profile { @@ -763,6 +862,7 @@ struct mlx5e_profile { void (*cleanup_tx)(struct mlx5e_priv *priv); void (*enable)(struct mlx5e_priv *priv); void (*disable)(struct mlx5e_priv *priv); + int (*update_rx)(struct mlx5e_priv *priv); void (*update_stats)(struct mlx5e_priv *priv); void (*update_carrier)(struct mlx5e_priv *priv); struct { @@ -781,7 +881,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more); void mlx5e_trigger_irq(struct mlx5e_icosq *sq); -void mlx5e_completion_event(struct mlx5_core_cq *mcq); +void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); @@ -793,11 +893,13 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info); -void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, - bool recycle); +void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info, + bool recycle); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); +void mlx5e_poll_ico_cq(struct mlx5e_cq *cq); bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); @@ -853,6 +955,30 @@ void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params, void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen); struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt); +struct mlx5e_xsk_param; + +struct mlx5e_rq_param; +int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, + struct xdp_umem *umem, struct mlx5e_rq *rq); +int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); +void mlx5e_deactivate_rq(struct mlx5e_rq *rq); +void mlx5e_close_rq(struct mlx5e_rq *rq); + +struct mlx5e_sq_param; +int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq); +void mlx5e_close_icosq(struct mlx5e_icosq *sq); +int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct xdp_umem *umem, + struct mlx5e_xdpsq *sq, bool is_redirect); +void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq); + +struct mlx5e_cq_param; +int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder, + struct mlx5e_cq_param *param, struct mlx5e_cq *cq); +void mlx5e_close_cq(struct mlx5e_cq *cq); + int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); @@ -898,102 +1024,6 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, swp_csum) && MLX5_CAP_ETH(mdev, swp_lso); } -struct mlx5e_swp_spec { - __be16 l3_proto; - u8 l4_proto; - u8 is_tun; - __be16 tun_l3_proto; - u8 tun_l4_proto; -}; - -static inline void -mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, - struct mlx5e_swp_spec *swp_spec) -{ - /* SWP offsets are in 2-bytes words */ - eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; - if (swp_spec->l3_proto == htons(ETH_P_IPV6)) - eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; - if (swp_spec->l4_proto) { - eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2; - if (swp_spec->l4_proto == IPPROTO_UDP) - eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP; - } - - if (swp_spec->is_tun) { - eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; - if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6)) - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; - } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */ - eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2; - if (swp_spec->l3_proto == htons(ETH_P_IPV6)) - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; - } - switch (swp_spec->tun_l4_proto) { - case IPPROTO_UDP: - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; - /* fall through */ - case IPPROTO_TCP: - eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; - break; - } -} - -static inline void mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, - struct mlx5e_tx_wqe **wqe, - u16 *pi) -{ - struct mlx5_wq_cyc *wq = &sq->wq; - - *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - *wqe = mlx5_wq_cyc_get_wqe(wq, *pi); - memset(*wqe, 0, sizeof(**wqe)); -} - -static inline -struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) -{ - u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc); - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - - memset(cseg, 0, sizeof(*cseg)); - - cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); - cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); - - (*pc)++; - - return wqe; -} - -static inline -void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, - void __iomem *uar_map, - struct mlx5_wqe_ctrl_seg *ctrl) -{ - ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - /* ensure wqe is visible to device before updating doorbell record */ - dma_wmb(); - - *wq->db = cpu_to_be32(pc); - - /* ensure doorbell record is visible to device before ringing the - * doorbell - */ - wmb(); - - mlx5_write64((__be32 *)ctrl, uar_map); -} - -static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) -{ - struct mlx5_core_cq *mcq; - - mcq = &cq->mcq; - mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); -} - extern const struct ethtool_ops mlx5e_ethtool_ops; #ifdef CONFIG_MLX5_CORE_EN_DCB extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; @@ -1023,17 +1053,17 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv); -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); -int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, - u32 underlay_qpn, u32 *tisn); +int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); int mlx5e_create_tises(struct mlx5e_priv *priv); +int mlx5e_update_nic_rx(struct mlx5e_priv *priv); void mlx5e_update_carrier(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); @@ -1075,8 +1105,6 @@ u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info); -int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, - struct ethtool_flash *flash); void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, struct ethtool_pauseparam *pauseparam); int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, @@ -1097,6 +1125,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_xsk *xsk, struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, u16 max_channels, u16 mtu); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index d3744bffbae3..79301d116667 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -3,65 +3,102 @@ #include "en/params.h" -u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params) +static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - u16 linear_rq_headroom = params->xdp_prog ? - XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; - u32 frag_sz; + return params->xdp_prog || xsk; +} + +u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u16 headroom = NET_IP_ALIGN; + + if (mlx5e_rx_is_xdp(params, xsk)) { + headroom += XDP_PACKET_HEADROOM; + if (xsk) + headroom += xsk->headroom; + } else { + headroom += MLX5_RX_HEADROOM; + } + + return headroom; +} + +u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk); + u32 frag_sz = linear_rq_headroom + hw_mtu; - linear_rq_headroom += NET_IP_ALIGN; + /* AF_XDP doesn't build SKBs in place. */ + if (!xsk) + frag_sz = MLX5_SKB_FRAG_SZ(frag_sz); - frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu); + /* XDP in mlx5e doesn't support multiple packets per page. */ + if (mlx5e_rx_is_xdp(params, xsk)) + frag_sz = max_t(u32, frag_sz, PAGE_SIZE); - if (params->xdp_prog && frag_sz < PAGE_SIZE) - frag_sz = PAGE_SIZE; + /* Even if we can go with a smaller fragment size, we must not put + * multiple packets into a single frame. + */ + if (xsk) + frag_sz = max_t(u32, frag_sz, xsk->chunk_size); return frag_sz; } -u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params) +u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params); + u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk); return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); } -bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params) +bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params); + /* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more + * than one page. For this, check both with and without xsk. + */ + u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk), + mlx5e_rx_get_linear_frag_sz(params, NULL)); - return !params->lro_en && frag_sz <= PAGE_SIZE; + return !params->lro_en && linear_frag_sz <= PAGE_SIZE; } #define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \ MLX5_MPWQE_LOG_STRIDE_SZ_BASE) bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params); + u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk); s8 signed_log_num_strides_param; u8 log_num_strides; - if (!mlx5e_rx_is_linear_skb(params)) + if (!mlx5e_rx_is_linear_skb(params, xsk)) return false; - if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ) + if (order_base_2(linear_frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ) return false; if (MLX5_CAP_GEN(mdev, ext_stride_num_range)) return true; - log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz); + log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); signed_log_num_strides_param = (s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE; return signed_log_num_strides_param >= 0; } -u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params) +u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params); + u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk); /* Numbers are unsigned, don't subtract to avoid underflow. */ if (params->log_rq_mtu_frames < @@ -72,33 +109,30 @@ u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params) } u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) - return order_base_2(mlx5e_rx_get_linear_frag_sz(params)); + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk)); return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); } u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { return MLX5_MPWRQ_LOG_WQE_SZ - - mlx5e_mpwqe_get_log_stride_size(mdev, params); + mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); } u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u16 linear_rq_headroom = params->xdp_prog ? - XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; - bool is_linear_skb; - - linear_rq_headroom += NET_IP_ALIGN; - - is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ? - mlx5e_rx_is_linear_skb(params) : - mlx5e_rx_mpwqe_is_linear_skb(mdev, params); + bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ? + mlx5e_rx_is_linear_skb(params, xsk) : + mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); - return is_linear_skb ? linear_rq_headroom : 0; + return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index b106a0236f36..bd882b5ee9a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -6,17 +6,119 @@ #include "en.h" -u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params); -u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params); -bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params); +struct mlx5e_xsk_param { + u16 headroom; + u16 chunk_size; +}; + +struct mlx5e_rq_param { + u32 rqc[MLX5_ST_SZ_DW(rqc)]; + struct mlx5_wq_param wq; + struct mlx5e_rq_frags_info frags_info; +}; + +struct mlx5e_sq_param { + u32 sqc[MLX5_ST_SZ_DW(sqc)]; + struct mlx5_wq_param wq; + bool is_mpw; +}; + +struct mlx5e_cq_param { + u32 cqc[MLX5_ST_SZ_DW(cqc)]; + struct mlx5_wq_param wq; + u16 eq_ix; + u8 cq_period_mode; +}; + +struct mlx5e_channel_param { + struct mlx5e_rq_param rq; + struct mlx5e_sq_param sq; + struct mlx5e_sq_param xdp_sq; + struct mlx5e_sq_param icosq; + struct mlx5e_cq_param rx_cq; + struct mlx5e_cq_param tx_cq; + struct mlx5e_cq_param icosq_cq; +}; + +static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params, + u16 qid, + enum mlx5e_rq_group group, + u16 *ix) +{ + int nch = params->num_channels; + int ch = qid - nch * group; + + if (ch < 0 || ch >= nch) + return false; + + *ix = ch; + return true; +} + +static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params, + u16 qid, + u16 *ix, + enum mlx5e_rq_group *group) +{ + u16 nch = params->num_channels; + + *ix = qid % nch; + *group = qid / nch; +} + +static inline bool mlx5e_qid_validate(struct mlx5e_params *params, u64 qid) +{ + return qid < params->num_channels * MLX5E_NUM_RQ_GROUPS; +} + +/* Parameter calculations */ + +u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); -u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params); + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); + +/* Build queue parameters */ + +void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_param *param); +void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param); +void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_cq_param *param); +void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_cq_param *param); +void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_cq_param *param); +void mlx5e_build_icosq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_sq_param *param); +void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_sq_param *param); #endif /* __MLX5_EN_PARAMS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 231e7cdfc6f7..3739646b653f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -3,8 +3,22 @@ #include <net/vxlan.h> #include <net/gre.h> -#include "lib/vxlan.h" +#include <net/geneve.h> #include "en/tc_tun.h" +#include "en_tc.h" + +struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) +{ + if (netif_is_vxlan(tunnel_dev)) + return &vxlan_tunnel; + else if (netif_is_geneve(tunnel_dev)) + return &geneve_tunnel; + else if (netif_is_gretap(tunnel_dev) || + netif_is_ip6gretap(tunnel_dev)) + return &gre_tunnel; + else + return NULL; +} static int get_route_and_out_devs(struct mlx5e_priv *priv, struct net_device *dev, @@ -34,7 +48,8 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, *route_dev = dev; if (is_vlan_dev(*route_dev)) *out_dev = uplink_dev; - else if (mlx5e_eswitch_rep(dev)) + else if (mlx5e_eswitch_rep(dev) && + mlx5e_is_valid_eswitch_fwd_dev(priv, dev)) *out_dev = *route_dev; else return -EOPNOTSUPP; @@ -142,63 +157,15 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, return 0; } -static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key) -{ - __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); - struct udphdr *udp = (struct udphdr *)(buf); - struct vxlanhdr *vxh = (struct vxlanhdr *) - ((char *)udp + sizeof(struct udphdr)); - - udp->dest = tun_key->tp_dst; - vxh->vx_flags = VXLAN_HF_VNI; - vxh->vx_vni = vxlan_vni_field(tun_id); - - return 0; -} - -static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key) -{ - __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); - int hdr_len; - struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); - - /* the HW does not calculate GRE csum or sequences */ - if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) - return -EOPNOTSUPP; - - greh->protocol = htons(ETH_P_TEB); - - /* GRE key */ - hdr_len = gre_calc_hlen(tun_key->tun_flags); - greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); - if (tun_key->tun_flags & TUNNEL_KEY) { - __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - - *ptr = tun_id; - } - - return 0; -} - static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, struct mlx5e_encap_entry *e) { - int err = 0; - struct ip_tunnel_key *key = &e->tun_info.key; - - if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - *ip_proto = IPPROTO_UDP; - err = mlx5e_gen_vxlan_header(buf, key); - } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { - *ip_proto = IPPROTO_GRE; - err = mlx5e_gen_gre_header(buf, key); - } else { - pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n" - , e->tunnel_type); - err = -EOPNOTSUPP; + if (!e->tunnel) { + pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n"); + return -EOPNOTSUPP; } - return err; + return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e); } static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev, @@ -230,7 +197,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - struct ip_tunnel_key *tun_key = &e->tun_info.key; + const struct ip_tunnel_key *tun_key = &e->tun_info->key; struct net_device *out_dev, *route_dev; struct neighbour *n = NULL; struct flowi4 fl4 = {}; @@ -254,7 +221,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, ipv4_encap_size = (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + sizeof(struct iphdr) + - e->tunnel_hlen; + e->tunnel->calc_hlen(e); if (max_encap_size < ipv4_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -346,7 +313,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - struct ip_tunnel_key *tun_key = &e->tun_info.key; + const struct ip_tunnel_key *tun_key = &e->tun_info->key; struct net_device *out_dev, *route_dev; struct neighbour *n = NULL; struct flowi6 fl6 = {}; @@ -370,7 +337,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, ipv6_encap_size = (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + sizeof(struct ipv6hdr) + - e->tunnel_hlen; + e->tunnel->calc_hlen(e); if (max_encap_size < ipv6_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -456,27 +423,12 @@ out: return err; } -int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev) -{ - if (netif_is_vxlan(tunnel_dev)) - return MLX5E_TC_TUNNEL_TYPE_VXLAN; - else if (netif_is_gretap(tunnel_dev) || - netif_is_ip6gretap(tunnel_dev)) - return MLX5E_TC_TUNNEL_TYPE_GRETAP; - else - return MLX5E_TC_TUNNEL_TYPE_UNKNOWN; -} - bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev) { - int tunnel_type = mlx5e_tc_tun_get_type(netdev); + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev); - if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN && - MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) - return true; - else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP && - MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) + if (tunnel && tunnel->can_offload(priv)) return true; else return false; @@ -487,71 +439,87 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, struct mlx5e_encap_entry *e, struct netlink_ext_ack *extack) { - e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev); + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev); - if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - int dst_port = be16_to_cpu(e->tun_info.key.tp_dst); - - if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { - NL_SET_ERR_MSG_MOD(extack, - "vxlan udp dport was not registered with the HW"); - netdev_warn(priv->netdev, - "%d isn't an offloaded vxlan udp dport\n", - dst_port); - return -EOPNOTSUPP; - } - e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; - e->tunnel_hlen = VXLAN_HLEN; - } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { - e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; - e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags); - } else { + if (!tunnel) { e->reformat_type = -1; - e->tunnel_hlen = -1; return -EOPNOTSUPP; } - return 0; + + return tunnel->init_encap_attr(tunnel_dev, priv, e, extack); } -static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, - void *headers_c, - void *headers_v) +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v, u8 *match_level) +{ + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); + int err = 0; + + if (!tunnel) { + netdev_warn(priv->netdev, + "decapsulation offload is not supported for %s net device\n", + mlx5e_netdev_kind(filter_dev)); + err = -EOPNOTSUPP; + goto out; + } + + *match_level = tunnel->match_level; + + if (tunnel->parse_udp_ports) { + err = tunnel->parse_udp_ports(priv, spec, f, + headers_c, headers_v); + if (err) + goto out; + } + + if (tunnel->parse_tunnel) { + err = tunnel->parse_tunnel(priv, spec, f, + headers_c, headers_v); + if (err) + goto out; + } + +out: + return err; +} + +int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) { struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; - void *misc_c = MLX5_ADDR_OF(fte_match_param, - spec->match_criteria, - misc_parameters); - void *misc_v = MLX5_ADDR_OF(fte_match_param, - spec->match_value, - misc_parameters); struct flow_match_ports enc_ports; - flow_rule_match_enc_ports(rule, &enc_ports); - /* Full udp dst port must be given */ - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) || - memchr_inv(&enc_ports.mask->dst, 0xff, sizeof(enc_ports.mask->dst))) { + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { NL_SET_ERR_MSG_MOD(extack, - "VXLAN decap filter must include enc_dst_port condition"); + "UDP tunnel decap filter must include enc_dst_port condition"); netdev_warn(priv->netdev, - "VXLAN decap filter must include enc_dst_port condition\n"); + "UDP tunnel decap filter must include enc_dst_port condition\n"); return -EOPNOTSUPP; } - /* udp dst port must be knonwn as a VXLAN port */ - if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(enc_ports.key->dst))) { + flow_rule_match_enc_ports(rule, &enc_ports); + + if (memchr_inv(&enc_ports.mask->dst, 0xff, + sizeof(enc_ports.mask->dst))) { NL_SET_ERR_MSG_MOD(extack, - "Matched UDP port is not registered as a VXLAN port"); + "UDP tunnel decap filter must match enc_dst_port fully"); netdev_warn(priv->netdev, - "UDP port %d is not registered as a VXLAN port\n", - be16_to_cpu(enc_ports.key->dst)); + "UDP tunnel decap filter must match enc_dst_port fully\n"); return -EOPNOTSUPP; } - /* dst UDP port is valid here */ + /* match on UDP protocol and dst port number */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); @@ -560,92 +528,15 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(enc_ports.key->dst)); + /* UDP src port on outer header is generated by HW, + * so it is probably a bad idea to request matching it. + * Nonetheless, it is allowed. + */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(enc_ports.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(enc_ports.key->src)); - /* match on VNI */ - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_match_enc_keyid enc_keyid; - - flow_rule_match_enc_keyid(rule, &enc_keyid); - - MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, - be32_to_cpu(enc_keyid.mask->keyid)); - MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, - be32_to_cpu(enc_keyid.key->keyid)); - } - return 0; -} - -static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, - void *outer_headers_c, - void *outer_headers_v) -{ - void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); - - if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) { - NL_SET_ERR_MSG_MOD(f->common.extack, - "GRE HW offloading is not supported"); - netdev_warn(priv->netdev, "GRE HW offloading is not supported\n"); - return -EOPNOTSUPP; - } - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, - ip_protocol, IPPROTO_GRE); - - /* gre protocol*/ - MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol); - MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); - - /* gre key */ - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_match_enc_keyid enc_keyid; - - flow_rule_match_enc_keyid(rule, &enc_keyid); - MLX5_SET(fte_match_set_misc, misc_c, - gre_key.key, be32_to_cpu(enc_keyid.mask->keyid)); - MLX5_SET(fte_match_set_misc, misc_v, - gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); - } - return 0; } - -int mlx5e_tc_tun_parse(struct net_device *filter_dev, - struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, - void *headers_c, - void *headers_v, u8 *match_level) -{ - int tunnel_type; - int err = 0; - - tunnel_type = mlx5e_tc_tun_get_type(filter_dev); - if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - *match_level = MLX5_MATCH_L4; - err = mlx5e_tc_tun_parse_vxlan(priv, spec, f, - headers_c, headers_v); - } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { - *match_level = MLX5_MATCH_L3; - err = mlx5e_tc_tun_parse_gretap(priv, spec, f, - headers_c, headers_v); - } else { - netdev_warn(priv->netdev, - "decapsulation offload is not supported for %s (kind: \"%s\")\n", - netdev_name(filter_dev), - mlx5e_netdev_kind(filter_dev)); - - return -EOPNOTSUPP; - } - return err; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index b63f15de899d..3c48f7e62505 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -14,9 +14,41 @@ enum { MLX5E_TC_TUNNEL_TYPE_UNKNOWN, MLX5E_TC_TUNNEL_TYPE_VXLAN, - MLX5E_TC_TUNNEL_TYPE_GRETAP + MLX5E_TC_TUNNEL_TYPE_GENEVE, + MLX5E_TC_TUNNEL_TYPE_GRETAP, }; +struct mlx5e_tc_tunnel { + int tunnel_type; + enum mlx5_flow_match_level match_level; + + bool (*can_offload)(struct mlx5e_priv *priv); + int (*calc_hlen)(struct mlx5e_encap_entry *e); + int (*init_encap_attr)(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack); + int (*generate_ip_tun_hdr)(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e); + int (*parse_udp_ports)(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v); + int (*parse_tunnel)(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v); +}; + +extern struct mlx5e_tc_tunnel vxlan_tunnel; +extern struct mlx5e_tc_tunnel geneve_tunnel; +extern struct mlx5e_tc_tunnel gre_tunnel; + +struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev); + int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, struct mlx5e_priv *priv, struct mlx5e_encap_entry *e, @@ -30,7 +62,6 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e); -int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev); bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev); @@ -41,4 +72,10 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, void *headers_c, void *headers_v, u8 *match_level); +int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v); + #endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c new file mode 100644 index 000000000000..238ae85d07cc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/geneve.h> +#include "lib/geneve.h" +#include "en/tc_tun.h" + +#define MLX5E_GENEVE_VER 0 + +static bool mlx5e_tc_tun_can_offload_geneve(struct mlx5e_priv *priv) +{ + return !!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_GENEVE); +} + +static int mlx5e_tc_tun_calc_hlen_geneve(struct mlx5e_encap_entry *e) +{ + return sizeof(struct udphdr) + + sizeof(struct genevehdr) + + e->tun_info->options_len; +} + +static int mlx5e_tc_tun_check_udp_dport_geneve(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f) +{ + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) + return -EOPNOTSUPP; + + flow_rule_match_enc_ports(rule, &enc_ports); + + /* Currently we support only default GENEVE + * port, so udp dst port must match. + */ + if (be16_to_cpu(enc_ports.key->dst) != GENEVE_UDP_PORT) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP dst port is not registered as a GENEVE port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a GENEVE port\n", + be16_to_cpu(enc_ports.key->dst)); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_udp_ports_geneve(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + int err; + + err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); + if (err) + return err; + + return mlx5e_tc_tun_check_udp_dport_geneve(priv, f); +} + +static int mlx5e_tc_tun_init_encap_attr_geneve(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &geneve_tunnel; + + /* Reformat type for GENEVE encap is similar to VXLAN: + * in both cases the HW adds in the same place a + * defined encapsulation header that the SW provides. + */ + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + return 0; +} + +static void mlx5e_tunnel_id_to_vni(__be64 tun_id, __u8 *vni) +{ +#ifdef __BIG_ENDIAN + vni[0] = (__force __u8)(tun_id >> 16); + vni[1] = (__force __u8)(tun_id >> 8); + vni[2] = (__force __u8)tun_id; +#else + vni[0] = (__force __u8)((__force u64)tun_id >> 40); + vni[1] = (__force __u8)((__force u64)tun_id >> 48); + vni[2] = (__force __u8)((__force u64)tun_id >> 56); +#endif +} + +static int mlx5e_gen_ip_tunnel_header_geneve(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_info *tun_info = e->tun_info; + struct udphdr *udp = (struct udphdr *)(buf); + struct genevehdr *geneveh; + + geneveh = (struct genevehdr *)((char *)udp + sizeof(struct udphdr)); + + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_info->key.tp_dst; + + memset(geneveh, 0, sizeof(*geneveh)); + geneveh->ver = MLX5E_GENEVE_VER; + geneveh->opt_len = tun_info->options_len / 4; + geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM); + geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT); + mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni); + geneveh->proto_type = htons(ETH_P_TEB); + + if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) { + if (!geneveh->opt_len) + return -EOPNOTSUPP; + ip_tunnel_info_opts_get(geneveh->options, tun_info); + } + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_vni(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f) +{ + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_keyid enc_keyid; + void *misc_c, *misc_v; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return 0; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_vni)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE VNI is not supported"); + netdev_warn(priv->netdev, "Matching on GENEVE VNI is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, geneve_vni, be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, geneve_vni, be32_to_cpu(enc_keyid.key->keyid)); + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f) +{ + u8 max_tlv_option_data_len = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_option_data_len); + u8 max_tlv_options = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_options); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + void *misc_c, *misc_v, *misc_3_c, *misc_3_v; + struct geneve_opt *option_key, *option_mask; + __be32 opt_data_key = 0, opt_data_mask = 0; + struct flow_match_enc_opts enc_opts; + int res = 0; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + misc_3_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_3); + misc_3_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) + return 0; + + flow_rule_match_enc_opts(rule, &enc_opts); + + if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.geneve_tlv_option_0_data)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options is not supported"); + netdev_warn(priv->netdev, + "Matching on GENEVE options is not supported\n"); + return -EOPNOTSUPP; + } + + /* make sure that we're talking about GENEVE options */ + + if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: option type is not GENEVE"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: option type is not GENEVE\n"); + return -EOPNOTSUPP; + } + + if (enc_opts.mask->len && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_geneve_opt_len)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options len is not supported"); + netdev_warn(priv->netdev, + "Matching on GENEVE options len is not supported\n"); + return -EOPNOTSUPP; + } + + /* max_geneve_tlv_option_data_len comes in multiples of 4 bytes, and it + * doesn't include the TLV option header. 'geneve_opt_len' is a total + * len of all the options, including the headers, also multiples of 4 + * bytes. Len that comes from the dissector is in bytes. + */ + + if ((enc_opts.key->len / 4) > ((max_tlv_option_data_len + 1) * max_tlv_options)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: unsupported options len"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: unsupported options len (len=%d)\n", + enc_opts.key->len); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, geneve_opt_len, enc_opts.mask->len / 4); + MLX5_SET(fte_match_set_misc, misc_v, geneve_opt_len, enc_opts.key->len / 4); + + /* we support matching on one option only, so just get it */ + option_key = (struct geneve_opt *)&enc_opts.key->data[0]; + option_mask = (struct geneve_opt *)&enc_opts.mask->data[0]; + + if (option_key->length > max_tlv_option_data_len) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: unsupported option len"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: unsupported option len (key=%d, mask=%d)\n", + option_key->length, option_mask->length); + return -EOPNOTSUPP; + } + + /* data can't be all 0 - fail to offload such rule */ + if (!memchr_inv(option_key->opt_data, 0, option_key->length * 4)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: can't match on 0 data field"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: can't match on 0 data field\n"); + return -EOPNOTSUPP; + } + + /* add new GENEVE TLV options object */ + res = mlx5_geneve_tlv_option_add(priv->mdev->geneve, option_key); + if (res) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: failed creating TLV opt object"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: failed creating TLV opt object (class:type:len = 0x%x:0x%x:%d)\n", + be16_to_cpu(option_key->opt_class), + option_key->type, option_key->length); + return res; + } + + /* In general, after creating the object, need to query it + * in order to check which option data to set in misc3. + * But we support only geneve_tlv_option_0_data, so no + * point querying at this stage. + */ + + memcpy(&opt_data_key, option_key->opt_data, option_key->length * 4); + memcpy(&opt_data_mask, option_mask->opt_data, option_mask->length * 4); + MLX5_SET(fte_match_set_misc3, misc_3_v, + geneve_tlv_option_0_data, be32_to_cpu(opt_data_key)); + MLX5_SET(fte_match_set_misc3, misc_3_c, + geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask)); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct netlink_ext_ack *extack = f->common.extack; + + /* match on OAM - packets with OAM bit on should NOT be offloaded */ + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_oam)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE OAM is not supported"); + netdev_warn(priv->netdev, "Matching on GENEVE OAM is not supported\n"); + return -EOPNOTSUPP; + } + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_oam); + MLX5_SET(fte_match_set_misc, misc_v, geneve_oam, 0); + + /* Match on GENEVE protocol. We support only Transparent Eth Bridge. */ + + if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_geneve_protocol_type)) { + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_protocol_type); + MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB); + } + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + int err; + + err = mlx5e_tc_tun_parse_geneve_params(priv, spec, f); + if (err) + return err; + + err = mlx5e_tc_tun_parse_geneve_vni(priv, spec, f); + if (err) + return err; + + return mlx5e_tc_tun_parse_geneve_options(priv, spec, f); +} + +struct mlx5e_tc_tunnel geneve_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GENEVE, + .match_level = MLX5_MATCH_L4, + .can_offload = mlx5e_tc_tun_can_offload_geneve, + .calc_hlen = mlx5e_tc_tun_calc_hlen_geneve, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_geneve, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_geneve, + .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_geneve, + .parse_tunnel = mlx5e_tc_tun_parse_geneve, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c new file mode 100644 index 000000000000..06908441d932 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/gre.h> +#include "en/tc_tun.h" + +static bool mlx5e_tc_tun_can_offload_gretap(struct mlx5e_priv *priv) +{ + return !!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap); +} + +static int mlx5e_tc_tun_calc_hlen_gretap(struct mlx5e_encap_entry *e) +{ + return gre_calc_hlen(e->tun_info->key.tun_flags); +} + +static int mlx5e_tc_tun_init_encap_attr_gretap(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &gre_tunnel; + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; + return 0; +} + +static int mlx5e_gen_ip_tunnel_header_gretap(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + int hdr_len; + + *ip_proto = IPPROTO_GRE; + + /* the HW does not calculate GRE csum or sequences */ + if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) + return -EOPNOTSUPP; + + greh->protocol = htons(ETH_P_TEB); + + /* GRE key */ + hdr_len = mlx5e_tc_tun_calc_hlen_gretap(e); + greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); + if (tun_key->tun_flags & TUNNEL_KEY) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + *ptr = tun_id; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE); + + /* gre protocol */ + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol); + MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); + + /* gre key */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + MLX5_SET(fte_match_set_misc, misc_c, + gre_key.key, be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, + gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); + } + + return 0; +} + +struct mlx5e_tc_tunnel gre_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GRETAP, + .match_level = MLX5_MATCH_L3, + .can_offload = mlx5e_tc_tun_can_offload_gretap, + .calc_hlen = mlx5e_tc_tun_calc_hlen_gretap, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_gretap, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_gretap, + .parse_udp_ports = NULL, + .parse_tunnel = mlx5e_tc_tun_parse_gretap, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c new file mode 100644 index 000000000000..2857b38527d6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/vxlan.h> +#include "lib/vxlan.h" +#include "en/tc_tun.h" + +static bool mlx5e_tc_tun_can_offload_vxlan(struct mlx5e_priv *priv) +{ + return !!MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap); +} + +static int mlx5e_tc_tun_calc_hlen_vxlan(struct mlx5e_encap_entry *e) +{ + return VXLAN_HLEN; +} + +static int mlx5e_tc_tun_check_udp_dport_vxlan(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f) +{ + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) + return -EOPNOTSUPP; + + flow_rule_match_enc_ports(rule, &enc_ports); + + /* check the UDP destination port validity */ + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, + be16_to_cpu(enc_ports.key->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP dst port is not registered as a VXLAN port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a VXLAN port\n", + be16_to_cpu(enc_ports.key->dst)); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_udp_ports_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + int err = 0; + + err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); + if (err) + return err; + + return mlx5e_tc_tun_check_udp_dport_vxlan(priv, f); +} + +static int mlx5e_tc_tun_init_encap_attr_vxlan(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + int dst_port = be16_to_cpu(e->tun_info->key.tp_dst); + + e->tunnel = &vxlan_tunnel; + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { + NL_SET_ERR_MSG_MOD(extack, + "vxlan udp dport was not registered with the HW"); + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", + dst_port); + return -EOPNOTSUPP; + } + + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + return 0; +} + +static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + struct udphdr *udp = (struct udphdr *)(buf); + struct vxlanhdr *vxh; + + vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_key->tp_dst; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(tun_id); + + return 0; +} + +static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_keyid enc_keyid; + void *misc_c, *misc_v; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return 0; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid) + return 0; + + /* match on VNI is required */ + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_vxlan_vni)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on VXLAN VNI is not supported"); + netdev_warn(priv->netdev, + "Matching on VXLAN VNI is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, + be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, + be32_to_cpu(enc_keyid.key->keyid)); + + return 0; +} + +struct mlx5e_tc_tunnel vxlan_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_VXLAN, + .match_level = MLX5_MATCH_L4, + .can_offload = mlx5e_tc_tun_can_offload_vxlan, + .calc_hlen = mlx5e_tc_tun_calc_hlen_vxlan, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_vxlan, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan, + .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan, + .parse_tunnel = mlx5e_tc_tun_parse_vxlan, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h new file mode 100644 index 000000000000..ddfe19adb3d9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TXRX_H___ +#define __MLX5_EN_TXRX_H___ + +#include "en.h" + +#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS +#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ + MLX5E_SQ_NOPS_ROOM) + +#ifndef CONFIG_MLX5_EN_TLS +#define MLX5E_SQ_TLS_ROOM (0) +#else +/* TLS offload requires additional stop_room for: + * - a resync SKB. + * kTLS offload requires additional stop_room for: + * - static params WQE, + * - progress params WQE, and + * - resync DUMP per frag. + */ +#define MLX5E_SQ_TLS_ROOM \ + (MLX5_SEND_WQE_MAX_WQEBBS + \ + MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + \ + MAX_SKB_FRAGS * MLX5E_KTLS_MAX_DUMP_WQEBBS) +#endif + +#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) + +static inline bool +mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n) +{ + return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc); +} + +static inline void * +mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, size_t size, u16 *pi) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + void *wqe; + + *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + wqe = mlx5_wq_cyc_get_wqe(wq, *pi); + memset(wqe, 0, size); + + return wqe; +} + +static inline struct mlx5e_tx_wqe * +mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc); + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); + + (*pc)++; + + return wqe; +} + +static inline struct mlx5e_tx_wqe * +mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc); + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); + cseg->fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL; + + (*pc)++; + + return wqe; +} + +static inline void +mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq, + u16 pi, u16 nnops) +{ + struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; + + edge_wi = wi + nnops; + + /* fill sq frag edge with nops to avoid wqe wrapping two pages */ + for (; wi < edge_wi; wi++) { + wi->skb = NULL; + wi->num_wqebbs = 1; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + sq->stats->nop += nnops; +} + +static inline void +mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map, + struct mlx5_wqe_ctrl_seg *ctrl) +{ + ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + + *wq->db = cpu_to_be32(pc); + + /* ensure doorbell record is visible to device before ringing the + * doorbell + */ + wmb(); + + mlx5_write64((__be32 *)ctrl, uar_map); +} + +static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5e_tx_wqe *wqe) +{ + return !!wqe->ctrl.tisn; +} + +static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) +{ + struct mlx5_core_cq *mcq; + + mcq = &cq->mcq; + mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); +} + +static inline struct mlx5e_sq_dma * +mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i) +{ + return &sq->db.dma_fifo[i & sq->dma_fifo_mask]; +} + +static inline void +mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size, + enum mlx5e_dma_map_type map_type) +{ + struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++); + + dma->addr = addr; + dma->size = size; + dma->type = map_type; +} + +static inline void +mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) +{ + switch (dma->type) { + case MLX5E_DMA_MAP_SINGLE: + dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + case MLX5E_DMA_MAP_PAGE: + dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + default: + WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); + } +} + +/* SW parser related functions */ + +struct mlx5e_swp_spec { + __be16 l3_proto; + u8 l4_proto; + u8 is_tun; + __be16 tun_l3_proto; + u8 tun_l4_proto; +}; + +static inline void +mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, + struct mlx5e_swp_spec *swp_spec) +{ + /* SWP offsets are in 2-bytes words */ + eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; + if (swp_spec->l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; + if (swp_spec->l4_proto) { + eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2; + if (swp_spec->l4_proto == IPPROTO_UDP) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP; + } + + if (swp_spec->is_tun) { + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */ + eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2; + if (swp_spec->l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + } + switch (swp_spec->tun_l4_proto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + /* fall through */ + case IPPROTO_TCP: + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + } +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index eb8ef78e5626..b0b982cf69bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -31,11 +31,13 @@ */ #include <linux/bpf_trace.h> +#include <net/xdp_sock.h> #include "en/xdp.h" +#include "en/params.h" -int mlx5e_xdp_max_mtu(struct mlx5e_params *params) +int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM; + int hr = mlx5e_get_linear_rq_headroom(params, xsk); /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). * The condition checked in mlx5e_rx_is_linear_skb is: @@ -54,25 +56,70 @@ int mlx5e_xdp_max_mtu(struct mlx5e_params *params) } static inline bool -mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di, - struct xdp_buff *xdp) +mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, + struct mlx5e_dma_info *di, struct xdp_buff *xdp) { + struct mlx5e_xdp_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; + struct xdp_frame *xdpf; + dma_addr_t dma_addr; - xdpi.xdpf = convert_to_xdp_frame(xdp); - if (unlikely(!xdpi.xdpf)) + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) return false; - xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf); - dma_sync_single_for_device(sq->pdev, xdpi.dma_addr, - xdpi.xdpf->len, PCI_DMA_TODEVICE); - xdpi.di = *di; - return sq->xmit_xdp_frame(sq, &xdpi); + xdptxd.data = xdpf->data; + xdptxd.len = xdpf->len; + + if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) { + /* The xdp_buff was in the UMEM and was copied into a newly + * allocated page. The UMEM page was returned via the ZCA, and + * this new page has to be mapped at this point and has to be + * unmapped and returned via xdp_return_frame on completion. + */ + + /* Prevent double recycling of the UMEM page. Even in case this + * function returns false, the xdp_buff shouldn't be recycled, + * as it was already done in xdp_convert_zc_to_xdp_frame. + */ + __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ + + xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; + + dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len, + DMA_TO_DEVICE); + if (dma_mapping_error(sq->pdev, dma_addr)) { + xdp_return_frame(xdpf); + return false; + } + + xdptxd.dma_addr = dma_addr; + xdpi.frame.xdpf = xdpf; + xdpi.frame.dma_addr = dma_addr; + } else { + /* Driver assumes that convert_to_xdp_frame returns an xdp_frame + * that points to the same memory region as the original + * xdp_buff. It allows to map the memory only once and to use + * the DMA_BIDIRECTIONAL mode. + */ + + xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; + + dma_addr = di->addr + (xdpf->data - (void *)xdpf); + dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, + DMA_TO_DEVICE); + + xdptxd.dma_addr = dma_addr; + xdpi.page.rq = rq; + xdpi.page.di = *di; + } + + return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0); } /* returns true if packet was consumed by xdp */ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - void *va, u16 *rx_headroom, u32 *len) + void *va, u16 *rx_headroom, u32 *len, bool xsk) { struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); struct xdp_buff xdp; @@ -86,16 +133,20 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + *len; xdp.data_hard_start = va; + if (xsk) + xdp.handle = di->xsk.handle; xdp.rxq = &rq->xdp_rxq; act = bpf_prog_run_xdp(prog, &xdp); + if (xsk) + xdp.handle += xdp.data - xdp.data_hard_start; switch (act) { case XDP_PASS: *rx_headroom = xdp.data - xdp.data_hard_start; *len = xdp.data_end - xdp.data; return false; case XDP_TX: - if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp))) + if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp))) goto xdp_abort; __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ return true; @@ -106,7 +157,8 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, goto xdp_abort; __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); - mlx5e_page_dma_unmap(rq, di); + if (!xsk) + mlx5e_page_dma_unmap(rq, di); rq->stats->xdp_redirect++; return true; default: @@ -160,7 +212,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) stats->mpwqe++; } -static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) +void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) { struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; @@ -183,32 +235,55 @@ static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) session->wqe = NULL; /* Close session */ } +enum { + MLX5E_XDP_CHECK_OK = 1, + MLX5E_XDP_CHECK_START_MPWQE = 2, +}; + +static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) +{ + if (unlikely(!sq->mpwqe.wqe)) { + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, + MLX5_SEND_WQE_MAX_WQEBBS))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->stats->full++; + return -EBUSY; + } + + return MLX5E_XDP_CHECK_START_MPWQE; + } + + return MLX5E_XDP_CHECK_OK; +} + static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, - struct mlx5e_xdp_info *xdpi) + struct mlx5e_xdp_xmit_data *xdptxd, + struct mlx5e_xdp_info *xdpi, + int check_result) { struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; - struct xdp_frame *xdpf = xdpi->xdpf; - - if (unlikely(sq->hw_mtu < xdpf->len)) { + if (unlikely(xdptxd->len > sq->hw_mtu)) { stats->err++; return false; } - if (unlikely(!session->wqe)) { - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - MLX5_SEND_WQE_MAX_WQEBBS))) { - /* SQ is full, ring doorbell */ - mlx5e_xmit_xdp_doorbell(sq); - stats->full++; - return false; - } + if (!check_result) + check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq); + if (unlikely(check_result < 0)) + return false; + if (check_result == MLX5E_XDP_CHECK_START_MPWQE) { + /* Start the session when nothing can fail, so it's guaranteed + * that if there is an active session, it has at least one dseg, + * and it's safe to complete it at any time. + */ mlx5e_xdp_mpwqe_session_start(sq); } - mlx5e_xdp_mpwqe_add_dseg(sq, xdpi, stats); + mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); if (unlikely(session->complete || session->ds_count == session->max_ds_count)) @@ -219,7 +294,22 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, return true; } -static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) +static int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) +{ + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->stats->full++; + return -EBUSY; + } + + return MLX5E_XDP_CHECK_OK; +} + +static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_xmit_data *xdptxd, + struct mlx5e_xdp_info *xdpi, + int check_result) { struct mlx5_wq_cyc *wq = &sq->wq; u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); @@ -229,9 +319,8 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info * struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg = wqe->data; - struct xdp_frame *xdpf = xdpi->xdpf; - dma_addr_t dma_addr = xdpi->dma_addr; - unsigned int dma_len = xdpf->len; + dma_addr_t dma_addr = xdptxd->dma_addr; + u32 dma_len = xdptxd->len; struct mlx5e_xdpsq_stats *stats = sq->stats; @@ -242,18 +331,16 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info * return false; } - if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { - /* SQ is full, ring doorbell */ - mlx5e_xmit_xdp_doorbell(sq); - stats->full++; + if (!check_result) + check_result = mlx5e_xmit_xdp_frame_check(sq); + if (unlikely(check_result < 0)) return false; - } cseg->fm_ce_se = 0; /* copy the inline part if required */ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE); + memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); dma_len -= MLX5E_XDP_MIN_INLINE; dma_addr += MLX5E_XDP_MIN_INLINE; @@ -277,7 +364,7 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info * static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_wqe_info *wi, - struct mlx5e_rq *rq, + u32 *xsk_frames, bool recycle) { struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; @@ -286,22 +373,32 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, for (i = 0; i < wi->num_pkts; i++) { struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); - if (rq) { - /* XDP_TX */ - mlx5e_page_release(rq, &xdpi.di, recycle); - } else { - /* XDP_REDIRECT */ - dma_unmap_single(sq->pdev, xdpi.dma_addr, - xdpi.xdpf->len, DMA_TO_DEVICE); - xdp_return_frame(xdpi.xdpf); + switch (xdpi.mode) { + case MLX5E_XDP_XMIT_MODE_FRAME: + /* XDP_TX from the XSK RQ and XDP_REDIRECT */ + dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, + xdpi.frame.xdpf->len, DMA_TO_DEVICE); + xdp_return_frame(xdpi.frame.xdpf); + break; + case MLX5E_XDP_XMIT_MODE_PAGE: + /* XDP_TX from the regular RQ */ + mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle); + break; + case MLX5E_XDP_XMIT_MODE_XSK: + /* AF_XDP send */ + (*xsk_frames)++; + break; + default: + WARN_ON_ONCE(true); } } } -bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) { struct mlx5e_xdpsq *sq; struct mlx5_cqe64 *cqe; + u32 xsk_frames = 0; u16 sqcc; int i; @@ -343,10 +440,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) sqcc += wi->num_wqebbs; - mlx5e_free_xdpsq_desc(sq, wi, rq, true); + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true); } while (!last_wqe); } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + if (xsk_frames) + xsk_umem_complete_tx(sq->umem, xsk_frames); + sq->stats->cqes += i; mlx5_cqwq_update_db_record(&cq->wq); @@ -358,8 +458,10 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) { + u32 xsk_frames = 0; + while (sq->cc != sq->pc) { struct mlx5e_xdp_wqe_info *wi; u16 ci; @@ -369,8 +471,11 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) sq->cc += wi->num_wqebbs; - mlx5e_free_xdpsq_desc(sq, wi, rq, false); + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false); } + + if (xsk_frames) + xsk_umem_complete_tx(sq->umem, xsk_frames); } int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, @@ -398,21 +503,27 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; + struct mlx5e_xdp_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; - xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) { + xdptxd.data = xdpf->data; + xdptxd.len = xdpf->len; + xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data, + xdptxd.len, DMA_TO_DEVICE); + + if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) { xdp_return_frame_rx_napi(xdpf); drops++; continue; } - xdpi.xdpf = xdpf; + xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; + xdpi.frame.xdpf = xdpf; + xdpi.frame.dma_addr = xdptxd.dma_addr; - if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) { - dma_unmap_single(sq->pdev, xdpi.dma_addr, - xdpf->len, DMA_TO_DEVICE); + if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0))) { + dma_unmap_single(sq->pdev, xdptxd.dma_addr, + xdptxd.len, DMA_TO_DEVICE); xdp_return_frame_rx_napi(xdpf); drops++; } @@ -429,7 +540,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) { - struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; + struct mlx5e_xdpsq *xdpsq = rq->xdpsq; if (xdpsq->mpwqe.wqe) mlx5e_xdp_mpwqe_complete(xdpsq); @@ -444,6 +555,8 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) { + sq->xmit_xdp_frame_check = is_mpw ? + mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check; sq->xmit_xdp_frame = is_mpw ? mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 8b537a4b0840..b90923932668 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -33,17 +33,20 @@ #define __MLX5_EN_XDP_H__ #include "en.h" +#include "en/txrx.h" #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) #define MLX5E_XDP_TX_EMPTY_DS_COUNT \ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) -int mlx5e_xdp_max_mtu(struct mlx5e_params *params); +struct mlx5e_xsk_param; +int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - void *va, u16 *rx_headroom, u32 *len); -bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq); -void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq); + void *va, u16 *rx_headroom, u32 *len, bool xsk); +void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq); +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw); void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq); int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, @@ -66,6 +69,21 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); } +static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_XDP_OPEN, &priv->state); +} + +static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv) +{ + clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state); +} + +static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv) +{ + return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state); +} + static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) { if (sq->doorbell_cseg) { @@ -97,15 +115,14 @@ static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq) } static inline void -mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi, +mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_xmit_data *xdptxd, struct mlx5e_xdpsq_stats *stats) { struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; - dma_addr_t dma_addr = xdpi->dma_addr; - struct xdp_frame *xdpf = xdpi->xdpf; struct mlx5_wqe_data_seg *dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; - u16 dma_len = xdpf->len; + u32 dma_len = xdptxd->len; session->pkt_count++; @@ -124,7 +141,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi, } inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG); - memcpy(inline_dseg->data, xdpf->data, dma_len); + memcpy(inline_dseg->data, xdptxd->data, dma_len); session->ds_count += ds_cnt; stats->inlnw++; @@ -132,7 +149,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi, } no_inline: - dseg->addr = cpu_to_be64(dma_addr); + dseg->addr = cpu_to_be64(xdptxd->dma_addr); dseg->byte_count = cpu_to_be32(dma_len); dseg->lkey = sq->mkey_be; session->ds_count++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile new file mode 100644 index 000000000000..5ee42991900a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/../.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c new file mode 100644 index 000000000000..6a55573ec8f2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "rx.h" +#include "en/xdp.h" +#include <net/xdp_sock.h> + +/* RX data path */ + +bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count) +{ + /* Check in advance that we have enough frames, instead of allocating + * one-by-one, failing and moving frames to the Reuse Ring. + */ + return xsk_umem_has_addrs_rq(rq->umem, count); +} + +int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct xdp_umem *umem = rq->umem; + u64 handle; + + if (!xsk_umem_peek_addr_rq(umem, &handle)) + return -ENOMEM; + + dma_info->xsk.handle = handle + rq->buff.umem_headroom; + dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle); + + /* No need to add headroom to the DMA address. In striding RQ case, we + * just provide pages for UMR, and headroom is counted at the setup + * stage when creating a WQE. In non-striding RQ case, headroom is + * accounted in mlx5e_alloc_rx_wqe. + */ + dma_info->addr = xdp_umem_get_dma(umem, handle); + + xsk_umem_discard_addr_rq(umem); + + dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + return 0; +} + +static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle) +{ + xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask); +} + +/* XSKRQ uses pages from UMEM, they must not be released. They are returned to + * the userspace if possible, and if not, this function is called to reuse them + * in the driver. + */ +void mlx5e_xsk_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle); +} + +/* Return a frame back to the hardware to fill in again. It is used by XDP when + * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP. + */ +void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle) +{ + struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca); + + mlx5e_xsk_recycle_frame(rq, handle); +} + +static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data, + u32 cqe_bcnt) +{ + struct sk_buff *skb; + + skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + skb_put_data(skb, data, cqe_bcnt); + + return skb; +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx) +{ + struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; + u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom; + u32 cqe_bcnt32 = cqe_bcnt; + void *va, *data; + u32 frag_size; + bool consumed; + + /* Check packet size. Note LRO doesn't use linear SKB */ + if (unlikely(cqe_bcnt > rq->hw_mtu)) { + rq->stats->oversize_pkts_sw_drop++; + return NULL; + } + + /* head_offset is not used in this function, because di->xsk.data and + * di->addr point directly to the necessary place. Furthermore, in the + * current implementation, one page = one packet = one frame, so + * head_offset should always be 0. + */ + WARN_ON_ONCE(head_offset); + + va = di->xsk.data; + data = va + rx_headroom; + frag_size = rq->buff.headroom + cqe_bcnt32; + + dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL); + prefetch(data); + + rcu_read_lock(); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true); + rcu_read_unlock(); + + /* Possible flows: + * - XDP_REDIRECT to XSKMAP: + * The page is owned by the userspace from now. + * - XDP_TX and other XDP_REDIRECTs: + * The page was returned by ZCA and recycled. + * - XDP_DROP: + * Recycle the page. + * - XDP_PASS: + * Allocate an SKB, copy the data and recycle the page. + * + * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its + * size is the same as the Driver RX Ring's size, and pages for WQEs are + * allocated first from the Reuse Ring, so it has enough space. + */ + + if (likely(consumed)) { + if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) + __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + return NULL; /* page/packet was consumed by XDP */ + } + + /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the + * frame. On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32); +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt) +{ + struct mlx5e_dma_info *di = wi->di; + u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom; + void *va, *data; + bool consumed; + u32 frag_size; + + /* wi->offset is not used in this function, because di->xsk.data and + * di->addr point directly to the necessary place. Furthermore, in the + * current implementation, one page = one packet = one frame, so + * wi->offset should always be 0. + */ + WARN_ON_ONCE(wi->offset); + + va = di->xsk.data; + data = va + rx_headroom; + frag_size = rq->buff.headroom + cqe_bcnt; + + dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL); + prefetch(data); + + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { + rq->stats->wqe_err++; + return NULL; + } + + rcu_read_lock(); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true); + rcu_read_unlock(); + + if (likely(consumed)) + return NULL; /* page/packet was consumed by XDP */ + + /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse + * will be handled by mlx5e_put_rx_frag. + * On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h new file mode 100644 index 000000000000..307b923a1361 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_RX_H__ +#define __MLX5_EN_XSK_RX_H__ + +#include "en.h" + +/* RX data path */ + +bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count); +int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info); +void mlx5e_xsk_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info); +void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle); +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx); +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt); + +#endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c new file mode 100644 index 000000000000..aaffa6f68dc0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "setup.h" +#include "en/params.h" + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev) +{ + /* AF_XDP doesn't support frames larger than PAGE_SIZE, and the current + * mlx5e XDP implementation doesn't support multiple packets per page. + */ + if (xsk->chunk_size != PAGE_SIZE) + return false; + + /* Current MTU and XSK headroom don't allow packets to fit the frames. */ + if (mlx5e_rx_get_linear_frag_sz(params, xsk) > xsk->chunk_size) + return false; + + /* frag_sz is different for regular and XSK RQs, so ensure that linear + * SKB mode is possible. + */ + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); + default: /* MLX5_WQ_TYPE_CYCLIC */ + return mlx5e_rx_is_linear_skb(params, xsk); + } +} + +static void mlx5e_build_xskicosq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); +} + +static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_channel_param *cparam) +{ + const u8 xskicosq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + + mlx5e_build_rq_param(priv, params, xsk, &cparam->rq); + mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); + mlx5e_build_xskicosq_param(priv, xskicosq_size, &cparam->icosq); + mlx5e_build_rx_cq_param(priv, params, xsk, &cparam->rx_cq); + mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq); + mlx5e_build_ico_cq_param(priv, xskicosq_size, &cparam->icosq_cq); +} + +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, + struct mlx5e_channel *c) +{ + struct mlx5e_channel_param cparam = {}; + struct dim_cq_moder icocq_moder = {}; + int err; + + if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) + return -EINVAL; + + mlx5e_build_xsk_cparam(priv, params, xsk, &cparam); + + err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq); + if (unlikely(err)) + return err; + + err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq); + if (unlikely(err)) + goto err_close_rx_cq; + + err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq); + if (unlikely(err)) + goto err_close_rq; + + /* Create a separate SQ, so that when the UMEM is disabled, we could + * close this SQ safely and stop receiving CQEs. In other case, e.g., if + * the XDPSQ was used instead, we might run into trouble when the UMEM + * is disabled and then reenabled, but the SQ continues receiving CQEs + * from the old UMEM. + */ + err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true); + if (unlikely(err)) + goto err_close_tx_cq; + + err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq); + if (unlikely(err)) + goto err_close_sq; + + /* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be + * triggered and NAPI to be called on the correct CPU. + */ + err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq); + if (unlikely(err)) + goto err_close_icocq; + + spin_lock_init(&c->xskicosq_lock); + + set_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + + return 0; + +err_close_icocq: + mlx5e_close_cq(&c->xskicosq.cq); + +err_close_sq: + mlx5e_close_xdpsq(&c->xsksq); + +err_close_tx_cq: + mlx5e_close_cq(&c->xsksq.cq); + +err_close_rq: + mlx5e_close_rq(&c->xskrq); + +err_close_rx_cq: + mlx5e_close_cq(&c->xskrq.cq); + + return err; +} + +void mlx5e_close_xsk(struct mlx5e_channel *c) +{ + clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + napi_synchronize(&c->napi); + + mlx5e_close_rq(&c->xskrq); + mlx5e_close_cq(&c->xskrq.cq); + mlx5e_close_icosq(&c->xskicosq); + mlx5e_close_cq(&c->xskicosq.cq); + mlx5e_close_xdpsq(&c->xsksq); + mlx5e_close_cq(&c->xsksq.cq); +} + +void mlx5e_activate_xsk(struct mlx5e_channel *c) +{ + set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + /* TX queue is created active. */ + mlx5e_trigger_irq(&c->xskicosq); +} + +void mlx5e_deactivate_xsk(struct mlx5e_channel *c) +{ + mlx5e_deactivate_rq(&c->xskrq); + /* TX queue is disabled on close. */ +} + +static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn) +{ + struct mlx5e_redirect_rqt_param direct_rrp = { + .is_rss = false, + { + .rqn = rqn, + }, + }; + + u32 rqtn = priv->xsk_tir[ix].rqt.rqtn; + + return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); +} + +int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c) +{ + return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn); +} + +int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix) +{ + return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn); +} + +int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + int err, i; + + if (!priv->xsk.refcnt) + return 0; + + for (i = 0; i < chs->num; i++) { + struct mlx5e_channel *c = chs->c[i]; + + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + continue; + + err = mlx5e_xsk_redirect_rqt_to_channel(priv, c); + if (unlikely(err)) + goto err_stop; + } + + return 0; + +err_stop: + for (i--; i >= 0; i--) { + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) + continue; + + mlx5e_xsk_redirect_rqt_to_drop(priv, i); + } + + return err; +} + +void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + int i; + + if (!priv->xsk.refcnt) + return; + + for (i = 0; i < chs->num; i++) { + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) + continue; + + mlx5e_xsk_redirect_rqt_to_drop(priv, i); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h new file mode 100644 index 000000000000..0dd11b81c046 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_SETUP_H__ +#define __MLX5_EN_XSK_SETUP_H__ + +#include "en.h" + +struct mlx5e_xsk_param; + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev); +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, + struct mlx5e_channel *c); +void mlx5e_close_xsk(struct mlx5e_channel *c); +void mlx5e_activate_xsk(struct mlx5e_channel *c); +void mlx5e_deactivate_xsk(struct mlx5e_channel *c); +int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c); +int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix); +int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs); +void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs); + +#endif /* __MLX5_EN_XSK_SETUP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c new file mode 100644 index 000000000000..35e188cf4ea4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "tx.h" +#include "umem.h" +#include "en/xdp.h" +#include "en/params.h" +#include <net/xdp_sock.h> + +int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_channel *c; + u16 ix; + + if (unlikely(!mlx5e_xdp_is_open(priv))) + return -ENETDOWN; + + if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) + return -EINVAL; + + c = priv->channels.c[ix]; + + if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))) + return -ENXIO; + + if (!napi_if_scheduled_mark_missed(&c->napi)) { + spin_lock(&c->xskicosq_lock); + mlx5e_trigger_irq(&c->xskicosq); + spin_unlock(&c->xskicosq_lock); + } + + return 0; +} + +/* When TX fails (because of the size of the packet), we need to get completions + * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish + * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the + * same. + */ +static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_info *xdpi) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; + struct mlx5e_tx_wqe *nopwqe; + + wi->num_wqebbs = 1; + wi->num_pkts = 1; + + nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); + sq->doorbell_cseg = &nopwqe->ctrl; +} + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) +{ + struct xdp_umem *umem = sq->umem; + struct mlx5e_xdp_info xdpi; + struct mlx5e_xdp_xmit_data xdptxd; + bool work_done = true; + bool flush = false; + + xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK; + + for (; budget; budget--) { + int check_result = sq->xmit_xdp_frame_check(sq); + struct xdp_desc desc; + + if (unlikely(check_result < 0)) { + work_done = false; + break; + } + + if (!xsk_umem_consume_tx(umem, &desc)) { + /* TX will get stuck until something wakes it up by + * triggering NAPI. Currently it's expected that the + * application calls sendto() if there are consumed, but + * not completed frames. + */ + break; + } + + xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr); + xdptxd.data = xdp_umem_get_data(umem, desc.addr); + xdptxd.len = desc.len; + + dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr, + xdptxd.len, DMA_BIDIRECTIONAL); + + if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, check_result))) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + + mlx5e_xsk_tx_post_err(sq, &xdpi); + } + + flush = true; + } + + if (flush) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + mlx5e_xmit_xdp_doorbell(sq); + + xsk_umem_consume_tx_done(umem); + } + + return !(budget && work_done); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h new file mode 100644 index 000000000000..7add18bf78d8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_TX_H__ +#define __MLX5_EN_XSK_TX_H__ + +#include "en.h" + +/* TX data path */ + +int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid); + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); + +#endif /* __MLX5_EN_XSK_TX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c new file mode 100644 index 000000000000..4baaa5788320 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <net/xdp_sock.h> +#include "umem.h" +#include "setup.h" +#include "en/params.h" + +static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv, + struct xdp_umem *umem) +{ + struct device *dev = priv->mdev->device; + u32 i; + + for (i = 0; i < umem->npgs; i++) { + dma_addr_t dma = dma_map_page(dev, umem->pgs[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + if (unlikely(dma_mapping_error(dev, dma))) + goto err_unmap; + umem->pages[i].dma = dma; + } + + return 0; + +err_unmap: + while (i--) { + dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); + umem->pages[i].dma = 0; + } + + return -ENOMEM; +} + +static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv, + struct xdp_umem *umem) +{ + struct device *dev = priv->mdev->device; + u32 i; + + for (i = 0; i < umem->npgs; i++) { + dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); + umem->pages[i].dma = 0; + } +} + +static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk) +{ + if (!xsk->umems) { + xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS, + sizeof(*xsk->umems), GFP_KERNEL); + if (unlikely(!xsk->umems)) + return -ENOMEM; + } + + xsk->refcnt++; + xsk->ever_used = true; + + return 0; +} + +static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk) +{ + if (!--xsk->refcnt) { + kfree(xsk->umems); + xsk->umems = NULL; + } +} + +static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix) +{ + int err; + + err = mlx5e_xsk_get_umems(xsk); + if (unlikely(err)) + return err; + + xsk->umems[ix] = umem; + return 0; +} + +static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix) +{ + xsk->umems[ix] = NULL; + + mlx5e_xsk_put_umems(xsk); +} + +static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem) +{ + return umem->headroom <= 0xffff && umem->chunk_size_nohr <= 0xffff; +} + +void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk) +{ + xsk->headroom = umem->headroom; + xsk->chunk_size = umem->chunk_size_nohr + umem->headroom; +} + +static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, + struct xdp_umem *umem, u16 ix) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_xsk_param xsk; + struct mlx5e_channel *c; + int err; + + if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix))) + return -EBUSY; + + if (unlikely(!mlx5e_xsk_is_umem_sane(umem))) + return -EINVAL; + + err = mlx5e_xsk_map_umem(priv, umem); + if (unlikely(err)) + return err; + + err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix); + if (unlikely(err)) + goto err_unmap_umem; + + mlx5e_build_xsk_param(umem, &xsk); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + /* XSK objects will be created on open. */ + goto validate_closed; + } + + if (!params->xdp_prog) { + /* XSK objects will be created when an XDP program is set, + * and the channels are reopened. + */ + goto validate_closed; + } + + c = priv->channels.c[ix]; + + err = mlx5e_open_xsk(priv, params, &xsk, umem, c); + if (unlikely(err)) + goto err_remove_umem; + + mlx5e_activate_xsk(c); + + /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide + * any Fill Ring entries at the setup stage. + */ + + err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]); + if (unlikely(err)) + goto err_deactivate; + + return 0; + +err_deactivate: + mlx5e_deactivate_xsk(c); + mlx5e_close_xsk(c); + +err_remove_umem: + mlx5e_xsk_remove_umem(&priv->xsk, ix); + +err_unmap_umem: + mlx5e_xsk_unmap_umem(priv, umem); + + return err; + +validate_closed: + /* Check the configuration in advance, rather than fail at a later stage + * (in mlx5e_xdp_set or on open) and end up with no channels. + */ + if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) { + err = -EINVAL; + goto err_remove_umem; + } + + return 0; +} + +static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix) +{ + struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params, + &priv->xsk, ix); + struct mlx5e_channel *c; + + if (unlikely(!umem)) + return -EINVAL; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto remove_umem; + + /* XSK RQ and SQ are only created if XDP program is set. */ + if (!priv->channels.params.xdp_prog) + goto remove_umem; + + c = priv->channels.c[ix]; + mlx5e_xsk_redirect_rqt_to_drop(priv, ix); + mlx5e_deactivate_xsk(c); + mlx5e_close_xsk(c); + +remove_umem: + mlx5e_xsk_remove_umem(&priv->xsk, ix); + mlx5e_xsk_unmap_umem(priv, umem); + + return 0; +} + +static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem, + u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_enable_locked(priv, umem, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_disable_locked(priv, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + u16 ix; + + if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) + return -EINVAL; + + return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) : + mlx5e_xsk_disable_umem(priv, ix); +} + +int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries) +{ + struct xdp_umem_fq_reuse *reuseq; + + reuseq = xsk_reuseq_prepare(nentries); + if (unlikely(!reuseq)) + return -ENOMEM; + xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq)); + + return 0; +} + +u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk) +{ + u16 res = xsk->refcnt ? params->num_channels : 0; + + while (res) { + if (mlx5e_xsk_get_umem(params, xsk, res - 1)) + break; + --res; + } + + return res; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h new file mode 100644 index 000000000000..25b4cbe58b54 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_UMEM_H__ +#define __MLX5_EN_XSK_UMEM_H__ + +#include "en.h" + +static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params, + struct mlx5e_xsk *xsk, u16 ix) +{ + if (!xsk || !xsk->umems) + return NULL; + + if (unlikely(ix >= params->num_channels)) + return NULL; + + return xsk->umems[ix]; +} + +struct mlx5e_xsk_param; +void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk); + +/* .ndo_bpf callback. */ +int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid); + +int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries); + +u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk); + +#endif /* __MLX5_EN_XSK_UMEM_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 6da7c88742dc..3022463f2284 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -39,6 +39,7 @@ #include "en_accel/ipsec_rxtx.h" #include "en_accel/tls_rxtx.h" #include "en.h" +#include "en/txrx.h" #if IS_ENABLED(CONFIG_GENEVE) static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index ca47c0540904..db84500b024f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -39,6 +39,7 @@ #include <linux/skbuff.h> #include <net/xfrm.h> #include "en.h" +#include "en/txrx.h" struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, u32 *cqe_bcnt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c new file mode 100644 index 000000000000..d2ff74d52720 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include "en.h" +#include "en_accel/ktls.h" + +static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, tls_en, 1); + + return mlx5e_create_tis(mdev, in, tisn); +} + +static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_ktls_offload_context_tx *tx_priv; + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (WARN_ON(direction != TLS_OFFLOAD_CTX_DIR_TX)) + return -EINVAL; + + if (WARN_ON(!mlx5e_ktls_type_check(mdev, crypto_info))) + return -EOPNOTSUPP; + + tx_priv = kvzalloc(sizeof(*tx_priv), GFP_KERNEL); + if (!tx_priv) + return -ENOMEM; + + tx_priv->expected_seq = start_offload_tcp_sn; + tx_priv->crypto_info = crypto_info; + mlx5e_set_ktls_tx_priv_ctx(tls_ctx, tx_priv); + + /* tc and underlay_qpn values are not in use for tls tis */ + err = mlx5e_ktls_create_tis(mdev, &tx_priv->tisn); + if (err) + goto create_tis_fail; + + err = mlx5_ktls_create_key(mdev, crypto_info, &tx_priv->key_id); + if (err) + goto encryption_key_create_fail; + + mlx5e_ktls_tx_offload_set_pending(tx_priv); + + return 0; + +encryption_key_create_fail: + mlx5e_destroy_tis(priv->mdev, tx_priv->tisn); +create_tis_fail: + kvfree(tx_priv); + return err; +} + +static void mlx5e_ktls_del(struct net_device *netdev, + struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_ktls_offload_context_tx *tx_priv = + mlx5e_get_ktls_tx_priv_ctx(tls_ctx); + + mlx5_ktls_destroy_key(priv->mdev, tx_priv->key_id); + mlx5e_destroy_tis(priv->mdev, tx_priv->tisn); + kvfree(tx_priv); +} + +static const struct tlsdev_ops mlx5e_ktls_ops = { + .tls_dev_add = mlx5e_ktls_add, + .tls_dev_del = mlx5e_ktls_del, +}; + +void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + + if (!mlx5_accel_is_ktls_device(priv->mdev)) + return; + + netdev->hw_features |= NETIF_F_HW_TLS_TX; + netdev->features |= NETIF_F_HW_TLS_TX; + + netdev->tlsdev_ops = &mlx5e_ktls_ops; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h new file mode 100644 index 000000000000..407da83474ef --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5E_KTLS_H__ +#define __MLX5E_KTLS_H__ + +#include "en.h" + +#ifdef CONFIG_MLX5_EN_TLS +#include <net/tls.h> +#include "accel/tls.h" + +#define MLX5E_KTLS_STATIC_UMR_WQE_SZ \ + (sizeof(struct mlx5e_umr_wqe) + MLX5_ST_SZ_BYTES(tls_static_params)) +#define MLX5E_KTLS_STATIC_WQEBBS \ + (DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_BB)) + +#define MLX5E_KTLS_PROGRESS_WQE_SZ \ + (sizeof(struct mlx5e_tx_wqe) + MLX5_ST_SZ_BYTES(tls_progress_params)) +#define MLX5E_KTLS_PROGRESS_WQEBBS \ + (DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB)) +#define MLX5E_KTLS_MAX_DUMP_WQEBBS 2 + +enum { + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD = 1, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION = 2, +}; + +enum { + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START = 0, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 1, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 2, +}; + +struct mlx5e_ktls_offload_context_tx { + struct tls_offload_context_tx *tx_ctx; + struct tls_crypto_info *crypto_info; + u32 expected_seq; + u32 tisn; + u32 key_id; + bool ctx_post_pending; +}; + +struct mlx5e_ktls_offload_context_tx_shadow { + struct tls_offload_context_tx tx_ctx; + struct mlx5e_ktls_offload_context_tx *priv_tx; +}; + +static inline void +mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx, + struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx); + struct mlx5e_ktls_offload_context_tx_shadow *shadow; + + BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX); + + shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx; + + shadow->priv_tx = priv_tx; + priv_tx->tx_ctx = tx_ctx; +} + +static inline struct mlx5e_ktls_offload_context_tx * +mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx) +{ + struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx); + struct mlx5e_ktls_offload_context_tx_shadow *shadow; + + BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX); + + shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx; + + return shadow->priv_tx; +} + +void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv); +void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx); + +struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_tx_wqe **wqe, u16 *pi); +void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + struct mlx5e_sq_dma *dma); + +#else + +static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) +{ +} + +#endif + +#endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c new file mode 100644 index 000000000000..3f5f4317a22b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -0,0 +1,459 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include <linux/tls.h> +#include "en.h" +#include "en/txrx.h" +#include "en_accel/ktls.h" + +enum { + MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2 = 0x2, +}; + +enum { + MLX5E_ENCRYPTION_STANDARD_TLS = 0x1, +}; + +#define EXTRACT_INFO_FIELDS do { \ + salt = info->salt; \ + rec_seq = info->rec_seq; \ + salt_sz = sizeof(info->salt); \ + rec_seq_sz = sizeof(info->rec_seq); \ +} while (0) + +static void +fill_static_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + struct tls_crypto_info *crypto_info = priv_tx->crypto_info; + char *initial_rn, *gcm_iv; + u16 salt_sz, rec_seq_sz; + char *salt, *rec_seq; + u8 tls_version; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + + EXTRACT_INFO_FIELDS; + break; + } + default: + WARN_ON(1); + return; + } + + gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv); + initial_rn = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number); + + memcpy(gcm_iv, salt, salt_sz); + memcpy(initial_rn, rec_seq, rec_seq_sz); + + tls_version = MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2; + + MLX5_SET(tls_static_params, ctx, tls_version, tls_version); + MLX5_SET(tls_static_params, ctx, const_1, 1); + MLX5_SET(tls_static_params, ctx, const_2, 2); + MLX5_SET(tls_static_params, ctx, encryption_standard, + MLX5E_ENCRYPTION_STANDARD_TLS); + MLX5_SET(tls_static_params, ctx, dek_index, priv_tx->key_id); +} + +static void +build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + +#define STATIC_PARAMS_DS_CNT \ + DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_DS) + + cseg->opmod_idx_opcode = cpu_to_be32((pc << 8) | MLX5_OPCODE_UMR | + (MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS << 24)); + cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + STATIC_PARAMS_DS_CNT); + cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; + cseg->imm = cpu_to_be32(priv_tx->tisn); + + ucseg->flags = MLX5_UMR_INLINE; + ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16); + + fill_static_params_ctx(wqe->tls_static_params_ctx, priv_tx); +} + +static void +fill_progress_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + MLX5_SET(tls_progress_params, ctx, pd, priv_tx->tisn); + MLX5_SET(tls_progress_params, ctx, record_tracker_state, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START); + MLX5_SET(tls_progress_params, ctx, auth_state, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD); +} + +static void +build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + +#define PROGRESS_PARAMS_DS_CNT \ + DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_DS) + + cseg->opmod_idx_opcode = + cpu_to_be32((pc << 8) | MLX5_OPCODE_SET_PSV | + (MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS << 24)); + cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + PROGRESS_PARAMS_DS_CNT); + cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; + + fill_progress_params_ctx(wqe->data, priv_tx); +} + +static void tx_fill_wi(struct mlx5e_txqsq *sq, + u16 pi, u8 num_wqebbs, + skb_frag_t *resync_dump_frag) +{ + struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; + + wi->skb = NULL; + wi->num_wqebbs = num_wqebbs; + wi->resync_dump_frag = resync_dump_frag; +} + +void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + priv_tx->ctx_post_pending = true; +} + +static bool +mlx5e_ktls_tx_offload_test_and_clear_pending(struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + bool ret = priv_tx->ctx_post_pending; + + priv_tx->ctx_post_pending = false; + + return ret; +} + +static void +post_static_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5e_umr_wqe *umr_wqe; + u16 pi; + + umr_wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_STATIC_UMR_WQE_SZ, &pi); + build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence); + tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, NULL); + sq->pc += MLX5E_KTLS_STATIC_WQEBBS; +} + +static void +post_progress_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5e_tx_wqe *wqe; + u16 pi; + + wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_PROGRESS_WQE_SZ, &pi); + build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence); + tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, NULL); + sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS; +} + +static void +mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool skip_static_post, bool fence_first_post) +{ + bool progress_fence = skip_static_post || !fence_first_post; + + if (!skip_static_post) + post_static_params(sq, priv_tx, fence_first_post); + + post_progress_params(sq, priv_tx, progress_fence); +} + +struct tx_sync_info { + u64 rcd_sn; + s32 sync_len; + int nr_frags; + skb_frag_t *frags[MAX_SKB_FRAGS]; +}; + +static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, + u32 tcp_seq, struct tx_sync_info *info) +{ + struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx; + struct tls_record_info *record; + int remaining, i = 0; + unsigned long flags; + bool ret = true; + + spin_lock_irqsave(&tx_ctx->lock, flags); + record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn); + + if (unlikely(!record)) { + ret = false; + goto out; + } + + if (unlikely(tcp_seq < tls_record_start_seq(record))) { + if (!tls_record_is_start_marker(record)) + ret = false; + goto out; + } + + info->sync_len = tcp_seq - tls_record_start_seq(record); + remaining = info->sync_len; + while (remaining > 0) { + skb_frag_t *frag = &record->frags[i]; + + __skb_frag_ref(frag); + remaining -= skb_frag_size(frag); + info->frags[i++] = frag; + } + /* reduce the part which will be sent with the original SKB */ + if (remaining < 0) + skb_frag_size_add(info->frags[i - 1], remaining); + info->nr_frags = i; +out: + spin_unlock_irqrestore(&tx_ctx->lock, flags); + return ret; +} + +static void +tx_post_resync_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + u64 rcd_sn) +{ + struct tls_crypto_info *crypto_info = priv_tx->crypto_info; + __be64 rn_be = cpu_to_be64(rcd_sn); + bool skip_static_post; + u16 rec_seq_sz; + char *rec_seq; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + + rec_seq = info->rec_seq; + rec_seq_sz = sizeof(info->rec_seq); + break; + } + default: + WARN_ON(1); + } + + skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz); + if (!skip_static_post) + memcpy(rec_seq, &rn_be, rec_seq_sz); + + mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, skip_static_post, true); +} + +static int +tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb, + skb_frag_t *frag, u32 tisn, bool first) +{ + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5e_tx_wqe *wqe; + dma_addr_t dma_addr = 0; + u16 ds_cnt, ds_cnt_inl; + u8 num_wqebbs; + u16 pi, ihs; + int fsz; + + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + ihs = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb)); + ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS); + ds_cnt += ds_cnt_inl; + ds_cnt += 1; /* one frag */ + + wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi); + + num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + + cseg = &wqe->ctrl; + eseg = &wqe->eth; + dseg = wqe->data; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + cseg->imm = cpu_to_be32(tisn); + cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; + + eseg->inline_hdr.sz = cpu_to_be16(ihs); + memcpy(eseg->inline_hdr.start, skb->data, ihs); + dseg += ds_cnt_inl; + + fsz = skb_frag_size(frag); + dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) + return -ENOMEM; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; + dseg->byte_count = cpu_to_be32(fsz); + mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); + + tx_fill_wi(sq, pi, num_wqebbs, frag); + sq->pc += num_wqebbs; + + WARN(num_wqebbs > MLX5E_KTLS_MAX_DUMP_WQEBBS, + "unexpected DUMP num_wqebbs, %d > %d", + num_wqebbs, MLX5E_KTLS_MAX_DUMP_WQEBBS); + + return 0; +} + +void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + struct mlx5e_sq_dma *dma) +{ + struct mlx5e_sq_stats *stats = sq->stats; + + mlx5e_tx_dma_unmap(sq->pdev, dma); + __skb_frag_unref(wi->resync_dump_frag); + stats->tls_dump_packets++; + stats->tls_dump_bytes += wi->num_bytes; +} + +static void tx_post_fence_nop(struct mlx5e_txqsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + + tx_fill_wi(sq, pi, 1, NULL); + + mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); +} + +static struct sk_buff * +mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + u32 seq) +{ + struct mlx5e_sq_stats *stats = sq->stats; + struct mlx5_wq_cyc *wq = &sq->wq; + struct tx_sync_info info = {}; + u16 contig_wqebbs_room, pi; + u8 num_wqebbs; + int i; + + if (!tx_sync_info_get(priv_tx, seq, &info)) { + /* We might get here if a retransmission reaches the driver + * after the relevant record is acked. + * It should be safe to drop the packet in this case + */ + stats->tls_drop_no_sync_data++; + goto err_out; + } + + if (unlikely(info.sync_len < 0)) { + u32 payload; + int headln; + + headln = skb_transport_offset(skb) + tcp_hdrlen(skb); + payload = skb->len - headln; + if (likely(payload <= -info.sync_len)) + return skb; + + stats->tls_drop_bypass_req++; + goto err_out; + } + + stats->tls_ooo++; + + num_wqebbs = MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + + (info.nr_frags ? info.nr_frags * MLX5E_KTLS_MAX_DUMP_WQEBBS : 1); + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < num_wqebbs)) + mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); + + tx_post_resync_params(sq, priv_tx, info.rcd_sn); + + for (i = 0; i < info.nr_frags; i++) + if (tx_post_resync_dump(sq, skb, info.frags[i], + priv_tx->tisn, !i)) + goto err_out; + + /* If no dump WQE was sent, we need to have a fence NOP WQE before the + * actual data xmit. + */ + if (!info.nr_frags) + tx_post_fence_nop(sq); + + return skb; + +err_out: + dev_kfree_skb_any(skb); + return NULL; +} + +struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_tx_wqe **wqe, u16 *pi) +{ + struct mlx5e_ktls_offload_context_tx *priv_tx; + struct mlx5e_sq_stats *stats = sq->stats; + struct mlx5_wqe_ctrl_seg *cseg; + struct tls_context *tls_ctx; + int datalen; + u32 seq; + + if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) + goto out; + + datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + if (!datalen) + goto out; + + tls_ctx = tls_get_ctx(skb->sk); + if (unlikely(tls_ctx->netdev != netdev)) + goto err_out; + + priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); + + if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) { + mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false); + *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); + stats->tls_ctx++; + } + + seq = ntohl(tcp_hdr(skb)->seq); + if (unlikely(priv_tx->expected_seq != seq)) { + skb = mlx5e_ktls_tx_handle_ooo(priv_tx, sq, skb, seq); + if (unlikely(!skb)) + goto out; + *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); + } + + priv_tx->expected_seq = seq + datalen; + + cseg = &(*wqe)->ctrl; + cseg->imm = cpu_to_be32(priv_tx->tisn); + + stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; + stats->tls_encrypted_bytes += datalen; + +out: + return skb; + +err_out: + dev_kfree_skb_any(skb); + return NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c index e88340e196f7..f8b93b62a7d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c @@ -160,13 +160,17 @@ static void mlx5e_tls_del(struct net_device *netdev, direction == TLS_OFFLOAD_CTX_DIR_TX); } -static void mlx5e_tls_resync_rx(struct net_device *netdev, struct sock *sk, - u32 seq, u64 rcd_sn) +static void mlx5e_tls_resync(struct net_device *netdev, struct sock *sk, + u32 seq, u8 *rcd_sn_data, + enum tls_offload_ctx_dir direction) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_tls_offload_context_rx *rx_ctx; + u64 rcd_sn = *(u64 *)rcd_sn_data; + if (WARN_ON_ONCE(direction != TLS_OFFLOAD_CTX_DIR_RX)) + return; rx_ctx = mlx5e_get_tls_rx_context(tls_ctx); netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq, @@ -178,7 +182,7 @@ static void mlx5e_tls_resync_rx(struct net_device *netdev, struct sock *sk, static const struct tlsdev_ops mlx5e_tls_ops = { .tls_dev_add = mlx5e_tls_add, .tls_dev_del = mlx5e_tls_del, - .tls_dev_resync_rx = mlx5e_tls_resync_rx, + .tls_dev_resync = mlx5e_tls_resync, }; void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) @@ -186,6 +190,11 @@ void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) struct net_device *netdev = priv->netdev; u32 caps; + if (mlx5_accel_is_ktls_device(priv->mdev)) { + mlx5e_ktls_build_netdev(priv); + return; + } + if (!mlx5_accel_is_tls_device(priv->mdev)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h index 3f5d72163b56..9015f3f7792d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h @@ -33,8 +33,10 @@ #ifndef __MLX5E_TLS_H__ #define __MLX5E_TLS_H__ -#ifdef CONFIG_MLX5_EN_TLS +#include "accel/tls.h" +#include "en_accel/ktls.h" +#ifdef CONFIG_MLX5_EN_TLS #include <net/tls.h> #include "en.h" @@ -94,7 +96,12 @@ int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data); #else -static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) { } +static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) +{ + if (mlx5_accel_is_ktls_device(priv->mdev)) + mlx5e_ktls_build_netdev(priv); +} + static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { } static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index 439bf5953885..71384ad1a443 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -248,7 +248,7 @@ mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context, mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln, cpu_to_be64(info.rcd_sn)); mlx5e_sq_xmit(sq, nskb, *wqe, *pi, true); - mlx5e_sq_fetch_wqe(sq, wqe, pi); + *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); return skb; err_out: @@ -269,6 +269,11 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, int datalen; u32 skb_seq; + if (MLX5_CAP_GEN(sq->channel->mdev, tls)) { + skb = mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi); + goto out; + } + if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h index 311667ec71b8..90bc1f2384c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h @@ -38,6 +38,7 @@ #include <linux/skbuff.h> #include "en.h" +#include "en/txrx.h" struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 554672edf8c3..8dd31b5c740c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -680,7 +680,7 @@ static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev, memset(perm_addr, 0xff, MAX_ADDR_LEN); - mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr); + mlx5_query_mac_address(priv->mdev, perm_addr); } static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index d67adf70a97b..ca9cfbf57d8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -30,22 +30,22 @@ * SOFTWARE. */ -#include <linux/net_dim.h> +#include <linux/dim.h> #include "en.h" static void -mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder, +mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder, struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) { mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts); - dim->state = NET_DIM_START_MEASURE; + dim->state = DIM_START_MEASURE; } void mlx5e_rx_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, work); + struct dim *dim = container_of(work, struct dim, work); struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim); - struct net_dim_cq_moder cur_moder = + struct dim_cq_moder cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq); @@ -53,9 +53,9 @@ void mlx5e_rx_dim_work(struct work_struct *work) void mlx5e_tx_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, work); + struct dim *dim = container_of(work, struct dim, work); struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim); - struct net_dim_cq_moder cur_moder = + struct dim_cq_moder cur_moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix); mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index dd764e0471f2..126ec4181286 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -32,6 +32,7 @@ #include "en.h" #include "en/port.h" +#include "en/xsk/umem.h" #include "lib/clock.h" void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, @@ -46,7 +47,7 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); - strlcpy(drvinfo->bus_info, pci_name(mdev->pdev), + strlcpy(drvinfo->bus_info, dev_name(mdev->device), sizeof(drvinfo->bus_info)); } @@ -388,8 +389,17 @@ static int mlx5e_set_ringparam(struct net_device *dev, void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch) { + mutex_lock(&priv->state_lock); + ch->max_combined = mlx5e_get_netdev_max_channels(priv->netdev); ch->combined_count = priv->channels.params.num_channels; + if (priv->xsk.refcnt) { + /* The upper half are XSK queues. */ + ch->max_combined *= 2; + ch->combined_count *= 2; + } + + mutex_unlock(&priv->state_lock); } static void mlx5e_get_channels(struct net_device *dev, @@ -403,6 +413,7 @@ static void mlx5e_get_channels(struct net_device *dev, int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch) { + struct mlx5e_params *cur_params = &priv->channels.params; unsigned int count = ch->combined_count; struct mlx5e_channels new_channels = {}; bool arfs_enabled; @@ -414,16 +425,26 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, return -EINVAL; } - if (priv->channels.params.num_channels == count) + if (cur_params->num_channels == count) return 0; mutex_lock(&priv->state_lock); + /* Don't allow changing the number of channels if there is an active + * XSK, because the numeration of the XSK and regular RQs will change. + */ + if (priv->xsk.refcnt) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n", + __func__); + goto out; + } + new_channels.params = priv->channels.params; new_channels.params.num_channels = count; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; + *cur_params = new_channels.params; if (!netif_is_rxfh_configured(priv->netdev)) mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, count); @@ -466,7 +487,7 @@ static int mlx5e_set_channels(struct net_device *dev, int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct net_dim_cq_moder *rx_moder, *tx_moder; + struct dim_cq_moder *rx_moder, *tx_moder; if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -EOPNOTSUPP; @@ -521,7 +542,7 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct net_dim_cq_moder *rx_moder, *tx_moder; + struct dim_cq_moder *rx_moder, *tx_moder; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; int err = 0; @@ -1867,40 +1888,6 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) return priv->channels.params.pflags; } -int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, - struct ethtool_flash *flash) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct net_device *dev = priv->netdev; - const struct firmware *fw; - int err; - - if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) - return -EOPNOTSUPP; - - err = request_firmware_direct(&fw, flash->data, &dev->dev); - if (err) - return err; - - dev_hold(dev); - rtnl_unlock(); - - err = mlx5_firmware_flash(mdev, fw); - release_firmware(fw); - - rtnl_lock(); - dev_put(dev); - return err; -} - -static int mlx5e_flash_device(struct net_device *dev, - struct ethtool_flash *flash) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - - return mlx5e_ethtool_flash_device(priv, flash); -} - #ifndef CONFIG_MLX5_EN_RXNFC /* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS * otherwise this function will be defined from en_fs_ethtool.c @@ -1939,7 +1926,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = { #ifdef CONFIG_MLX5_EN_RXNFC .set_rxnfc = mlx5e_set_rxnfc, #endif - .flash_device = mlx5e_flash_device, .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, .get_pauseparam = mlx5e_get_pauseparam, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 4421c10f58ae..ea3a490b569a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -32,6 +32,8 @@ #include <linux/mlx5/fs.h> #include "en.h" +#include "en/params.h" +#include "en/xsk/umem.h" struct mlx5e_ethtool_rule { struct list_head list; @@ -414,6 +416,14 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, if (fs->ring_cookie == RX_CLS_FLOW_DISC) { flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; } else { + struct mlx5e_params *params = &priv->channels.params; + enum mlx5e_rq_group group; + struct mlx5e_tir *tir; + u16 ix; + + mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group); + tir = group == MLX5E_RQ_GROUP_XSK ? priv->xsk_tir : priv->direct_tir; + dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (!dst) { err = -ENOMEM; @@ -421,12 +431,12 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, } dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn; + dst->tir_num = tir[ix].tirn; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); - flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -600,9 +610,9 @@ static int validate_flow(struct mlx5e_priv *priv, if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES) return -ENOSPC; - if (fs->ring_cookie >= priv->channels.params.num_channels && - fs->ring_cookie != RX_CLS_FLOW_DISC) - return -EINVAL; + if (fs->ring_cookie != RX_CLS_FLOW_DISC) + if (!mlx5e_qid_validate(&priv->channels.params, fs->ring_cookie)) + return -EINVAL; switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { case ETHER_FLOW: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a8e8350b38aa..10efd69de7ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -38,8 +38,10 @@ #include <linux/bpf.h> #include <linux/if_bridge.h> #include <net/page_pool.h> +#include <net/xdp_sock.h> #include "eswitch.h" #include "en.h" +#include "en/txrx.h" #include "en_tc.h" #include "en_rep.h" #include "en_accel/ipsec.h" @@ -56,35 +58,11 @@ #include "en/monitor_stats.h" #include "en/reporter.h" #include "en/params.h" +#include "en/xsk/umem.h" +#include "en/xsk/setup.h" +#include "en/xsk/rx.h" +#include "en/xsk/tx.h" -struct mlx5e_rq_param { - u32 rqc[MLX5_ST_SZ_DW(rqc)]; - struct mlx5_wq_param wq; - struct mlx5e_rq_frags_info frags_info; -}; - -struct mlx5e_sq_param { - u32 sqc[MLX5_ST_SZ_DW(sqc)]; - struct mlx5_wq_param wq; - bool is_mpw; -}; - -struct mlx5e_cq_param { - u32 cqc[MLX5_ST_SZ_DW(cqc)]; - struct mlx5_wq_param wq; - u16 eq_ix; - u8 cq_period_mode; -}; - -struct mlx5e_channel_param { - struct mlx5e_rq_param rq; - struct mlx5e_sq_param sq; - struct mlx5e_sq_param xdp_sq; - struct mlx5e_sq_param icosq; - struct mlx5e_cq_param rx_cq; - struct mlx5e_cq_param tx_cq; - struct mlx5e_cq_param icosq_cq; -}; bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { @@ -114,18 +92,31 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? - BIT(mlx5e_mpwqe_get_log_rq_size(params)) : + BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) : BIT(params->log_rq_mtu_frames), - BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)), + BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)), MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { - return mlx5e_check_fragmented_striding_rq_cap(mdev) && - !MLX5_IPSEC_DEV(mdev) && - !(params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params)); + if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) + return false; + + if (MLX5_IPSEC_DEV(mdev)) + return false; + + if (params->xdp_prog) { + /* XSK params are not considered here. If striding RQ is in use, + * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will + * be called with the known XSK params. + */ + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) + return false; + } + + return true; } void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) @@ -394,6 +385,8 @@ static void mlx5e_free_di_list(struct mlx5e_rq *rq) static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct xdp_umem *umem, struct mlx5e_rq_param *rqp, struct mlx5e_rq *rq) { @@ -401,6 +394,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = c->mdev; void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 num_xsk_frames = 0; + u32 rq_xdp_ix; u32 pool_size; int wq_sz; int err; @@ -417,7 +412,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->ix = c->ix; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - rq->stats = &c->priv->channel_stats[c->ix].rq; + rq->xdpsq = &c->rq_xdpsq; + rq->umem = umem; + + if (rq->umem) + rq->stats = &c->priv->channel_stats[c->ix].xskrq; + else + rq->stats = &c->priv->channel_stats[c->ix].rq; rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL; if (IS_ERR(rq->xdp_prog)) { @@ -426,12 +427,16 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix); + rq_xdp_ix = rq->ix; + if (xsk) + rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK; + err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix); if (err < 0) goto err_rq_wq_destroy; rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; - rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params); + rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); + rq->buff.umem_headroom = xsk ? xsk->headroom : 0; pool_size = 1 << params->log_rq_mtu_frames; switch (rq->wq_type) { @@ -445,7 +450,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); - pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params); + if (xsk) + num_xsk_frames = wq_sz << + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + + pool_size = MLX5_MPWRQ_PAGES_PER_WQE << + mlx5e_mpwqe_get_log_rq_size(params, xsk); rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; @@ -464,12 +474,15 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->mpwqe.skb_from_cqe_mpwrq = - mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ? - mlx5e_skb_from_cqe_mpwrq_linear : - mlx5e_skb_from_cqe_mpwrq_nonlinear; - rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params); - rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params)); + rq->mpwqe.skb_from_cqe_mpwrq = xsk ? + mlx5e_xsk_skb_from_cqe_mpwrq_linear : + mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ? + mlx5e_skb_from_cqe_mpwrq_linear : + mlx5e_skb_from_cqe_mpwrq_nonlinear; + + rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + rq->mpwqe.num_strides = + BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) @@ -490,6 +503,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); + if (xsk) + num_xsk_frames = wq_sz << rq->wqe.info.log_num_frags; + rq->wqe.info = rqp->frags_info; rq->wqe.frags = kvzalloc_node(array_size(sizeof(*rq->wqe.frags), @@ -503,6 +519,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = mlx5e_init_di_list(rq, wq_sz, c->cpu); if (err) goto err_free; + rq->post_wqes = mlx5e_post_rx_wqes; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; @@ -518,33 +535,49 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_free; } - rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(params) ? - mlx5e_skb_from_cqe_linear : - mlx5e_skb_from_cqe_nonlinear; + rq->wqe.skb_from_cqe = xsk ? + mlx5e_xsk_skb_from_cqe_linear : + mlx5e_rx_is_linear_skb(params, NULL) ? + mlx5e_skb_from_cqe_linear : + mlx5e_skb_from_cqe_nonlinear; rq->mkey_be = c->mkey_be; } - /* Create a page_pool and register it with rxq */ - pp_params.order = 0; - pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ - pp_params.pool_size = pool_size; - pp_params.nid = cpu_to_node(c->cpu); - pp_params.dev = c->pdev; - pp_params.dma_dir = rq->buff.map_dir; - - /* page_pool can be used even when there is no rq->xdp_prog, - * given page_pool does not handle DMA mapping there is no - * required state to clear. And page_pool gracefully handle - * elevated refcnt. - */ - rq->page_pool = page_pool_create(&pp_params); - if (IS_ERR(rq->page_pool)) { - err = PTR_ERR(rq->page_pool); - rq->page_pool = NULL; - goto err_free; + if (xsk) { + err = mlx5e_xsk_resize_reuseq(umem, num_xsk_frames); + if (unlikely(err)) { + mlx5_core_err(mdev, "Unable to allocate the Reuse Ring for %u frames\n", + num_xsk_frames); + goto err_free; + } + + rq->zca.free = mlx5e_xsk_zca_free; + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_ZERO_COPY, + &rq->zca); + } else { + /* Create a page_pool and register it with rxq */ + pp_params.order = 0; + pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ + pp_params.pool_size = pool_size; + pp_params.nid = cpu_to_node(c->cpu); + pp_params.dev = c->pdev; + pp_params.dma_dir = rq->buff.map_dir; + + /* page_pool can be used even when there is no rq->xdp_prog, + * given page_pool does not handle DMA mapping there is no + * required state to clear. And page_pool gracefully handle + * elevated refcnt. + */ + rq->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rq->page_pool)) { + err = PTR_ERR(rq->page_pool); + rq->page_pool = NULL; + goto err_free; + } + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_PAGE_POOL, rq->page_pool); } - err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, - MEM_TYPE_PAGE_POOL, rq->page_pool); if (err) goto err_free; @@ -584,11 +617,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, switch (params->rx_cq_moderation.cq_period_mode) { case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: - rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE; + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; break; case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: default: - rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } rq->page_cache.head = 0; @@ -611,8 +644,7 @@ err_rq_wq_destroy: if (rq->xdp_prog) bpf_prog_put(rq->xdp_prog); xdp_rxq_info_unreg(&rq->xdp_rxq); - if (rq->page_pool) - page_pool_destroy(rq->page_pool); + page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); return err; @@ -625,10 +657,6 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) if (rq->xdp_prog) bpf_prog_put(rq->xdp_prog); - xdp_rxq_info_unreg(&rq->xdp_rxq); - if (rq->page_pool) - page_pool_destroy(rq->page_pool); - switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: kvfree(rq->mpwqe.info); @@ -643,8 +671,15 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) { struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i]; - mlx5e_page_release(rq, dma_info, false); + /* With AF_XDP, page_cache is not used, so this loop is not + * entered, and it's safe to call mlx5e_page_release_dynamic + * directly. + */ + mlx5e_page_release_dynamic(rq, dma_info, false); } + + xdp_rxq_info_unreg(&rq->xdp_rxq); + page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); } @@ -778,7 +813,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) mlx5_core_destroy_rq(rq->mdev, rq->rqn); } -static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) +int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) { unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time); struct mlx5e_channel *c = rq->channel; @@ -836,14 +871,13 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) } -static int mlx5e_open_rq(struct mlx5e_channel *c, - struct mlx5e_params *params, - struct mlx5e_rq_param *param, - struct mlx5e_rq *rq) +int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, + struct xdp_umem *umem, struct mlx5e_rq *rq) { int err; - err = mlx5e_alloc_rq(c, params, param, rq); + err = mlx5e_alloc_rq(c, params, xsk, umem, param, rq); if (err) return err; @@ -881,13 +915,13 @@ static void mlx5e_activate_rq(struct mlx5e_rq *rq) mlx5e_trigger_irq(&rq->channel->icosq); } -static void mlx5e_deactivate_rq(struct mlx5e_rq *rq) +void mlx5e_deactivate_rq(struct mlx5e_rq *rq) { clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ } -static void mlx5e_close_rq(struct mlx5e_rq *rq) +void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); mlx5e_destroy_rq(rq); @@ -940,6 +974,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct xdp_umem *umem, struct mlx5e_sq_param *param, struct mlx5e_xdpsq *sq, bool is_redirect) @@ -955,9 +990,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - sq->stats = is_redirect ? - &c->priv->channel_stats[c->ix].xdpsq : - &c->priv->channel_stats[c->ix].rq_xdpsq; + sq->umem = umem; + + sq->stats = sq->umem ? + &c->priv->channel_stats[c->ix].xsksq : + is_redirect ? + &c->priv->channel_stats[c->ix].xdpsq : + &c->priv->channel_stats[c->ix].rq_xdpsq; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1087,11 +1126,14 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; + sq->stop_room = MLX5E_SQ_STOP_ROOM; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); - if (mlx5_accel_is_tls_device(c->priv->mdev)) + if (mlx5_accel_is_tls_device(c->priv->mdev)) { set_bit(MLX5E_SQ_STATE_TLS, &sq->state); + sq->stop_room += MLX5E_SQ_TLS_ROOM; + } param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1337,10 +1379,8 @@ static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) mlx5e_tx_reporter_err_cqe(sq); } -static int mlx5e_open_icosq(struct mlx5e_channel *c, - struct mlx5e_params *params, - struct mlx5e_sq_param *param, - struct mlx5e_icosq *sq) +int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq) { struct mlx5e_create_sq_param csp = {}; int err; @@ -1366,7 +1406,7 @@ err_free_icosq: return err; } -static void mlx5e_close_icosq(struct mlx5e_icosq *sq) +void mlx5e_close_icosq(struct mlx5e_icosq *sq) { struct mlx5e_channel *c = sq->channel; @@ -1377,16 +1417,14 @@ static void mlx5e_close_icosq(struct mlx5e_icosq *sq) mlx5e_free_icosq(sq); } -static int mlx5e_open_xdpsq(struct mlx5e_channel *c, - struct mlx5e_params *params, - struct mlx5e_sq_param *param, - struct mlx5e_xdpsq *sq, - bool is_redirect) +int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct xdp_umem *umem, + struct mlx5e_xdpsq *sq, bool is_redirect) { struct mlx5e_create_sq_param csp = {}; int err; - err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect); + err = mlx5e_alloc_xdpsq(c, params, umem, param, sq, is_redirect); if (err) return err; @@ -1440,7 +1478,7 @@ err_free_xdpsq: return err; } -static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) +void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) { struct mlx5e_channel *c = sq->channel; @@ -1448,7 +1486,7 @@ static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) napi_synchronize(&c->napi); mlx5e_destroy_sq(c->mdev, sq->sqn); - mlx5e_free_xdpsq_descs(sq, rq); + mlx5e_free_xdpsq_descs(sq); mlx5e_free_xdpsq(sq); } @@ -1518,6 +1556,7 @@ static void mlx5e_free_cq(struct mlx5e_cq *cq) static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) { + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; struct mlx5_core_dev *mdev = cq->mdev; struct mlx5_core_cq *mcq = &cq->mcq; @@ -1552,7 +1591,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); - err = mlx5_core_create_cq(mdev, mcq, in, inlen); + err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out)); kvfree(in); @@ -1569,10 +1608,8 @@ static void mlx5e_destroy_cq(struct mlx5e_cq *cq) mlx5_core_destroy_cq(cq->mdev, &cq->mcq); } -static int mlx5e_open_cq(struct mlx5e_channel *c, - struct net_dim_cq_moder moder, - struct mlx5e_cq_param *param, - struct mlx5e_cq *cq) +int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder, + struct mlx5e_cq_param *param, struct mlx5e_cq *cq) { struct mlx5_core_dev *mdev = c->mdev; int err; @@ -1595,7 +1632,7 @@ err_free_cq: return err; } -static void mlx5e_close_cq(struct mlx5e_cq *cq) +void mlx5e_close_cq(struct mlx5e_cq *cq) { mlx5e_destroy_cq(cq); mlx5e_free_cq(cq); @@ -1769,49 +1806,16 @@ static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c) free_cpumask_var(c->xps_cpumask); } -static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, - struct mlx5e_params *params, - struct mlx5e_channel_param *cparam, - struct mlx5e_channel **cp) +static int mlx5e_open_queues(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam) { - int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); - struct net_dim_cq_moder icocq_moder = {0, 0}; - struct net_device *netdev = priv->netdev; - struct mlx5e_channel *c; - unsigned int irq; + struct dim_cq_moder icocq_moder = {0, 0}; int err; - int eqn; - - err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); - if (err) - return err; - - c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); - if (!c) - return -ENOMEM; - - c->priv = priv; - c->mdev = priv->mdev; - c->tstamp = &priv->tstamp; - c->ix = ix; - c->cpu = cpu; - c->pdev = priv->mdev->device; - c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); - c->num_tc = params->num_tc; - c->xdp = !!params->xdp_prog; - c->stats = &priv->channel_stats[ix].ch; - c->irq_desc = irq_to_desc(irq); - - err = mlx5e_alloc_xps_cpumask(c, params); - if (err) - goto err_free_channel; - - netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); if (err) - goto err_napi_del; + return err; err = mlx5e_open_tx_cqs(c, params, cparam); if (err) @@ -1827,7 +1831,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, /* XDP SQ CQ params are same as normal TXQ sq CQ params */ err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation, - &cparam->tx_cq, &c->rq.xdpsq.cq) : 0; + &cparam->tx_cq, &c->rq_xdpsq.cq) : 0; if (err) goto err_close_rx_cq; @@ -1841,20 +1845,21 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, if (err) goto err_close_icosq; - err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq, false) : 0; - if (err) - goto err_close_sqs; + if (c->xdp) { + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, + &c->rq_xdpsq, false); + if (err) + goto err_close_sqs; + } - err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq); + err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq); if (err) goto err_close_xdp_sq; - err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->xdpsq, true); + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true); if (err) goto err_close_rq; - *cp = c; - return 0; err_close_rq: @@ -1862,7 +1867,7 @@ err_close_rq: err_close_xdp_sq: if (c->xdp) - mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq); + mlx5e_close_xdpsq(&c->rq_xdpsq); err_close_sqs: mlx5e_close_sqs(c); @@ -1872,8 +1877,9 @@ err_close_icosq: err_disable_napi: napi_disable(&c->napi); + if (c->xdp) - mlx5e_close_cq(&c->rq.xdpsq.cq); + mlx5e_close_cq(&c->rq_xdpsq.cq); err_close_rx_cq: mlx5e_close_cq(&c->rq.cq); @@ -1887,6 +1893,85 @@ err_close_tx_cqs: err_close_icosq_cq: mlx5e_close_cq(&c->icosq.cq); + return err; +} + +static void mlx5e_close_queues(struct mlx5e_channel *c) +{ + mlx5e_close_xdpsq(&c->xdpsq); + mlx5e_close_rq(&c->rq); + if (c->xdp) + mlx5e_close_xdpsq(&c->rq_xdpsq); + mlx5e_close_sqs(c); + mlx5e_close_icosq(&c->icosq); + napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->rq_xdpsq.cq); + mlx5e_close_cq(&c->rq.cq); + mlx5e_close_cq(&c->xdpsq.cq); + mlx5e_close_tx_cqs(c); + mlx5e_close_cq(&c->icosq.cq); +} + +static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam, + struct xdp_umem *umem, + struct mlx5e_channel **cp) +{ + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); + struct net_device *netdev = priv->netdev; + struct mlx5e_xsk_param xsk; + struct mlx5e_channel *c; + unsigned int irq; + int err; + int eqn; + + err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + if (err) + return err; + + c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); + if (!c) + return -ENOMEM; + + c->priv = priv; + c->mdev = priv->mdev; + c->tstamp = &priv->tstamp; + c->ix = ix; + c->cpu = cpu; + c->pdev = priv->mdev->device; + c->netdev = priv->netdev; + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); + c->num_tc = params->num_tc; + c->xdp = !!params->xdp_prog; + c->stats = &priv->channel_stats[ix].ch; + c->irq_desc = irq_to_desc(irq); + + err = mlx5e_alloc_xps_cpumask(c, params); + if (err) + goto err_free_channel; + + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); + + err = mlx5e_open_queues(c, params, cparam); + if (unlikely(err)) + goto err_napi_del; + + if (umem) { + mlx5e_build_xsk_param(umem, &xsk); + err = mlx5e_open_xsk(priv, params, &xsk, umem, c); + if (unlikely(err)) + goto err_close_queues; + } + + *cp = c; + + return 0; + +err_close_queues: + mlx5e_close_queues(c); + err_napi_del: netif_napi_del(&c->napi); mlx5e_free_xps_cpumask(c); @@ -1905,12 +1990,18 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_rq(&c->rq); netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix); + + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_activate_xsk(c); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) { int tc; + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_deactivate_xsk(c); + mlx5e_deactivate_rq(&c->rq); for (tc = 0; tc < c->num_tc; tc++) mlx5e_deactivate_txqsq(&c->sq[tc]); @@ -1918,19 +2009,9 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) static void mlx5e_close_channel(struct mlx5e_channel *c) { - mlx5e_close_xdpsq(&c->xdpsq, NULL); - mlx5e_close_rq(&c->rq); - if (c->xdp) - mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq); - mlx5e_close_sqs(c); - mlx5e_close_icosq(&c->icosq); - napi_disable(&c->napi); - if (c->xdp) - mlx5e_close_cq(&c->rq.xdpsq.cq); - mlx5e_close_cq(&c->rq.cq); - mlx5e_close_cq(&c->xdpsq.cq); - mlx5e_close_tx_cqs(c); - mlx5e_close_cq(&c->icosq.cq); + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_close_xsk(c); + mlx5e_close_queues(c); netif_napi_del(&c->napi); mlx5e_free_xps_cpumask(c); @@ -1941,6 +2022,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct mlx5e_rq_frags_info *info) { u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); @@ -1953,10 +2035,10 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, byte_count += MLX5E_METADATA_ETHER_LEN; #endif - if (mlx5e_rx_is_linear_skb(params)) { + if (mlx5e_rx_is_linear_skb(params, xsk)) { int frag_stride; - frag_stride = mlx5e_rx_get_linear_frag_sz(params); + frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk); frag_stride = roundup_pow_of_two(frag_stride); info->arr[0].frag_size = byte_count; @@ -2014,9 +2096,10 @@ static u8 mlx5e_get_rq_log_wq_sz(void *rqc) return MLX5_GET(wq, wq, log_wq_sz); } -static void mlx5e_build_rq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_rq_param *param) +void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_param *param) { struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; @@ -2026,16 +2109,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: MLX5_SET(wq, wq, log_wqe_num_of_strides, - mlx5e_mpwqe_get_log_num_strides(mdev, params) - + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); MLX5_SET(wq, wq, log_wqe_stride_size, - mlx5e_mpwqe_get_log_stride_size(mdev, params) - + mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); - MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params)); + MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk)); break; default: /* MLX5_WQ_TYPE_CYCLIC */ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - mlx5e_build_rq_frags_info(mdev, params, ¶m->frags_info); + mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); ndsegs = param->frags_info.num_frags; } @@ -2066,8 +2149,8 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv, param->wq.buf_numa_node = dev_to_node(mdev->device); } -static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, - struct mlx5e_sq_param *param) +void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2103,9 +2186,10 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); } -static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_cq_param *param) +void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_cq_param *param) { struct mlx5_core_dev *mdev = priv->mdev; void *cqc = param->cqc; @@ -2113,8 +2197,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) + - mlx5e_mpwqe_get_log_num_strides(mdev, params); + log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); break; default: /* MLX5_WQ_TYPE_CYCLIC */ log_cq_size = params->log_rq_mtu_frames; @@ -2130,9 +2214,9 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; } -static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_cq_param *param) +void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_cq_param *param) { void *cqc = param->cqc; @@ -2142,9 +2226,9 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; } -static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_cq_param *param) +void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_cq_param *param) { void *cqc = param->cqc; @@ -2152,12 +2236,12 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } -static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_sq_param *param) +void mlx5e_build_icosq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2168,9 +2252,9 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); } -static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_sq_param *param) +void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2198,14 +2282,14 @@ static void mlx5e_build_channel_param(struct mlx5e_priv *priv, { u8 icosq_log_wq_sz; - mlx5e_build_rq_param(priv, params, &cparam->rq); + mlx5e_build_rq_param(priv, params, NULL, &cparam->rq); icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq); mlx5e_build_sq_param(priv, params, &cparam->sq); mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq); - mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq); + mlx5e_build_rx_cq_param(priv, params, NULL, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq); mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq); } @@ -2226,7 +2310,12 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, mlx5e_build_channel_param(priv, &chs->params, cparam); for (i = 0; i < chs->num; i++) { - err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]); + struct xdp_umem *umem = NULL; + + if (chs->params.xdp_prog) + umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, i); + + err = mlx5e_open_channel(priv, i, &chs->params, cparam, umem, &chs->c[i]); if (err) goto err_close_channels; } @@ -2268,6 +2357,10 @@ static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT; err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout); + + /* Don't wait on the XSK RQ, because the newer xdpsock sample + * doesn't provide any Fill Ring entries at the setup stage. + */ } return err ? -ETIMEDOUT : 0; @@ -2340,35 +2433,35 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv) return err; } -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { - struct mlx5e_rqt *rqt; + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int err; int ix; - for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) { - rqt = &priv->direct_tir[ix].rqt; - err = mlx5e_create_rqt(priv, 1 /*size */, rqt); - if (err) + for (ix = 0; ix < max_nch; ix++) { + err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt); + if (unlikely(err)) goto err_destroy_rqts; } return 0; err_destroy_rqts: - mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err); + mlx5_core_warn(priv->mdev, "create rqts failed, %d\n", err); for (ix--; ix >= 0; ix--) - mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt); + mlx5e_destroy_rqt(priv, &tirs[ix].rqt); return err; } -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv) +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int i; - for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + for (i = 0; i < max_nch; i++) + mlx5e_destroy_rqt(priv, &tirs[i].rqt); } static int mlx5e_rx_hash_fn(int hfunc) @@ -2788,11 +2881,12 @@ static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv) void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) { int num_txqs = priv->channels.num * priv->channels.params.num_tc; + int num_rxqs = priv->channels.num * MLX5E_NUM_RQ_GROUPS; struct net_device *netdev = priv->netdev; mlx5e_netdev_set_tcs(netdev); netif_set_real_num_tx_queues(netdev, num_txqs); - netif_set_real_num_rx_queues(netdev, priv->channels.num); + netif_set_real_num_rx_queues(netdev, num_rxqs); mlx5e_build_tx2sq_maps(priv); mlx5e_activate_channels(&priv->channels); @@ -2804,10 +2898,14 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_wait_channels_min_rx_wqes(&priv->channels); mlx5e_redirect_rqts_to_channels(priv, &priv->channels); + + mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels); } void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { + mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels); + mlx5e_redirect_rqts_to_drop(priv); if (mlx5e_is_vport_rep(priv)) @@ -2847,7 +2945,7 @@ static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, if (hw_modify) hw_modify(priv); - mlx5e_refresh_tirs(priv, false); + priv->profile->update_rx(priv); mlx5e_activate_priv_channels(priv); /* return carrier back if needed */ @@ -2886,15 +2984,18 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + bool is_xdp = priv->channels.params.xdp_prog; int err; set_bit(MLX5E_STATE_OPENED, &priv->state); + if (is_xdp) + mlx5e_xdp_set_open(priv); err = mlx5e_open_channels(priv, &priv->channels); if (err) goto err_clear_state_opened_flag; - mlx5e_refresh_tirs(priv, false); + priv->profile->update_rx(priv); mlx5e_activate_priv_channels(priv); if (priv->profile->update_carrier) priv->profile->update_carrier(priv); @@ -2903,6 +3004,8 @@ int mlx5e_open_locked(struct net_device *netdev) return 0; err_clear_state_opened_flag: + if (is_xdp) + mlx5e_xdp_set_closed(priv); clear_bit(MLX5E_STATE_OPENED, &priv->state); return err; } @@ -2934,6 +3037,8 @@ int mlx5e_close_locked(struct net_device *netdev) if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; + if (priv->channels.params.xdp_prog) + mlx5e_xdp_set_closed(priv); clear_bit(MLX5E_STATE_OPENED, &priv->state); netif_carrier_off(priv->netdev); @@ -3045,20 +3150,19 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq) mlx5e_free_cq(&drop_rq->cq); } -int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, - u32 underlay_qpn, u32 *tisn) +int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn) { - u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); - MLX5_SET(tisc, tisc, prio, tc << 1); - MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn); MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); + if (MLX5_GET(tisc, tisc, tls_en)) + MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn); + if (mlx5_lag_is_lacp_owner(mdev)) MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); - return mlx5_core_create_tis(mdev, in, sizeof(in), tisn); + return mlx5_core_create_tis(mdev, in, MLX5_ST_SZ_BYTES(create_tis_in), tisn); } void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn) @@ -3072,7 +3176,14 @@ int mlx5e_create_tises(struct mlx5e_priv *priv) int tc; for (tc = 0; tc < priv->profile->max_tc; tc++) { - err = mlx5e_create_tis(priv->mdev, tc, 0, &priv->tisn[tc]); + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, prio, tc << 1); + + err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[tc]); if (err) goto err_close_tises; } @@ -3190,13 +3301,13 @@ err_destroy_inner_tirs: return err; } -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { - int nch = mlx5e_get_netdev_max_channels(priv->netdev); + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); struct mlx5e_tir *tir; void *tirc; int inlen; - int err; + int err = 0; u32 *in; int ix; @@ -3205,25 +3316,24 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) if (!in) return -ENOMEM; - for (ix = 0; ix < nch; ix++) { + for (ix = 0; ix < max_nch; ix++) { memset(in, 0, inlen); - tir = &priv->direct_tir[ix]; + tir = &tirs[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc); + mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); - if (err) + if (unlikely(err)) goto err_destroy_ch_tirs; } - kvfree(in); - - return 0; + goto out; err_destroy_ch_tirs: - mlx5_core_warn(priv->mdev, "create direct tirs failed, %d\n", err); + mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err); for (ix--; ix >= 0; ix--) - mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]); + mlx5e_destroy_tir(priv->mdev, &tirs[ix]); +out: kvfree(in); return err; @@ -3243,13 +3353,13 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc) mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); } -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { - int nch = mlx5e_get_netdev_max_channels(priv->netdev); + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int i; - for (i = 0; i < nch; i++) - mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]); + for (i = 0; i < max_nch; i++) + mlx5e_destroy_tir(priv->mdev, &tirs[i]); } static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) @@ -3391,11 +3501,12 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) { struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i]; + struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; int j; - s->rx_packets += rq_stats->packets; - s->rx_bytes += rq_stats->bytes; + s->rx_packets += rq_stats->packets + xskrq_stats->packets; + s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes; for (j = 0; j < priv->max_opened_tc; j++) { struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; @@ -3494,6 +3605,13 @@ static int set_feature_lro(struct net_device *netdev, bool enable) mutex_lock(&priv->state_lock); + if (enable && priv->xsk.refcnt) { + netdev_warn(netdev, "LRO is incompatible with AF_XDP (%hu XSKs are active)\n", + priv->xsk.refcnt); + err = -EINVAL; + goto out; + } + old_params = &priv->channels.params; if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { netdev_warn(netdev, "can't set LRO with legacy RQ\n"); @@ -3507,8 +3625,8 @@ static int set_feature_lro(struct net_device *netdev, bool enable) new_channels.params.lro_en = enable; if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { - if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) == - mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params)) + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params, NULL) == + mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL)) reset = false; } @@ -3698,6 +3816,43 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, return features; } +static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, + struct mlx5e_channels *chs, + struct mlx5e_params *new_params, + struct mlx5_core_dev *mdev) +{ + u16 ix; + + for (ix = 0; ix < chs->params.num_channels; ix++) { + struct xdp_umem *umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, ix); + struct mlx5e_xsk_param xsk; + + if (!umem) + continue; + + mlx5e_build_xsk_param(umem, &xsk); + + if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) { + u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk); + int max_mtu_frame, max_mtu_page, max_mtu; + + /* Two criteria must be met: + * 1. HW MTU + all headrooms <= XSK frame size. + * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE. + */ + max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr); + max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk); + max_mtu = min(max_mtu_frame, max_mtu_page); + + netdev_err(netdev, "MTU %d is too big for an XSK running on channel %hu. Try MTU <= %d\n", + new_params->sw_mtu, ix, max_mtu); + return false; + } + } + + return true; +} + int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, change_hw_mtu_cb set_mtu_cb) { @@ -3718,18 +3873,31 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, new_channels.params.sw_mtu = new_mtu; if (params->xdp_prog && - !mlx5e_rx_is_linear_skb(&new_channels.params)) { + !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) { netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n", - new_mtu, mlx5e_xdp_max_mtu(params)); + new_mtu, mlx5e_xdp_max_mtu(params, NULL)); + err = -EINVAL; + goto out; + } + + if (priv->xsk.refcnt && + !mlx5e_xsk_validate_mtu(netdev, &priv->channels, + &new_channels.params, priv->mdev)) { err = -EINVAL; goto out; } if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { - bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_channels.params); - u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params); - u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params); + bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, + &new_channels.params, + NULL); + u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL); + u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params, NULL); + /* If XSK is active, XSK RQs are linear. */ + is_linear |= priv->xsk.refcnt; + + /* Always reset in linear mode - hw_mtu is used in data path. */ reset = reset && (is_linear || (ppw_old != ppw_new)); } @@ -4162,16 +4330,29 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) new_channels.params = priv->channels.params; new_channels.params.xdp_prog = prog; - if (!mlx5e_rx_is_linear_skb(&new_channels.params)) { + /* No XSK params: AF_XDP can't be enabled yet at the point of setting + * the XDP program. + */ + if (!mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) { netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n", new_channels.params.sw_mtu, - mlx5e_xdp_max_mtu(&new_channels.params)); + mlx5e_xdp_max_mtu(&new_channels.params, NULL)); return -EINVAL; } return 0; } +static int mlx5e_xdp_update_state(struct mlx5e_priv *priv) +{ + if (priv->channels.params.xdp_prog) + mlx5e_xdp_set_open(priv); + else + mlx5e_xdp_set_closed(priv); + + return 0; +} + static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4192,8 +4373,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* no need for full reset when exchanging programs */ reset = (!priv->channels.params.xdp_prog || !prog); - if (was_opened && reset) - mlx5e_close_locked(netdev); if (was_opened && !reset) { /* num_channels is invariant here, so we can take the * batched reference right upfront. @@ -4205,20 +4384,31 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) } } - /* exchange programs, extra prog reference we got from caller - * as long as we don't fail from this point onwards. - */ - old_prog = xchg(&priv->channels.params.xdp_prog, prog); + if (was_opened && reset) { + struct mlx5e_channels new_channels = {}; + + new_channels.params = priv->channels.params; + new_channels.params.xdp_prog = prog; + mlx5e_set_rq_type(priv->mdev, &new_channels.params); + old_prog = priv->channels.params.xdp_prog; + + err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_xdp_update_state); + if (err) + goto unlock; + } else { + /* exchange programs, extra prog reference we got from caller + * as long as we don't fail from this point onwards. + */ + old_prog = xchg(&priv->channels.params.xdp_prog, prog); + } + if (old_prog) bpf_prog_put(old_prog); - if (reset) /* change RQ type according to priv->xdp_prog */ + if (!was_opened && reset) /* change RQ type according to priv->xdp_prog */ mlx5e_set_rq_type(priv->mdev, &priv->channels.params); - if (was_opened && reset) - err = mlx5e_open_locked(netdev); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) + if (!was_opened || reset) goto unlock; /* exchanging programs w/o reset, we update ref counts on behalf @@ -4226,19 +4416,29 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) */ for (i = 0; i < priv->channels.num; i++) { struct mlx5e_channel *c = priv->channels.c[i]; + bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); + if (xsk_open) + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); napi_synchronize(&c->napi); /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ old_prog = xchg(&c->rq.xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + if (xsk_open) { + old_prog = xchg(&c->xskrq.xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); + if (xsk_open) + set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); /* napi_schedule in case we have missed anything */ napi_schedule(&c->napi); - - if (old_prog) - bpf_prog_put(old_prog); } unlock: @@ -4269,6 +4469,9 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) case XDP_QUERY_PROG: xdp->prog_id = mlx5e_xdp_query(dev); return 0; + case XDP_SETUP_XSK_UMEM: + return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem, + xdp->xsk.queue_id); default: return -EINVAL; } @@ -4351,6 +4554,7 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_tx_timeout = mlx5e_tx_timeout, .ndo_bpf = mlx5e_xdp, .ndo_xdp_xmit = mlx5e_xdp_xmit, + .ndo_xsk_async_xmit = mlx5e_xsk_async_xmit, #ifdef CONFIG_MLX5_EN_ARFS .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif @@ -4420,9 +4624,9 @@ static bool slow_pci_heuristic(struct mlx5_core_dev *mdev) link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; } -static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) +static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) { - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; moder.cq_period_mode = cq_period_mode; moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; @@ -4433,9 +4637,9 @@ static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) return moder; } -static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) +static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) { - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; moder.cq_period_mode = cq_period_mode; moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; @@ -4449,8 +4653,8 @@ static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) { return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? - NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE : - NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + DIM_CQ_PERIOD_MODE_START_FROM_CQE : + DIM_CQ_PERIOD_MODE_START_FROM_EQE; } void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) @@ -4502,11 +4706,13 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, * - Striding RQ configuration is not possible/supported. * - Slow PCI heuristic. * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. + * + * No XSK params: checking the availability of striding RQ in general. */ if (!slow_pci_heuristic(mdev) && mlx5e_striding_rq_possible(mdev, params) && - (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) || - !mlx5e_rx_is_linear_skb(params))) + (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || + !mlx5e_rx_is_linear_skb(params, NULL))) MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); mlx5e_set_rq_type(mdev, params); mlx5e_init_rq_type_params(mdev, params); @@ -4528,6 +4734,7 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, } void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_xsk *xsk, struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, u16 max_channels, u16 mtu) @@ -4563,9 +4770,11 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* HW LRO */ /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ - if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + /* No XSK params: checking the availability of striding RQ in general. */ + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) params->lro_en = !slow_pci_heuristic(mdev); + } params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ @@ -4584,13 +4793,16 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, mlx5e_build_rss_params(rss_params, params->num_channels); params->tunneled_offload_en = mlx5e_tunnel_inner_ft_supported(mdev); + + /* AF_XDP */ + params->xsk = xsk; } static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr); + mlx5_query_mac_address(priv->mdev, netdev->dev_addr); if (is_zero_ether_addr(netdev->dev_addr) && !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) { eth_hw_addr_random(netdev); @@ -4619,14 +4831,18 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->ethtool_ops = &mlx5e_ethtool_ops; netdev->vlan_features |= NETIF_F_SG; - netdev->vlan_features |= NETIF_F_IP_CSUM; - netdev->vlan_features |= NETIF_F_IPV6_CSUM; + netdev->vlan_features |= NETIF_F_HW_CSUM; netdev->vlan_features |= NETIF_F_GRO; netdev->vlan_features |= NETIF_F_TSO; netdev->vlan_features |= NETIF_F_TSO6; netdev->vlan_features |= NETIF_F_RXCSUM; netdev->vlan_features |= NETIF_F_RXHASH; + netdev->mpls_features |= NETIF_F_SG; + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->mpls_features |= NETIF_F_TSO; + netdev->mpls_features |= NETIF_F_TSO6; + netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX; @@ -4642,8 +4858,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { - netdev->hw_enc_features |= NETIF_F_IP_CSUM; - netdev->hw_enc_features |= NETIF_F_IPV6_CSUM; + netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; netdev->hw_enc_features |= NETIF_F_TSO6; netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; @@ -4756,7 +4971,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (err) return err; - mlx5e_build_nic_params(mdev, rss, &priv->channels.params, + mlx5e_build_nic_params(mdev, &priv->xsk, rss, &priv->channels.params, mlx5e_get_netdev_max_channels(netdev), netdev->mtu); @@ -4798,7 +5013,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir); if (err) goto err_destroy_indirect_rqts; @@ -4806,14 +5021,22 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir); if (err) goto err_destroy_indirect_tirs; + err = mlx5e_create_direct_rqts(priv, priv->xsk_tir); + if (unlikely(err)) + goto err_destroy_direct_tirs; + + err = mlx5e_create_direct_tirs(priv, priv->xsk_tir); + if (unlikely(err)) + goto err_destroy_xsk_rqts; + err = mlx5e_create_flow_steering(priv); if (err) { mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); - goto err_destroy_direct_tirs; + goto err_destroy_xsk_tirs; } err = mlx5e_tc_nic_init(priv); @@ -4824,12 +5047,16 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) err_destroy_flow_steering: mlx5e_destroy_flow_steering(priv); +err_destroy_xsk_tirs: + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); +err_destroy_xsk_rqts: + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv, true); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -4843,9 +5070,11 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { mlx5e_tc_nic_cleanup(priv); mlx5e_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); mlx5e_destroy_indirect_tirs(priv, true); - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@ -4927,6 +5156,11 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5_lag_remove(mdev); } +int mlx5e_update_nic_rx(struct mlx5e_priv *priv) +{ + return mlx5e_refresh_tirs(priv, false); +} + static const struct mlx5e_profile mlx5e_nic_profile = { .init = mlx5e_nic_init, .cleanup = mlx5e_nic_cleanup, @@ -4936,6 +5170,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .cleanup_tx = mlx5e_cleanup_nic_tx, .enable = mlx5e_nic_enable, .disable = mlx5e_nic_disable, + .update_rx = mlx5e_update_nic_rx, .update_stats = mlx5e_update_ndo_stats, .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe, @@ -4995,7 +5230,7 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), nch * profile->max_tc, - nch); + nch * MLX5E_NUM_RQ_GROUPS); if (!netdev) { mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); return NULL; @@ -5133,7 +5368,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) #ifdef CONFIG_MLX5_ESWITCH if (MLX5_ESWITCH_MANAGER(mdev) && - mlx5_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { + mlx5_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) { mlx5e_rep_register_vport_reps(mdev); return mdev; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 2f406b161bcf..529f8e4b32c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -128,7 +128,7 @@ static void mlx5e_rep_get_strings(struct net_device *dev, } } -static void mlx5e_vf_rep_update_hw_counters(struct mlx5e_priv *priv) +static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -166,17 +166,6 @@ static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv) vport_stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); } -static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; - - if (rep->vport == MLX5_VPORT_UPLINK) - mlx5e_uplink_rep_update_hw_counters(priv); - else - mlx5e_vf_rep_update_hw_counters(priv); -} - static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) { struct mlx5e_sw_stats *s = &priv->stats.sw; @@ -203,7 +192,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, mutex_lock(&priv->state_lock); mlx5e_rep_update_sw_counters(priv); - mlx5e_rep_update_hw_counters(priv); + priv->profile->update_stats(priv); mutex_unlock(&priv->state_lock); for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) @@ -363,7 +352,7 @@ static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev, return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); } -static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = { +static const struct ethtool_ops mlx5e_rep_ethtool_ops = { .get_drvinfo = mlx5e_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, @@ -402,30 +391,19 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { static int mlx5e_rep_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid) { - struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *uplink_upper = NULL; - struct mlx5e_priv *uplink_priv = NULL; - struct net_device *uplink_dev; - - if (esw->mode == SRIOV_NONE) - return -EOPNOTSUPP; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + u64 parent_id; - uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - if (uplink_dev) { - uplink_upper = netdev_master_upper_dev_get(uplink_dev); - uplink_priv = netdev_priv(uplink_dev); - } + priv = netdev_priv(dev); + esw = priv->mdev->priv.eswitch; - ppid->id_len = ETH_ALEN; - if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) { - ether_addr_copy(ppid->id, uplink_upper->dev_addr); - } else { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; + if (esw->mode == MLX5_ESWITCH_NONE) + return -EOPNOTSUPP; - ether_addr_copy(ppid->id, rep->hw_id); - } + parent_id = mlx5_query_nic_system_image_guid(priv->mdev); + ppid->id_len = sizeof(parent_id); + memcpy(ppid->id, &parent_id, sizeof(parent_id)); return 0; } @@ -436,7 +414,7 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, struct mlx5e_rep_sq *rep_sq, *tmp; struct mlx5e_rep_priv *rpriv; - if (esw->mode != SRIOV_OFFLOADS) + if (esw->mode != MLX5_ESWITCH_OFFLOADS) return; rpriv = mlx5e_rep_to_rep_priv(rep); @@ -457,7 +435,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, int err; int i; - if (esw->mode != SRIOV_OFFLOADS) + if (esw->mode != MLX5_ESWITCH_OFFLOADS) return 0; rpriv = mlx5e_rep_to_rep_priv(rep); @@ -1101,7 +1079,7 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); } -static int mlx5e_vf_rep_open(struct net_device *dev) +static int mlx5e_rep_open(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1124,7 +1102,7 @@ unlock: return err; } -static int mlx5e_vf_rep_close(struct net_device *dev) +static int mlx5e_rep_close(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1156,6 +1134,8 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, if (rep->vport == MLX5_VPORT_UPLINK) ret = snprintf(buf, len, "p%d", fn); + else if (rep->vport == MLX5_VPORT_PF) + ret = snprintf(buf, len, "pf%d", fn); else ret = snprintf(buf, len, "pf%dvf%d", fn, rep->vport - 1); @@ -1276,7 +1256,7 @@ static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev } static void -mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -1285,7 +1265,7 @@ mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); } -static int mlx5e_vf_rep_change_mtu(struct net_device *netdev, int new_mtu) +static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu) { return mlx5e_change_mtu(netdev, new_mtu, NULL); } @@ -1318,16 +1298,16 @@ static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan return 0; } -static const struct net_device_ops mlx5e_netdev_ops_vf_rep = { - .ndo_open = mlx5e_vf_rep_open, - .ndo_stop = mlx5e_vf_rep_close, +static const struct net_device_ops mlx5e_netdev_ops_rep = { + .ndo_open = mlx5e_rep_open, + .ndo_stop = mlx5e_rep_close, .ndo_start_xmit = mlx5e_xmit, .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, .ndo_setup_tc = mlx5e_rep_setup_tc, - .ndo_get_stats64 = mlx5e_vf_rep_get_stats, + .ndo_get_stats64 = mlx5e_rep_get_stats, .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, - .ndo_change_mtu = mlx5e_vf_rep_change_mtu, + .ndo_change_mtu = mlx5e_rep_change_mtu, .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, }; @@ -1356,7 +1336,7 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { bool mlx5e_eswitch_rep(struct net_device *netdev) { - if (netdev->netdev_ops == &mlx5e_netdev_ops_vf_rep || + if (netdev->netdev_ops == &mlx5e_netdev_ops_rep || netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep) return true; @@ -1412,16 +1392,16 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) SET_NETDEV_DEV(netdev, mdev->device); netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep; /* we want a persistent mac for the uplink rep */ - mlx5_query_nic_vport_mac_address(mdev, 0, netdev->dev_addr); + mlx5_query_mac_address(mdev, netdev->dev_addr); netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops; #ifdef CONFIG_MLX5_CORE_EN_DCB if (MLX5_CAP_GEN(mdev, qos)) netdev->dcbnl_ops = &mlx5e_dcbnl_ops; #endif } else { - netdev->netdev_ops = &mlx5e_netdev_ops_vf_rep; + netdev->netdev_ops = &mlx5e_netdev_ops_rep; eth_hw_addr_random(netdev); - netdev->ethtool_ops = &mlx5e_vf_rep_ethtool_ops; + netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; } netdev->watchdog_timeo = 15 * HZ; @@ -1530,7 +1510,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir); if (err) goto err_destroy_indirect_rqts; @@ -1538,7 +1518,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir); if (err) goto err_destroy_indirect_tirs; @@ -1555,11 +1535,11 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv, false); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -1573,9 +1553,9 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) mlx5_del_flow_rules(rpriv->vport_rx_rule); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); mlx5e_destroy_indirect_tirs(priv, false); - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); } @@ -1642,11 +1622,16 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) } } -static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv) +static void mlx5e_rep_enable(struct mlx5e_priv *priv) { mlx5e_set_netdev_mtu_boundaries(priv); } +static int mlx5e_update_rep_rx(struct mlx5e_priv *priv) +{ + return 0; +} + static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data) { struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); @@ -1714,15 +1699,16 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) mlx5_lag_remove(mdev); } -static const struct mlx5e_profile mlx5e_vf_rep_profile = { +static const struct mlx5e_profile mlx5e_rep_profile = { .init = mlx5e_init_rep, .cleanup = mlx5e_cleanup_rep, .init_rx = mlx5e_init_rep_rx, .cleanup_rx = mlx5e_cleanup_rep_rx, .init_tx = mlx5e_init_rep_tx, .cleanup_tx = mlx5e_cleanup_rep_tx, - .enable = mlx5e_vf_rep_enable, - .update_stats = mlx5e_vf_rep_update_hw_counters, + .enable = mlx5e_rep_enable, + .update_rx = mlx5e_update_rep_rx, + .update_stats = mlx5e_rep_update_hw_counters, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, .max_tc = 1, @@ -1737,6 +1723,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .cleanup_tx = mlx5e_cleanup_rep_tx, .enable = mlx5e_uplink_rep_enable, .disable = mlx5e_uplink_rep_disable, + .update_rx = mlx5e_update_rep_rx, .update_stats = mlx5e_uplink_rep_update_hw_counters, .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, @@ -1761,7 +1748,8 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) rpriv->rep = rep; nch = mlx5e_get_max_num_channels(dev); - profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; + profile = (rep->vport == MLX5_VPORT_UPLINK) ? + &mlx5e_uplink_rep_profile : &mlx5e_rep_profile; netdev = mlx5e_create_netdev(dev, profile, nch, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", @@ -1771,7 +1759,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) } rpriv->netdev = netdev; - rep->rep_if[REP_ETH].priv = rpriv; + rep->rep_data[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); if (rep->vport == MLX5_VPORT_UPLINK) { @@ -1845,16 +1833,17 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) return rpriv->netdev; } +static const struct mlx5_eswitch_rep_ops rep_ops = { + .load = mlx5e_vport_rep_load, + .unload = mlx5e_vport_rep_unload, + .get_proto_dev = mlx5e_vport_rep_get_proto_dev +}; + void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep_if rep_if = {}; - - rep_if.load = mlx5e_vport_rep_load; - rep_if.unload = mlx5e_vport_rep_unload; - rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH); + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_ETH); } void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 83b573b1abac..d4585f3b8cb2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -91,7 +91,7 @@ struct mlx5e_rep_priv { static inline struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep) { - return (struct mlx5e_rep_priv *)rep->rep_if[REP_ETH].priv; + return rep->rep_data[REP_ETH].priv; } struct mlx5e_neigh { @@ -150,13 +150,12 @@ struct mlx5e_encap_entry { struct hlist_node encap_hlist; struct list_head flows; u32 encap_id; - struct ip_tunnel_info tun_info; + const struct ip_tunnel_info *tun_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; struct net_device *route_dev; - int tunnel_type; - int tunnel_hlen; + struct mlx5e_tc_tunnel *tunnel; int reformat_type; u8 flags; char *encap_header; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 13133e7f088e..56a2f4666c47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -34,6 +34,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/tcp.h> +#include <linux/indirect_call_wrapper.h> #include <net/ip6_checksum.h> #include <net/page_pool.h> #include <net/inet_ecn.h> @@ -46,6 +47,7 @@ #include "en_accel/tls_rxtx.h" #include "lib/clock.h" #include "en/xdp.h" +#include "en/xsk/rx.h" static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) { @@ -234,8 +236,8 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, return true; } -static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) +static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) { if (mlx5e_rx_cache_get(rq, dma_info)) return 0; @@ -247,7 +249,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0, PAGE_SIZE, rq->buff.map_dir); if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { - put_page(dma_info->page); + page_pool_recycle_direct(rq->page_pool, dma_info->page); dma_info->page = NULL; return -ENOMEM; } @@ -255,13 +257,23 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, return 0; } +static inline int mlx5e_page_alloc(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + if (rq->umem) + return mlx5e_xsk_page_alloc_umem(rq, dma_info); + else + return mlx5e_page_alloc_pool(rq, dma_info); +} + void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir); } -void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, - bool recycle) +void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info, + bool recycle) { if (likely(recycle)) { if (mlx5e_rx_cache_put(rq, dma_info)) @@ -271,10 +283,25 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, page_pool_recycle_direct(rq->page_pool, dma_info->page); } else { mlx5e_page_dma_unmap(rq, dma_info); + page_pool_release_page(rq->page_pool, dma_info->page); put_page(dma_info->page); } } +static inline void mlx5e_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info, + bool recycle) +{ + if (rq->umem) + /* The `recycle` parameter is ignored, and the page is always + * put into the Reuse Ring, because there is no way to return + * the page to the userspace when the interface goes down. + */ + mlx5e_xsk_page_release(rq, dma_info); + else + mlx5e_page_release_dynamic(rq, dma_info, recycle); +} + static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *frag) { @@ -286,7 +313,7 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, * offset) should just use the new one without replenishing again * by themselves. */ - err = mlx5e_page_alloc_mapped(rq, frag->di); + err = mlx5e_page_alloc(rq, frag->di); return err; } @@ -352,6 +379,13 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk) int err; int i; + if (rq->umem) { + int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags; + + if (unlikely(!mlx5e_xsk_pages_enough_umem(rq, pages_desired))) + return -ENOMEM; + } + for (i = 0; i < wqe_bulk; i++) { struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i); @@ -399,11 +433,17 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, static void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle) { - const bool no_xdp_xmit = - bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); + bool no_xdp_xmit; struct mlx5e_dma_info *dma_info = wi->umr.dma_info; int i; + /* A common case for AF_XDP. */ + if (bitmap_full(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE)) + return; + + no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, + MLX5_MPWRQ_PAGES_PER_WQE); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) mlx5e_page_release(rq, &dma_info[i], recycle); @@ -425,11 +465,6 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n) mlx5_wq_ll_update_db_record(wq); } -static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq) -{ - return mlx5_wq_cyc_get_ctr_wrap_cnt(&sq->wq, sq->pc); -} - static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq, struct mlx5_wq_cyc *wq, u16 pi, u16 nnops) @@ -457,6 +492,12 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) int err; int i; + if (rq->umem && + unlikely(!mlx5e_xsk_pages_enough_umem(rq, MLX5_MPWRQ_PAGES_PER_WQE))) { + err = -ENOMEM; + goto err; + } + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) { @@ -465,12 +506,10 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) } umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); - if (unlikely(mlx5e_icosq_wrap_cnt(sq) < 2)) - memcpy(umr_wqe, &rq->mpwqe.umr_wqe, - offsetof(struct mlx5e_umr_wqe, inline_mtts)); + memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts)); for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { - err = mlx5e_page_alloc_mapped(rq, dma_info); + err = mlx5e_page_alloc(rq, dma_info); if (unlikely(err)) goto err_unmap; umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR); @@ -485,6 +524,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset); sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; + sq->db.ico_wqe[pi].umr.rq = rq; sq->pc += MLX5E_UMR_WQEBBS; sq->doorbell_cseg = &umr_wqe->ctrl; @@ -496,6 +536,8 @@ err_unmap: dma_info--; mlx5e_page_release(rq, dma_info, true); } + +err: rq->stats->buff_alloc_err++; return err; @@ -542,11 +584,10 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) return !!err; } -static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) +void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) { struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); struct mlx5_cqe64 *cqe; - u8 completed_umr = 0; u16 sqcc; int i; @@ -587,7 +628,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) if (likely(wi->opcode == MLX5_OPCODE_UMR)) { sqcc += MLX5E_UMR_WQEBBS; - completed_umr++; + wi->umr.rq->mpwqe.umr_completed++; } else if (likely(wi->opcode == MLX5_OPCODE_NOP)) { sqcc++; } else { @@ -603,24 +644,25 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) sq->cc = sqcc; mlx5_cqwq_update_db_record(&cq->wq); - - if (likely(completed_umr)) { - mlx5e_post_rx_mpwqe(rq, completed_umr); - rq->mpwqe.umr_in_progress -= completed_umr; - } } bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) { struct mlx5e_icosq *sq = &rq->channel->icosq; struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + u8 umr_completed = rq->mpwqe.umr_completed; + int alloc_err = 0; u8 missing, i; u16 head; if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return false; - mlx5e_poll_ico_cq(&sq->cq, rq); + if (umr_completed) { + mlx5e_post_rx_mpwqe(rq, umr_completed); + rq->mpwqe.umr_in_progress -= umr_completed; + rq->mpwqe.umr_completed = 0; + } missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress; @@ -634,7 +676,9 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) head = rq->mpwqe.actual_wq_head; i = missing; do { - if (unlikely(mlx5e_alloc_rx_mpwqe(rq, head))) + alloc_err = mlx5e_alloc_rx_mpwqe(rq, head); + + if (unlikely(alloc_err)) break; head = mlx5_wq_ll_get_wqe_next_ix(wq, head); } while (--i); @@ -648,6 +692,12 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk; rq->mpwqe.actual_wq_head = head; + /* If XSK Fill Ring doesn't have enough frames, busy poll by + * rescheduling the NAPI poll. + */ + if (unlikely(alloc_err == -ENOMEM && rq->umem)) + return true; + return false; } @@ -1016,7 +1066,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, } rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, false); rcu_read_unlock(); if (consumed) return NULL; /* page/packet was consumed by XDP */ @@ -1092,7 +1142,10 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + rq, cqe, wi, cqe_bcnt); if (!skb) { /* probably for XDP */ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { @@ -1230,7 +1283,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, prefetch(data); rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, false); rcu_read_unlock(); if (consumed) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) @@ -1279,8 +1332,10 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); - skb = rq->mpwqe.skb_from_cqe_mpwrq(rq, wi, cqe_bcnt, head_offset, - page_idx); + skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq, + mlx5e_skb_from_cqe_mpwrq_linear, + mlx5e_skb_from_cqe_mpwrq_nonlinear, + rq, wi, cqe_bcnt, head_offset, page_idx); if (!skb) goto mpwrq_cqe_out; @@ -1327,7 +1382,8 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) mlx5_cqwq_pop(cqwq); - rq->handle_rx_cqe(rq, cqe); + INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe, rq, cqe); } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); out: @@ -1437,7 +1493,10 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + rq, cqe, wi, cqe_bcnt); if (!skb) goto wq_free_wqe; @@ -1469,7 +1528,10 @@ void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + rq, cqe, wi, cqe_bcnt); if (unlikely(!skb)) { /* a DROP, save the page-reuse checks */ mlx5e_free_rx_wqe(rq, wi, true); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 4382ef85488c..840ec945ccba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -64,7 +64,7 @@ static int mlx5e_test_health_info(struct mlx5e_priv *priv) { struct mlx5_core_health *health = &priv->mdev->priv.health; - return health->sick ? 1 : 0; + return health->fatal_error ? 1 : 0; } static int mlx5e_test_link_state(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 483d321d2151..539b4d3656da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -48,8 +48,15 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) }, #ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_no_sync_data) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_bypass_req) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) }, #endif { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, @@ -104,7 +111,33 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_force_irq) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_ecn_mark) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_removed_vlan_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_redirect) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_strides) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_congst_umr) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_arfs_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_xmit) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_mpwqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_cqes) }, }; #define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) @@ -144,6 +177,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) &priv->channel_stats[i]; struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq; struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq; + struct mlx5e_xdpsq_stats *xsksq_stats = &channel_stats->xsksq; + struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; struct mlx5e_ch_stats *ch_stats = &channel_stats->ch; int j; @@ -186,6 +221,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->ch_poll += ch_stats->poll; s->ch_arm += ch_stats->arm; s->ch_aff_change += ch_stats->aff_change; + s->ch_force_irq += ch_stats->force_irq; s->ch_eq_rearm += ch_stats->eq_rearm; /* xdp redirect */ s->tx_xdp_xmit += xdpsq_red_stats->xmit; @@ -194,6 +230,32 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_xdp_full += xdpsq_red_stats->full; s->tx_xdp_err += xdpsq_red_stats->err; s->tx_xdp_cqes += xdpsq_red_stats->cqes; + /* AF_XDP zero-copy */ + s->rx_xsk_packets += xskrq_stats->packets; + s->rx_xsk_bytes += xskrq_stats->bytes; + s->rx_xsk_csum_complete += xskrq_stats->csum_complete; + s->rx_xsk_csum_unnecessary += xskrq_stats->csum_unnecessary; + s->rx_xsk_csum_unnecessary_inner += xskrq_stats->csum_unnecessary_inner; + s->rx_xsk_csum_none += xskrq_stats->csum_none; + s->rx_xsk_ecn_mark += xskrq_stats->ecn_mark; + s->rx_xsk_removed_vlan_packets += xskrq_stats->removed_vlan_packets; + s->rx_xsk_xdp_drop += xskrq_stats->xdp_drop; + s->rx_xsk_xdp_redirect += xskrq_stats->xdp_redirect; + s->rx_xsk_wqe_err += xskrq_stats->wqe_err; + s->rx_xsk_mpwqe_filler_cqes += xskrq_stats->mpwqe_filler_cqes; + s->rx_xsk_mpwqe_filler_strides += xskrq_stats->mpwqe_filler_strides; + s->rx_xsk_oversize_pkts_sw_drop += xskrq_stats->oversize_pkts_sw_drop; + s->rx_xsk_buff_alloc_err += xskrq_stats->buff_alloc_err; + s->rx_xsk_cqe_compress_blks += xskrq_stats->cqe_compress_blks; + s->rx_xsk_cqe_compress_pkts += xskrq_stats->cqe_compress_pkts; + s->rx_xsk_congst_umr += xskrq_stats->congst_umr; + s->rx_xsk_arfs_err += xskrq_stats->arfs_err; + s->tx_xsk_xmit += xsksq_stats->xmit; + s->tx_xsk_mpwqe += xsksq_stats->mpwqe; + s->tx_xsk_inlnw += xsksq_stats->inlnw; + s->tx_xsk_full += xsksq_stats->full; + s->tx_xsk_err += xsksq_stats->err; + s->tx_xsk_cqes += xsksq_stats->cqes; for (j = 0; j < priv->max_opened_tc; j++) { struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; @@ -216,8 +278,15 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_csum_none += sq_stats->csum_none; s->tx_csum_partial += sq_stats->csum_partial; #ifdef CONFIG_MLX5_EN_TLS - s->tx_tls_ooo += sq_stats->tls_ooo; - s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes; + s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets; + s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes; + s->tx_tls_ctx += sq_stats->tls_ctx; + s->tx_tls_ooo += sq_stats->tls_ooo; + s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes; + s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data; + s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req; + s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes; + s->tx_tls_dump_packets += sq_stats->tls_dump_packets; #endif s->tx_cqes += sq_stats->cqes; } @@ -1238,6 +1307,16 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, +#endif { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, @@ -1266,11 +1345,43 @@ static const struct counter_desc xdpsq_stats_desc[] = { { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, }; +static const struct counter_desc xskrq_stats_desc[] = { + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, congst_umr) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, arfs_err) }, +}; + +static const struct counter_desc xsksq_stats_desc[] = { + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, full) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, err) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, +}; + static const struct counter_desc ch_stats_desc[] = { { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, force_irq) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) }, }; @@ -1278,6 +1389,8 @@ static const struct counter_desc ch_stats_desc[] = { #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) #define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc) #define NUM_RQ_XDPSQ_STATS ARRAY_SIZE(rq_xdpsq_stats_desc) +#define NUM_XSKRQ_STATS ARRAY_SIZE(xskrq_stats_desc) +#define NUM_XSKSQ_STATS ARRAY_SIZE(xsksq_stats_desc) #define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc) static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) @@ -1288,13 +1401,16 @@ static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) (NUM_CH_STATS * max_nch) + (NUM_SQ_STATS * max_nch * priv->max_opened_tc) + (NUM_RQ_XDPSQ_STATS * max_nch) + - (NUM_XDPSQ_STATS * max_nch); + (NUM_XDPSQ_STATS * max_nch) + + (NUM_XSKRQ_STATS * max_nch * priv->xsk.ever_used) + + (NUM_XSKSQ_STATS * max_nch * priv->xsk.ever_used); } static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) { int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); + bool is_xsk = priv->xsk.ever_used; int i, j, tc; for (i = 0; i < max_nch; i++) @@ -1306,6 +1422,9 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, for (j = 0; j < NUM_RQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_stats_desc[j].format, i); + for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xskrq_stats_desc[j].format, i); for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_xdpsq_stats_desc[j].format, i); @@ -1318,10 +1437,14 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, sq_stats_desc[j].format, priv->channel_tc2txq[i][tc]); - for (i = 0; i < max_nch; i++) + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xsksq_stats_desc[j].format, i); for (j = 0; j < NUM_XDPSQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, xdpsq_stats_desc[j].format, i); + } return idx; } @@ -1330,6 +1453,7 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) { int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); + bool is_xsk = priv->xsk.ever_used; int i, j, tc; for (i = 0; i < max_nch; i++) @@ -1343,6 +1467,10 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq, rq_stats_desc, j); + for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xskrq, + xskrq_stats_desc, j); for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq, @@ -1356,11 +1484,16 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc], sq_stats_desc, j); - for (i = 0; i < max_nch; i++) + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xsksq, + xsksq_stats_desc, j); for (j = 0; j < NUM_XDPSQ_STATS; j++) data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq, xdpsq_stats_desc, j); + } return idx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index cdddcc46971b..76ac111e14d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -46,6 +46,8 @@ #define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XSKRQ_STAT(type, fld) "rx%d_xsk_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XSKSQ_STAT(type, fld) "tx%d_xsk_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld) struct counter_desc { @@ -116,12 +118,46 @@ struct mlx5e_sw_stats { u64 ch_poll; u64 ch_arm; u64 ch_aff_change; + u64 ch_force_irq; u64 ch_eq_rearm; #ifdef CONFIG_MLX5_EN_TLS + u64 tx_tls_encrypted_packets; + u64 tx_tls_encrypted_bytes; + u64 tx_tls_ctx; u64 tx_tls_ooo; u64 tx_tls_resync_bytes; + u64 tx_tls_drop_no_sync_data; + u64 tx_tls_drop_bypass_req; + u64 tx_tls_dump_packets; + u64 tx_tls_dump_bytes; #endif + + u64 rx_xsk_packets; + u64 rx_xsk_bytes; + u64 rx_xsk_csum_complete; + u64 rx_xsk_csum_unnecessary; + u64 rx_xsk_csum_unnecessary_inner; + u64 rx_xsk_csum_none; + u64 rx_xsk_ecn_mark; + u64 rx_xsk_removed_vlan_packets; + u64 rx_xsk_xdp_drop; + u64 rx_xsk_xdp_redirect; + u64 rx_xsk_wqe_err; + u64 rx_xsk_mpwqe_filler_cqes; + u64 rx_xsk_mpwqe_filler_strides; + u64 rx_xsk_oversize_pkts_sw_drop; + u64 rx_xsk_buff_alloc_err; + u64 rx_xsk_cqe_compress_blks; + u64 rx_xsk_cqe_compress_pkts; + u64 rx_xsk_congst_umr; + u64 rx_xsk_arfs_err; + u64 tx_xsk_xmit; + u64 tx_xsk_mpwqe; + u64 tx_xsk_inlnw; + u64 tx_xsk_full; + u64 tx_xsk_err; + u64 tx_xsk_cqes; }; struct mlx5e_qcounter_stats { @@ -227,8 +263,15 @@ struct mlx5e_sq_stats { u64 added_vlan_packets; u64 nop; #ifdef CONFIG_MLX5_EN_TLS + u64 tls_encrypted_packets; + u64 tls_encrypted_bytes; + u64 tls_ctx; u64 tls_ooo; u64 tls_resync_bytes; + u64 tls_drop_no_sync_data; + u64 tls_drop_bypass_req; + u64 tls_dump_packets; + u64 tls_dump_bytes; #endif /* less likely accessed in data path */ u64 csum_none; @@ -256,6 +299,7 @@ struct mlx5e_ch_stats { u64 poll; u64 arm; u64 aff_change; + u64 force_irq; u64 eq_rearm; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e40c60d1631f..3ac9b1e423ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -53,6 +53,7 @@ #include "en/port.h" #include "en/tc_tun.h" #include "lib/devcom.h" +#include "lib/geneve.h" struct mlx5_nic_flow_attr { u32 action; @@ -126,7 +127,7 @@ struct mlx5e_tc_flow { }; struct mlx5e_tc_flow_parse_attr { - struct ip_tunnel_info tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; + const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; int num_mod_hdr_actions; @@ -716,19 +717,22 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context; struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, - .flow_tag = attr->flow_tag, .reformat_id = 0, - .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND, + .flags = FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; bool table_created = false; int err, dest_ix = 0; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = attr->flow_tag; + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); if (err) { @@ -799,7 +803,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } if (attr->match_level != MLX5_MATCH_NONE) - parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, &flow_act, dest, dest_ix); @@ -1063,6 +1067,19 @@ err_max_prio_chain: return err; } +static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) +{ + struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec; + void *headers_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + misc_parameters_3); + u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3, + headers_v, + geneve_tlv_option_0_data); + + return !!geneve_tlv_opt_0_data; +} + static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { @@ -1084,6 +1101,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); } + if (mlx5_flow_has_geneve_opt(flow)) + mlx5_geneve_tlv_option_del(priv->mdev->geneve); + mlx5_eswitch_del_vlan_action(esw, attr); for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) @@ -1339,7 +1359,6 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); - struct flow_match_control enc_control; int err; err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, @@ -1350,9 +1369,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return err; } - flow_rule_match_enc_control(rule, &enc_control); - - if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { struct flow_match_ipv4_addrs match; flow_rule_match_enc_ipv4_addrs(rule, &match); @@ -1372,7 +1389,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); - } else if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { struct flow_match_ipv6_addrs match; flow_rule_match_enc_ipv6_addrs(rule, &match); @@ -1497,29 +1514,21 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | BIT(FLOW_DISSECTOR_KEY_TCP) | BIT(FLOW_DISSECTOR_KEY_IP) | - BIT(FLOW_DISSECTOR_KEY_ENC_IP))) { + BIT(FLOW_DISSECTOR_KEY_ENC_IP) | + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", dissector->used_keys); return -EOPNOTSUPP; } - if ((flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) && - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { - struct flow_match_control match; - - flow_rule_match_enc_control(rule, &match); - switch (match.key->addr_type) { - case FLOW_DISSECTOR_KEY_IPV4_ADDRS: - case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) - return -EOPNOTSUPP; - break; - default: + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) return -EOPNOTSUPP; - } /* In decap flow, header pointers should point to the inner * headers, outer header were already set by parse_tunnel_attr @@ -2581,21 +2590,21 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, } struct encap_key { - struct ip_tunnel_key *ip_tun_key; - int tunnel_type; + const struct ip_tunnel_key *ip_tun_key; + struct mlx5e_tc_tunnel *tc_tunnel; }; static inline int cmp_encap_info(struct encap_key *a, struct encap_key *b) { return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || - a->tunnel_type != b->tunnel_type; + a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; } static inline int hash_encap_info(struct encap_key *key) { return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), - key->tunnel_type); + key->tc_tunnel->tunnel_type); } @@ -2625,7 +2634,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct ip_tunnel_info *tun_info; + const struct ip_tunnel_info *tun_info; struct encap_key key, e_key; struct mlx5e_encap_entry *e; unsigned short family; @@ -2634,17 +2643,17 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, int err = 0; parse_attr = attr->parse_attr; - tun_info = &parse_attr->tun_info[out_index]; + tun_info = parse_attr->tun_info[out_index]; family = ip_tunnel_info_af(tun_info); key.ip_tun_key = &tun_info->key; - key.tunnel_type = mlx5e_tc_tun_get_type(mirred_dev); + key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); hash_key = hash_encap_info(&key); hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { - e_key.ip_tun_key = &e->tun_info.key; - e_key.tunnel_type = e->tunnel_type; + e_key.ip_tun_key = &e->tun_info->key; + e_key.tc_tunnel = e->tunnel; if (!cmp_encap_info(&e_key, &key)) { found = true; break; @@ -2659,7 +2668,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, if (!e) return -ENOMEM; - e->tun_info = *tun_info; + e->tun_info = tun_info; err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); if (err) goto out_err; @@ -2793,6 +2802,16 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv, return err; } +bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, + struct net_device *out_dev) +{ + if (is_merged_eswitch_dev(priv, out_dev)) + return true; + + return mlx5e_eswitch_rep(out_dev) && + same_hw_devs(priv, netdev_priv(out_dev)); +} + static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, @@ -2858,9 +2877,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - if (netdev_port_same_parent_id(priv->netdev, - out_dev) || - is_merged_eswitch_dev(priv, out_dev)) { + if (netdev_port_same_parent_id(priv->netdev, out_dev)) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev); @@ -2877,6 +2894,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (err) return err; } + if (is_vlan_dev(parse_attr->filter_dev)) { err = add_vlan_pop_action(priv, attr, &action); @@ -2884,8 +2902,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return err; } - if (!mlx5e_eswitch_rep(out_dev)) + if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not on same switch HW, can't offload forwarding"); + pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", + priv->netdev->name, out_dev->name); return -EOPNOTSUPP; + } out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; @@ -2895,7 +2918,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } else if (encap) { parse_attr->mirred_ifindex[attr->out_count] = out_dev->ifindex; - parse_attr->tun_info[attr->out_count] = *info; + parse_attr->tun_info[attr->out_count] = info; encap = false; attr->dests[attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; @@ -3349,7 +3372,7 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv, if (!tc_can_offload_extack(priv->netdev, f->common.extack)) return -EOPNOTSUPP; - if (esw && esw->mode == SRIOV_OFFLOADS) + if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) err = mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, flow); else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index f62e81902d27..8f288cc53cee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -74,6 +74,9 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags); void mlx5e_tc_reoffload_flows_work(struct work_struct *work); +bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, + struct net_device *out_dev); + #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 701e5dc75bb0..600e92cb629a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -35,55 +35,12 @@ #include <net/geneve.h> #include <net/dsfield.h> #include "en.h" +#include "en/txrx.h" #include "ipoib/ipoib.h" #include "en_accel/en_accel.h" +#include "en_accel/ktls.h" #include "lib/clock.h" -#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS - -#ifndef CONFIG_MLX5_EN_TLS -#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ - MLX5E_SQ_NOPS_ROOM) -#else -/* TLS offload requires MLX5E_SQ_STOP_ROOM to have - * enough room for a resync SKB, a normal SKB and a NOP - */ -#define MLX5E_SQ_STOP_ROOM (2 * MLX5_SEND_WQE_MAX_WQEBBS +\ - MLX5E_SQ_NOPS_ROOM) -#endif - -static inline void mlx5e_tx_dma_unmap(struct device *pdev, - struct mlx5e_sq_dma *dma) -{ - switch (dma->type) { - case MLX5E_DMA_MAP_SINGLE: - dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); - break; - case MLX5E_DMA_MAP_PAGE: - dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); - break; - default: - WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); - } -} - -static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i) -{ - return &sq->db.dma_fifo[i & sq->dma_fifo_mask]; -} - -static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq, - dma_addr_t addr, - u32 size, - enum mlx5e_dma_map_type map_type) -{ - struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++); - - dma->addr = addr; - dma->size = size; - dma->type = map_type; -} - static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) { int i; @@ -277,23 +234,6 @@ dma_unmap_wqe_err: return -ENOMEM; } -static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, - struct mlx5_wq_cyc *wq, - u16 pi, u16 nnops) -{ - struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; - - edge_wi = wi + nnops; - - /* fill sq frag edge with nops to avoid wqe wrapping two pages */ - for (; wi < edge_wi; wi++) { - wi->skb = NULL; - wi->num_wqebbs = 1; - mlx5e_post_nop(wq, sq->sqn, &sq->pc); - } - sq->stats->nop += nnops; -} - static inline void mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma, @@ -301,6 +241,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more) { struct mlx5_wq_cyc *wq = &sq->wq; + bool send_doorbell; wi->num_bytes = num_bytes; wi->num_dma = num_dma; @@ -310,23 +251,21 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - netdev_tx_sent_queue(sq->txq, num_bytes); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; sq->pc += wi->num_wqebbs; - if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) { + if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room))) { netif_tx_stop_queue(sq->txq); sq->stats->stopped++; } - if (!xmit_more || netif_xmit_stopped(sq->txq)) + send_doorbell = __netdev_tx_sent_queue(sq->txq, num_bytes, + xmit_more); + if (send_doorbell) mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); } -#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) - netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more) { @@ -353,9 +292,12 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; stats->packets += skb_shinfo(skb)->gso_segs; } else { + u8 mode = mlx5e_transport_inline_tx_wqe(wqe) ? + MLX5_INLINE_MODE_TCP_UDP : sq->min_inline_mode; + opcode = MLX5_OPCODE_SEND; mss = 0; - ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + ihs = mlx5e_calc_min_inline(mode, skb); num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); stats->packets++; } @@ -380,11 +322,17 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, #ifdef CONFIG_MLX5_EN_IPSEC struct mlx5_wqe_eth_seg cur_eth = wqe->eth; #endif +#ifdef CONFIG_MLX5_EN_TLS + struct mlx5_wqe_ctrl_seg cur_ctrl = wqe->ctrl; +#endif mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); - mlx5e_sq_fetch_wqe(sq, &wqe, &pi); + wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi); #ifdef CONFIG_MLX5_EN_IPSEC wqe->eth = cur_eth; #endif +#ifdef CONFIG_MLX5_EN_TLS + wqe->ctrl = cur_ctrl; +#endif } /* fill wqe */ @@ -443,7 +391,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) u16 pi; sq = priv->txq2sq[skb_get_queue_mapping(skb)]; - mlx5e_sq_fetch_wqe(sq, &wqe, &pi); + wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi); /* might send skbs and update wqe and pi */ skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi); @@ -531,8 +479,16 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wi = &sq->db.wqe_info[ci]; skb = wi->skb; - if (unlikely(!skb)) { /* nop */ - sqcc++; + if (unlikely(!skb)) { +#ifdef CONFIG_MLX5_EN_TLS + if (wi->resync_dump_frag) { + struct mlx5e_sq_dma *dma = + mlx5e_dma_get(sq, dma_fifo_cc++); + + mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma); + } +#endif + sqcc += wi->num_wqebbs; continue; } @@ -574,8 +530,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) netdev_tx_completed_queue(sq->txq, npkts, nbytes); if (netif_tx_queue_stopped(sq->txq) && - mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - MLX5E_SQ_STOP_ROOM) && + mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { netif_tx_wake_queue(sq->txq); stats->wake++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index f9862bf75491..c50b6f0769c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -33,6 +33,7 @@ #include <linux/irq.h> #include "en.h" #include "en/xdp.h" +#include "en/xsk/tx.h" static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) { @@ -48,26 +49,24 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) { struct mlx5e_sq_stats *stats = sq->stats; - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state))) return; - net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes, - &dim_sample); + dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); net_dim(&sq->dim, dim_sample); } static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) { struct mlx5e_rq_stats *stats = rq->stats; - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state))) return; - net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes, - &dim_sample); + dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); net_dim(&rq->dim, dim_sample); } @@ -87,7 +86,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, napi); struct mlx5e_ch_stats *ch_stats = c->stats; + struct mlx5e_xdpsq *xsksq = &c->xsksq; + struct mlx5e_rq *xskrq = &c->xskrq; struct mlx5e_rq *rq = &c->rq; + bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + bool aff_change = false; + bool busy_xsk = false; bool busy = false; int work_done = 0; int i; @@ -97,22 +101,38 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); - busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq, NULL); + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); if (c->xdp) - busy |= mlx5e_poll_xdpsq_cq(&rq->xdpsq.cq, rq); + busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq); if (likely(budget)) { /* budget=0 means: don't poll rx rings */ - work_done = mlx5e_poll_rx_cq(&rq->cq, budget); + if (xsk_open) + work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget); + + if (likely(budget - work_done)) + work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done); + busy |= work_done == budget; } - busy |= c->rq.post_wqes(rq); + mlx5e_poll_ico_cq(&c->icosq.cq); + + busy |= rq->post_wqes(rq); + if (xsk_open) { + mlx5e_poll_ico_cq(&c->xskicosq.cq); + busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq); + busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); + busy_xsk |= xskrq->post_wqes(xskrq); + } + + busy |= busy_xsk; if (busy) { if (likely(mlx5e_channel_no_affinity_change(c))) return budget; ch_stats->aff_change++; + aff_change = true; if (budget && work_done == budget) work_done--; } @@ -133,10 +153,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_cq_arm(&c->icosq.cq); mlx5e_cq_arm(&c->xdpsq.cq); + if (xsk_open) { + mlx5e_handle_rx_dim(xskrq); + mlx5e_cq_arm(&c->xskicosq.cq); + mlx5e_cq_arm(&xsksq->cq); + mlx5e_cq_arm(&xskrq->cq); + } + + if (unlikely(aff_change && busy_xsk)) { + mlx5e_trigger_irq(&c->icosq); + ch_stats->force_irq++; + } + return work_done; } -void mlx5e_completion_event(struct mlx5_core_cq *mcq) +void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) { struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 23883d1fa22f..41f25ea2e8d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -61,17 +61,21 @@ enum { MLX5_EQ_DOORBEL_OFFSET = 0x40, }; -struct mlx5_irq_info { - cpumask_var_t mask; - char name[MLX5_MAX_IRQ_NAME]; - void *context; /* dev_id provided to request_irq */ +/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update + * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is + * used to set the EQ size, budget must be smaller than the EQ size. + */ +enum { + MLX5_EQ_POLLING_BUDGET = 128, }; +static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); + struct mlx5_eq_table { struct list_head comp_eqs_list; - struct mlx5_eq pages_eq; - struct mlx5_eq cmd_eq; - struct mlx5_eq async_eq; + struct mlx5_eq_async pages_eq; + struct mlx5_eq_async cmd_eq; + struct mlx5_eq_async async_eq; struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; @@ -79,11 +83,8 @@ struct mlx5_eq_table { struct mlx5_nb cq_err_nb; struct mutex lock; /* sync async eqs creations */ - int num_comp_vectors; - struct mlx5_irq_info *irq_info; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif + int num_comp_eqs; + struct mlx5_irq_table *irq_table; }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@ -124,16 +125,24 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) return cq; } -static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) +static int mlx5_eq_comp_int(struct notifier_block *nb, + __always_unused unsigned long action, + __always_unused void *data) { - struct mlx5_eq_comp *eq_comp = eq_ptr; - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_comp *eq_comp = + container_of(nb, struct mlx5_eq_comp, irq_nb); + struct mlx5_eq *eq = &eq_comp->core; struct mlx5_eqe *eqe; - int set_ci = 0; + int num_eqes = 0; u32 cqn = -1; - while ((eqe = next_eqe_sw(eq))) { + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { struct mlx5_core_cq *cq; + /* Make sure we read EQ entry contents after we've * checked the ownership bit. */ @@ -144,33 +153,23 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) cq = mlx5_eq_cq_get(eq, cqn); if (likely(cq)) { ++cq->arm_sn; - cq->comp(cq); + cq->comp(cq, eqe); mlx5_cq_put(cq); } else { mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); } ++eq->cons_index; - ++set_ci; - /* The HCA will think the queue has overflowed if we - * don't tell it we've been processing events. We - * create our EQs with MLX5_NUM_SPARE_EQE extra - * entries, so we must update our consumer index at - * least that often. - */ - if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq, 0); - set_ci = 0; - } - } + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); +out: eq_update_ci(eq, 1); if (cqn != -1) tasklet_schedule(&eq_comp->tasklet_ctx.task); - return IRQ_HANDLED; + return 0; } /* Some architectures don't latch interrupts when they are disabled, so using @@ -184,25 +183,32 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) disable_irq(eq->core.irqn); count_eqe = eq->core.cons_index; - mlx5_eq_comp_int(eq->core.irqn, eq); + mlx5_eq_comp_int(&eq->irq_nb, 0, NULL); count_eqe = eq->core.cons_index - count_eqe; enable_irq(eq->core.irqn); return count_eqe; } -static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) +static int mlx5_eq_async_int(struct notifier_block *nb, + unsigned long action, void *data) { - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_async *eq_async = + container_of(nb, struct mlx5_eq_async, irq_nb); + struct mlx5_eq *eq = &eq_async->core; struct mlx5_eq_table *eqt; struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; - int set_ci = 0; + int num_eqes = 0; dev = eq->dev; eqt = dev->priv.eq_table; - while ((eqe = next_eqe_sw(eq))) { + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { /* * Make sure we read EQ entry contents after we've * checked the ownership bit. @@ -217,23 +223,13 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); ++eq->cons_index; - ++set_ci; - /* The HCA will think the queue has overflowed if we - * don't tell it we've been processing events. We - * create our EQs with MLX5_NUM_SPARE_EQE extra - * entries, so we must update our consumer index at - * least that often. - */ - if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq, 0); - set_ci = 0; - } - } + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); +out: eq_update_ci(eq, 1); - return IRQ_HANDLED; + return 0; } static void init_eq_buf(struct mlx5_eq *eq) @@ -248,22 +244,19 @@ static void init_eq_buf(struct mlx5_eq *eq) } static int -create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, +create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - u8 vecidx = param->index; + u8 vecidx = param->irq_index; __be64 *pas; void *eqc; int inlen; u32 *in; int err; - - if (eq_table->irq_info[vecidx].context) - return -EEXIST; + int i; /* Init CQ table */ memset(cq_table, 0, sizeof(*cq_table)); @@ -291,10 +284,12 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, mlx5_fill_page_array(&eq->buf, pas); MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); - if (!param->mask && MLX5_CAP_GEN(dev, log_max_uctx)) + if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx)) MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID); - MLX5_SET64(create_eq_in, in, event_bitmask, param->mask); + for (i = 0; i < 4; i++) + MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i, + param->mask[i]); eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); @@ -307,34 +302,19 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, if (err) goto err_in; - snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", - name, pci_name(dev->pdev)); - eq_table->irq_info[vecidx].context = param->context; - eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(eq->irqn, param->handler, 0, - eq_table->irq_info[vecidx].name, param->context); - if (err) - goto err_eq; err = mlx5_debug_eq_add(dev, eq); if (err) - goto err_irq; - - /* EQs are created in ARMED state - */ - eq_update_ci(eq, 1); + goto err_eq; kvfree(in); return 0; -err_irq: - free_irq(eq->irqn, eq); - err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -346,18 +326,48 @@ err_buf: return err; } -static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +/** + * mlx5_eq_enable - Enable EQ for receiving EQEs + * @dev - Device which owns the eq + * @eq - EQ to enable + * @nb - notifier call block + * mlx5_eq_enable - must be called after EQ is created in device. + */ +int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; - struct mlx5_irq_info *irq_info; int err; - irq_info = &eq_table->irq_info[eq->vecidx]; + err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb); + if (!err) + eq_update_ci(eq, 1); - mlx5_debug_eq_remove(dev, eq); + return err; +} +EXPORT_SYMBOL(mlx5_eq_enable); + +/** + * mlx5_eq_disable - Enable EQ for receiving EQEs + * @dev - Device which owns the eq + * @eq - EQ to disable + * @nb - notifier call block + * mlx5_eq_disable - must be called before EQ is destroyed. + */ +void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + + mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb); +} +EXPORT_SYMBOL(mlx5_eq_disable); + +static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; - free_irq(eq->irqn, irq_info->context); - irq_info->context = NULL; + mlx5_debug_eq_remove(dev, eq); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) @@ -382,7 +392,7 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) return err; } -int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) +void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) { struct mlx5_cq_table *table = &eq->cq_table; struct mlx5_core_cq *tmp; @@ -392,16 +402,14 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) spin_unlock(&table->lock); if (!tmp) { - mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn); - return -ENOENT; - } - - if (tmp != cq) { - mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn); - return -EINVAL; + mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", + eq->eqn, cq->cqn); + return; } - return 0; + if (tmp != cq) + mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", + eq->eqn, cq->cqn); } int mlx5_eq_table_init(struct mlx5_core_dev *dev) @@ -423,6 +431,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); + eq_table->irq_table = dev->priv.irq_table; return 0; kvfree_eq_table: @@ -439,19 +448,20 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) /* Async EQs */ -static int create_async_eq(struct mlx5_core_dev *dev, const char *name, +static int create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; mutex_lock(&eq_table->lock); - if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) { - err = -ENOSPC; + /* Async EQs must share irq index 0 */ + if (param->irq_index != 0) { + err = -EINVAL; goto unlock; } - err = create_map_eq(dev, eq, name, param); + err = create_map_eq(dev, eq, param); unlock: mutex_unlock(&eq_table->lock); return err; @@ -480,7 +490,7 @@ static int cq_err_event_notifier(struct notifier_block *nb, /* type == MLX5_EVENT_TYPE_CQ_ERROR */ eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); - eq = &eqt->async_eq; + eq = &eqt->async_eq.core; eqe = data; cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; @@ -493,14 +503,31 @@ static int cq_err_event_notifier(struct notifier_block *nb, return NOTIFY_OK; } - cq->event(cq, type); + if (cq->event) + cq->event(cq, type); mlx5_cq_put(cq); return NOTIFY_OK; } -static u64 gather_async_events_mask(struct mlx5_core_dev *dev) +static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4]) +{ + __be64 *user_unaffiliated_events; + __be64 *user_affiliated_events; + int i; + + user_affiliated_events = + MLX5_CAP_DEV_EVENT(dev, user_affiliated_events); + user_unaffiliated_events = + MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events); + + for (i = 0; i < 4; i++) + mask[i] |= be64_to_cpu(user_affiliated_events[i] | + user_unaffiliated_events[i]); +} + +static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4]) { u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; @@ -533,10 +560,14 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); - if (mlx5_core_is_ecpf_esw_manager(dev)) - async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE); + if (mlx5_eswitch_is_funcs_handler(dev)) + async_event_mask |= + (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED); - return async_event_mask; + mask[0] = async_event_mask; + + if (MLX5_CAP_GEN(dev, event_cap)) + gather_user_async_events(dev, mask); } static int create_async_eqs(struct mlx5_core_dev *dev) @@ -548,55 +579,76 @@ static int create_async_eqs(struct mlx5_core_dev *dev) MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); mlx5_eq_notifier_register(dev, &table->cq_err_nb); + table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_CMD_IDX, - .mask = 1ull << MLX5_EVENT_TYPE_CMD, + .irq_index = 0, .nent = MLX5_NUM_CMD_EQE, - .context = &table->cmd_eq, - .handler = mlx5_eq_async_int, }; - err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, ¶m); + + param.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD; + err = create_async_eq(dev, &table->cmd_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); goto err0; } - + err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err); + goto err1; + } mlx5_cmd_use_events(dev); + table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_ASYNC_IDX, - .mask = gather_async_events_mask(dev), + .irq_index = 0, .nent = MLX5_NUM_ASYNC_EQE, - .context = &table->async_eq, - .handler = mlx5_eq_async_int, }; - err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, ¶m); + + gather_async_events_mask(dev, param.mask); + err = create_async_eq(dev, &table->async_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); - goto err1; + goto err2; + } + err = mlx5_eq_enable(dev, &table->async_eq.core, + &table->async_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable async EQ %d\n", err); + goto err3; } + table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_PAGEREQ_IDX, - .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, + .irq_index = 0, .nent = /* TODO: sriov max_vf + */ 1, - .context = &table->pages_eq, - .handler = mlx5_eq_async_int, }; - err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, ¶m); + + param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST; + err = create_async_eq(dev, &table->pages_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); - goto err2; + goto err4; + } + err = mlx5_eq_enable(dev, &table->pages_eq.core, + &table->pages_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err); + goto err5; } return err; +err5: + destroy_async_eq(dev, &table->pages_eq.core); +err4: + mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); +err3: + destroy_async_eq(dev, &table->async_eq.core); err2: - destroy_async_eq(dev, &table->async_eq); - -err1: mlx5_cmd_use_polling(dev); - destroy_async_eq(dev, &table->cmd_eq); + mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); +err1: + destroy_async_eq(dev, &table->cmd_eq.core); err0: mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; @@ -607,19 +659,22 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; int err; - err = destroy_async_eq(dev, &table->pages_eq); + mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb); + err = destroy_async_eq(dev, &table->pages_eq.core); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); - err = destroy_async_eq(dev, &table->async_eq); + mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); + err = destroy_async_eq(dev, &table->async_eq.core); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); mlx5_cmd_use_polling(dev); - err = destroy_async_eq(dev, &table->cmd_eq); + mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); + err = destroy_async_eq(dev, &table->cmd_eq.core); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); @@ -629,24 +684,24 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) { - return &dev->priv.eq_table->async_eq; + return &dev->priv.eq_table->async_eq.core; } void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) { - synchronize_irq(dev->priv.eq_table->async_eq.irqn); + synchronize_irq(dev->priv.eq_table->async_eq.core.irqn); } void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) { - synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); + synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn); } /* Generic EQ API for mlx5_core consumers * Needed For RDMA ODP EQ for now */ struct mlx5_eq * -mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, +mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param) { struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); @@ -655,7 +710,7 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, if (!eq) return ERR_PTR(-ENOMEM); - err = create_async_eq(dev, name, eq, param); + err = create_async_eq(dev, eq, param); if (err) { kvfree(eq); eq = ERR_PTR(err); @@ -713,84 +768,14 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) } EXPORT_SYMBOL(mlx5_eq_update_ci); -/* Completion EQs */ - -static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; - - if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { - mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); - return -ENOMEM; - } - - cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - irq_info->mask); - - if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, irq_info->mask)) - mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); - - return 0; -} - -static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; - - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(irq_info->mask); -} - -static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) -{ - int err; - int i; - - for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) { - err = set_comp_irq_affinity_hint(mdev, i); - if (err) - goto err_out; - } - - return 0; - -err_out: - for (i--; i >= 0; i--) - clear_comp_irq_affinity_hint(mdev, i); - - return err; -} - -static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) -{ - int i; - - for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) - clear_comp_irq_affinity_hint(mdev, i); -} - static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq, *n; - clear_comp_irqs_affinity_hints(dev); - -#ifdef CONFIG_RFS_ACCEL - if (table->rmap) { - free_irq_cpu_rmap(table->rmap); - table->rmap = NULL; - } -#endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); if (destroy_unmap_eq(dev, &eq->core)) mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", eq->core.eqn); @@ -802,23 +787,17 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - char name[MLX5_MAX_IRQ_NAME]; struct mlx5_eq_comp *eq; - int ncomp_vec; + int ncomp_eqs; int nent; int err; int i; INIT_LIST_HEAD(&table->comp_eqs_list); - ncomp_vec = table->num_comp_vectors; + ncomp_eqs = table->num_comp_eqs; nent = MLX5_COMP_EQ_SIZE; -#ifdef CONFIG_RFS_ACCEL - table->rmap = alloc_irq_cpu_rmap(ncomp_vec); - if (!table->rmap) - return -ENOMEM; -#endif - for (i = 0; i < ncomp_vec; i++) { - int vecidx = i + MLX5_EQ_VEC_COMP_BASE; + for (i = 0; i < ncomp_eqs; i++) { + int vecidx = i + MLX5_IRQ_VEC_COMP_BASE; struct mlx5_eq_param param = {}; eq = kzalloc(sizeof(*eq), GFP_KERNEL); @@ -833,33 +812,28 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, (unsigned long)&eq->tasklet_ctx); -#ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); -#endif - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); + eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { - .index = vecidx, - .mask = 0, + .irq_index = vecidx, .nent = nent, - .context = &eq->core, - .handler = mlx5_eq_comp_int }; - err = create_map_eq(dev, &eq->core, name, ¶m); + err = create_map_eq(dev, &eq->core, ¶m); + if (err) { + kfree(eq); + goto clean; + } + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); if (err) { + destroy_unmap_eq(dev, &eq->core); kfree(eq); goto clean; } + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ list_add_tail(&eq->list, &table->comp_eqs_list); } - err = set_comp_irq_affinity_hints(dev); - if (err) { - mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); - goto clean; - } - return 0; clean: @@ -890,22 +864,24 @@ EXPORT_SYMBOL(mlx5_vector2eqn); unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->num_comp_vectors; + return dev->priv.eq_table->num_comp_eqs; } EXPORT_SYMBOL(mlx5_comp_vectors_count); struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { - /* TODO: consider irq_get_affinity_mask(irq) */ - return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; + int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE; + + return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table, + vecidx); } EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->rmap; + return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table); } #endif @@ -926,82 +902,19 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - int i, max_eqs; - - clear_comp_irqs_affinity_hints(dev); - -#ifdef CONFIG_RFS_ACCEL - if (table->rmap) { - free_irq_cpu_rmap(table->rmap); - table->rmap = NULL; - } -#endif mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ - max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; - for (i = max_eqs - 1; i >= 0; i--) { - if (!table->irq_info[i].context) - continue; - free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context); - table->irq_info[i].context = NULL; - } + mlx5_irq_table_destroy(dev); mutex_unlock(&table->lock); - pci_free_irq_vectors(dev->pdev); -} - -static int alloc_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *table = priv->eq_table; - int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? - MLX5_CAP_GEN(dev, max_num_eqs) : - 1 << MLX5_CAP_GEN(dev, log_max_eq); - int nvec; - int err; - - nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + - MLX5_EQ_VEC_COMP_BASE; - nvec = min_t(int, nvec, num_eqs); - if (nvec <= MLX5_EQ_VEC_COMP_BASE) - return -ENOMEM; - - table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); - if (!table->irq_info) - return -ENOMEM; - - nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, - nvec, PCI_IRQ_MSIX); - if (nvec < 0) { - err = nvec; - goto err_free_irq_info; - } - - table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; - - return 0; - -err_free_irq_info: - kfree(table->irq_info); - return err; -} - -static void free_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - - pci_free_irq_vectors(dev->pdev); - kfree(priv->eq_table->irq_info); } int mlx5_eq_table_create(struct mlx5_core_dev *dev) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; - err = alloc_irq_vectors(dev); - if (err) { - mlx5_core_err(dev, "alloc irq vectors failed\n"); - return err; - } + eq_table->num_comp_eqs = + mlx5_irq_get_num_comp(eq_table->irq_table); err = create_async_eqs(dev); if (err) { @@ -1019,7 +932,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) err_comp_eqs: destroy_async_eqs(dev); err_async_eqs: - free_irq_vectors(dev); return err; } @@ -1027,7 +939,6 @@ void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) { destroy_comp_eqs(dev); destroy_async_eqs(dev); - free_irq_vectors(dev); } int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) @@ -1039,6 +950,7 @@ int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); } +EXPORT_SYMBOL(mlx5_eq_notifier_register); int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) { @@ -1049,3 +961,4 @@ int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); } +EXPORT_SYMBOL(mlx5_eq_notifier_unregister); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 6a921e24cd5e..7281f8d6cba6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -134,6 +134,30 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *in, int inlen) +{ + return modify_esw_vport_context_cmd(esw->dev, vport, in, inlen); +} + +static int query_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, + void *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {}; + + MLX5_SET(query_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *out, int outlen) +{ + return query_esw_vport_context_cmd(esw->dev, vport, out, outlen); +} + static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, u16 vlan, u8 qos, u8 set_flags) { @@ -473,7 +497,7 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) fdb_add: /* SRIOV is enabled: Forward UC MAC to vport */ - if (esw->fdb_table.legacy.fdb && esw->mode == SRIOV_LEGACY) + if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY) vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n", @@ -873,7 +897,7 @@ static void esw_vport_change_handle_locked(struct mlx5_vport *vport) struct mlx5_eswitch *esw = dev->priv.eswitch; u8 mac[ETH_ALEN]; - mlx5_query_nic_vport_mac_address(dev, vport->vport, mac); + mlx5_query_nic_vport_mac_address(dev, vport->vport, true, mac); esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", vport->vport, mac); @@ -939,7 +963,7 @@ int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size)); root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS, - vport->vport); + mlx5_eswitch_vport_num_to_index(esw, vport->vport)); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport); return -EOPNOTSUPP; @@ -1057,7 +1081,7 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, - vport->vport); + mlx5_eswitch_vport_num_to_index(esw, vport->vport)); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport); return -EOPNOTSUPP; @@ -1168,6 +1192,8 @@ void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, vport->ingress.drop_rule = NULL; vport->ingress.allow_rule = NULL; + + esw_vport_del_ingress_acl_modify_metadata(esw, vport); } void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, @@ -1527,6 +1553,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { u16 vport_num = vport->vport; + int flags; if (esw->manager_vport == vport_num) return; @@ -1544,11 +1571,13 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, vport->info.node_guid); } + flags = (vport->info.vlan || vport->info.qos) ? + SET_VLAN_STRIP | SET_VLAN_INSERT : 0; modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, - (vport->info.vlan || vport->info.qos)); + flags); /* Only legacy mode needs ACLs */ - if (esw->mode == SRIOV_LEGACY) { + if (esw->mode == MLX5_ESWITCH_LEGACY) { esw_vport_ingress_config(esw, vport); esw_vport_egress_config(esw, vport); } @@ -1600,7 +1629,7 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); /* Create steering drop counters for ingress and egress ACLs */ - if (vport_num && esw->mode == SRIOV_LEGACY) + if (vport_num && esw->mode == MLX5_ESWITCH_LEGACY) esw_vport_create_drop_counters(vport); /* Restore old vport configuration */ @@ -1654,7 +1683,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, vport->enabled_events = 0; esw_vport_disable_qos(esw, vport); if (esw->manager_vport != vport_num && - esw->mode == SRIOV_LEGACY) { + esw->mode == MLX5_ESWITCH_LEGACY) { mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, vport_num, 1, @@ -1686,54 +1715,91 @@ static int eswitch_vport_event(struct notifier_block *nb, return NOTIFY_OK; } +/** + * mlx5_esw_query_functions - Returns raw output about functions state + * @dev: Pointer to device to query + * + * mlx5_esw_query_functions() allocates and returns functions changed + * raw output memory pointer from device on success. Otherwise returns ERR_PTR. + * Caller must free the memory using kvfree() when valid pointer is returned. + */ +const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) +{ + int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out); + u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {}; + u32 *out; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return ERR_PTR(-ENOMEM); + + MLX5_SET(query_esw_functions_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_FUNCTIONS); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); + if (!err) + return out; + + kvfree(out); + return ERR_PTR(err); +} + +static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw) +{ + MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->nb); + + if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) { + MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler, + ESW_FUNCTIONS_CHANGED); + mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb); + } +} + +static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw) +{ + if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) + mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); + + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + + flush_workqueue(esw->work_queue); +} + /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) -int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { - int vf_nvports = 0, total_nvports = 0; struct mlx5_vport *vport; int err; int i, enabled_events; if (!ESW_ALLOWED(esw) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { - esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); + esw_warn(esw->dev, "FDB is not supported, aborting ...\n"); return -EOPNOTSUPP; } if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support)) - esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n"); + esw_warn(esw->dev, "ingress ACL is not supported by FW\n"); if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) - esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); - - esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); - - if (mode == SRIOV_OFFLOADS) { - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports); - if (err) - return err; - total_nvports = esw->total_vports; - } else { - vf_nvports = nvfs; - total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev); - } - } + esw_warn(esw->dev, "engress ACL is not supported by FW\n"); esw->mode = mode; mlx5_lag_update(esw->dev); - if (mode == SRIOV_LEGACY) { + if (mode == MLX5_ESWITCH_LEGACY) { err = esw_create_legacy_table(esw); if (err) goto abort; } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, vf_nvports, total_nvports); + err = esw_offloads_init(esw); } if (err) @@ -1743,11 +1809,8 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (err) esw_warn(esw->dev, "Failed to create eswitch TSAR"); - /* Don't enable vport events when in SRIOV_OFFLOADS mode, since: - * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode - * 2. FDB/Eswitch is programmed by user space tools - */ - enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0; + enabled_events = (mode == MLX5_ESWITCH_LEGACY) ? SRIOV_VPORT_EVENTS : + UC_ADDR_CHANGE; /* Enable PF vport */ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); @@ -1760,22 +1823,21 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } /* Enable VF vports */ - mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) esw_enable_vport(esw, vport, enabled_events); - if (mode == SRIOV_LEGACY) { - MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); - mlx5_eq_notifier_register(esw->dev, &esw->nb); - } + mlx5_eswitch_event_handlers_register(esw); + + esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n", + mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->esw_funcs.num_vfs, esw->enabled_vports); - esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", - esw->enabled_vports); return 0; abort: - esw->mode = SRIOV_NONE; + esw->mode = MLX5_ESWITCH_NONE; - if (mode == SRIOV_OFFLOADS) { + if (mode == MLX5_ESWITCH_OFFLOADS) { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); } @@ -1783,23 +1845,22 @@ abort: return err; } -void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) +void mlx5_eswitch_disable(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; struct mlx5_vport *vport; int old_mode; int i; - if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE) + if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE) return; - esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", - esw->enabled_vports, esw->mode); + esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->esw_funcs.num_vfs, esw->enabled_vports); mc_promisc = &esw->mc_promisc; - - if (esw->mode == SRIOV_LEGACY) - mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + mlx5_eswitch_event_handlers_unregister(esw); mlx5_esw_for_all_vports(esw, i, vport) esw_disable_vport(esw, vport); @@ -1809,17 +1870,17 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_destroy_tsar(esw); - if (esw->mode == SRIOV_LEGACY) + if (esw->mode == MLX5_ESWITCH_LEGACY) esw_destroy_legacy_table(esw); - else if (esw->mode == SRIOV_OFFLOADS) + else if (esw->mode == MLX5_ESWITCH_OFFLOADS) esw_offloads_cleanup(esw); old_mode = esw->mode; - esw->mode = SRIOV_NONE; + esw->mode = MLX5_ESWITCH_NONE; mlx5_lag_update(esw->dev); - if (old_mode == SRIOV_OFFLOADS) { + if (old_mode == MLX5_ESWITCH_OFFLOADS) { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); } @@ -1827,14 +1888,16 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) int mlx5_eswitch_init(struct mlx5_core_dev *dev) { - int total_vports = MLX5_TOTAL_VPORTS(dev); struct mlx5_eswitch *esw; struct mlx5_vport *vport; + int total_vports; int err, i; if (!MLX5_VPORT_MANAGER(dev)) return 0; + total_vports = mlx5_eswitch_get_total_vports(dev); + esw_info(dev, "Total vports %d, per vport: max uc(%d) max mc(%d)\n", total_vports, @@ -1847,6 +1910,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->dev = dev; esw->manager_vport = mlx5_eswitch_manager_vport(dev); + esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev); esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { @@ -1880,7 +1944,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) } esw->enabled_vports = 0; - esw->mode = SRIOV_NONE; + esw->mode = MLX5_ESWITCH_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) @@ -1950,7 +2014,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, ether_addr_copy(evport->info.mac, mac); evport->info.node_guid = node_guid; - if (evport->enabled && esw->mode == SRIOV_LEGACY) + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) err = esw_vport_ingress_config(esw, evport); unlock: @@ -2034,7 +2098,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, evport->info.vlan = vlan; evport->info.qos = qos; - if (evport->enabled && esw->mode == SRIOV_LEGACY) { + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_vport_ingress_config(esw, evport); if (err) goto unlock; @@ -2076,7 +2140,7 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, mlx5_core_warn(esw->dev, "Spoofchk in set while MAC is invalid, vport(%d)\n", evport->vport); - if (evport->enabled && esw->mode == SRIOV_LEGACY) + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) err = esw_vport_ingress_config(esw, evport); if (err) evport->info.spoofchk = pschk; @@ -2172,7 +2236,7 @@ int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting) return -EPERM; mutex_lock(&esw->state_lock); - if (esw->mode != SRIOV_LEGACY) { + if (esw->mode != MLX5_ESWITCH_LEGACY) { err = -EOPNOTSUPP; goto out; } @@ -2195,7 +2259,7 @@ int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) return -EPERM; mutex_lock(&esw->state_lock); - if (esw->mode != SRIOV_LEGACY) { + if (esw->mode != MLX5_ESWITCH_LEGACY) { err = -EOPNOTSUPP; goto out; } @@ -2338,7 +2402,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, u64 bytes = 0; int err = 0; - if (!vport->enabled || esw->mode != SRIOV_LEGACY) + if (!vport->enabled || esw->mode != MLX5_ESWITCH_LEGACY) return 0; if (vport->egress.drop_counter) @@ -2448,16 +2512,27 @@ free_out: u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) { - return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE; + return ESW_ALLOWED(esw) ? esw->mode : MLX5_ESWITCH_NONE; } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); +enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw; + + esw = dev->priv.eswitch; + return ESW_ALLOWED(esw) ? esw->offloads.encap : + DEVLINK_ESWITCH_ENCAP_MODE_NONE; +} +EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); + bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { - if ((dev0->priv.eswitch->mode == SRIOV_NONE && - dev1->priv.eswitch->mode == SRIOV_NONE) || - (dev0->priv.eswitch->mode == SRIOV_OFFLOADS && - dev1->priv.eswitch->mode == SRIOV_OFFLOADS)) + if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && + dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) || + (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS && + dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS)) return true; return false; @@ -2466,6 +2541,26 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { - return (dev0->priv.eswitch->mode == SRIOV_OFFLOADS && - dev1->priv.eswitch->mode == SRIOV_OFFLOADS); + return (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS && + dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS); +} + +void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) +{ + const u32 *out; + + WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE); + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) { + esw->esw_funcs.num_vfs = num_vfs; + return; + } + + out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(out)) + return; + + esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + kvfree(out); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index d043d6f9797d..a38e8a3c7c9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -68,6 +68,8 @@ struct vport_ingress { struct mlx5_flow_group *allow_spoofchk_only_grp; struct mlx5_flow_group *allow_untagged_only_grp; struct mlx5_flow_group *drop_grp; + int modify_metadata_id; + struct mlx5_flow_handle *modify_metadata_rule; struct mlx5_flow_handle *allow_rule; struct mlx5_flow_handle *drop_rule; struct mlx5_fc *drop_counter; @@ -173,9 +175,12 @@ struct mlx5_esw_offload { struct mutex peer_mutex; DECLARE_HASHTABLE(encap_tbl, 8); DECLARE_HASHTABLE(mod_hdr_tbl, 8); + DECLARE_HASHTABLE(termtbl_tbl, 8); + struct mutex termtbl_mutex; /* protects termtbl hash */ + const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; u64 num_flows; - u8 encap; + enum devlink_eswitch_encap_mode encap; }; /* E-Switch MC FDB table hash node */ @@ -190,11 +195,15 @@ struct mlx5_host_work { struct mlx5_eswitch *esw; }; -struct mlx5_host_info { +struct mlx5_esw_functions { struct mlx5_nb nb; u16 num_vfs; }; +enum { + MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -202,6 +211,7 @@ struct mlx5_eswitch { struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; struct mlx5_vport *vports; + u32 flags; int total_vports; int enabled_vports; /* Synchronize between vport change events @@ -219,12 +229,12 @@ struct mlx5_eswitch { int mode; int nvports; u16 manager_vport; - struct mlx5_host_info host_info; + u16 first_host_vport; + struct mlx5_esw_functions esw_funcs; }; void esw_offloads_cleanup(struct mlx5_eswitch *esw); -int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, - int total_nvports); +int esw_offloads_init(struct mlx5_eswitch *esw); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, @@ -239,12 +249,14 @@ void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); -int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); -void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode); +void mlx5_eswitch_disable(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u16 vport, u8 mac[ETH_ALEN]); int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, @@ -266,8 +278,32 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct ifla_vf_stats *vf_stats); void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule); +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *in, int inlen); +int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *out, int outlen); + struct mlx5_flow_spec; struct mlx5_esw_flow_attr; +struct mlx5_termtbl_handle; + +bool +mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec); + +struct mlx5_flow_handle * +mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest); + +void +mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, + struct mlx5_termtbl_handle *tt); struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, @@ -338,6 +374,7 @@ struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *rep; struct mlx5_core_dev *mdev; u32 encap_id; + struct mlx5_termtbl_handle *termtbl; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; u32 mod_hdr_id; u8 match_level; @@ -355,10 +392,12 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); -int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode); -int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, +int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode); +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack); -int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap); +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, @@ -386,6 +425,8 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1); +const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(__dev, format, ...) \ @@ -404,6 +445,24 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) MLX5_VPORT_ECPF : MLX5_VPORT_PF; } +static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF; +} + +static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) +{ + /* Ideally device should have the functions changed supported + * capability regardless of it being ECPF or PF wherever such + * event should be processed such as on eswitch manager device. + * However, some ECPF based device might not have this capability + * set. Hence OR for ECPF check to cover such device. + */ + return MLX5_CAP_ESW(dev, esw_functions_changed) || + mlx5_core_is_ecpf_esw_manager(dev); +} + static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) { /* Uplink always locate at the last element of the array.*/ @@ -488,16 +547,47 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); #define mlx5_esw_for_each_vf_vport_num_reverse(esw, vport, nvfs) \ for ((vport) = (nvfs); (vport) >= MLX5_VPORT_FIRST_VF; (vport)--) +/* Includes host PF (vport 0) if it's not esw manager. */ +#define mlx5_esw_for_each_host_func_rep(esw, i, rep, nvfs) \ + for ((i) = (esw)->first_host_vport; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) <= (nvfs); (i)++) + +#define mlx5_esw_for_each_host_func_rep_reverse(esw, i, rep, nvfs) \ + for ((i) = (nvfs); \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) >= (esw)->first_host_vport; (i)--) + +#define mlx5_esw_for_each_host_func_vport(esw, vport, nvfs) \ + for ((vport) = (esw)->first_host_vport; \ + (vport) <= (nvfs); (vport)++) + +#define mlx5_esw_for_each_host_func_vport_reverse(esw, vport, nvfs) \ + for ((vport) = (nvfs); \ + (vport) >= (esw)->first_host_vport; (vport)--) + struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); +bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num); + +void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs); +int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} -static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } -static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} +static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; } +static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } +static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } +static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {} #define FDB_MAX_CHAIN 1 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 47b446d30f71..8ed4497929b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -41,7 +41,6 @@ #include "en.h" #include "fs_core.h" #include "lib/devcom.h" -#include "ecpf.h" #include "lib/eq.h" /* There are two match-all miss flows, one for unicast dst mac and @@ -89,6 +88,53 @@ u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw) return 1; } +static void +mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) +{ + void *misc2; + void *misc; + + /* Use metadata matching because vport is not represented by single + * VHCA in dual-port RoCE mode, and matching on source vport may fail. + */ + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch, + attr->in_rep->vport)); + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc))) + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); + + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } + + if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) && + attr->in_rep->vport == MLX5_VPORT_UPLINK) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; +} + struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, @@ -100,9 +146,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; int j, i = 0; - void *misc; - if (esw->mode != SRIOV_OFFLOADS) + if (esw->mode != MLX5_ESWITCH_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); flow_act.action = attr->action; @@ -160,21 +205,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, i++; } - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); - - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + mlx5_eswitch_set_rule_source_port(esw, spec, attr); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); - - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { if (attr->tunnel_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; @@ -193,7 +225,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, goto err_esw_get; } - rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); + if (mlx5_eswitch_termtbl_required(esw, &flow_act, spec)) + rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr, + &flow_act, dest, i); + else + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); if (IS_ERR(rule)) goto err_add_rule; else @@ -220,7 +256,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_table *fast_fdb; struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_handle *rule; - void *misc; int i; fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0); @@ -252,25 +287,11 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, dest[i].ft = fwd_fdb, i++; - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); - - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + mlx5_eswitch_set_rule_source_port(esw, spec, attr); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); - - if (attr->match_level == MLX5_MATCH_NONE) - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; - else - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | - MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + if (attr->match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); @@ -295,8 +316,16 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, bool fwd_rule) { bool split = (attr->split_count > 0); + int i; mlx5_del_flow_rules(rule); + + /* unref the term table */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (attr->dests[i].termtbl) + mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); + } + esw->offloads.num_flows--; if (fwd_rule) { @@ -328,12 +357,11 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) { struct mlx5_eswitch_rep *rep; - int vf_vport, err = 0; + int i, err = 0; esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); - for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { - rep = &esw->offloads.vport_reps[vf_vport]; - if (atomic_read(&rep->rep_if[REP_ETH].state) != REP_LOADED) + mlx5_esw_for_each_host_func_rep(esw, i, rep, esw->esw_funcs.num_vfs) { + if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) continue; err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); @@ -559,23 +587,87 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } -static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, +static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; + u8 fdb_to_vport_reg_c_id; + int err; + + err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, + out, sizeof(out)); + if (err) + return err; + + fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + + fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.fdb_to_vport_reg_c_id, 1); + + return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, + in, sizeof(in)); +} + +static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; + u8 fdb_to_vport_reg_c_id; + int err; + + err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, + out, sizeof(out)); + if (err) + return err; + + fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + + fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; + + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.fdb_to_vport_reg_c_id, 1); + + return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, + in, sizeof(in)); +} + +static void peer_miss_rules_setup(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev, struct mlx5_flow_spec *spec, struct mlx5_flow_destination *dest) { - void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); + void *misc; - MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(peer_dev, vhca_id)); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(peer_dev, vhca_id)); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + } dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest->vport.num = peer_dev->priv.eswitch->manager_vport; @@ -583,6 +675,26 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; } +static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw, + struct mlx5_flow_spec *spec, + u16 vport) +{ + void *misc; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(peer_esw, + vport)); + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + } +} + static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_core_dev *peer_dev) { @@ -600,7 +712,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, if (!spec) return -ENOMEM; - peer_miss_rules_setup(peer_dev, spec, &dest); + peer_miss_rules_setup(esw, peer_dev, spec, &dest); flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL); if (!flows) { @@ -613,7 +725,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc_parameters); if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF); + esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, + spec, MLX5_VPORT_PF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { @@ -635,7 +749,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, } mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) { - MLX5_SET(fte_match_set_misc, misc, source_port, i); + esw_set_peer_miss_rule_source_port(esw, + peer_dev->priv.eswitch, + spec, i); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { @@ -919,6 +1036,30 @@ static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw) #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 +static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, + u32 *flow_group_in) +{ + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0); + } else { + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + } +} + static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -1016,19 +1157,21 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) /* create peer esw miss group */ memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, - match_criteria); + esw_set_flow_group_source_port(esw, flow_group_in); + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_port); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } - MLX5_SET(create_flow_group_in, flow_group_in, - source_eswitch_owner_vhca_id_valid, 1); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + esw->total_vports - 1); @@ -1142,7 +1285,6 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *g; u32 *flow_group_in; - void *match_criteria, *misc; int err = 0; nvports = nvports + MLX5_ESW_MISS_FLOWS; @@ -1152,12 +1294,8 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) /* create vport rx group */ memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + esw_set_flow_group_source_port(esw, flow_group_in); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); @@ -1196,13 +1334,24 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, goto out; } - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, vport); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport)); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, @@ -1220,21 +1369,22 @@ out: static int esw_offloads_start(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { - int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1; - if (esw->mode != SRIOV_LEGACY && + if (esw->mode != MLX5_ESWITCH_LEGACY && !mlx5_core_is_ecpf_esw_manager(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, "Can't set offloads mode, SRIOV legacy not enabled"); return -EINVAL; } - mlx5_eswitch_disable_sriov(esw); - err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + mlx5_eswitch_disable(esw); + mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs); + err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to offloads"); - err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to legacy"); @@ -1242,7 +1392,6 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, } if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) { if (mlx5_eswitch_inline_mode_get(esw, - num_vfs, &esw->offloads.inline_mode)) { esw->offloads.inline_mode = MLX5_INLINE_MODE_L2; NL_SET_ERR_MSG_MOD(extack, @@ -1259,11 +1408,11 @@ void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw) int esw_offloads_init_reps(struct mlx5_eswitch *esw) { - int total_vports = MLX5_TOTAL_VPORTS(esw->dev); + int total_vports = esw->total_vports; struct mlx5_core_dev *dev = esw->dev; struct mlx5_eswitch_rep *rep; u8 hw_id[ETH_ALEN], rep_type; - int vport; + int vport_index; esw->offloads.vport_reps = kcalloc(total_vports, sizeof(struct mlx5_eswitch_rep), @@ -1271,14 +1420,15 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) if (!esw->offloads.vport_reps) return -ENOMEM; - mlx5_query_nic_vport_mac_address(dev, 0, hw_id); + mlx5_query_mac_address(dev, hw_id); - mlx5_esw_for_all_reps(esw, vport, rep) { - rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport); + mlx5_esw_for_all_reps(esw, vport_index, rep) { + rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index); + rep->vport_index = vport_index; ether_addr_copy(rep->hw_id, hw_id); for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) - atomic_set(&rep->rep_if[rep_type].state, + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); } @@ -1288,9 +1438,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { - if (atomic_cmpxchg(&rep->rep_if[rep_type].state, + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, REP_LOADED, REP_REGISTERED) == REP_LOADED) - rep->rep_if[rep_type].unload(rep); + esw->offloads.rep_ops[rep_type]->unload(rep); } static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) @@ -1329,21 +1479,20 @@ static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports) __unload_reps_vf_vport(esw, nvports, rep_type); } -static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) { - __unload_reps_vf_vport(esw, nvports, rep_type); + __unload_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type); /* Special vports must be the last to unload. */ __unload_reps_special_vport(esw, rep_type); } -static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports) +static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw) { u8 rep_type = NUM_REP_TYPES; while (rep_type-- > 0) - __unload_reps_all_vport(esw, nvports, rep_type); + __unload_reps_all_vport(esw, rep_type); } static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, @@ -1351,11 +1500,11 @@ static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, { int err = 0; - if (atomic_cmpxchg(&rep->rep_if[rep_type].state, + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { - err = rep->rep_if[rep_type].load(esw->dev, rep); + err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); if (err) - atomic_set(&rep->rep_if[rep_type].state, + atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED); } @@ -1419,6 +1568,26 @@ err_vf: return err; } +static int __load_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) +{ + int err; + + /* Special vports must be loaded first, uplink rep creates mdev resource. */ + err = __load_reps_special_vport(esw, rep_type); + if (err) + return err; + + err = __load_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type); + if (err) + goto err_vfs; + + return 0; + +err_vfs: + __unload_reps_special_vport(esw, rep_type); + return err; +} + static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports) { u8 rep_type = 0; @@ -1438,34 +1607,13 @@ err_reps: return err; } -static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) -{ - int err; - - /* Special vports must be loaded first. */ - err = __load_reps_special_vport(esw, rep_type); - if (err) - return err; - - err = __load_reps_vf_vport(esw, nvports, rep_type); - if (err) - goto err_vfs; - - return 0; - -err_vfs: - __unload_reps_special_vport(esw, rep_type); - return err; -} - -static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw) { u8 rep_type = 0; int err; for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = __load_reps_all_vport(esw, nvports, rep_type); + err = __load_reps_all_vport(esw, rep_type); if (err) goto err_reps; } @@ -1474,7 +1622,7 @@ static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) err_reps: while (rep_type-- > 0) - __unload_reps_all_vport(esw, nvports, rep_type); + __unload_reps_all_vport(esw, rep_type); return err; } @@ -1510,6 +1658,10 @@ static int mlx5_esw_offloads_devcom_event(int event, switch (event) { case ESW_OFFLOADS_DEVCOM_PAIR: + if (mlx5_eswitch_vport_match_metadata_enabled(esw) != + mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) + break; + err = mlx5_esw_offloads_pair(esw, peer_esw); if (err) goto err_out; @@ -1578,32 +1730,16 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; int err = 0; /* For prio tag mode, there is only 1 FTEs: - * 1) Untagged packets - push prio tag VLAN, allow + * 1) Untagged packets - push prio tag VLAN and modify metadata if + * required, allow * Unmatched traffic is allowed by default */ - if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) - return -EOPNOTSUPP; - - esw_vport_cleanup_ingress_rules(esw, vport); - - err = esw_vport_enable_ingress_acl(esw, vport); - if (err) { - mlx5_core_warn(esw->dev, - "failed to enable prio tag ingress acl (%d) on vport[%d]\n", - err, vport->vport); - return err; - } - - esw_debug(esw->dev, - "vport[%d] configure ingress rules\n", vport->vport); - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { err = -ENOMEM; @@ -1619,6 +1755,12 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, flow_act.vlan[0].ethtype = ETH_P_8021Q; flow_act.vlan[0].vid = 0; flow_act.vlan[0].prio = 0; + + if (vport->ingress.modify_metadata_rule) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_id = vport->ingress.modify_metadata_id; + } + vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, &flow_act, NULL, 0); @@ -1639,6 +1781,58 @@ out_no_mem: return err; } +static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec spec = {}; + int err = 0; + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); + MLX5_SET(set_action_in, action, data, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); + + err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + 1, action, &vport->ingress.modify_metadata_id); + if (err) { + esw_warn(esw->dev, + "failed to alloc modify header for vport %d ingress acl (%d)\n", + vport->vport, err); + return err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_act.modify_id = vport->ingress.modify_metadata_id; + vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, + &spec, &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.modify_metadata_rule)) { + err = PTR_ERR(vport->ingress.modify_metadata_rule); + esw_warn(esw->dev, + "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n", + vport->vport, err); + vport->ingress.modify_metadata_rule = NULL; + goto out; + } + +out: + if (err) + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + return err; +} + +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (vport->ingress.modify_metadata_rule) { + mlx5_del_flow_rules(vport->ingress.modify_metadata_rule); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + + vport->ingress.modify_metadata_rule = NULL; + } +} + static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -1646,6 +1840,9 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec; int err = 0; + if (!MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + /* For prio tag mode, there is only 1 FTEs: * 1) prio tag packets - pop the prio tag VLAN, allow * Unmatched traffic is allowed by default @@ -1699,27 +1896,98 @@ out_no_mem: return err; } -static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports) +static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport = NULL; - int i, j; int err; - mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) { + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && + !MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + + esw_vport_cleanup_ingress_rules(esw, vport); + + err = esw_vport_enable_ingress_acl(esw, vport); + if (err) { + esw_warn(esw->dev, + "failed to enable ingress acl (%d) on vport[%d]\n", + err, vport->vport); + return err; + } + + esw_debug(esw->dev, + "vport[%d] configure ingress rules\n", vport->vport); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + err = esw_vport_add_ingress_acl_modify_metadata(esw, vport); + if (err) + goto out; + } + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + mlx5_eswitch_is_vf_vport(esw, vport->vport)) { err = esw_vport_ingress_prio_tag_config(esw, vport); if (err) - goto err_ingress; - err = esw_vport_egress_prio_tag_config(esw, vport); + goto out; + } + +out: + if (err) + esw_vport_disable_ingress_acl(esw, vport); + return err; +} + +static bool +esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) +{ + if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl)) + return false; + + if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) & + MLX5_FDB_TO_VPORT_REG_C_0)) + return false; + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) + return false; + + if (mlx5_core_is_ecpf_esw_manager(esw->dev) || + mlx5_ecpf_vport_exists(esw->dev)) + return false; + + return true; +} + +static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int i, j; + int err; + + if (esw_check_vport_match_metadata_supported(esw)) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + + mlx5_esw_for_all_vports(esw, i, vport) { + err = esw_vport_ingress_common_config(esw, vport); if (err) - goto err_egress; + goto err_ingress; + + if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { + err = esw_vport_egress_prio_tag_config(esw, vport); + if (err) + goto err_egress; + } } + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + esw_info(esw->dev, "Use metadata reg_c as source vport to match\n"); + return 0; err_egress: esw_vport_disable_ingress_acl(esw, vport); err_ingress: - mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) { + for (j = MLX5_VPORT_PF; j < i; j++) { + vport = &esw->vports[j]; esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } @@ -1727,40 +1995,46 @@ err_ingress: return err; } -static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; int i; - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->dev->priv.sriov.num_vfs) { + mlx5_esw_for_all_vports(esw, i, vport) { esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } + + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; } -static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int vf_nvports, - int nvports) +static int esw_offloads_steering_init(struct mlx5_eswitch *esw) { + int num_vfs = esw->esw_funcs.num_vfs; + int total_vports; int err; + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + total_vports = esw->total_vports; + else + total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev); + memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); - if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) { - err = esw_prio_tag_acls_config(esw, vf_nvports); - if (err) - return err; - } - - err = esw_create_offloads_fdb_tables(esw, nvports); + err = esw_create_offloads_acl_tables(esw); if (err) return err; - err = esw_create_offloads_table(esw, nvports); + err = esw_create_offloads_fdb_tables(esw, total_vports); + if (err) + goto create_fdb_err; + + err = esw_create_offloads_table(esw, total_vports); if (err) goto create_ft_err; - err = esw_create_vport_rx_group(esw, nvports); + err = esw_create_vport_rx_group(esw, total_vports); if (err) goto create_fg_err; @@ -1772,6 +2046,9 @@ create_fg_err: create_ft_err: esw_destroy_offloads_fdb_tables(esw); +create_fdb_err: + esw_destroy_offloads_acl_tables(esw); + return err; } @@ -1780,88 +2057,105 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_vport_rx_group(esw); esw_destroy_offloads_table(esw); esw_destroy_offloads_fdb_tables(esw); - if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) - esw_prio_tag_acls_cleanup(esw); + esw_destroy_offloads_acl_tables(esw); } -static void esw_host_params_event_handler(struct work_struct *work) +static void +esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) { - struct mlx5_host_work *host_work; - struct mlx5_eswitch *esw; - int err, num_vf = 0; + bool host_pf_disabled; + u16 new_num_vfs; - host_work = container_of(work, struct mlx5_host_work, work); - esw = host_work->esw; + new_num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + host_pf_disabled = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_pf_disabled); - err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf); - if (err || num_vf == esw->host_info.num_vfs) - goto out; + if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled) + return; /* Number of VFs can only change from "0 to x" or "x to 0". */ - if (esw->host_info.num_vfs > 0) { - esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs); + if (esw->esw_funcs.num_vfs > 0) { + esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs); } else { - err = esw_offloads_load_vf_reps(esw, num_vf); + int err; + err = esw_offloads_load_vf_reps(esw, new_num_vfs); if (err) - goto out; + return; } + esw->esw_funcs.num_vfs = new_num_vfs; +} + +static void esw_functions_changed_event_handler(struct work_struct *work) +{ + struct mlx5_host_work *host_work; + struct mlx5_eswitch *esw; + const u32 *out; - esw->host_info.num_vfs = num_vf; + host_work = container_of(work, struct mlx5_host_work, work); + esw = host_work->esw; + out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(out)) + goto out; + + esw_vfs_changed_event_handler(esw, out); + kvfree(out); out: kfree(host_work); } -static int esw_host_params_event(struct notifier_block *nb, - unsigned long type, void *data) +int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data) { + struct mlx5_esw_functions *esw_funcs; struct mlx5_host_work *host_work; - struct mlx5_host_info *host_info; struct mlx5_eswitch *esw; host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC); if (!host_work) return NOTIFY_DONE; - host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb); - esw = container_of(host_info, struct mlx5_eswitch, host_info); + esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb); + esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs); host_work->esw = esw; - INIT_WORK(&host_work->work, esw_host_params_event_handler); + INIT_WORK(&host_work->work, esw_functions_changed_event_handler); queue_work(esw->work_queue, &host_work->work); return NOTIFY_OK; } -int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, - int total_nvports) +int esw_offloads_init(struct mlx5_eswitch *esw) { int err; - err = esw_offloads_steering_init(esw, vf_nvports, total_nvports); + err = esw_offloads_steering_init(esw); if (err) return err; - err = esw_offloads_load_all_reps(esw, vf_nvports); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + err = mlx5_eswitch_enable_passing_vport_metadata(esw); + if (err) + goto err_vport_metadata; + } + + err = esw_offloads_load_all_reps(esw); if (err) goto err_reps; esw_offloads_devcom_init(esw); - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event, - HOST_PARAMS_CHANGE); - mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb); - esw->host_info.num_vfs = vf_nvports; - } + mutex_init(&esw->offloads.termtbl_mutex); mlx5_rdma_enable_roce(esw->dev); return 0; err_reps: + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + mlx5_eswitch_disable_passing_vport_metadata(esw); +err_vport_metadata: esw_offloads_steering_cleanup(esw); return err; } @@ -1869,13 +2163,13 @@ err_reps: static int esw_offloads_stop(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { - int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1; - mlx5_eswitch_disable_sriov(esw); - err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + mlx5_eswitch_disable(esw); + err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); - err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to offloads"); @@ -1887,19 +2181,11 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_cleanup(struct mlx5_eswitch *esw) { - u16 num_vfs; - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb); - flush_workqueue(esw->work_queue); - num_vfs = esw->host_info.num_vfs; - } else { - num_vfs = esw->dev->priv.sriov.num_vfs; - } - mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_all_reps(esw, num_vfs); + esw_offloads_unload_all_reps(esw); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + mlx5_eswitch_disable_passing_vport_metadata(esw); esw_offloads_steering_cleanup(esw); } @@ -1907,10 +2193,10 @@ static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) { switch (mode) { case DEVLINK_ESWITCH_MODE_LEGACY: - *mlx5_mode = SRIOV_LEGACY; + *mlx5_mode = MLX5_ESWITCH_LEGACY; break; case DEVLINK_ESWITCH_MODE_SWITCHDEV: - *mlx5_mode = SRIOV_OFFLOADS; + *mlx5_mode = MLX5_ESWITCH_OFFLOADS; break; default: return -EINVAL; @@ -1922,10 +2208,10 @@ static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode) { switch (mlx5_mode) { - case SRIOV_LEGACY: + case MLX5_ESWITCH_LEGACY: *mode = DEVLINK_ESWITCH_MODE_LEGACY; break; - case SRIOV_OFFLOADS: + case MLX5_ESWITCH_OFFLOADS: *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; break; default: @@ -1989,7 +2275,7 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink) if(!MLX5_ESWITCH_MANAGER(dev)) return -EPERM; - if (dev->priv.eswitch->mode == SRIOV_NONE && + if (dev->priv.eswitch->mode == MLX5_ESWITCH_NONE && !mlx5_core_is_ecpf_esw_manager(dev)) return -EOPNOTSUPP; @@ -2040,7 +2326,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; - int err, vport; + int err, vport, num_vport; u8 mlx5_mode; err = mlx5_devlink_eswitch_check(devlink); @@ -2069,7 +2355,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (err) goto out; - for (vport = 1; vport < esw->enabled_vports; vport++) { + mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) { err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode); if (err) { NL_SET_ERR_MSG_MOD(extack, @@ -2082,7 +2368,8 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, return 0; revert_inline_mode: - while (--vport > 0) + num_vport = --vport; + mlx5_esw_for_each_host_func_vport_reverse(esw, vport, num_vport) mlx5_modify_nic_vport_min_inline(dev, vport, esw->offloads.inline_mode); @@ -2103,7 +2390,7 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); } -int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) +int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode) { u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; struct mlx5_core_dev *dev = esw->dev; @@ -2112,7 +2399,7 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) if (!MLX5_CAP_GEN(dev, vport_group_manager)) return -EOPNOTSUPP; - if (esw->mode == SRIOV_NONE) + if (esw->mode == MLX5_ESWITCH_NONE) return -EOPNOTSUPP; switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { @@ -2127,9 +2414,10 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) } query_vports: - for (vport = 1; vport <= nvfs; vport++) { + mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode); + mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) { mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode); - if (vport > 1 && prev_mlx5_mode != mlx5_mode) + if (prev_mlx5_mode != mlx5_mode) return -EINVAL; prev_mlx5_mode = mlx5_mode; } @@ -2139,7 +2427,8 @@ out: return 0; } -int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); @@ -2158,7 +2447,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) return -EOPNOTSUPP; - if (esw->mode == SRIOV_LEGACY) { + if (esw->mode == MLX5_ESWITCH_LEGACY) { esw->offloads.encap = encap; return 0; } @@ -2188,7 +2477,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, return err; } -int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; @@ -2203,36 +2493,31 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) } void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep_if *__rep_if, + const struct mlx5_eswitch_rep_ops *ops, u8 rep_type) { - struct mlx5_eswitch_rep_if *rep_if; + struct mlx5_eswitch_rep_data *rep_data; struct mlx5_eswitch_rep *rep; int i; + esw->offloads.rep_ops[rep_type] = ops; mlx5_esw_for_all_reps(esw, i, rep) { - rep_if = &rep->rep_if[rep_type]; - rep_if->load = __rep_if->load; - rep_if->unload = __rep_if->unload; - rep_if->get_proto_dev = __rep_if->get_proto_dev; - rep_if->priv = __rep_if->priv; - - atomic_set(&rep_if->state, REP_REGISTERED); + rep_data = &rep->rep_data[rep_type]; + atomic_set(&rep_data->state, REP_REGISTERED); } } EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) { - u16 max_vf = mlx5_core_max_vfs(esw->dev); struct mlx5_eswitch_rep *rep; int i; - if (esw->mode == SRIOV_OFFLOADS) - __unload_reps_all_vport(esw, max_vf, rep_type); + if (esw->mode == MLX5_ESWITCH_OFFLOADS) + __unload_reps_all_vport(esw, rep_type); mlx5_esw_for_all_reps(esw, i, rep) - atomic_set(&rep->rep_if[rep_type].state, REP_UNREGISTERED); + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); } EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); @@ -2241,7 +2526,7 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) struct mlx5_eswitch_rep *rep; rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - return rep->rep_if[rep_type].priv; + return rep->rep_data[rep_type].priv; } void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, @@ -2252,9 +2537,9 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, rep = mlx5_eswitch_get_rep(esw, vport); - if (atomic_read(&rep->rep_if[rep_type].state) == REP_LOADED && - rep->rep_if[rep_type].get_proto_dev) - return rep->rep_if[rep_type].get_proto_dev(rep); + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + esw->offloads.rep_ops[rep_type]->get_proto_dev) + return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep); return NULL; } EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); @@ -2271,3 +2556,22 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, return mlx5_eswitch_get_rep(esw, vport); } EXPORT_SYMBOL(mlx5_eswitch_vport_rep); + +bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num >= MLX5_VPORT_FIRST_VF && + vport_num <= esw->dev->priv.sriov.max_vfs; +} + +bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) +{ + return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA); +} +EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); + +u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + u16 vport_num) +{ + return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num; +} +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c new file mode 100644 index 000000000000..1d55a324a17e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include <linux/mlx5/fs.h> +#include "eswitch.h" + +struct mlx5_termtbl_handle { + struct hlist_node termtbl_hlist; + + struct mlx5_flow_table *termtbl; + struct mlx5_flow_act flow_act; + struct mlx5_flow_destination dest; + + struct mlx5_flow_handle *rule; + int ref_count; +}; + +static u32 +mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest) +{ + u32 hash; + + hash = jhash_1word(flow_act->action, 0); + hash = jhash((const void *)&flow_act->vlan, + sizeof(flow_act->vlan), hash); + hash = jhash((const void *)&dest->vport.num, + sizeof(dest->vport.num), hash); + hash = jhash((const void *)&dest->vport.vhca_id, + sizeof(dest->vport.num), hash); + return hash; +} + +static int +mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, + struct mlx5_flow_destination *dest1, + struct mlx5_flow_act *flow_act2, + struct mlx5_flow_destination *dest2) +{ + return flow_act1->action != flow_act2->action || + dest1->vport.num != dest2->vport.num || + dest1->vport.vhca_id != dest2->vport.vhca_id || + memcmp(&flow_act1->vlan, &flow_act2->vlan, + sizeof(flow_act1->vlan)); +} + +static int +mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, + struct mlx5_termtbl_handle *tt, + struct mlx5_flow_act *flow_act) +{ + static const struct mlx5_flow_spec spec = {}; + struct mlx5_flow_namespace *root_ns; + int prio, flags; + int err; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + /* As this is the terminating action then the termination table is the + * same prio as the slow path + */ + prio = FDB_SLOW_PATH; + flags = MLX5_FLOW_TABLE_TERMINATION; + tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, prio, 1, 1, + 0, flags); + if (IS_ERR(tt->termtbl)) { + esw_warn(dev, "Failed to create termination table\n"); + return -EOPNOTSUPP; + } + + tt->rule = mlx5_add_flow_rules(tt->termtbl, &spec, flow_act, + &tt->dest, 1); + + if (IS_ERR(tt->rule)) { + esw_warn(dev, "Failed to create termination table rule\n"); + goto add_flow_err; + } + return 0; + +add_flow_err: + err = mlx5_destroy_flow_table(tt->termtbl); + if (err) + esw_warn(dev, "Failed to destroy termination table\n"); + + return -EOPNOTSUPP; +} + +static struct mlx5_termtbl_handle * +mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest) +{ + struct mlx5_termtbl_handle *tt; + bool found = false; + u32 hash_key; + int err; + + mutex_lock(&esw->offloads.termtbl_mutex); + + hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest); + hash_for_each_possible(esw->offloads.termtbl_tbl, tt, + termtbl_hlist, hash_key) { + if (!mlx5_eswitch_termtbl_cmp(&tt->flow_act, &tt->dest, + flow_act, dest)) { + found = true; + break; + } + } + if (found) + goto tt_add_ref; + + tt = kzalloc(sizeof(*tt), GFP_KERNEL); + if (!tt) { + err = -ENOMEM; + goto tt_create_err; + } + + tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + tt->dest.vport.num = dest->vport.num; + tt->dest.vport.vhca_id = dest->vport.vhca_id; + memcpy(&tt->flow_act, flow_act, sizeof(*flow_act)); + + err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act); + if (err) { + esw_warn(esw->dev, "Failed to create termination table\n"); + goto tt_create_err; + } + hash_add(esw->offloads.termtbl_tbl, &tt->termtbl_hlist, hash_key); +tt_add_ref: + tt->ref_count++; + mutex_unlock(&esw->offloads.termtbl_mutex); + return tt; +tt_create_err: + kfree(tt); + mutex_unlock(&esw->offloads.termtbl_mutex); + return ERR_PTR(err); +} + +void +mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, + struct mlx5_termtbl_handle *tt) +{ + mutex_lock(&esw->offloads.termtbl_mutex); + if (--tt->ref_count == 0) + hash_del(&tt->termtbl_hlist); + mutex_unlock(&esw->offloads.termtbl_mutex); + + if (!tt->ref_count) { + mlx5_del_flow_rules(tt->rule); + mlx5_destroy_flow_table(tt->termtbl); + kfree(tt); + } +} + +static void +mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, + struct mlx5_flow_act *dst) +{ + if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)) + return; + + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0])); + memset(&src->vlan[0], 0, sizeof(src->vlan[0])); + + if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2)) + return; + + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1])); + memset(&src->vlan[1], 0, sizeof(src->vlan[1])); +} + +bool +mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec) +{ + u32 port_mask = MLX5_GET(fte_match_param, spec->match_criteria, + misc_parameters.source_port); + u32 port_value = MLX5_GET(fte_match_param, spec->match_value, + misc_parameters.source_port); + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table)) + return false; + + /* push vlan on RX */ + return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) && + ((port_mask & port_value) == MLX5_VPORT_UPLINK); +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest) +{ + struct mlx5_flow_act term_tbl_act = {}; + struct mlx5_flow_handle *rule = NULL; + bool term_table_created = false; + int num_vport_dests = 0; + int i, curr_dest; + + mlx5_eswitch_termtbl_actions_move(flow_act, &term_tbl_act); + term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + for (i = 0; i < num_dest; i++) { + struct mlx5_termtbl_handle *tt; + + /* only vport destinations can be terminated */ + if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) + continue; + + /* get the terminating table for the action list */ + tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, + &dest[i]); + if (IS_ERR(tt)) { + esw_warn(esw->dev, "Failed to create termination table\n"); + goto revert_changes; + } + attr->dests[num_vport_dests].termtbl = tt; + num_vport_dests++; + + /* link the destination with the termination table */ + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = tt->termtbl; + term_table_created = true; + } + + /* at least one destination should reference a termination table */ + if (!term_table_created) + goto revert_changes; + + /* create the FTE */ + rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); + if (IS_ERR(rule)) + goto revert_changes; + + goto out; + +revert_changes: + /* revert the changes that were made to the original flow_act + * and fall-back to the original rule actions + */ + mlx5_eswitch_termtbl_actions_move(&term_tbl_act, flow_act); + + for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) { + struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl; + + /* search for the destination associated with the + * current term table + */ + for (i = 0; i < num_dest; i++) { + if (dest[i].ft != tt->termtbl) + continue; + + memset(&dest[i], 0, sizeof(dest[i])); + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[i].vport.num = tt->dest.vport.num; + dest[i].vport.vhca_id = tt->dest.vport.vhca_id; + mlx5_eswitch_termtbl_put(esw, tt); + break; + } + } + rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); +out: + return rule; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index a81e8d2168d8..8bcf3426b9c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -108,8 +108,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_STALL_EVENT"; case MLX5_EVENT_TYPE_CMD: return "MLX5_EVENT_TYPE_CMD"; - case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE: - return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE"; + case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED: + return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED"; case MLX5_EVENT_TYPE_PAGE_REQUEST: return "MLX5_EVENT_TYPE_PAGE_REQUEST"; case MLX5_EVENT_TYPE_PAGE_FAULT: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index ca2296a2f9ee..4c50efe4e7f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -414,7 +414,8 @@ static void mlx5_fpga_conn_cq_tasklet(unsigned long data) mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET); } -static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq) +static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq, + struct mlx5_eqe *eqe) { struct mlx5_fpga_conn *conn; @@ -429,6 +430,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) struct mlx5_fpga_device *fdev = conn->fdev; struct mlx5_core_dev *mdev = fdev->mdev; u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; struct mlx5_wq_param wqp; struct mlx5_cqe64 *cqe; int inlen, err, eqn; @@ -476,7 +478,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas); - err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen); + err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen, out, sizeof(out)); kvfree(in); if (err) @@ -867,7 +869,7 @@ struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, conn->cb_arg = attr->cb_arg; remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32); - err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac); + err = mlx5_query_mac_address(fdev->mdev, remote_mac); if (err) { mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err); ret = ERR_PTR(err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 52c47d3dd5a5..c76da309506b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -636,7 +636,8 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, u8 match_criteria_enable, const u32 *match_c, const u32 *match_v, - struct mlx5_flow_act *flow_act) + struct mlx5_flow_act *flow_act, + struct mlx5_flow_context *flow_context) { const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); @@ -655,7 +656,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, (match_criteria_enable & ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) || (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) || - (flow_act->flags & FLOW_ACT_HAS_TAG)) + (flow_context->flags & FLOW_CONTEXT_HAS_TAG)) return false; return true; @@ -767,7 +768,8 @@ mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev, fg->mask.match_criteria_enable, fg->mask.match_criteria, fte->val, - &fte->action)) + &fte->action, + &fte->flow_context)) return ERR_PTR(-EINVAL); else if (!mlx5_is_fpga_ipsec_rule(mdev, fg->mask.match_criteria_enable, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h index 2b5e63b0d4d6..382985e65b48 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h @@ -37,8 +37,6 @@ #include "accel/ipsec.h" #include "fs_cmd.h" -#ifdef CONFIG_MLX5_FPGA - u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev); unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev); int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, @@ -66,77 +64,4 @@ int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, const struct mlx5_flow_cmds * mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type); -#else - -static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) -{ - return 0; -} - -static inline unsigned int -mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev) -{ - return 0; -} - -static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, - u64 *counters) -{ - return 0; -} - -static inline void * -mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *accel_xfrm, - const __be32 saddr[4], - const __be32 daddr[4], - const __be32 spi, bool is_ipv6) -{ - return NULL; -} - -static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context) -{ -} - -static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) -{ - return 0; -} - -static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) -{ -} - -static inline void mlx5_fpga_ipsec_build_fs_cmds(void) -{ -} - -static inline struct mlx5_accel_esp_xfrm * -mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) -{ -} - -static inline int -mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs) -{ - return -EOPNOTSUPP; -} - -static inline const struct mlx5_flow_cmds * -mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) -{ - return mlx5_fs_cmd_get_default(type); -} - -#endif /* CONFIG_MLX5_FPGA */ - #endif /* __MLX5_FPGA_SADB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 013b1ca4a791..7ac1249eadc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -147,6 +147,7 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, { int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION); u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; struct mlx5_core_dev *dev = ns->dev; @@ -167,6 +168,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, en_decap); MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, en_encap); + MLX5_SET(create_flow_table_in, in, flow_table_context.termination_table, + term); switch (ft->op_mod) { case FS_FT_OP_MOD_NORMAL: @@ -393,7 +396,11 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); MLX5_SET(flow_context, in_flow_context, group_id, group_id); - MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_tag, + fte->flow_context.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_source, + fte->flow_context.flow_source); + MLX5_SET(flow_context, in_flow_context, extended_destination, extended_dest); if (extended_dest) { @@ -768,6 +775,10 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions); table_type = FS_FT_NIC_TX; break; + case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions); + table_type = FS_FT_ESW_INGRESS_ACL; + break; default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index fe76c6fd6d80..3e99799bdb40 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -584,7 +584,7 @@ err_ida_remove: } static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, - u32 *match_value, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act) { struct mlx5_flow_steering *steering = get_steering(&ft->node); @@ -594,9 +594,10 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, if (!fte) return ERR_PTR(-ENOMEM); - memcpy(fte->val, match_value, sizeof(fte->val)); + memcpy(fte->val, &spec->match_value, sizeof(fte->val)); fte->node.type = FS_TYPE_FLOW_ENTRY; fte->action = *flow_act; + fte->flow_context = spec->flow_context; tree_init_node(&fte->node, NULL, del_sw_fte); @@ -612,7 +613,7 @@ static void dealloc_flow_group(struct mlx5_flow_steering *steering, static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering, u8 match_criteria_enable, - void *match_criteria, + const void *match_criteria, int start_index, int end_index) { @@ -642,7 +643,7 @@ static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steer static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft, u8 match_criteria_enable, - void *match_criteria, + const void *match_criteria, int start_index, int end_index, struct list_head *prev) @@ -1285,7 +1286,7 @@ free_handle: } static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec) + const struct mlx5_flow_spec *spec) { struct list_head *prev = &ft->node.children; struct mlx5_flow_group *fg; @@ -1430,7 +1431,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2) return false; } -static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act) +static int check_conflicting_ftes(struct fs_fte *fte, + const struct mlx5_flow_context *flow_context, + const struct mlx5_flow_act *flow_act) { if (check_conflicting_actions(flow_act->action, fte->action.action)) { mlx5_core_warn(get_dev(&fte->node), @@ -1438,12 +1441,12 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act return -EEXIST; } - if ((flow_act->flags & FLOW_ACT_HAS_TAG) && - fte->action.flow_tag != flow_act->flow_tag) { + if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) && + fte->flow_context.flow_tag != flow_context->flow_tag) { mlx5_core_warn(get_dev(&fte->node), "FTE flow tag %u already exists with different flow tag %u\n", - fte->action.flow_tag, - flow_act->flow_tag); + fte->flow_context.flow_tag, + flow_context->flow_tag); return -EEXIST; } @@ -1451,7 +1454,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act } static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, - u32 *match_value, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num, @@ -1462,7 +1465,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, int i; int ret; - ret = check_conflicting_ftes(fte, flow_act); + ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act); if (ret) return ERR_PTR(ret); @@ -1536,7 +1539,7 @@ static void free_match_list(struct match_list_head *head) static int build_match_list(struct match_list_head *match_head, struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec) + const struct mlx5_flow_spec *spec) { struct rhlist_head *tmp, *list; struct mlx5_flow_group *g; @@ -1589,7 +1592,7 @@ static u64 matched_fgs_get_version(struct list_head *match_head) static struct fs_fte * lookup_fte_locked(struct mlx5_flow_group *g, - u32 *match_value, + const u32 *match_value, bool take_write) { struct fs_fte *fte_tmp; @@ -1622,7 +1625,7 @@ out: static struct mlx5_flow_handle * try_add_to_existing_fg(struct mlx5_flow_table *ft, struct list_head *match_head, - struct mlx5_flow_spec *spec, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num, @@ -1637,7 +1640,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, u64 version; int err; - fte = alloc_fte(ft, spec->match_value, flow_act); + fte = alloc_fte(ft, spec, flow_act); if (IS_ERR(fte)) return ERR_PTR(-ENOMEM); @@ -1653,8 +1656,7 @@ search_again_locked: fte_tmp = lookup_fte_locked(g, spec->match_value, take_write); if (!fte_tmp) continue; - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte_tmp); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp); up_write_ref_node(&fte_tmp->node, false); tree_put_node(&fte_tmp->node, false); kmem_cache_free(steering->ftes_cache, fte); @@ -1701,8 +1703,7 @@ skip_search: nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node, false); - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); return rule; @@ -1715,7 +1716,7 @@ out: static struct mlx5_flow_handle * _mlx5_add_flow_rules(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num) @@ -1788,7 +1789,7 @@ search_again_locked: if (err) goto err_release_fg; - fte = alloc_fte(ft, spec->match_value, flow_act); + fte = alloc_fte(ft, spec, flow_act); if (IS_ERR(fte)) { err = PTR_ERR(fte); goto err_release_fg; @@ -1802,8 +1803,7 @@ search_again_locked: nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node, false); - rule = add_rule_fg(g, spec->match_value, flow_act, dest, - dest_num, fte); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); tree_put_node(&g->node, false); @@ -1823,7 +1823,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) struct mlx5_flow_handle * mlx5_add_flow_rules(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int num_dest) @@ -2092,7 +2092,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d { struct mlx5_flow_steering *steering = dev->priv.steering; - if (!steering || vport >= MLX5_TOTAL_VPORTS(dev)) + if (!steering || vport >= mlx5_eswitch_get_total_vports(dev)) return NULL; switch (type) { @@ -2423,7 +2423,7 @@ static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev) if (!steering->esw_egress_root_ns) return; - for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) + for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++) cleanup_root_ns(steering->esw_egress_root_ns[i]); kfree(steering->esw_egress_root_ns); @@ -2438,7 +2438,7 @@ static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev) if (!steering->esw_ingress_root_ns) return; - for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) + for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++) cleanup_root_ns(steering->esw_ingress_root_ns[i]); kfree(steering->esw_ingress_root_ns); @@ -2606,16 +2606,18 @@ static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vpo static int init_egress_acls_root_ns(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; + int total_vports = mlx5_eswitch_get_total_vports(dev); int err; int i; - steering->esw_egress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev), - sizeof(*steering->esw_egress_root_ns), - GFP_KERNEL); + steering->esw_egress_root_ns = + kcalloc(total_vports, + sizeof(*steering->esw_egress_root_ns), + GFP_KERNEL); if (!steering->esw_egress_root_ns) return -ENOMEM; - for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) { + for (i = 0; i < total_vports; i++) { err = init_egress_acl_root_ns(steering, i); if (err) goto cleanup_root_ns; @@ -2634,16 +2636,18 @@ cleanup_root_ns: static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; + int total_vports = mlx5_eswitch_get_total_vports(dev); int err; int i; - steering->esw_ingress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev), - sizeof(*steering->esw_ingress_root_ns), - GFP_KERNEL); + steering->esw_ingress_root_ns = + kcalloc(total_vports, + sizeof(*steering->esw_ingress_root_ns), + GFP_KERNEL); if (!steering->esw_ingress_root_ns) return -ENOMEM; - for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) { + for (i = 0; i < total_vports; i++) { err = init_ingress_acl_root_ns(steering, i); if (err) goto cleanup_root_ns; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index a08c3d09a50f..c48c382f926f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -170,6 +170,7 @@ struct fs_fte { u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; u32 index; + struct mlx5_flow_context flow_context; struct mlx5_flow_act action; enum fs_fte_status status; struct mlx5_fc *counter; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 1ab6f7e3bec6..a19790dee7b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -37,6 +37,37 @@ #include "mlx5_core.h" #include "../../mlxfw/mlxfw.h" +enum { + MCQS_IDENTIFIER_BOOT_IMG = 0x1, + MCQS_IDENTIFIER_OEM_NVCONFIG = 0x4, + MCQS_IDENTIFIER_MLNX_NVCONFIG = 0x5, + MCQS_IDENTIFIER_CS_TOKEN = 0x6, + MCQS_IDENTIFIER_DBG_TOKEN = 0x7, + MCQS_IDENTIFIER_GEARBOX = 0xA, +}; + +enum { + MCQS_UPDATE_STATE_IDLE, + MCQS_UPDATE_STATE_IN_PROGRESS, + MCQS_UPDATE_STATE_APPLIED, + MCQS_UPDATE_STATE_ACTIVE, + MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET, + MCQS_UPDATE_STATE_FAILED, + MCQS_UPDATE_STATE_CANCELED, + MCQS_UPDATE_STATE_BUSY, +}; + +enum { + MCQI_INFO_TYPE_CAPABILITIES = 0x0, + MCQI_INFO_TYPE_VERSION = 0x1, + MCQI_INFO_TYPE_ACTIVATION_METHOD = 0x5, +}; + +enum { + MCQI_FW_RUNNING_VERSION = 0, + MCQI_FW_STORED_VERSION = 1, +}; + static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, int outlen) { @@ -202,6 +233,18 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, event_cap)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_EVENT); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, tls)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_TLS); + if (err) + return err; + } + return 0; } @@ -392,33 +435,49 @@ static int mlx5_reg_mcda_set(struct mlx5_core_dev *dev, } static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev, - u16 component_index, - u32 *max_component_size, - u8 *log_mcda_word_size, - u16 *mcda_max_write_size) + u16 component_index, bool read_pending, + u8 info_type, u16 data_size, void *mcqi_data) { - u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_ST_SZ_DW(mcqi_cap)]; - int offset = MLX5_ST_SZ_DW(mcqi_reg); - u32 in[MLX5_ST_SZ_DW(mcqi_reg)]; + u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_UN_SZ_DW(mcqi_reg_data)] = {}; + u32 in[MLX5_ST_SZ_DW(mcqi_reg)] = {}; + void *data; int err; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(mcqi_reg, in, component_index, component_index); - MLX5_SET(mcqi_reg, in, data_size, MLX5_ST_SZ_BYTES(mcqi_cap)); + MLX5_SET(mcqi_reg, in, read_pending_component, read_pending); + MLX5_SET(mcqi_reg, in, info_type, info_type); + MLX5_SET(mcqi_reg, in, data_size, data_size); err = mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_MCQI, 0, 0); + MLX5_ST_SZ_BYTES(mcqi_reg) + data_size, + MLX5_REG_MCQI, 0, 0); if (err) - goto out; + return err; - *max_component_size = MLX5_GET(mcqi_cap, out + offset, max_component_size); - *log_mcda_word_size = MLX5_GET(mcqi_cap, out + offset, log_mcda_word_size); - *mcda_max_write_size = MLX5_GET(mcqi_cap, out + offset, mcda_max_write_size); + data = MLX5_ADDR_OF(mcqi_reg, out, data); + memcpy(mcqi_data, data, data_size); -out: - return err; + return 0; +} + +static int mlx5_reg_mcqi_caps_query(struct mlx5_core_dev *dev, u16 component_index, + u32 *max_component_size, u8 *log_mcda_word_size, + u16 *mcda_max_write_size) +{ + u32 mcqi_reg[MLX5_ST_SZ_DW(mcqi_cap)] = {}; + int err; + + err = mlx5_reg_mcqi_query(dev, component_index, 0, + MCQI_INFO_TYPE_CAPABILITIES, + MLX5_ST_SZ_BYTES(mcqi_cap), mcqi_reg); + if (err) + return err; + + *max_component_size = MLX5_GET(mcqi_cap, mcqi_reg, max_component_size); + *log_mcda_word_size = MLX5_GET(mcqi_cap, mcqi_reg, log_mcda_word_size); + *mcda_max_write_size = MLX5_GET(mcqi_cap, mcqi_reg, mcda_max_write_size); + + return 0; } struct mlx5_mlxfw_dev { @@ -434,8 +493,13 @@ static int mlx5_component_query(struct mlxfw_dev *mlxfw_dev, container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; - return mlx5_reg_mcqi_query(dev, component_index, p_max_size, - p_align_bits, p_max_write_size); + if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi)) { + mlx5_core_warn(dev, "caps query isn't supported by running FW\n"); + return -EOPNOTSUPP; + } + + return mlx5_reg_mcqi_caps_query(dev, component_index, p_max_size, + p_align_bits, p_max_write_size); } static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle) @@ -552,7 +616,8 @@ static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = { }; int mlx5_firmware_flash(struct mlx5_core_dev *dev, - const struct firmware *firmware) + const struct firmware *firmware, + struct netlink_ext_ack *extack) { struct mlx5_mlxfw_dev mlx5_mlxfw_dev = { .mlxfw_dev = { @@ -571,5 +636,133 @@ int mlx5_firmware_flash(struct mlx5_core_dev *dev, return -EOPNOTSUPP; } - return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, firmware); + return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, + firmware, extack); +} + +static int mlx5_reg_mcqi_version_query(struct mlx5_core_dev *dev, + u16 component_index, bool read_pending, + u32 *mcqi_version_out) +{ + return mlx5_reg_mcqi_query(dev, component_index, read_pending, + MCQI_INFO_TYPE_VERSION, + MLX5_ST_SZ_BYTES(mcqi_version), + mcqi_version_out); +} + +static int mlx5_reg_mcqs_query(struct mlx5_core_dev *dev, u32 *out, + u16 component_index) +{ + u8 out_sz = MLX5_ST_SZ_BYTES(mcqs_reg); + u32 in[MLX5_ST_SZ_DW(mcqs_reg)] = {}; + int err; + + memset(out, 0, out_sz); + + MLX5_SET(mcqs_reg, in, component_index, component_index); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + out_sz, MLX5_REG_MCQS, 0, 0); + return err; +} + +/* scans component index sequentially, to find the boot img index */ +static int mlx5_get_boot_img_component_index(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(mcqs_reg)] = {}; + u16 identifier, component_idx = 0; + bool quit; + int err; + + do { + err = mlx5_reg_mcqs_query(dev, out, component_idx); + if (err) + return err; + + identifier = MLX5_GET(mcqs_reg, out, identifier); + quit = !!MLX5_GET(mcqs_reg, out, last_index_flag); + quit |= identifier == MCQS_IDENTIFIER_BOOT_IMG; + } while (!quit && ++component_idx); + + if (identifier != MCQS_IDENTIFIER_BOOT_IMG) { + mlx5_core_warn(dev, "mcqs: can't find boot_img component ix, last scanned idx %d\n", + component_idx); + return -EOPNOTSUPP; + } + + return component_idx; +} + +static int +mlx5_fw_image_pending(struct mlx5_core_dev *dev, + int component_index, + bool *pending_version_exists) +{ + u32 out[MLX5_ST_SZ_DW(mcqs_reg)]; + u8 component_update_state; + int err; + + err = mlx5_reg_mcqs_query(dev, out, component_index); + if (err) + return err; + + component_update_state = MLX5_GET(mcqs_reg, out, component_update_state); + + if (component_update_state == MCQS_UPDATE_STATE_IDLE) { + *pending_version_exists = false; + } else if (component_update_state == MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET) { + *pending_version_exists = true; + } else { + mlx5_core_warn(dev, + "mcqs: can't read pending fw version while fw state is %d\n", + component_update_state); + return -ENODATA; + } + return 0; +} + +int mlx5_fw_version_query(struct mlx5_core_dev *dev, + u32 *running_ver, u32 *pending_ver) +{ + u32 reg_mcqi_version[MLX5_ST_SZ_DW(mcqi_version)] = {}; + bool pending_version_exists; + int component_index; + int err; + + if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi) || + !MLX5_CAP_MCAM_REG(dev, mcqs)) { + mlx5_core_warn(dev, "fw query isn't supported by the FW\n"); + return -EOPNOTSUPP; + } + + component_index = mlx5_get_boot_img_component_index(dev); + if (component_index < 0) + return component_index; + + err = mlx5_reg_mcqi_version_query(dev, component_index, + MCQI_FW_RUNNING_VERSION, + reg_mcqi_version); + if (err) + return err; + + *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); + + err = mlx5_fw_image_pending(dev, component_index, &pending_version_exists); + if (err) + return err; + + if (!pending_version_exists) { + *pending_ver = 0; + return 0; + } + + err = mlx5_reg_mcqi_version_query(dev, component_index, + MCQI_FW_STORED_VERSION, + reg_mcqi_version); + if (err) + return err; + + *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); + + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index a2656f4008d9..2fe6923f7ce0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -40,6 +40,8 @@ #include "mlx5_core.h" #include "lib/eq.h" #include "lib/mlx5.h" +#include "lib/pci_vsc.h" +#include "diag/fw_tracer.h" enum { MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, @@ -62,12 +64,20 @@ enum { enum { MLX5_DROP_NEW_HEALTH_WORK, - MLX5_DROP_NEW_RECOVERY_WORK, +}; + +enum { + MLX5_SENSOR_NO_ERR = 0, + MLX5_SENSOR_PCI_COMM_ERR = 1, + MLX5_SENSOR_PCI_ERR = 2, + MLX5_SENSOR_NIC_DISABLED = 3, + MLX5_SENSOR_NIC_SW_RESET = 4, + MLX5_SENSOR_FW_SYND_RFR = 5, }; u8 mlx5_get_nic_state(struct mlx5_core_dev *dev) { - return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3; + return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7; } void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) @@ -80,18 +90,105 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) &dev->iseg->cmdq_addr_l_sz); } -static int in_fatal(struct mlx5_core_dev *dev) +static bool sensor_pci_not_working(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; struct health_buffer __iomem *h = health->health; + /* Offline PCI reads return 0xffffffff */ + return (ioread32be(&h->fw_ver) == 0xffffffff); +} + +static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET; + u8 synd = ioread8(&h->synd); + + if (rfr && synd) + mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd); + return rfr && synd; +} + +static u32 check_fatal_sensors(struct mlx5_core_dev *dev) +{ + if (sensor_pci_not_working(dev)) + return MLX5_SENSOR_PCI_COMM_ERR; + if (pci_channel_offline(dev->pdev)) + return MLX5_SENSOR_PCI_ERR; if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) - return 1; + return MLX5_SENSOR_NIC_DISABLED; + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET) + return MLX5_SENSOR_NIC_SW_RESET; + if (sensor_fw_synd_rfr(dev)) + return MLX5_SENSOR_FW_SYND_RFR; - if (ioread32be(&h->fw_ver) == 0xffffffff) - return 1; + return MLX5_SENSOR_NO_ERR; +} - return 0; +static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock) +{ + enum mlx5_vsc_state state; + int ret; + + if (!mlx5_core_is_pf(dev)) + return -EBUSY; + + /* Try to lock GW access, this stage doesn't return + * EBUSY because locked GW does not mean that other PF + * already started the reset. + */ + ret = mlx5_vsc_gw_lock(dev); + if (ret == -EBUSY) + return -EINVAL; + if (ret) + return ret; + + state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK; + /* At this stage, if the return status == EBUSY, then we know + * for sure that another PF started the reset, so don't allow + * another reset. + */ + ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state); + if (ret) + mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); + + /* Unlock GW access */ + mlx5_vsc_gw_unlock(dev); + + return ret; +} + +static bool reset_fw_if_needed(struct mlx5_core_dev *dev) +{ + bool supported = (ioread32be(&dev->iseg->initializing) >> + MLX5_FW_RESET_SUPPORTED_OFFSET) & 1; + u32 fatal_error; + + if (!supported) + return false; + + /* The reset only needs to be issued by one PF. The health buffer is + * shared between all functions, and will be cleared during a reset. + * Check again to avoid a redundant 2nd reset. If the fatal erros was + * PCI related a reset won't help. + */ + fatal_error = check_fatal_sensors(dev); + if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR || + fatal_error == MLX5_SENSOR_NIC_DISABLED || + fatal_error == MLX5_SENSOR_NIC_SW_RESET) { + mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help."); + return false; + } + + mlx5_core_warn(dev, "Issuing FW Reset\n"); + /* Write the NIC interface field to initiate the reset, the command + * interface address also resides here, don't overwrite it. + */ + mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET); + + return true; } void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) @@ -99,14 +196,65 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) mutex_lock(&dev->intf_state_mutex); if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) goto unlock; + if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) { + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + goto unlock; + } - mlx5_core_err(dev, "start\n"); - if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { + if (check_fatal_sensors(dev) || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mlx5_cmd_flush(dev); } mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); +unlock: + mutex_unlock(&dev->intf_state_mutex); +} + +#define MLX5_CRDUMP_WAIT_MS 60000 +#define MLX5_FW_RESET_WAIT_MS 1000 +void mlx5_error_sw_reset(struct mlx5_core_dev *dev) +{ + unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS; + int lock = -EBUSY; + + mutex_lock(&dev->intf_state_mutex); + if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto unlock; + + mlx5_core_err(dev, "start\n"); + + if (check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) { + /* Get cr-dump and reset FW semaphore */ + lock = lock_sem_sw_reset(dev, true); + + if (lock == -EBUSY) { + delay_ms = MLX5_CRDUMP_WAIT_MS; + goto recover_from_sw_reset; + } + /* Execute SW reset */ + reset_fw_if_needed(dev); + } + +recover_from_sw_reset: + /* Recover from SW reset */ + end = jiffies + msecs_to_jiffies(delay_ms); + do { + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + break; + + cond_resched(); + } while (!time_after(jiffies, end)); + + if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) { + dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n", + mlx5_get_nic_state(dev), delay_ms); + } + + /* Release FW semaphore if you are the lock owner */ + if (!lock) + lock_sem_sw_reset(dev, false); + mlx5_core_err(dev, "end\n"); unlock: @@ -129,6 +277,20 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) case MLX5_NIC_IFC_NO_DRAM_NIC: mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n"); break; + + case MLX5_NIC_IFC_SW_RESET: + /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases: + * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded + * and this is a VF), this is not recoverable by SW reset. + * Logging of this is handled elsewhere. + * 2. FW reset has been issued by another function, driver can + * be reloaded to recover after the mode switches to + * MLX5_NIC_IFC_DISABLED. + */ + if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR) + mlx5_core_warn(dev, "NIC SW reset in progress\n"); + break; + default: mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n", nic_interface); @@ -137,52 +299,32 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) mlx5_disable_device(dev); } -static void health_recover(struct work_struct *work) -{ - struct mlx5_core_health *health; - struct delayed_work *dwork; - struct mlx5_core_dev *dev; - struct mlx5_priv *priv; - u8 nic_state; - - dwork = container_of(work, struct delayed_work, work); - health = container_of(dwork, struct mlx5_core_health, recover_work); - priv = container_of(health, struct mlx5_priv, health); - dev = container_of(priv, struct mlx5_core_dev, priv); - - nic_state = mlx5_get_nic_state(dev); - if (nic_state == MLX5_NIC_IFC_INVALID) { - mlx5_core_err(dev, "health recovery flow aborted since the nic state is invalid\n"); - return; - } - - mlx5_core_err(dev, "starting health recovery flow\n"); - mlx5_recover_device(dev); -} - /* How much time to wait until health resetting the driver (in msecs) */ -#define MLX5_RECOVERY_DELAY_MSECS 60000 -static void health_care(struct work_struct *work) +#define MLX5_RECOVERY_WAIT_MSECS 60000 +static int mlx5_health_try_recover(struct mlx5_core_dev *dev) { - unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS); - struct mlx5_core_health *health; - struct mlx5_core_dev *dev; - struct mlx5_priv *priv; - unsigned long flags; + unsigned long end; - health = container_of(work, struct mlx5_core_health, work); - priv = container_of(health, struct mlx5_priv, health); - dev = container_of(priv, struct mlx5_core_dev, priv); mlx5_core_warn(dev, "handling bad device here\n"); mlx5_handle_bad_state(dev); + end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS); + while (sensor_pci_not_working(dev)) { + if (time_after(jiffies, end)) { + mlx5_core_err(dev, + "health recovery flow aborted, PCI reads still not working\n"); + return -EIO; + } + msleep(100); + } - spin_lock_irqsave(&health->wq_lock, flags); - if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags)) - schedule_delayed_work(&health->recover_work, recover_delay); - else - mlx5_core_err(dev, - "new health works are not permitted at this stage\n"); - spin_unlock_irqrestore(&health->wq_lock, flags); + mlx5_core_err(dev, "starting health recovery flow\n"); + mlx5_recover_device(dev); + if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state) || + check_fatal_sensors(dev)) { + mlx5_core_err(dev, "health recovery failed\n"); + return -EIO; + } + return 0; } static const char *hsynd_str(u8 synd) @@ -246,6 +388,282 @@ static void print_health_info(struct mlx5_core_dev *dev) mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw); } +static int +mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u8 synd; + int err; + + synd = ioread8(&h->synd); + err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); + if (err || !synd) + return err; + return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd)); +} + +struct mlx5_fw_reporter_ctx { + u8 err_synd; + int miss_counter; +}; + +static int +mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg, + struct mlx5_fw_reporter_ctx *fw_reporter_ctx) +{ + int err; + + err = devlink_fmsg_u8_pair_put(fmsg, "syndrome", + fw_reporter_ctx->err_synd); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter", + fw_reporter_ctx->miss_counter); + if (err) + return err; + return 0; +} + +static int +mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + int err; + int i; + + if (!ioread8(&h->synd)) + return 0; + + err = devlink_fmsg_pair_nest_start(fmsg, "health buffer"); + if (err) + return err; + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + err = devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var"); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) { + err = devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i)); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr", + ioread32be(&h->assert_exit_ptr)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra", + ioread32be(&h->assert_callra)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id)); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index", + ioread8(&h->irisc_index)); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd", + ioread16be(&h->ext_synd)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver", + ioread32be(&h->fw_ver)); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + return devlink_fmsg_pair_nest_end(fmsg); +} + +static int +mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + int err; + + err = mlx5_fw_tracer_trigger_core_dump_general(dev); + if (err) + return err; + + if (priv_ctx) { + struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; + + err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx); + if (err) + return err; + } + + err = mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg); + if (err) + return err; + return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg); +} + +static void mlx5_fw_reporter_err_work(struct work_struct *work) +{ + struct mlx5_fw_reporter_ctx fw_reporter_ctx; + struct mlx5_core_health *health; + + health = container_of(work, struct mlx5_core_health, report_work); + + if (IS_ERR_OR_NULL(health->fw_reporter)) + return; + + fw_reporter_ctx.err_synd = health->synd; + fw_reporter_ctx.miss_counter = health->miss_counter; + if (fw_reporter_ctx.err_synd) { + devlink_health_report(health->fw_reporter, + "FW syndrom reported", &fw_reporter_ctx); + return; + } + if (fw_reporter_ctx.miss_counter) + devlink_health_report(health->fw_reporter, + "FW miss counter reported", + &fw_reporter_ctx); +} + +static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = { + .name = "fw", + .diagnose = mlx5_fw_reporter_diagnose, + .dump = mlx5_fw_reporter_dump, +}; + +static int +mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + + return mlx5_health_try_recover(dev); +} + +#define MLX5_CR_DUMP_CHUNK_SIZE 256 +static int +mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + u32 crdump_size = dev->priv.health.crdump_size; + u32 *cr_data; + u32 data_size; + u32 offset; + int err; + + if (!mlx5_core_is_pf(dev)) + return -EPERM; + + cr_data = kvmalloc(crdump_size, GFP_KERNEL); + if (!cr_data) + return -ENOMEM; + err = mlx5_crdump_collect(dev, cr_data); + if (err) + return err; + + if (priv_ctx) { + struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; + + err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx); + if (err) + goto free_data; + } + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "crdump_data"); + if (err) + goto free_data; + for (offset = 0; offset < crdump_size; offset += data_size) { + if (crdump_size - offset < MLX5_CR_DUMP_CHUNK_SIZE) + data_size = crdump_size - offset; + else + data_size = MLX5_CR_DUMP_CHUNK_SIZE; + err = devlink_fmsg_binary_put(fmsg, cr_data, data_size); + if (err) + goto free_data; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + +free_data: + kfree(cr_data); + return err; +} + +static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) +{ + struct mlx5_fw_reporter_ctx fw_reporter_ctx; + struct mlx5_core_health *health; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + + health = container_of(work, struct mlx5_core_health, fatal_report_work); + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + + mlx5_enter_error_state(dev, false); + if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { + if (mlx5_health_try_recover(dev)) + mlx5_core_err(dev, "health recovery failed\n"); + return; + } + fw_reporter_ctx.err_synd = health->synd; + fw_reporter_ctx.miss_counter = health->miss_counter; + devlink_health_report(health->fw_fatal_reporter, + "FW fatal error reported", &fw_reporter_ctx); +} + +static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { + .name = "fw_fatal", + .recover = mlx5_fw_fatal_reporter_recover, + .dump = mlx5_fw_fatal_reporter_dump, +}; + +#define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000 +static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct devlink *devlink = priv_to_devlink(dev); + + health->fw_reporter = + devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops, + 0, false, dev); + if (IS_ERR(health->fw_reporter)) + mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", + PTR_ERR(health->fw_reporter)); + + health->fw_fatal_reporter = + devlink_health_reporter_create(devlink, + &mlx5_fw_fatal_reporter_ops, + MLX5_REPORTER_FW_GRACEFUL_PERIOD, + true, dev); + if (IS_ERR(health->fw_fatal_reporter)) + mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", + PTR_ERR(health->fw_fatal_reporter)); +} + +static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (!IS_ERR_OR_NULL(health->fw_reporter)) + devlink_health_reporter_destroy(health->fw_reporter); + + if (!IS_ERR_OR_NULL(health->fw_fatal_reporter)) + devlink_health_reporter_destroy(health->fw_fatal_reporter); +} + static unsigned long get_next_poll_jiffies(void) { unsigned long next; @@ -264,7 +682,7 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev) spin_lock_irqsave(&health->wq_lock, flags); if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) - queue_work(health->wq, &health->work); + queue_work(health->wq, &health->fatal_report_work); else mlx5_core_err(dev, "new health works are not permitted at this stage\n"); spin_unlock_irqrestore(&health->wq_lock, flags); @@ -274,6 +692,9 @@ static void poll_health(struct timer_list *t) { struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u32 fatal_error; + u8 prev_synd; u32 count; if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) @@ -289,10 +710,19 @@ static void poll_health(struct timer_list *t) if (health->miss_counter == MAX_MISSES) { mlx5_core_err(dev, "device's health compromised - reached miss count\n"); print_health_info(dev); + queue_work(health->wq, &health->report_work); } - if (in_fatal(dev) && !health->sick) { - health->sick = true; + prev_synd = health->synd; + health->synd = ioread8(&h->synd); + if (health->synd && health->synd != prev_synd) + queue_work(health->wq, &health->report_work); + + fatal_error = check_fatal_sensors(dev); + + if (fatal_error && !health->fatal_error) { + mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); + dev->priv.health.fatal_error = fatal_error; print_health_info(dev); mlx5_trigger_health_work(dev); } @@ -306,9 +736,8 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; timer_setup(&health->timer, poll_health, 0); - health->sick = 0; + health->fatal_error = MLX5_SENSOR_NO_ERR; clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; @@ -324,7 +753,6 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) if (disable_health) { spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); spin_unlock_irqrestore(&health->wq_lock, flags); } @@ -338,21 +766,9 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev) spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); spin_unlock_irqrestore(&health->wq_lock, flags); - cancel_delayed_work_sync(&health->recover_work); - cancel_work_sync(&health->work); -} - -void mlx5_drain_health_recovery(struct mlx5_core_dev *dev) -{ - struct mlx5_core_health *health = &dev->priv.health; - unsigned long flags; - - spin_lock_irqsave(&health->wq_lock, flags); - set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); - spin_unlock_irqrestore(&health->wq_lock, flags); - cancel_delayed_work_sync(&dev->priv.health.recover_work); + cancel_work_sync(&health->report_work); + cancel_work_sync(&health->fatal_report_work); } void mlx5_health_flush(struct mlx5_core_dev *dev) @@ -367,6 +783,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; destroy_workqueue(health->wq); + mlx5_fw_reporters_destroy(dev); } int mlx5_health_init(struct mlx5_core_dev *dev) @@ -374,20 +791,26 @@ int mlx5_health_init(struct mlx5_core_dev *dev) struct mlx5_core_health *health; char *name; + mlx5_fw_reporters_create(dev); + health = &dev->priv.health; name = kmalloc(64, GFP_KERNEL); if (!name) - return -ENOMEM; + goto out_err; strcpy(name, "mlx5_health"); strcat(name, dev_name(dev->device)); health->wq = create_singlethread_workqueue(name); kfree(name); if (!health->wq) - return -ENOMEM; + goto out_err; spin_lock_init(&health->wq_lock); - INIT_WORK(&health->work, health_care); - INIT_DELAYED_WORK(&health->recover_work, health_recover); + INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work); + INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work); return 0; + +out_err: + mlx5_fw_reporters_destroy(dev); + return -ENOMEM; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 90cb50fe17fd..ebd81f6b556e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -122,14 +122,6 @@ static int mlx5i_get_ts_info(struct net_device *netdev, return mlx5e_ethtool_get_ts_info(priv, info); } -static int mlx5i_flash_device(struct net_device *netdev, - struct ethtool_flash *flash) -{ - struct mlx5e_priv *priv = mlx5i_epriv(netdev); - - return mlx5e_ethtool_flash_device(priv, flash); -} - enum mlx5_ptys_width { MLX5_PTYS_WIDTH_1X = 1 << 0, MLX5_PTYS_WIDTH_2X = 1 << 1, @@ -241,7 +233,6 @@ const struct ethtool_ops mlx5i_ethtool_ops = { .get_ethtool_stats = mlx5i_get_ethtool_stats, .get_ringparam = mlx5i_get_ringparam, .set_ringparam = mlx5i_set_ringparam, - .flash_device = mlx5i_flash_device, .get_channels = mlx5i_get_channels, .set_channels = mlx5i_set_channels, .get_coalesce = mlx5i_get_coalesce, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 9ca492b430d8..faf197d53743 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -87,7 +87,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev, mlx5e_set_netdev_mtu_boundaries(priv); netdev->mtu = netdev->max_mtu; - mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params, + mlx5e_build_nic_params(mdev, NULL, &priv->rss_params, &priv->channels.params, mlx5e_get_netdev_max_channels(netdev), netdev->mtu); mlx5i_build_nic_params(mdev, &priv->channels.params); @@ -258,6 +258,18 @@ void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp * mlx5_core_destroy_qp(mdev, qp); } +int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn); + + return mlx5e_create_tis(mdev, in, tisn); +} + static int mlx5i_init_tx(struct mlx5e_priv *priv) { struct mlx5i_priv *ipriv = priv->ppriv; @@ -269,7 +281,7 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv) return err; } - err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); + err = mlx5i_create_tis(priv->mdev, ipriv->qp.qpn, &priv->tisn[0]); if (err) { mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); goto err_destroy_underlay_qp; @@ -365,7 +377,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir); if (err) goto err_destroy_indirect_rqts; @@ -373,7 +385,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir); if (err) goto err_destroy_indirect_tirs; @@ -384,11 +396,11 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) return 0; err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv, true); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -401,9 +413,9 @@ err_destroy_q_counters: static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { mlx5i_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); mlx5e_destroy_indirect_tirs(priv, true); - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@ -418,6 +430,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .cleanup_rx = mlx5i_cleanup_rx, .enable = NULL, /* mlx5i_enable */ .disable = NULL, /* mlx5i_disable */ + .update_rx = mlx5e_update_nic_rx, .update_stats = NULL, /* mlx5i_update_stats */ .update_carrier = NULL, /* no HW update in IB link */ .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, @@ -526,7 +539,7 @@ static int mlx5i_open(struct net_device *netdev) if (err) goto err_remove_fs_underlay_qp; - mlx5e_refresh_tirs(epriv, false); + epriv->profile->update_rx(epriv); mlx5e_activate_priv_channels(epriv); mutex_unlock(&epriv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index e19ba3fcd1b7..c87962cab921 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -59,6 +59,8 @@ struct mlx5i_priv { char *mlx5e_priv[0]; }; +int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn); + /* Underlay QP create/destroy functions */ int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index b491b8f5fd6b..6e56fa769d2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -210,7 +210,7 @@ static int mlx5i_pkey_open(struct net_device *netdev) goto err_unint_underlay_qp; } - err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]); + err = mlx5i_create_tis(mdev, ipriv->qp.qpn, &epriv->tisn[0]); if (err) { mlx5_core_warn(mdev, "create child tis failed, %d\n", err); goto err_remove_rx_uderlay_qp; @@ -221,7 +221,7 @@ static int mlx5i_pkey_open(struct net_device *netdev) mlx5_core_warn(mdev, "opening child channels failed, %d\n", err); goto err_clear_state_opened_flag; } - mlx5e_refresh_tirs(epriv, false); + epriv->profile->update_rx(epriv); mlx5e_activate_priv_channels(epriv); mutex_unlock(&epriv->state_lock); @@ -350,6 +350,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = { .cleanup_rx = mlx5i_pkey_cleanup_rx, .enable = NULL, .disable = NULL, + .update_rx = mlx5e_update_nic_rx, .update_stats = NULL, .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 959605559858..c5ef2ff26465 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -305,8 +305,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) !mlx5_sriov_is_enabled(dev1); #ifdef CONFIG_MLX5_ESWITCH - roce_lag &= dev0->priv.eswitch->mode == SRIOV_NONE && - dev1->priv.eswitch->mode == SRIOV_NONE; + roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && + dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE; #endif if (roce_lag) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 8212bfd05733..e69766393990 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include <linux/netdevice.h> +#include <net/nexthop.h> #include "lag.h" #include "lag_mp.h" #include "mlx5_core.h" @@ -110,6 +111,8 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, struct fib_info *fi) { struct lag_mp *mp = &ldev->lag_mp; + struct fib_nh *fib_nh0, *fib_nh1; + unsigned int nhs; /* Handle delete event */ if (event == FIB_EVENT_ENTRY_DEL) { @@ -120,9 +123,11 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } /* Handle add/replace event */ - if (fi->fib_nhs == 1) { + nhs = fib_info_num_path(fi); + if (nhs == 1) { if (__mlx5_lag_is_active(ldev)) { - struct net_device *nh_dev = fi->fib_nh[0].fib_nh_dev; + struct fib_nh *nh = fib_info_nh(fi, 0); + struct net_device *nh_dev = nh->fib_nh_dev; int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); mlx5_lag_set_port_affinity(ldev, ++i); @@ -130,14 +135,16 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, return; } - if (fi->fib_nhs != 2) + if (nhs != 2) return; /* Verify next hops are ports of the same hca */ - if (!(fi->fib_nh[0].fib_nh_dev == ldev->pf[0].netdev && - fi->fib_nh[1].fib_nh_dev == ldev->pf[1].netdev) && - !(fi->fib_nh[0].fib_nh_dev == ldev->pf[1].netdev && - fi->fib_nh[1].fib_nh_dev == ldev->pf[0].netdev)) { + fib_nh0 = fib_info_nh(fi, 0); + fib_nh1 = fib_info_nh(fi, 1); + if (!(fib_nh0->fib_nh_dev == ldev->pf[0].netdev && + fib_nh1->fib_nh_dev == ldev->pf[1].netdev) && + !(fib_nh0->fib_nh_dev == ldev->pf[1].netdev && + fib_nh1->fib_nh_dev == ldev->pf[0].netdev)) { mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n"); return; } @@ -174,7 +181,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, mlx5_lag_set_port_affinity(ldev, i); } } else if (event == FIB_EVENT_NH_ADD && - fi->fib_nhs == 2) { + fib_info_num_path(fi) == 2) { mlx5_lag_set_port_affinity(ldev, 0); } } @@ -238,6 +245,7 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, struct mlx5_fib_event_work *fib_work; struct fib_entry_notifier_info *fen_info; struct fib_nh_notifier_info *fnh_info; + struct net_device *fib_dev; struct fib_info *fi; if (info->family != AF_INET) @@ -254,8 +262,13 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, fen_info = container_of(info, struct fib_entry_notifier_info, info); fi = fen_info->fi; - if (fi->fib_dev != ldev->pf[0].netdev && - fi->fib_dev != ldev->pf[1].netdev) { + if (fi->nh) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); + return notifier_from_errno(-EINVAL); + } + fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; + if (fib_dev != ldev->pf[0].netdev && + fib_dev != ldev->pf[1].netdev) { return NOTIFY_DONE; } fib_work = mlx5_lag_init_fib_work(ldev, event); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c new file mode 100644 index 000000000000..ea9ee88491e5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include "mlx5_core.h" + +int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, + void *key, u32 sz_bytes, + u32 *p_key_id) +{ + u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u32 sz_bits = sz_bytes * BITS_PER_BYTE; + u8 general_obj_key_size; + u64 general_obj_types; + void *obj, *key_p; + int err; + + obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object); + key_p = MLX5_ADDR_OF(encryption_key_obj, obj, key); + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY)) + return -EINVAL; + + switch (sz_bits) { + case 128: + general_obj_key_size = + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128; + break; + case 256: + general_obj_key_size = + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256; + break; + default: + return -EINVAL; + } + + memcpy(key_p, key, sz_bytes); + + MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size); + MLX5_SET(encryption_key_obj, obj, key_type, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK); + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.pdn); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) + *p_key_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + /* avoid leaking key on the stack */ + memzero_explicit(in, sizeof(in)); + + return err; +} + +void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, key_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index c0fb6d72b695..3dfab91ae5f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -7,7 +7,6 @@ #include <linux/mlx5/eq.h> #include <linux/mlx5/cq.h> -#define MLX5_MAX_IRQ_NAME (32) #define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe)) struct mlx5_eq_tasklet { @@ -36,8 +35,14 @@ struct mlx5_eq { struct mlx5_rsc_debug *dbg; }; +struct mlx5_eq_async { + struct mlx5_eq core; + struct notifier_block irq_nb; +}; + struct mlx5_eq_comp { - struct mlx5_eq core; /* Must be first */ + struct mlx5_eq core; + struct notifier_block irq_nb; struct mlx5_eq_tasklet tasklet_ctx; struct list_head list; }; @@ -70,7 +75,7 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev); void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn); struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev); void mlx5_cq_tasklet_cb(unsigned long data); @@ -92,7 +97,4 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); #endif -int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb); -int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb); - #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c new file mode 100644 index 000000000000..23361a9ae4fa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/kernel.h> +#include "mlx5_core.h" +#include "geneve.h" + +struct mlx5_geneve { + struct mlx5_core_dev *mdev; + __be16 opt_class; + u8 opt_type; + u32 obj_id; + struct mutex sync_lock; /* protect GENEVE obj operations */ + u32 refcount; +}; + +static int mlx5_geneve_tlv_option_create(struct mlx5_core_dev *mdev, + __be16 class, + u8 type, + u8 len) +{ + u32 in[MLX5_ST_SZ_DW(create_geneve_tlv_option_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u64 general_obj_types; + void *hdr, *opt; + u16 obj_id; + int err; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT)) + return -EINVAL; + + hdr = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, hdr); + opt = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, geneve_tlv_opt); + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT); + + MLX5_SET(geneve_tlv_option, opt, option_class, be16_to_cpu(class)); + MLX5_SET(geneve_tlv_option, opt, option_type, type); + MLX5_SET(geneve_tlv_option, opt, option_data_length, len); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return obj_id; +} + +static void mlx5_geneve_tlv_option_destroy(struct mlx5_core_dev *mdev, u16 obj_id) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) +{ + int res = 0; + + if (IS_ERR_OR_NULL(geneve)) + return -EOPNOTSUPP; + + mutex_lock(&geneve->sync_lock); + + if (geneve->refcount) { + if (geneve->opt_class == opt->opt_class && + geneve->opt_type == opt->type) { + /* We already have TLV options obj allocated */ + geneve->refcount++; + } else { + /* TLV options obj allocated, but its params + * do not match the new request. + * We support only one such object. + */ + mlx5_core_warn(geneve->mdev, + "Won't create Geneve TLV opt object with class:type:len = 0x%x:0x%x:%d (another class:type already exists)\n", + be16_to_cpu(opt->opt_class), + opt->type, + opt->length); + res = -EOPNOTSUPP; + goto unlock; + } + } else { + /* We don't have any TLV options obj allocated */ + + res = mlx5_geneve_tlv_option_create(geneve->mdev, + opt->opt_class, + opt->type, + opt->length); + if (res < 0) { + mlx5_core_warn(geneve->mdev, + "Failed creating Geneve TLV opt object class:type:len = 0x%x:0x%x:%d (err=%d)\n", + be16_to_cpu(opt->opt_class), + opt->type, opt->length, res); + goto unlock; + } + geneve->opt_class = opt->opt_class; + geneve->opt_type = opt->type; + geneve->obj_id = res; + geneve->refcount++; + } + +unlock: + mutex_unlock(&geneve->sync_lock); + return res; +} + +void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) +{ + if (IS_ERR_OR_NULL(geneve)) + return; + + mutex_lock(&geneve->sync_lock); + if (--geneve->refcount == 0) { + /* We've just removed the last user of Geneve option. + * Now delete the object in FW. + */ + mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id); + + geneve->opt_class = 0; + geneve->opt_type = 0; + geneve->obj_id = 0; + } + mutex_unlock(&geneve->sync_lock); +} + +struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev) +{ + struct mlx5_geneve *geneve = + kzalloc(sizeof(*geneve), GFP_KERNEL); + + if (!geneve) + return ERR_PTR(-ENOMEM); + geneve->mdev = mdev; + mutex_init(&geneve->sync_lock); + + return geneve; +} + +void mlx5_geneve_destroy(struct mlx5_geneve *geneve) +{ + if (IS_ERR_OR_NULL(geneve)) + return; + + /* Lockless since we are unloading */ + if (geneve->refcount) + mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id); + + kfree(geneve); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h new file mode 100644 index 000000000000..adee0cbba19c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_GENEVE_H__ +#define __MLX5_GENEVE_H__ + +#include <net/geneve.h> +#include <linux/mlx5/driver.h> + +struct mlx5_geneve; + +#ifdef CONFIG_MLX5_ESWITCH + +struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev); +void mlx5_geneve_destroy(struct mlx5_geneve *geneve); + +int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt); +void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline struct mlx5_geneve +*mlx5_geneve_create(struct mlx5_core_dev *mdev) { return NULL; } +static inline void +mlx5_geneve_destroy(struct mlx5_geneve *geneve) {} +static inline int +mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) { return 0; } +static inline void +mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) {} + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_GENEVE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 397a2847867a..b99d469e4e64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -41,6 +41,9 @@ int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count); void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count); int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index); void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index); +int mlx5_crdump_enable(struct mlx5_core_dev *dev); +void mlx5_crdump_disable(struct mlx5_core_dev *dev); +int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data); /* TODO move to lib/events.h */ @@ -76,4 +79,9 @@ struct mlx5_pme_stats { void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); +/* Crypto */ +int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, + void *key, u32 sz_bytes, u32 *p_key_id); +void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index a71d5b9c7ab2..3118e8d66407 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -67,6 +67,7 @@ static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index) struct l2table_node { struct l2addr_node node; u32 index; /* index in HW l2 table */ + int ref_count; }; struct mlx5_mpfs { @@ -134,8 +135,8 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { struct mlx5_mpfs *mpfs = dev->priv.mpfs; struct l2table_node *l2addr; + int err = 0; u32 index; - int err; if (!MLX5_ESWITCH_MANAGER(dev)) return 0; @@ -144,30 +145,35 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); if (l2addr) { - err = -EEXIST; - goto abort; + l2addr->ref_count++; + goto out; } err = alloc_l2table_index(mpfs, &index); if (err) - goto abort; + goto out; l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL); if (!l2addr) { - free_l2table_index(mpfs, index); err = -ENOMEM; - goto abort; + goto hash_add_err; } - l2addr->index = index; err = set_l2table_entry_cmd(dev, index, mac); - if (err) { - l2addr_hash_del(l2addr); - free_l2table_index(mpfs, index); - } + if (err) + goto set_table_entry_err; + + l2addr->index = index; + l2addr->ref_count = 1; mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index); -abort: + goto out; + +set_table_entry_err: + l2addr_hash_del(l2addr); +hash_add_err: + free_l2table_index(mpfs, index); +out: mutex_unlock(&mpfs->lock); return err; } @@ -190,6 +196,9 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) goto unlock; } + if (--l2addr->ref_count > 0) + goto unlock; + index = l2addr->index; del_l2table_entry_cmd(dev, index); l2addr_hash_del(l2addr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c new file mode 100644 index 000000000000..6b774e0c2766 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <linux/pci.h> +#include "mlx5_core.h" +#include "pci_vsc.h" + +#define MLX5_EXTRACT_C(source, offset, size) \ + ((((u32)(source)) >> (offset)) & MLX5_ONES32(size)) +#define MLX5_EXTRACT(src, start, len) \ + (((len) == 32) ? (src) : MLX5_EXTRACT_C(src, start, len)) +#define MLX5_ONES32(size) \ + ((size) ? (0xffffffff >> (32 - (size))) : 0) +#define MLX5_MASK32(offset, size) \ + (MLX5_ONES32(size) << (offset)) +#define MLX5_MERGE_C(rsrc1, rsrc2, start, len) \ + ((((rsrc2) << (start)) & (MLX5_MASK32((start), (len)))) | \ + ((rsrc1) & (~MLX5_MASK32((start), (len))))) +#define MLX5_MERGE(rsrc1, rsrc2, start, len) \ + (((len) == 32) ? (rsrc2) : MLX5_MERGE_C(rsrc1, rsrc2, start, len)) +#define vsc_read(dev, offset, val) \ + pci_read_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) +#define vsc_write(dev, offset, val) \ + pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) +#define VSC_MAX_RETRIES 2048 + +enum { + VSC_CTRL_OFFSET = 0x4, + VSC_COUNTER_OFFSET = 0x8, + VSC_SEMAPHORE_OFFSET = 0xc, + VSC_ADDR_OFFSET = 0x10, + VSC_DATA_OFFSET = 0x14, + + VSC_FLAG_BIT_OFFS = 31, + VSC_FLAG_BIT_LEN = 1, + + VSC_SYND_BIT_OFFS = 30, + VSC_SYND_BIT_LEN = 1, + + VSC_ADDR_BIT_OFFS = 0, + VSC_ADDR_BIT_LEN = 30, + + VSC_SPACE_BIT_OFFS = 0, + VSC_SPACE_BIT_LEN = 16, + + VSC_SIZE_VLD_BIT_OFFS = 28, + VSC_SIZE_VLD_BIT_LEN = 1, + + VSC_STATUS_BIT_OFFS = 29, + VSC_STATUS_BIT_LEN = 3, +}; + +void mlx5_pci_vsc_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_pf(dev)) + return; + + dev->vsc_addr = pci_find_capability(dev->pdev, + PCI_CAP_ID_VNDR); + if (!dev->vsc_addr) + mlx5_core_warn(dev, "Failed to get valid vendor specific ID\n"); +} + +int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev) +{ + u32 counter = 0; + int retries = 0; + u32 lock_val; + int ret; + + pci_cfg_access_lock(dev->pdev); + do { + if (retries > VSC_MAX_RETRIES) { + ret = -EBUSY; + goto pci_unlock; + } + + /* Check if semaphore is already locked */ + ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val); + if (ret) + goto pci_unlock; + + if (lock_val) { + retries++; + usleep_range(1000, 2000); + continue; + } + + /* Read and write counter value, if written value is + * the same, semaphore was acquired successfully. + */ + ret = vsc_read(dev, VSC_COUNTER_OFFSET, &counter); + if (ret) + goto pci_unlock; + + ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, counter); + if (ret) + goto pci_unlock; + + ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val); + if (ret) + goto pci_unlock; + + retries++; + } while (counter != lock_val); + + return 0; + +pci_unlock: + pci_cfg_access_unlock(dev->pdev); + return ret; +} + +int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev) +{ + int ret; + + ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, MLX5_VSC_UNLOCK); + pci_cfg_access_unlock(dev->pdev); + return ret; +} + +int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space, + u32 *ret_space_size) +{ + int ret; + u32 val = 0; + + if (!mlx5_vsc_accessible(dev)) + return -EINVAL; + + if (ret_space_size) + *ret_space_size = 0; + + /* Get a unique val */ + ret = vsc_read(dev, VSC_CTRL_OFFSET, &val); + if (ret) + goto out; + + /* Try to modify the lock */ + val = MLX5_MERGE(val, space, VSC_SPACE_BIT_OFFS, VSC_SPACE_BIT_LEN); + ret = vsc_write(dev, VSC_CTRL_OFFSET, val); + if (ret) + goto out; + + /* Verify lock was modified */ + ret = vsc_read(dev, VSC_CTRL_OFFSET, &val); + if (ret) + goto out; + + if (MLX5_EXTRACT(val, VSC_STATUS_BIT_OFFS, VSC_STATUS_BIT_LEN) == 0) + return -EINVAL; + + /* Get space max address if indicated by size valid bit */ + if (ret_space_size && + MLX5_EXTRACT(val, VSC_SIZE_VLD_BIT_OFFS, VSC_SIZE_VLD_BIT_LEN)) { + ret = vsc_read(dev, VSC_ADDR_OFFSET, &val); + if (ret) { + mlx5_core_warn(dev, "Failed to get max space size\n"); + goto out; + } + *ret_space_size = MLX5_EXTRACT(val, VSC_ADDR_BIT_OFFS, + VSC_ADDR_BIT_LEN); + } + return 0; + +out: + return ret; +} + +static int mlx5_vsc_wait_on_flag(struct mlx5_core_dev *dev, u8 expected_val) +{ + int retries = 0; + u32 flag; + int ret; + + do { + if (retries > VSC_MAX_RETRIES) + return -EBUSY; + + ret = vsc_read(dev, VSC_ADDR_OFFSET, &flag); + if (ret) + return ret; + flag = MLX5_EXTRACT(flag, VSC_FLAG_BIT_OFFS, VSC_FLAG_BIT_LEN); + retries++; + + if ((retries & 0xf) == 0) + usleep_range(1000, 2000); + + } while (flag != expected_val); + + return 0; +} + +static int mlx5_vsc_gw_write(struct mlx5_core_dev *dev, unsigned int address, + u32 data) +{ + int ret; + + if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS, + VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN)) + return -EINVAL; + + /* Set flag to 0x1 */ + address = MLX5_MERGE(address, 1, VSC_FLAG_BIT_OFFS, 1); + ret = vsc_write(dev, VSC_DATA_OFFSET, data); + if (ret) + goto out; + + ret = vsc_write(dev, VSC_ADDR_OFFSET, address); + if (ret) + goto out; + + /* Wait for the flag to be cleared */ + ret = mlx5_vsc_wait_on_flag(dev, 0); + +out: + return ret; +} + +static int mlx5_vsc_gw_read(struct mlx5_core_dev *dev, unsigned int address, + u32 *data) +{ + int ret; + + if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS, + VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN)) + return -EINVAL; + + ret = vsc_write(dev, VSC_ADDR_OFFSET, address); + if (ret) + goto out; + + ret = mlx5_vsc_wait_on_flag(dev, 1); + if (ret) + goto out; + + ret = vsc_read(dev, VSC_DATA_OFFSET, data); +out: + return ret; +} + +static int mlx5_vsc_gw_read_fast(struct mlx5_core_dev *dev, + unsigned int read_addr, + unsigned int *next_read_addr, + u32 *data) +{ + int ret; + + ret = mlx5_vsc_gw_read(dev, read_addr, data); + if (ret) + goto out; + + ret = vsc_read(dev, VSC_ADDR_OFFSET, next_read_addr); + if (ret) + goto out; + + *next_read_addr = MLX5_EXTRACT(*next_read_addr, VSC_ADDR_BIT_OFFS, + VSC_ADDR_BIT_LEN); + + if (*next_read_addr <= read_addr) + ret = -EINVAL; +out: + return ret; +} + +int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, + int length) +{ + unsigned int next_read_addr = 0; + unsigned int read_addr = 0; + + while (read_addr < length) { + if (mlx5_vsc_gw_read_fast(dev, read_addr, &next_read_addr, + &data[(read_addr >> 2)])) + return read_addr; + + read_addr = next_read_addr; + } + return length; +} + +int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space, + enum mlx5_vsc_state state) +{ + u32 data, id = 0; + int ret; + + ret = mlx5_vsc_gw_set_space(dev, MLX5_SEMAPHORE_SPACE_DOMAIN, NULL); + if (ret) { + mlx5_core_warn(dev, "Failed to set gw space %d\n", ret); + return ret; + } + + if (state == MLX5_VSC_LOCK) { + /* Get a unique ID based on the counter */ + ret = vsc_read(dev, VSC_COUNTER_OFFSET, &id); + if (ret) + return ret; + } + + /* Try to modify lock */ + ret = mlx5_vsc_gw_write(dev, space, id); + if (ret) + return ret; + + /* Verify lock was modified */ + ret = mlx5_vsc_gw_read(dev, space, &data); + if (ret) + return -EINVAL; + + if (data != id) + return -EBUSY; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h new file mode 100644 index 000000000000..64272a6d7754 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies */ + +#ifndef __MLX5_PCI_VSC_H__ +#define __MLX5_PCI_VSC_H__ + +enum mlx5_vsc_state { + MLX5_VSC_UNLOCK, + MLX5_VSC_LOCK, +}; + +enum { + MLX5_VSC_SPACE_SCAN_CRSPACE = 0x7, +}; + +void mlx5_pci_vsc_init(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space, + u32 *ret_space_size); +int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, + int length); + +static inline bool mlx5_vsc_accessible(struct mlx5_core_dev *dev) +{ + return !!dev->vsc_addr; +} + +int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space, + enum mlx5_vsc_state state); + +#endif /* __MLX5_PCI_VSC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 23d53163ce15..b15b27a497fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -56,6 +56,7 @@ #include "fs_core.h" #include "lib/mpfs.h" #include "eswitch.h" +#include "devlink.h" #include "lib/mlx5.h" #include "fpga/core.h" #include "fpga/ipsec.h" @@ -63,7 +64,9 @@ #include "accel/tls.h" #include "lib/clock.h" #include "lib/vxlan.h" +#include "lib/geneve.h" #include "lib/devcom.h" +#include "lib/pci_vsc.h" #include "diag/fw_tracer.h" #include "ecpf.h" @@ -169,18 +172,28 @@ static struct mlx5_profile profile[] = { #define FW_INIT_TIMEOUT_MILI 2000 #define FW_INIT_WAIT_MS 2 -#define FW_PRE_INIT_TIMEOUT_MILI 10000 +#define FW_PRE_INIT_TIMEOUT_MILI 120000 +#define FW_INIT_WARN_MESSAGE_INTERVAL 20000 -static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili) +static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, + u32 warn_time_mili) { + unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili); unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); int err = 0; + BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL); + while (fw_initializing(dev)) { if (time_after(jiffies, end)) { err = -EBUSY; break; } + if (warn_time_mili && time_after(jiffies, warn)) { + mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n", + jiffies_to_msecs(end - warn) / 1000); + warn = jiffies + msecs_to_jiffies(warn_time_mili); + } msleep(FW_INIT_WAIT_MS); } @@ -721,8 +734,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, struct mlx5_priv *priv = &dev->priv; int err = 0; - priv->pci_dev_data = id->driver_data; - + mutex_init(&dev->pci_status_mutex); pci_set_drvdata(dev->pdev, dev); dev->bar_addr = pci_resource_start(pdev, 0); @@ -761,6 +773,8 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, goto err_clr_master; } + mlx5_pci_vsc_init(dev); + return 0; err_clr_master: @@ -794,10 +808,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_devcom; } + err = mlx5_irq_table_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize irq table\n"); + goto err_devcom; + } + err = mlx5_eq_table_init(dev); if (err) { mlx5_core_err(dev, "failed to initialize eq\n"); - goto err_devcom; + goto err_irq_cleanup; } err = mlx5_events_init(dev); @@ -821,6 +841,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) mlx5_init_clock(dev); dev->vxlan = mlx5_vxlan_create(dev); + dev->geneve = mlx5_geneve_create(dev); err = mlx5_init_rl_table(dev); if (err) { @@ -834,37 +855,38 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_rl_cleanup; } - err = mlx5_eswitch_init(dev); + err = mlx5_sriov_init(dev); if (err) { - mlx5_core_err(dev, "Failed to init eswitch %d\n", err); + mlx5_core_err(dev, "Failed to init sriov %d\n", err); goto err_mpfs_cleanup; } - err = mlx5_sriov_init(dev); + err = mlx5_eswitch_init(dev); if (err) { - mlx5_core_err(dev, "Failed to init sriov %d\n", err); - goto err_eswitch_cleanup; + mlx5_core_err(dev, "Failed to init eswitch %d\n", err); + goto err_sriov_cleanup; } err = mlx5_fpga_init(dev); if (err) { mlx5_core_err(dev, "Failed to init fpga device %d\n", err); - goto err_sriov_cleanup; + goto err_eswitch_cleanup; } dev->tracer = mlx5_fw_tracer_create(dev); return 0; -err_sriov_cleanup: - mlx5_sriov_cleanup(dev); err_eswitch_cleanup: mlx5_eswitch_cleanup(dev->priv.eswitch); +err_sriov_cleanup: + mlx5_sriov_cleanup(dev); err_mpfs_cleanup: mlx5_mpfs_cleanup(dev); err_rl_cleanup: mlx5_cleanup_rl_table(dev); err_tables_cleanup: + mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_qp_table(dev); @@ -873,6 +895,8 @@ err_events_cleanup: mlx5_events_cleanup(dev); err_eq_cleanup: mlx5_eq_table_cleanup(dev); +err_irq_cleanup: + mlx5_irq_table_cleanup(dev); err_devcom: mlx5_devcom_unregister_device(dev->priv.devcom); @@ -883,10 +907,11 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { mlx5_fw_tracer_destroy(dev->tracer); mlx5_fpga_cleanup(dev); - mlx5_sriov_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_sriov_cleanup(dev); mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); + mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); @@ -895,6 +920,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cq_debugfs_cleanup(dev); mlx5_events_cleanup(dev); mlx5_eq_table_cleanup(dev); + mlx5_irq_table_cleanup(dev); mlx5_devcom_unregister_device(dev->priv.devcom); } @@ -911,7 +937,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) /* wait for firmware to accept initialization segments configurations */ - err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI); + err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL); if (err) { mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n", FW_PRE_INIT_TIMEOUT_MILI); @@ -924,7 +950,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) return err; } - err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI); + err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0); if (err) { mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n", FW_INIT_TIMEOUT_MILI); @@ -1028,6 +1054,12 @@ static int mlx5_load(struct mlx5_core_dev *dev) mlx5_events_start(dev); mlx5_pagealloc_start(dev); + err = mlx5_irq_table_create(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc IRQs\n"); + goto err_irq_table; + } + err = mlx5_eq_table_create(dev); if (err) { mlx5_core_err(dev, "Failed to create EQs\n"); @@ -1099,6 +1131,8 @@ err_fpga_start: err_fw_tracer: mlx5_eq_table_destroy(dev); err_eq_table: + mlx5_irq_table_destroy(dev); +err_irq_table: mlx5_pagealloc_stop(dev); mlx5_events_stop(dev); mlx5_put_uars_page(dev, dev->priv.uar); @@ -1115,6 +1149,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_fpga_device_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); + mlx5_irq_table_destroy(dev); mlx5_pagealloc_stop(dev); mlx5_events_stop(dev); mlx5_put_uars_page(dev, dev->priv.uar); @@ -1183,7 +1218,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) int err = 0; if (cleanup) - mlx5_drain_health_recovery(dev); + mlx5_drain_health_wq(dev); mutex_lock(&dev->intf_state_mutex); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { @@ -1210,17 +1245,6 @@ out: return err; } -static const struct devlink_ops mlx5_devlink_ops = { -#ifdef CONFIG_MLX5_ESWITCH - .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, - .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, - .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, - .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, - .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, - .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, -#endif -}; - static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) { struct mlx5_priv *priv = &dev->priv; @@ -1230,7 +1254,6 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); - mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); mutex_init(&priv->bfregs.reg_head.lock); @@ -1282,9 +1305,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) struct devlink *devlink; int err; - devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev)); + devlink = mlx5_devlink_alloc(); if (!devlink) { - dev_err(&pdev->dev, "kzalloc failed\n"); + dev_err(&pdev->dev, "devlink alloc failed\n"); return -ENOMEM; } @@ -1292,6 +1315,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) dev->device = &pdev->dev; dev->pdev = pdev; + dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ? + MLX5_COREDEV_VF : MLX5_COREDEV_PF; + err = mlx5_mdev_init(dev, prof_sel); if (err) goto mdev_init_err; @@ -1312,10 +1338,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) request_module_nowait(MLX5_IB_MOD); - err = devlink_register(devlink, &pdev->dev); + err = mlx5_devlink_register(devlink, &pdev->dev); if (err) goto clean_load; + err = mlx5_crdump_enable(dev); + if (err) + dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); + pci_save_state(pdev); return 0; @@ -1327,7 +1357,7 @@ err_load_one: pci_init_err: mlx5_mdev_uninit(dev); mdev_init_err: - devlink_free(devlink); + mlx5_devlink_free(devlink); return err; } @@ -1337,7 +1367,8 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); - devlink_unregister(devlink); + mlx5_crdump_disable(dev); + mlx5_devlink_unregister(devlink); mlx5_unregister_device(dev); if (mlx5_unload_one(dev, true)) { @@ -1348,7 +1379,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_pci_close(dev); mlx5_mdev_uninit(dev); - devlink_free(devlink); + mlx5_devlink_free(devlink); } static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, @@ -1359,12 +1390,10 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_core_info(dev, "%s was called\n", __func__); mlx5_enter_error_state(dev, false); + mlx5_error_sw_reset(dev); mlx5_unload_one(dev, false); - /* In case of kernel call drain the health wq */ - if (state) { - mlx5_drain_health_wq(dev); - mlx5_pci_disable_device(dev); - } + mlx5_drain_health_wq(dev); + mlx5_pci_disable_device(dev); return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; @@ -1532,7 +1561,8 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); void mlx5_disable_device(struct mlx5_core_dev *dev) { - mlx5_pci_err_detected(dev->pdev, 0); + mlx5_error_sw_reset(dev); + mlx5_unload_one(dev, false); } void mlx5_recover_device(struct mlx5_core_dev *dev) @@ -1570,7 +1600,7 @@ static int __init init(void) get_random_bytes(&sw_owner_id, sizeof(sw_owner_id)); mlx5_core_verify_params(); - mlx5_fpga_ipsec_build_fs_cmds(); + mlx5_accel_ipsec_build_fs_cmds(); mlx5_register_debugfs(); err = pci_register_driver(&mlx5_core_driver); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 22e69d4813e4..471bbc48bc1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -111,6 +111,11 @@ enum { MLX5_DRIVER_SYND = 0xbadd00de, }; +enum mlx5_semaphore_space_address { + MLX5_SEMAPHORE_SPACE_DOMAIN = 0xA, + MLX5_SEMAPHORE_SW_RESET = 0x20, +}; + int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); @@ -118,6 +123,7 @@ int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); +void mlx5_error_sw_reset(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_recover_device(struct mlx5_core_dev *dev); int mlx5_sriov_init(struct mlx5_core_dev *dev); @@ -153,6 +159,19 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +int mlx5_irq_table_init(struct mlx5_core_dev *dev); +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_irq_table_create(struct mlx5_core_dev *dev); +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); +int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb); +int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb); +struct cpumask * +mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx); +struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table); +int mlx5_irq_get_num_comp(struct mlx5_irq_table *table); + int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); void mlx5_events_start(struct mlx5_core_dev *dev); @@ -184,7 +203,10 @@ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj)) -int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw); +int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw, + struct netlink_ext_ack *extack); +int mlx5_fw_version_query(struct mlx5_core_dev *dev, + u32 *running_ver, u32 *stored_ver); void mlx5e_init(void); void mlx5e_cleanup(void); @@ -213,7 +235,7 @@ enum { MLX5_NIC_IFC_FULL = 0, MLX5_NIC_IFC_DISABLED = 1, MLX5_NIC_IFC_NO_DRAM_NIC = 2, - MLX5_NIC_IFC_INVALID = 3 + MLX5_NIC_IFC_SW_RESET = 7 }; u8 mlx5_get_nic_state(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index ea744d8466ea..9231b39d18b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -38,15 +38,12 @@ void mlx5_init_mkey_table(struct mlx5_core_dev *dev) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; - - memset(table, 0, sizeof(*table)); - rwlock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ); } void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) { + WARN_ON(!xa_empty(&dev->priv.mkey_table)); } int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, @@ -56,8 +53,8 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mlx5_async_cbk_t callback, struct mlx5_async_work *context) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; + struct xarray *mkeys = &dev->priv.mkey_table; u32 mkey_index; void *mkc; int err; @@ -88,12 +85,10 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", mkey_index, key, mkey->key); - /* connect to mkey tree */ - write_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey); - write_unlock_irq(&table->lock); + err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey, + GFP_KERNEL)); if (err) { - mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n", + mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n", mlx5_base_mkey(mkey->key), err); mlx5_core_destroy_mkey(dev, mkey); } @@ -114,17 +109,17 @@ EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; + struct xarray *mkeys = &dev->priv.mkey_table; struct mlx5_core_mkey *deleted_mkey; unsigned long flags; - write_lock_irqsave(&table->lock, flags); - deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key)); - write_unlock_irqrestore(&table->lock, flags); + xa_lock_irqsave(mkeys, flags); + deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key)); + xa_unlock_irqrestore(mkeys, flags); if (!deleted_mkey) { - mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n", + mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n", mlx5_base_mkey(mkey->key)); return -ENOENT; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c new file mode 100644 index 000000000000..373981a659c7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif + +#define MLX5_MAX_IRQ_NAME (32) + +struct mlx5_irq { + struct atomic_notifier_head nh; + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_NAME]; +}; + +struct mlx5_irq_table { + struct mlx5_irq *irq; + int nvec; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif +}; + +int mlx5_irq_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table; + + irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); + if (!irq_table) + return -ENOMEM; + + dev->priv.irq_table = irq_table; + return 0; +} + +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) +{ + kvfree(dev->priv.irq_table); +} + +int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) +{ + return table->nvec - MLX5_IRQ_VEC_COMP_BASE; +} + +static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) +{ + struct mlx5_irq_table *irq_table = dev->priv.irq_table; + + return &irq_table->irq[vecidx]; +} + +int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb) +{ + struct mlx5_irq *irq; + + irq = &irq_table->irq[vecidx]; + return atomic_notifier_chain_register(&irq->nh, nb); +} + +int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb) +{ + struct mlx5_irq *irq; + + irq = &irq_table->irq[vecidx]; + return atomic_notifier_chain_unregister(&irq->nh, nb); +} + +static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) +{ + atomic_notifier_call_chain(nh, 0, NULL); + return IRQ_HANDLED; +} + +static void irq_set_name(char *name, int vecidx) +{ + if (vecidx == 0) { + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async"); + return; + } + + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", + vecidx - MLX5_IRQ_VEC_COMP_BASE); + return; +} + +static int request_irqs(struct mlx5_core_dev *dev, int nvec) +{ + char name[MLX5_MAX_IRQ_NAME]; + int err; + int i; + + for (i = 0; i < nvec; i++) { + struct mlx5_irq *irq = mlx5_irq_get(dev, i); + int irqn = pci_irq_vector(dev->pdev, i); + + irq_set_name(name, i); + ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); + snprintf(irq->name, MLX5_MAX_IRQ_NAME, + "%s@pci:%s", name, pci_name(dev->pdev)); + err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name, + &irq->nh); + if (err) { + mlx5_core_err(dev, "Failed to request irq\n"); + goto err_request_irq; + } + } + return 0; + +err_request_irq: + for (; i >= 0; i--) { + struct mlx5_irq *irq = mlx5_irq_get(dev, i); + int irqn = pci_irq_vector(dev->pdev, i); + + free_irq(irqn, &irq->nh); + } + return err; +} + +static void irq_clear_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = dev->priv.irq_table; + + free_irq_cpu_rmap(irq_table->rmap); +#endif +} + +static int irq_set_rmap(struct mlx5_core_dev *mdev) +{ + int err = 0; +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = mdev->priv.irq_table; + int num_affinity_vec; + int vecidx; + + num_affinity_vec = mlx5_irq_get_num_comp(irq_table); + irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec); + if (!irq_table->rmap) { + err = -ENOMEM; + mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err); + goto err_out; + } + + vecidx = MLX5_IRQ_VEC_COMP_BASE; + for (; vecidx < irq_table->nvec; vecidx++) { + err = irq_cpu_rmap_add(irq_table->rmap, + pci_irq_vector(mdev->pdev, vecidx)); + if (err) { + mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", + err); + goto err_irq_cpu_rmap_add; + } + } + return 0; + +err_irq_cpu_rmap_add: + irq_clear_rmap(mdev); +err_out: +#endif + return err; +} + +/* Completion IRQ vectors */ + +static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; + struct mlx5_irq *irq; + int irqn; + + irq = mlx5_irq_get(mdev, vecidx); + irqn = pci_irq_vector(mdev->pdev, vecidx); + if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node), + irq->mask); + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irqn, irq->mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", + irqn); + + return 0; +} + +static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; + struct mlx5_irq *irq; + int irqn; + + irq = mlx5_irq_get(mdev, vecidx); + irqn = pci_irq_vector(mdev->pdev, vecidx); + irq_set_affinity_hint(irqn, NULL); + free_cpumask_var(irq->mask); +} + +static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) +{ + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); + int err; + int i; + + for (i = 0; i < nvec; i++) { + err = set_comp_irq_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + clear_comp_irq_affinity_hint(mdev, i); + + return err; +} + +static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) +{ + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); + int i; + + for (i = 0; i < nvec; i++) + clear_comp_irq_affinity_hint(mdev, i); +} + +struct cpumask * +mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx) +{ + return irq_table->irq[vecidx].mask; +} + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table) +{ + return irq_table->rmap; +} +#endif + +static void unrequest_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int i; + + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); +} + +int mlx5_irq_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_irq_table *table = priv->irq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + int nvec; + int err; + + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + + MLX5_IRQ_VEC_COMP_BASE; + nvec = min_t(int, nvec, num_eqs); + if (nvec <= MLX5_IRQ_VEC_COMP_BASE) + return -ENOMEM; + + table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL); + if (!table->irq) + return -ENOMEM; + + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1, + nvec, PCI_IRQ_MSIX); + if (nvec < 0) { + err = nvec; + goto err_free_irq; + } + + table->nvec = nvec; + + err = irq_set_rmap(dev); + if (err) + goto err_set_rmap; + + err = request_irqs(dev, nvec); + if (err) + goto err_request_irqs; + + err = set_comp_irq_affinity_hints(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); + goto err_set_affinity; + } + + return 0; + +err_set_affinity: + unrequest_irqs(dev); +err_request_irqs: + irq_clear_rmap(dev); +err_set_rmap: + pci_free_irq_vectors(dev->pdev); +err_free_irq: + kfree(table->irq); + return err; +} + +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int i; + + /* free_irq requires that affinity and rmap will be cleared + * before calling it. This is why there is asymmetry with set_rmap + * which should be called after alloc_irq but before request_irq. + */ + irq_clear_rmap(dev); + clear_comp_irqs_affinity_hints(dev); + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); + pci_free_irq_vectors(dev->pdev); + kfree(table->irq); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 86f77456f873..17ce9dd56b13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -106,10 +106,10 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) return 0; -destroy_flow_table: - mlx5_destroy_flow_table(ft); destroy_flow_group: mlx5_destroy_flow_group(fg); +destroy_flow_table: + mlx5_destroy_flow_table(ft); free: kvfree(spec); kvfree(flow_group_in); @@ -126,7 +126,7 @@ static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid * { u8 hw_id[ETH_ALEN]; - mlx5_query_nic_vport_mac_address(dev, 0, hw_id); + mlx5_query_mac_address(dev, hw_id); gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); addrconf_addr_eui48(&gid->raw[8], hw_id); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index a249b3c3843d..61fcfd8b39b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -74,17 +74,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) int err; int vf; - if (sriov->enabled_vfs) { - mlx5_core_warn(dev, - "failed to enable SRIOV on device, already enabled with %d vfs\n", - sriov->enabled_vfs); - return -EBUSY; - } - if (!MLX5_ESWITCH_MANAGER(dev)) goto enable_vfs_hca; - err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); + mlx5_eswitch_update_num_of_vfs(dev->priv.eswitch, num_vfs); + err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY); if (err) { mlx5_core_warn(dev, "failed to enable eswitch SRIOV (%d)\n", err); @@ -99,7 +93,6 @@ enable_vfs_hca: continue; } sriov->vfs_ctx[vf].enabled = 1; - sriov->enabled_vfs++; if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { err = sriov_restore_guids(dev, vf); if (err) { @@ -118,13 +111,11 @@ enable_vfs_hca: static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int num_vfs = pci_num_vf(dev->pdev); int err; int vf; - if (!sriov->enabled_vfs) - goto out; - - for (vf = 0; vf < sriov->num_vfs; vf++) { + for (vf = num_vfs - 1; vf >= 0; vf--) { if (!sriov->vfs_ctx[vf].enabled) continue; err = mlx5_core_disable_hca(dev, vf + 1); @@ -133,12 +124,10 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) continue; } sriov->vfs_ctx[vf].enabled = 0; - sriov->enabled_vfs--; } -out: if (MLX5_ESWITCH_MANAGER(dev)) - mlx5_eswitch_disable_sriov(dev->priv.eswitch); + mlx5_eswitch_disable(dev->priv.eswitch); if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); @@ -191,13 +180,11 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) int mlx5_sriov_attach(struct mlx5_core_dev *dev) { - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - - if (!mlx5_core_is_pf(dev) || !sriov->num_vfs) + if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev)) return 0; /* If sriov VFs exist in PCI level, enable them in device level */ - return mlx5_device_enable_sriov(dev, sriov->num_vfs); + return mlx5_device_enable_sriov(dev, pci_num_vf(dev->pdev)); } void mlx5_sriov_detach(struct mlx5_core_dev *dev) @@ -208,6 +195,30 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev) mlx5_device_disable_sriov(dev); } +static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) +{ + u16 host_total_vfs; + const u32 *out; + + if (mlx5_core_is_ecpf_esw_manager(dev)) { + out = mlx5_esw_query_functions(dev); + + /* Old FW doesn't support getting total_vfs from esw func + * but supports getting it from pci_sriov. + */ + if (IS_ERR(out)) + goto done; + host_total_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_total_vfs); + kvfree(out); + if (host_total_vfs) + return host_total_vfs; + } + +done: + return pci_sriov_get_totalvfs(dev->pdev); +} + int mlx5_sriov_init(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -218,6 +229,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) return 0; total_vfs = pci_sriov_get_totalvfs(pdev); + sriov->max_vfs = mlx5_get_max_vfs(dev); sriov->num_vfs = pci_num_vf(pdev); sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 95cdc8cbcba4..c912d82ca64b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -34,6 +34,7 @@ #include <linux/etherdevice.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/vport.h> +#include <linux/mlx5/eswitch.h> #include "mlx5_core.h" /* Mutex to hold while enabling or disabling RoCE */ @@ -155,11 +156,12 @@ int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, } int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, - u16 vport, u8 *addr) + u16 vport, bool other, u8 *addr) { - u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {}; u8 *out_addr; + u32 *out; int err; out = kvzalloc(outlen, GFP_KERNEL); @@ -169,7 +171,12 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, nic_vport_context.permanent_address); - err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(query_nic_vport_context_in, in, other_vport, other); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); if (!err) ether_addr_copy(addr, &out_addr[2]); @@ -178,6 +185,12 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address); +int mlx5_query_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +{ + return mlx5_query_nic_vport_mac_address(mdev, 0, false, addr); +} +EXPORT_SYMBOL_GPL(mlx5_query_mac_address); + int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr) { @@ -194,9 +207,7 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, MLX5_SET(modify_nic_vport_context_in, in, field_select.permanent_address, 1); MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); - - if (vport) - MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); @@ -291,9 +302,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); - - if (vport) - MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); if (err) @@ -483,7 +492,7 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, MLX5_SET(modify_nic_vport_context_in, in, field_select.node_guid, 1); MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); - MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); @@ -1157,3 +1166,17 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) return tmp; } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); + +/** + * mlx5_eswitch_get_total_vports - Get total vports of the eswitch + * + * @dev: Pointer to core device + * + * mlx5_eswitch_get_total_vports returns total number of vports for + * the eswitch. + */ +u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) +{ + return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev); +} +EXPORT_SYMBOL(mlx5_eswitch_get_total_vports); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index 1f87cce421e0..f1ec58c9e9e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -134,11 +134,6 @@ static inline void mlx5_wq_cyc_update_db_record(struct mlx5_wq_cyc *wq) *wq->db = cpu_to_be32(wq->wqe_ctr); } -static inline u16 mlx5_wq_cyc_get_ctr_wrap_cnt(struct mlx5_wq_cyc *wq, u16 ctr) -{ - return ctr >> wq->fbc.log_sz; -} - static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr) { return ctr & wq->fbc.sz_m1; diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h index 14c0c62f8e73..c50e74ab02c4 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h @@ -5,6 +5,7 @@ #define _MLXFW_H #include <linux/firmware.h> +#include <linux/netlink.h> enum mlxfw_fsm_state { MLXFW_FSM_STATE_IDLE, @@ -57,6 +58,10 @@ struct mlxfw_dev_ops { void (*fsm_cancel)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); void (*fsm_release)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); + + void (*status_notify)(struct mlxfw_dev *mlxfw_dev, + const char *msg, const char *comp_name, + u32 done_bytes, u32 total_bytes); }; struct mlxfw_dev { @@ -67,11 +72,13 @@ struct mlxfw_dev { #if IS_REACHABLE(CONFIG_MLXFW) int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, - const struct firmware *firmware); + const struct firmware *firmware, + struct netlink_ext_ack *extack); #else static inline int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, - const struct firmware *firmware) + const struct firmware *firmware, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c index 240c027e5f07..67990406cba2 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c @@ -39,8 +39,19 @@ static const char * const mlxfw_fsm_state_err_str[] = { "unknown error" }; +static void mlxfw_status_notify(struct mlxfw_dev *mlxfw_dev, + const char *msg, const char *comp_name, + u32 done_bytes, u32 total_bytes) +{ + if (!mlxfw_dev->ops->status_notify) + return; + mlxfw_dev->ops->status_notify(mlxfw_dev, msg, comp_name, + done_bytes, total_bytes); +} + static int mlxfw_fsm_state_wait(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, - enum mlxfw_fsm_state fsm_state) + enum mlxfw_fsm_state fsm_state, + struct netlink_ext_ack *extack) { enum mlxfw_fsm_state_err fsm_state_err; enum mlxfw_fsm_state curr_fsm_state; @@ -57,11 +68,13 @@ retry: if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) { pr_err("Firmware flash failed: %s\n", mlxfw_fsm_state_err_str[fsm_state_err]); + NL_SET_ERR_MSG_MOD(extack, "Firmware flash failed"); return -EINVAL; } if (curr_fsm_state != fsm_state) { if (--times == 0) { pr_err("Timeout reached on FSM state change"); + NL_SET_ERR_MSG_MOD(extack, "Timeout reached on FSM state change"); return -ETIMEDOUT; } msleep(MLXFW_FSM_STATE_WAIT_CYCLE_MS); @@ -76,16 +89,20 @@ retry: static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, - struct mlxfw_mfa2_component *comp) + struct mlxfw_mfa2_component *comp, + struct netlink_ext_ack *extack) { u16 comp_max_write_size; u8 comp_align_bits; u32 comp_max_size; + char comp_name[8]; u16 block_size; u8 *block_ptr; u32 offset; int err; + sprintf(comp_name, "%u", comp->index); + err = mlxfw_dev->ops->component_query(mlxfw_dev, comp->index, &comp_max_size, &comp_align_bits, &comp_max_write_size); @@ -96,6 +113,7 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, if (comp->data_size > comp_max_size) { pr_err("Component %d is of size %d which is bigger than limit %d\n", comp->index, comp->data_size, comp_max_size); + NL_SET_ERR_MSG_MOD(extack, "Component is bigger than limit"); return -EINVAL; } @@ -103,6 +121,7 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, comp_align_bits); pr_debug("Component update\n"); + mlxfw_status_notify(mlxfw_dev, "Updating component", comp_name, 0, 0); err = mlxfw_dev->ops->fsm_component_update(mlxfw_dev, fwhandle, comp->index, comp->data_size); @@ -110,11 +129,13 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, return err; err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, - MLXFW_FSM_STATE_DOWNLOAD); + MLXFW_FSM_STATE_DOWNLOAD, extack); if (err) goto err_out; pr_debug("Component download\n"); + mlxfw_status_notify(mlxfw_dev, "Downloading component", + comp_name, 0, comp->data_size); for (offset = 0; offset < MLXFW_ALIGN_UP(comp->data_size, comp_align_bits); offset += comp_max_write_size) { @@ -126,15 +147,20 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, offset); if (err) goto err_out; + mlxfw_status_notify(mlxfw_dev, "Downloading component", + comp_name, offset + block_size, + comp->data_size); } pr_debug("Component verify\n"); + mlxfw_status_notify(mlxfw_dev, "Verifying component", comp_name, 0, 0); err = mlxfw_dev->ops->fsm_component_verify(mlxfw_dev, fwhandle, comp->index); if (err) goto err_out; - err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED); + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, + MLXFW_FSM_STATE_LOCKED, extack); if (err) goto err_out; return 0; @@ -145,7 +171,8 @@ err_out: } static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, - struct mlxfw_mfa2_file *mfa2_file) + struct mlxfw_mfa2_file *mfa2_file, + struct netlink_ext_ack *extack) { u32 component_count; int err; @@ -156,6 +183,7 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, &component_count); if (err) { pr_err("Could not find device PSID in MFA2 file\n"); + NL_SET_ERR_MSG_MOD(extack, "Could not find device PSID in MFA2 file"); return err; } @@ -168,7 +196,7 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, return PTR_ERR(comp); pr_info("Flashing component type %d\n", comp->index); - err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp); + err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp, extack); mlxfw_mfa2_file_component_put(comp); if (err) return err; @@ -177,7 +205,8 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, } int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, - const struct firmware *firmware) + const struct firmware *firmware, + struct netlink_ext_ack *extack) { struct mlxfw_mfa2_file *mfa2_file; u32 fwhandle; @@ -185,6 +214,7 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, if (!mlxfw_mfa2_check(firmware)) { pr_err("Firmware file is not MFA2\n"); + NL_SET_ERR_MSG_MOD(extack, "Firmware file is not MFA2"); return -EINVAL; } @@ -193,29 +223,35 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, return PTR_ERR(mfa2_file); pr_info("Initialize firmware flash process\n"); + mlxfw_status_notify(mlxfw_dev, "Initializing firmware flash process", + NULL, 0, 0); err = mlxfw_dev->ops->fsm_lock(mlxfw_dev, &fwhandle); if (err) { pr_err("Could not lock the firmware FSM\n"); + NL_SET_ERR_MSG_MOD(extack, "Could not lock the firmware FSM"); goto err_fsm_lock; } err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, - MLXFW_FSM_STATE_LOCKED); + MLXFW_FSM_STATE_LOCKED, extack); if (err) goto err_state_wait_idle_to_locked; - err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file); + err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file, extack); if (err) goto err_flash_components; pr_debug("Activate image\n"); + mlxfw_status_notify(mlxfw_dev, "Activating image", NULL, 0, 0); err = mlxfw_dev->ops->fsm_activate(mlxfw_dev, fwhandle); if (err) { pr_err("Could not activate the downloaded image\n"); + NL_SET_ERR_MSG_MOD(extack, "Could not activate the downloaded image"); goto err_fsm_activate; } - err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED); + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, + MLXFW_FSM_STATE_LOCKED, extack); if (err) goto err_state_wait_activate_to_locked; @@ -223,6 +259,7 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle); pr_info("Firmware flash done.\n"); + mlxfw_status_notify(mlxfw_dev, "Firmware flash done", NULL, 0, 0); mlxfw_mfa2_file_fini(mfa2_file); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 11ded0bc7d98..06c80343d9ed 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -83,6 +83,8 @@ config MLXSW_SPECTRUM select PARMAN select OBJAGG select MLXFW + imply PTP_1588_CLOCK + select NET_PTP_CLASSIFY if PTP_1588_CLOCK default m ---help--- This driver supports Mellanox Technologies Spectrum Ethernet diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index c4dc72e1ce63..171b36bd8a4e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -31,5 +31,6 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_nve.o spectrum_nve_vxlan.o \ spectrum_dpipe.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o +mlxsw_spectrum-$(CONFIG_PTP_1588_CLOCK) += spectrum_ptp.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o mlxsw_minimal-objs := minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h index 0772e4339b33..5ffdfb532cb7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -317,6 +317,18 @@ MLXSW_ITEM64(cmd_mbox, query_fw, doorbell_page_offset, 0x40, 0, 64); */ MLXSW_ITEM32(cmd_mbox, query_fw, doorbell_page_bar, 0x48, 30, 2); +/* cmd_mbox_query_fw_free_running_clock_offset + * The offset of the free running clock page + */ +MLXSW_ITEM64(cmd_mbox, query_fw, free_running_clock_offset, 0x50, 0, 64); + +/* cmd_mbox_query_fw_fr_rn_clk_bar + * PCI base address register (BAR) of the free running clock page + * 0: BAR 0 + * 1: 64 bit BAR + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fr_rn_clk_bar, 0x58, 30, 2); + /* QUERY_BOARDINFO - Query Board Information * ----------------------------------------- * OpMod == 0 (N/A), INMmod == 0 (N/A) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 6ee6de7f0160..17ceac7505e5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1003,6 +1003,20 @@ static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink, return err; } +static int mlxsw_devlink_flash_update(struct devlink *devlink, + const char *file_name, + const char *component, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->flash_update) + return -EOPNOTSUPP; + return mlxsw_driver->flash_update(mlxsw_core, file_name, + component, extack); +} + static const struct devlink_ops mlxsw_devlink_ops = { .reload = mlxsw_devlink_core_bus_device_reload, .port_type_set = mlxsw_devlink_port_type_set, @@ -1019,6 +1033,7 @@ static const struct devlink_ops mlxsw_devlink_ops = { .sb_occ_port_pool_get = mlxsw_devlink_sb_occ_port_pool_get, .sb_occ_tc_port_bind_get = mlxsw_devlink_sb_occ_tc_port_bind_get, .info_get = mlxsw_devlink_info_get, + .flash_update = mlxsw_devlink_flash_update, }; static int @@ -1098,6 +1113,12 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_register_params; } + if (mlxsw_driver->init) { + err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info); + if (err) + goto err_driver_init; + } + err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon); if (err) goto err_hwmon_init; @@ -1107,22 +1128,17 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_thermal_init; - if (mlxsw_driver->init) { - err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info); - if (err) - goto err_driver_init; - } - if (mlxsw_driver->params_register && !reload) devlink_params_publish(devlink); return 0; -err_driver_init: - mlxsw_thermal_fini(mlxsw_core->thermal); err_thermal_init: mlxsw_hwmon_fini(mlxsw_core->hwmon); err_hwmon_init: + if (mlxsw_core->driver->fini) + mlxsw_core->driver->fini(mlxsw_core); +err_driver_init: if (mlxsw_driver->params_unregister && !reload) mlxsw_driver->params_unregister(mlxsw_core); err_register_params: @@ -1187,10 +1203,10 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, if (mlxsw_core->driver->params_unregister && !reload) devlink_params_unpublish(devlink); - if (mlxsw_core->driver->fini) - mlxsw_core->driver->fini(mlxsw_core); mlxsw_thermal_fini(mlxsw_core->thermal); mlxsw_hwmon_fini(mlxsw_core->hwmon); + if (mlxsw_core->driver->fini) + mlxsw_core->driver->fini(mlxsw_core); if (mlxsw_core->driver->params_unregister && !reload) mlxsw_core->driver->params_unregister(mlxsw_core); if (!reload) @@ -1229,6 +1245,15 @@ int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, } EXPORT_SYMBOL(mlxsw_core_skb_transmit); +void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u8 local_port) +{ + if (mlxsw_core->driver->ptp_transmitted) + mlxsw_core->driver->ptp_transmitted(mlxsw_core, skb, + local_port); +} +EXPORT_SYMBOL(mlxsw_core_ptp_transmitted); + static bool __is_rx_listener_equal(const struct mlxsw_rx_listener *rxl_a, const struct mlxsw_rx_listener *rxl_b) { @@ -2010,6 +2035,18 @@ int mlxsw_core_resources_query(struct mlxsw_core *mlxsw_core, char *mbox, } EXPORT_SYMBOL(mlxsw_core_resources_query); +u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->bus->read_frc_h(mlxsw_core->bus_priv); +} +EXPORT_SYMBOL(mlxsw_core_read_frc_h); + +u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->bus->read_frc_l(mlxsw_core->bus_priv); +} +EXPORT_SYMBOL(mlxsw_core_read_frc_l); + static int __init mlxsw_core_module_init(void) { int err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index e3832cb5bdda..8efcff4b59cb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -48,6 +48,8 @@ bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core, const struct mlxsw_tx_info *tx_info); int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); +void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u8 local_port); struct mlxsw_rx_listener { void (*func)(struct sk_buff *skb, u8 local_port, void *priv); @@ -284,6 +286,9 @@ struct mlxsw_driver { unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, u32 *p_cur, u32 *p_max); + int (*flash_update)(struct mlxsw_core *mlxsw_core, + const char *file_name, const char *component, + struct netlink_ext_ack *extack); void (*txhdr_construct)(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); int (*resources_register)(struct mlxsw_core *mlxsw_core); @@ -293,6 +298,13 @@ struct mlxsw_driver { u64 *p_linear_size); int (*params_register)(struct mlxsw_core *mlxsw_core); void (*params_unregister)(struct mlxsw_core *mlxsw_core); + + /* Notify a driver that a timestamped packet was transmitted. Driver + * is responsible for freeing the passed-in SKB. + */ + void (*ptp_transmitted)(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u8 local_port); + u8 txhdr_len; const struct mlxsw_config_profile *profile; bool res_query_enabled; @@ -306,6 +318,9 @@ int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core, void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core); void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core); +u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core); +u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core); + bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core, enum mlxsw_res_id res_id); @@ -336,6 +351,8 @@ struct mlxsw_bus { char *in_mbox, size_t in_mbox_size, char *out_mbox, size_t out_mbox_size, u8 *p_status); + u32 (*read_frc_h)(void *bus_priv); + u32 (*read_frc_l)(void *bus_priv); u8 features; }; @@ -353,7 +370,8 @@ struct mlxsw_bus_info { struct mlxsw_fw_rev fw_rev; u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN]; u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN]; - u8 low_frequency; + u8 low_frequency:1, + read_frc_capable:1; }; struct mlxsw_hwmon; @@ -409,4 +427,14 @@ enum mlxsw_devlink_param_id { MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, }; +struct mlxsw_skb_cb { + struct mlxsw_tx_info tx_info; +}; + +static inline struct mlxsw_skb_cb *mlxsw_skb_cb(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(mlxsw_skb_cb) > sizeof(skb->cb)); + return (struct mlxsw_skb_cb *) skb->cb; +} + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c index cb3e663b1d37..feb4672a5ac0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c @@ -30,8 +30,9 @@ static bool mlxsw_afk_blocks_check(struct mlxsw_afk *mlxsw_afk) elinst = &block->instances[j]; if (elinst->type != elinst->info->type || - elinst->item.size.bits != - elinst->info->item.size.bits) + (!elinst->avoid_size_check && + elinst->item.size.bits != + elinst->info->item.size.bits)) return false; } } @@ -385,12 +386,12 @@ EXPORT_SYMBOL(mlxsw_afk_values_add_buf); static void mlxsw_sp_afk_encode_u32(const struct mlxsw_item *storage_item, const struct mlxsw_item *output_item, - char *storage, char *output) + char *storage, char *output, int diff) { u32 value; value = __mlxsw_item_get32(storage, storage_item, 0); - __mlxsw_item_set32(output, output_item, 0, value); + __mlxsw_item_set32(output, output_item, 0, value + diff); } static void mlxsw_sp_afk_encode_buf(const struct mlxsw_item *storage_item, @@ -406,14 +407,14 @@ static void mlxsw_sp_afk_encode_buf(const struct mlxsw_item *storage_item, static void mlxsw_sp_afk_encode_one(const struct mlxsw_afk_element_inst *elinst, - char *output, char *storage) + char *output, char *storage, int u32_diff) { const struct mlxsw_item *storage_item = &elinst->info->item; const struct mlxsw_item *output_item = &elinst->item; if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32) mlxsw_sp_afk_encode_u32(storage_item, output_item, - storage, output); + storage, output, u32_diff); else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF) mlxsw_sp_afk_encode_buf(storage_item, output_item, storage, output); @@ -446,9 +447,10 @@ void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk, continue; mlxsw_sp_afk_encode_one(elinst, block_key, - values->storage.key); + values->storage.key, + elinst->u32_key_diff); mlxsw_sp_afk_encode_one(elinst, block_mask, - values->storage.mask); + values->storage.mask, 0); } mlxsw_afk->ops->encode_block(key, i, block_key); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index 4a625cdf3e7c..cb229b55ecc4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -74,7 +74,7 @@ struct mlxsw_afk_element_info { * define an internal storage geometry. */ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { - MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 8), + MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 16), MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_32_47, 0x04, 2), MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_0_31, 0x06, 4), MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_32_47, 0x0A, 2), @@ -107,9 +107,14 @@ struct mlxsw_afk_element_inst { /* element instance in actual block */ const struct mlxsw_afk_element_info *info; enum mlxsw_afk_element_type type; struct mlxsw_item item; /* element geometry in block */ + int u32_key_diff; /* in case value needs to be adjusted before write + * this diff is here to handle that + */ + bool avoid_size_check; }; -#define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset, _shift, _size) \ +#define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset, \ + _shift, _size, _u32_key_diff, _avoid_size_check) \ { \ .info = &mlxsw_afk_element_infos[MLXSW_AFK_ELEMENT_##_element], \ .type = _type, \ @@ -119,15 +124,24 @@ struct mlxsw_afk_element_inst { /* element instance in actual block */ .size = {.bits = _size}, \ .name = #_element, \ }, \ + .u32_key_diff = _u32_key_diff, \ + .avoid_size_check = _avoid_size_check, \ } #define MLXSW_AFK_ELEMENT_INST_U32(_element, _offset, _shift, _size) \ MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_U32, \ - _element, _offset, _shift, _size) + _element, _offset, _shift, _size, 0, false) + +#define MLXSW_AFK_ELEMENT_INST_EXT_U32(_element, _offset, \ + _shift, _size, _key_diff, \ + _avoid_size_check) \ + MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_U32, \ + _element, _offset, _shift, _size, \ + _key_diff, _avoid_size_check) #define MLXSW_AFK_ELEMENT_INST_BUF(_element, _offset, _size) \ MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_BUF, \ - _element, _offset, 0, _size) + _element, _offset, 0, _size, 0, false) struct mlxsw_afk_block { u16 encoding; /* block ID */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 72539a9a3847..d2c7ce67c300 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -92,33 +92,20 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, u16 temp; } temp_thresh; char mcia_pl[MLXSW_REG_MCIA_LEN] = {0}; - char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; - u16 module_temp; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + unsigned int module_temp; bool qsfp; int err; - mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, - 1); - err = mlxsw_reg_query(core, MLXSW_REG(mtbr), mtbr_pl); + mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module, + false, false); + err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl); if (err) return err; - - /* Don't read temperature thresholds for module with no valid info. */ - mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &module_temp, NULL); - switch (module_temp) { - case MLXSW_REG_MTBR_BAD_SENS_INFO: /* fall-through */ - case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ - case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ - case MLXSW_REG_MTBR_INDEX_NA: + mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, NULL); + if (!module_temp) { *temp = 0; return 0; - default: - /* Do not consider thresholds for zero temperature. */ - if (MLXSW_REG_MTMP_TEMP_TO_MC(module_temp) == 0) { - *temp = 0; - return 0; - } - break; } /* Read Free Side Device Temperature Thresholds from page 03h diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 496dc904c5ed..5b00726c4346 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -23,6 +23,14 @@ struct mlxsw_hwmon_attr { char name[32]; }; +static int mlxsw_hwmon_get_attr_index(int index, int count) +{ + if (index >= count) + return index % count + MLXSW_REG_MTMP_GBOX_INDEX_MIN; + + return index; +} + struct mlxsw_hwmon { struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; @@ -33,6 +41,7 @@ struct mlxsw_hwmon { struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT]; unsigned int attrs_count; u8 sensor_count; + u8 module_sensor_count; }; static ssize_t mlxsw_hwmon_temp_show(struct device *dev, @@ -43,18 +52,19 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp; + int temp, index; int err; - mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, - false, false); + index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, + mlxsw_hwmon->module_sensor_count); + mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); return err; } mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); - return sprintf(buf, "%u\n", temp); + return sprintf(buf, "%d\n", temp); } static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, @@ -65,18 +75,19 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp_max; + int temp_max, index; int err; - mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, - false, false); + index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, + mlxsw_hwmon->module_sensor_count); + mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); return err; } mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL); - return sprintf(buf, "%u\n", temp_max); + return sprintf(buf, "%d\n", temp_max); } static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, @@ -88,6 +99,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; unsigned long val; + int index; int err; err = kstrtoul(buf, 10, &val); @@ -96,7 +108,9 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, if (val != 1) return -EINVAL; - mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, true, true); + index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, + mlxsw_hwmon->module_sensor_count); + mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true); err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to reset temp sensor history\n"); @@ -198,40 +212,20 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev, struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; - char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; - u16 temp; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; u8 module; + int temp; int err; module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; - mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, - 1); - err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl); - if (err) { - dev_err(dev, "Failed to query module temperature sensor\n"); + mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module, + false, false); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) return err; - } - - mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); - /* Update status and temperature cache. */ - switch (temp) { - case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ - case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ - case MLXSW_REG_MTBR_INDEX_NA: - temp = 0; - break; - case MLXSW_REG_MTBR_BAD_SENS_INFO: - /* Untrusted cable is connected. Reading temperature from its - * sensor is faulty. - */ - temp = 0; - break; - default: - temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); - break; - } + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); - return sprintf(buf, "%u\n", temp); + return sprintf(buf, "%d\n", temp); } static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev, @@ -333,6 +327,20 @@ mlxsw_hwmon_module_temp_label_show(struct device *dev, mlwsw_hwmon_attr->type_index); } +static ssize_t +mlxsw_hwmon_gbox_temp_label_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + int index = mlwsw_hwmon_attr->type_index - + mlxsw_hwmon->module_sensor_count + 1; + + return sprintf(buf, "gearbox %03u\n", index); +} + enum mlxsw_hwmon_attr_type { MLXSW_HWMON_ATTR_TYPE_TEMP, MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, @@ -345,6 +353,7 @@ enum mlxsw_hwmon_attr_type { MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, + MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL, }; static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, @@ -428,6 +437,13 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), "temp%u_label", num + 1); break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_gbox_temp_label_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_label", num + 1); + break; default: WARN_ON(1); } @@ -556,6 +572,54 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) index, index); index++; } + mlxsw_hwmon->module_sensor_count = index; + + return 0; +} + +static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon) +{ + int index, max_index, sensor_index; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + u8 gbox_num; + int err; + + mlxsw_reg_mgpir_pack(mgpir_pl); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL); + if (!gbox_num) + return 0; + + index = mlxsw_hwmon->module_sensor_count; + max_index = mlxsw_hwmon->module_sensor_count + gbox_num; + while (index < max_index) { + sensor_index = index % mlxsw_hwmon->module_sensor_count + + MLXSW_REG_MTMP_GBOX_INDEX_MIN; + mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, true, true); + err = mlxsw_reg_write(mlxsw_hwmon->core, + MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n", + sensor_index); + return err; + } + mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP, + index, index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, index, + index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_RST, index, + index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL, + index, index); + index++; + } return 0; } @@ -586,6 +650,10 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, if (err) goto err_temp_module_init; + err = mlxsw_hwmon_gearbox_init(mlxsw_hwmon); + if (err) + goto err_temp_gearbox_init; + mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group; mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs; @@ -602,6 +670,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, return 0; err_hwmon_register: +err_temp_gearbox_init: err_temp_module_init: err_fans_init: err_temp_init: diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index d3e851e7ca72..35a1dc89c28a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -23,6 +23,7 @@ #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ #define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) #define MLXSW_THERMAL_ZONE_MAX_NAME 16 +#define MLXSW_THERMAL_TEMP_SCORE_MAX GENMASK(31, 0) #define MLXSW_THERMAL_MAX_STATE 10 #define MLXSW_THERMAL_MAX_DUTY 255 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values @@ -98,7 +99,7 @@ struct mlxsw_thermal_module { struct thermal_zone_device *tzdev; struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; - int module; + int module; /* Module or gearbox number */ }; struct mlxsw_thermal { @@ -111,6 +112,10 @@ struct mlxsw_thermal { struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; struct mlxsw_thermal_module *tz_module_arr; + struct mlxsw_thermal_module *tz_gearbox_arr; + u8 tz_gearbox_num; + unsigned int tz_highest_score; + struct thermal_zone_device *tz_highest_dev; }; static inline u8 mlxsw_state_to_duty(int state) @@ -195,6 +200,34 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, return 0; } +static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal, + struct thermal_zone_device *tzdev, + struct mlxsw_thermal_trip *trips, + int temp) +{ + struct mlxsw_thermal_trip *trip = trips; + unsigned int score, delta, i, shift = 1; + + /* Calculate thermal zone score, if temperature is above the critical + * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX. + */ + score = MLXSW_THERMAL_TEMP_SCORE_MAX; + for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS; + i++, trip++) { + if (temp < trip->temp) { + delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp); + score = delta * shift; + break; + } + shift *= 256; + } + + if (score > thermal->tz_highest_score) { + thermal->tz_highest_score = score; + thermal->tz_highest_dev = tzdev; + } +} + static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev, struct thermal_cooling_device *cdev) { @@ -279,7 +312,7 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, struct mlxsw_thermal *thermal = tzdev->devdata; struct device *dev = thermal->bus_info->dev; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp; + int temp; int err; mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false); @@ -290,8 +323,11 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, return err; } mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + if (temp > 0) + mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips, + temp); - *p_temp = (int) temp; + *p_temp = temp; return 0; } @@ -351,6 +387,22 @@ static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev, return 0; } +static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev, + int trip, enum thermal_trend *trend) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) + return -EINVAL; + + if (tzdev == thermal->tz_highest_dev) + return 1; + + *trend = THERMAL_TREND_STABLE; + return 0; +} + static struct thermal_zone_device_ops mlxsw_thermal_ops = { .bind = mlxsw_thermal_bind, .unbind = mlxsw_thermal_unbind, @@ -362,6 +414,7 @@ static struct thermal_zone_device_ops mlxsw_thermal_ops = { .set_trip_temp = mlxsw_thermal_set_trip_temp, .get_trip_hyst = mlxsw_thermal_get_trip_hyst, .set_trip_hyst = mlxsw_thermal_set_trip_hyst, + .get_trend = mlxsw_thermal_trend_get, }; static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev, @@ -449,39 +502,33 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, struct mlxsw_thermal_module *tz = tzdev->devdata; struct mlxsw_thermal *thermal = tz->parent; struct device *dev = thermal->bus_info->dev; - char mtbr_pl[MLXSW_REG_MTBR_LEN]; - u16 temp; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + int temp; int err; /* Read module temperature. */ - mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + - tz->module, 1); - err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtbr), mtbr_pl); - if (err) - return err; - - mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); - /* Update temperature. */ - switch (temp) { - case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ - case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ - case MLXSW_REG_MTBR_INDEX_NA: /* fall-through */ - case MLXSW_REG_MTBR_BAD_SENS_INFO: + mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + + tz->module, false, false); + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + /* Do not return error - in case of broken module's sensor + * it will cause error message flooding. + */ temp = 0; - break; - default: - temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); - /* Reset all trip point. */ - mlxsw_thermal_module_trips_reset(tz); - /* Update trip points. */ - err = mlxsw_thermal_module_trips_update(dev, thermal->core, - tz); - if (err) - return err; - break; + *p_temp = (int) temp; + return 0; } + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + *p_temp = temp; + + if (!temp) + return 0; + + /* Update trip points. */ + err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz); + if (!err && temp > 0) + mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp); - *p_temp = (int) temp; return 0; } @@ -545,10 +592,6 @@ mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip, return 0; } -static struct thermal_zone_params mlxsw_thermal_module_params = { - .governor_name = "user_space", -}; - static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { .bind = mlxsw_thermal_module_bind, .unbind = mlxsw_thermal_module_unbind, @@ -560,6 +603,46 @@ static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { .set_trip_temp = mlxsw_thermal_module_trip_temp_set, .get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, + .get_trend = mlxsw_thermal_trend_get, +}; + +static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev, + int *p_temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + u16 index; + int temp; + int err; + + index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module; + mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); + + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) + return err; + + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + if (temp > 0) + mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp); + + *p_temp = temp; + return 0; +} + +static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = { + .bind = mlxsw_thermal_module_bind, + .unbind = mlxsw_thermal_module_unbind, + .get_mode = mlxsw_thermal_module_mode_get, + .set_mode = mlxsw_thermal_module_mode_set, + .get_temp = mlxsw_thermal_gearbox_temp_get, + .get_trip_type = mlxsw_thermal_module_trip_type_get, + .get_trip_temp = mlxsw_thermal_module_trip_temp_get, + .set_trip_temp = mlxsw_thermal_module_trip_temp_set, + .get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, + .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, + .get_trend = mlxsw_thermal_trend_get, }; static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev, @@ -675,13 +758,13 @@ mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz) MLXSW_THERMAL_TRIP_MASK, module_tz, &mlxsw_thermal_module_ops, - &mlxsw_thermal_module_params, - 0, 0); + NULL, 0, 0); if (IS_ERR(module_tz->tzdev)) { err = PTR_ERR(module_tz->tzdev); return err; } + module_tz->mode = THERMAL_DEVICE_ENABLED; return 0; } @@ -787,6 +870,92 @@ mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal) kfree(thermal->tz_module_arr); } +static int +mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz) +{ + char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME]; + + snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d", + gearbox_tz->module + 1); + gearbox_tz->tzdev = thermal_zone_device_register(tz_name, + MLXSW_THERMAL_NUM_TRIPS, + MLXSW_THERMAL_TRIP_MASK, + gearbox_tz, + &mlxsw_thermal_gearbox_ops, + NULL, 0, 0); + if (IS_ERR(gearbox_tz->tzdev)) + return PTR_ERR(gearbox_tz->tzdev); + + gearbox_tz->mode = THERMAL_DEVICE_ENABLED; + return 0; +} + +static void +mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz) +{ + thermal_zone_device_unregister(gearbox_tz->tzdev); +} + +static int +mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, + struct mlxsw_thermal *thermal) +{ + struct mlxsw_thermal_module *gearbox_tz; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + int i; + int err; + + if (!mlxsw_core_res_query_enabled(core)) + return 0; + + mlxsw_reg_mgpir_pack(mgpir_pl); + err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL); + if (!thermal->tz_gearbox_num) + return 0; + + thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num, + sizeof(*thermal->tz_gearbox_arr), + GFP_KERNEL); + if (!thermal->tz_gearbox_arr) + return -ENOMEM; + + for (i = 0; i < thermal->tz_gearbox_num; i++) { + gearbox_tz = &thermal->tz_gearbox_arr[i]; + memcpy(gearbox_tz->trips, default_thermal_trips, + sizeof(thermal->trips)); + gearbox_tz->module = i; + gearbox_tz->parent = thermal; + err = mlxsw_thermal_gearbox_tz_init(gearbox_tz); + if (err) + goto err_unreg_tz_gearbox; + } + + return 0; + +err_unreg_tz_gearbox: + for (i--; i >= 0; i--) + mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]); + kfree(thermal->tz_gearbox_arr); + return err; +} + +static void +mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal) +{ + int i; + + if (!mlxsw_core_res_query_enabled(thermal->core)) + return; + + for (i = thermal->tz_gearbox_num - 1; i >= 0; i--) + mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]); + kfree(thermal->tz_gearbox_arr); +} + int mlxsw_thermal_init(struct mlxsw_core *core, const struct mlxsw_bus_info *bus_info, struct mlxsw_thermal **p_thermal) @@ -877,10 +1046,16 @@ int mlxsw_thermal_init(struct mlxsw_core *core, if (err) goto err_unreg_tzdev; + err = mlxsw_thermal_gearboxes_init(dev, core, thermal); + if (err) + goto err_unreg_modules_tzdev; + thermal->mode = THERMAL_DEVICE_ENABLED; *p_thermal = thermal; return 0; +err_unreg_modules_tzdev: + mlxsw_thermal_modules_fini(thermal); err_unreg_tzdev: if (thermal->tzdev) { thermal_zone_device_unregister(thermal->tzdev); @@ -899,6 +1074,7 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal) { int i; + mlxsw_thermal_gearboxes_fini(thermal); mlxsw_thermal_modules_fini(thermal); if (thermal->tzdev) { thermal_zone_device_unregister(thermal->tzdev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 06aea1999518..95f408d0e103 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -43,11 +43,10 @@ #define MLXSW_I2C_PREP_SIZE (MLXSW_I2C_ADDR_WIDTH + 28) #define MLXSW_I2C_MBOX_SIZE 20 #define MLXSW_I2C_MBOX_OUT_PARAM_OFF 12 -#define MLXSW_I2C_MAX_BUFF_SIZE 32 #define MLXSW_I2C_MBOX_OFFSET_BITS 20 #define MLXSW_I2C_MBOX_SIZE_BITS 12 #define MLXSW_I2C_ADDR_BUF_SIZE 4 -#define MLXSW_I2C_BLK_MAX 32 +#define MLXSW_I2C_BLK_DEF 32 #define MLXSW_I2C_RETRY 5 #define MLXSW_I2C_TIMEOUT_MSECS 5000 #define MLXSW_I2C_MAX_DATA_SIZE 256 @@ -62,6 +61,7 @@ * @dev: I2C device; * @core: switch core pointer; * @bus_info: bus info block; + * @block_size: maximum block size allowed to pass to under layer; */ struct mlxsw_i2c { struct { @@ -74,6 +74,7 @@ struct mlxsw_i2c { struct device *dev; struct mlxsw_core *core; struct mlxsw_bus_info bus_info; + u16 block_size; }; #define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) { \ @@ -315,20 +316,26 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num, struct i2c_client *client = to_i2c_client(dev); struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client); unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS); - u8 tran_buf[MLXSW_I2C_MAX_BUFF_SIZE + MLXSW_I2C_ADDR_BUF_SIZE]; int off = mlxsw_i2c->cmd.mb_off_in, chunk_size, i, j; unsigned long end; + u8 *tran_buf; struct i2c_msg write_tran = - MLXSW_I2C_WRITE_MSG(client, tran_buf, MLXSW_I2C_PUSH_CMD_SIZE); + MLXSW_I2C_WRITE_MSG(client, NULL, MLXSW_I2C_PUSH_CMD_SIZE); int err; + tran_buf = kmalloc(mlxsw_i2c->block_size + MLXSW_I2C_ADDR_BUF_SIZE, + GFP_KERNEL); + if (!tran_buf) + return -ENOMEM; + + write_tran.buf = tran_buf; for (i = 0; i < num; i++) { - chunk_size = (in_mbox_size > MLXSW_I2C_BLK_MAX) ? - MLXSW_I2C_BLK_MAX : in_mbox_size; + chunk_size = (in_mbox_size > mlxsw_i2c->block_size) ? + mlxsw_i2c->block_size : in_mbox_size; write_tran.len = MLXSW_I2C_ADDR_WIDTH + chunk_size; mlxsw_i2c_set_slave_addr(tran_buf, off); memcpy(&tran_buf[MLXSW_I2C_ADDR_BUF_SIZE], in_mbox + - MLXSW_I2C_BLK_MAX * i, chunk_size); + mlxsw_i2c->block_size * i, chunk_size); j = 0; end = jiffies + timeout; @@ -342,9 +349,10 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num, (j++ < MLXSW_I2C_RETRY)); if (err != 1) { - if (!err) + if (!err) { err = -EIO; - return err; + goto mlxsw_i2c_write_exit; + } } off += chunk_size; @@ -355,24 +363,27 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num, err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 0); if (err) { dev_err(&client->dev, "Could not start transaction"); - return -EIO; + err = -EIO; + goto mlxsw_i2c_write_exit; } /* Wait until go bit is cleared. */ err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, p_status); if (err) { dev_err(&client->dev, "HW semaphore is not released"); - return err; + goto mlxsw_i2c_write_exit; } /* Validate transaction completion status. */ if (*p_status) { dev_err(&client->dev, "Bad transaction completion status %x\n", *p_status); - return -EIO; + err = -EIO; } - return 0; +mlxsw_i2c_write_exit: + kfree(tran_buf); + return err; } /* Routine executes I2C command. */ @@ -395,8 +406,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size, if (in_mbox) { reg_size = mlxsw_i2c_get_reg_size(in_mbox); - num = reg_size / MLXSW_I2C_BLK_MAX; - if (reg_size % MLXSW_I2C_BLK_MAX) + num = reg_size / mlxsw_i2c->block_size; + if (reg_size % mlxsw_i2c->block_size) num++; if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) { @@ -416,7 +427,7 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size, } else { /* No input mailbox is case of initialization query command. */ reg_size = MLXSW_I2C_MAX_DATA_SIZE; - num = reg_size / MLXSW_I2C_BLK_MAX; + num = reg_size / mlxsw_i2c->block_size; if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) { dev_err(&client->dev, "Could not acquire lock"); @@ -432,8 +443,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size, /* Send read transaction to get output mailbox content. */ read_tran[1].buf = out_mbox; for (i = 0; i < num; i++) { - chunk_size = (reg_size > MLXSW_I2C_BLK_MAX) ? - MLXSW_I2C_BLK_MAX : reg_size; + chunk_size = (reg_size > mlxsw_i2c->block_size) ? + mlxsw_i2c->block_size : reg_size; read_tran[1].len = chunk_size; mlxsw_i2c_set_slave_addr(tran_buf, off); @@ -509,8 +520,20 @@ mlxsw_i2c_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (!mbox) return -ENOMEM; + err = mlxsw_cmd_query_fw(mlxsw_core, mbox); + if (err) + goto mbox_put; + + mlxsw_i2c->bus_info.fw_rev.major = + mlxsw_cmd_mbox_query_fw_fw_rev_major_get(mbox); + mlxsw_i2c->bus_info.fw_rev.minor = + mlxsw_cmd_mbox_query_fw_fw_rev_minor_get(mbox); + mlxsw_i2c->bus_info.fw_rev.subminor = + mlxsw_cmd_mbox_query_fw_fw_rev_subminor_get(mbox); + err = mlxsw_core_resources_query(mlxsw_core, mbox, res); +mbox_put: mlxsw_cmd_mbox_free(mbox); return err; } @@ -534,6 +557,7 @@ static const struct mlxsw_bus mlxsw_i2c_bus = { static int mlxsw_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) { + const struct i2c_adapter_quirks *quirks = client->adapter->quirks; struct mlxsw_i2c *mlxsw_i2c; u8 status; int err; @@ -542,6 +566,22 @@ static int mlxsw_i2c_probe(struct i2c_client *client, if (!mlxsw_i2c) return -ENOMEM; + if (quirks) { + if ((quirks->max_read_len && + quirks->max_read_len < MLXSW_I2C_BLK_DEF) || + (quirks->max_write_len && + quirks->max_write_len < MLXSW_I2C_BLK_DEF)) { + dev_err(&client->dev, "Insufficient transaction buffer length\n"); + return -EOPNOTSUPP; + } + + mlxsw_i2c->block_size = max_t(u16, MLXSW_I2C_BLK_DEF, + min_t(u16, quirks->max_read_len, + quirks->max_write_len)); + } else { + mlxsw_i2c->block_size = MLXSW_I2C_BLK_DEF; + } + i2c_set_clientdata(client, mlxsw_i2c); mutex_init(&mlxsw_i2c->cmd.lock); diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index cf2114273b72..471b0ca6d69a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -67,6 +67,23 @@ static const struct net_device_ops mlxsw_m_port_netdev_ops = { .ndo_get_devlink_port = mlxsw_m_port_get_devlink_port, }; +static void mlxsw_m_module_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(dev); + struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m; + + strlcpy(drvinfo->driver, mlxsw_m->bus_info->device_kind, + sizeof(drvinfo->driver)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d", + mlxsw_m->bus_info->fw_rev.major, + mlxsw_m->bus_info->fw_rev.minor, + mlxsw_m->bus_info->fw_rev.subminor); + strlcpy(drvinfo->bus_info, mlxsw_m->bus_info->device_name, + sizeof(drvinfo->bus_info)); +} + static int mlxsw_m_get_module_info(struct net_device *netdev, struct ethtool_modinfo *modinfo) { @@ -88,6 +105,7 @@ mlxsw_m_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee, } static const struct ethtool_ops mlxsw_m_port_ethtool_ops = { + .get_drvinfo = mlxsw_m_module_get_drvinfo, .get_module_info = mlxsw_m_get_module_info, .get_module_eeprom = mlxsw_m_get_module_eeprom, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index b40455f8293d..051b19388a81 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -102,6 +102,7 @@ struct mlxsw_pci_queue_type_group { struct mlxsw_pci { struct pci_dev *pdev; u8 __iomem *hw_addr; + u64 free_running_clock_offset; struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT]; u32 doorbell_offset; struct mlxsw_core *core; @@ -507,17 +508,28 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, { struct pci_dev *pdev = mlxsw_pci->pdev; struct mlxsw_pci_queue_elem_info *elem_info; + struct mlxsw_tx_info tx_info; char *wqe; struct sk_buff *skb; int i; spin_lock(&q->lock); elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); + tx_info = mlxsw_skb_cb(elem_info->u.sdq.skb)->tx_info; skb = elem_info->u.sdq.skb; wqe = elem_info->elem; for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE); - dev_kfree_skb_any(skb); + + if (unlikely(!tx_info.is_emad && + skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + mlxsw_core_ptp_transmitted(mlxsw_pci->core, skb, + tx_info.local_port); + skb = NULL; + } + + if (skb) + dev_kfree_skb_any(skb); elem_info->u.sdq.skb = NULL; if (q->consumer_counter++ != consumer_counter_limit) @@ -1414,6 +1426,15 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, mlxsw_pci->doorbell_offset = mlxsw_cmd_mbox_query_fw_doorbell_page_offset_get(mbox); + if (mlxsw_cmd_mbox_query_fw_fr_rn_clk_bar_get(mbox) != 0) { + dev_err(&pdev->dev, "Unsupported free running clock BAR queried from hw\n"); + err = -EINVAL; + goto err_fr_rn_clk_bar; + } + + mlxsw_pci->free_running_clock_offset = + mlxsw_cmd_mbox_query_fw_free_running_clock_offset_get(mbox); + num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox); err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages); if (err) @@ -1469,6 +1490,7 @@ err_query_resources: err_boardinfo: mlxsw_pci_fw_area_fini(mlxsw_pci); err_fw_area_init: +err_fr_rn_clk_bar: err_doorbell_page_bar: err_iface_rev: err_query_fw: @@ -1537,6 +1559,7 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, err = -EAGAIN; goto unlock; } + mlxsw_skb_cb(skb)->tx_info = *tx_info; elem_info->u.sdq.skb = skb; wqe = elem_info->elem; @@ -1560,6 +1583,9 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, goto unmap_frags; } + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + /* Set unused sq entries byte count to zero. */ for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) mlxsw_pci_wqe_byte_count_set(wqe, i, 0); @@ -1672,6 +1698,24 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, return err; } +static u32 mlxsw_pci_read_frc_h(void *bus_priv) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + u64 frc_offset; + + frc_offset = mlxsw_pci->free_running_clock_offset; + return mlxsw_pci_read32(mlxsw_pci, FREE_RUNNING_CLOCK_H(frc_offset)); +} + +static u32 mlxsw_pci_read_frc_l(void *bus_priv) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + u64 frc_offset; + + frc_offset = mlxsw_pci->free_running_clock_offset; + return mlxsw_pci_read32(mlxsw_pci, FREE_RUNNING_CLOCK_L(frc_offset)); +} + static const struct mlxsw_bus mlxsw_pci_bus = { .kind = "pci", .init = mlxsw_pci_init, @@ -1679,6 +1723,8 @@ static const struct mlxsw_bus mlxsw_pci_bus = { .skb_transmit_busy = mlxsw_pci_skb_transmit_busy, .skb_transmit = mlxsw_pci_skb_transmit, .cmd_exec = mlxsw_pci_cmd_exec, + .read_frc_h = mlxsw_pci_read_frc_h, + .read_frc_l = mlxsw_pci_read_frc_l, .features = MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET, }; @@ -1740,6 +1786,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mlxsw_pci->bus_info.device_kind = driver_name; mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev); mlxsw_pci->bus_info.dev = &pdev->dev; + mlxsw_pci->bus_info.read_frc_capable = true; mlxsw_pci->id = id; err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 8648ca171254..e57e42e2d2b2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -43,6 +43,9 @@ #define MLXSW_PCI_DOORBELL(offset, type_offset, num) \ ((offset) + (type_offset) + (num) * 4) +#define MLXSW_PCI_FREE_RUNNING_CLOCK_H(offset) (offset) +#define MLXSW_PCI_FREE_RUNNING_CLOCK_L(offset) ((offset) + 4) + #define MLXSW_PCI_CQS_MAX 96 #define MLXSW_PCI_EQS_COUNT 2 #define MLXSW_PCI_EQ_ASYNC_NUM 0 diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 7ed63ed657c7..ead36702549a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3515,6 +3515,18 @@ MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8); */ MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1); +/* reg_qeec_ptps + * PTP shaper + * 0: regular shaper mode + * 1: PTP oriented shaper + * Allowed only for hierarchy 0 + * Not supported for CPU port + * Note that ptps mode may affect the shaper rates of all hierarchies + * Supported only on Spectrum-1 + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, ptps, 0x0C, 29, 1); + enum { MLXSW_REG_QEEC_BYTES_MODE, MLXSW_REG_QEEC_PACKETS_MODE, @@ -3601,6 +3613,16 @@ static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port, mlxsw_reg_qeec_next_element_index_set(payload, next_index); } +static inline void mlxsw_reg_qeec_ptps_pack(char *payload, u8 local_port, + bool ptps) +{ + MLXSW_REG_ZERO(qeec, payload); + mlxsw_reg_qeec_local_port_set(payload, local_port); + mlxsw_reg_qeec_element_hierarchy_set(payload, + MLXSW_REG_QEEC_HIERARCY_PORT); + mlxsw_reg_qeec_ptps_set(payload, ptps); +} + /* QRWE - QoS ReWrite Enable * ------------------------- * This register configures the rewrite enable per receive port. @@ -3814,6 +3836,112 @@ mlxsw_reg_qtctm_pack(char *payload, u8 local_port, bool mc) mlxsw_reg_qtctm_mc_set(payload, mc); } +/* QPSC - QoS PTP Shaper Configuration Register + * -------------------------------------------- + * The QPSC allows advanced configuration of the shapers when QEEC.ptps=1. + * Supported only on Spectrum-1. + */ +#define MLXSW_REG_QPSC_ID 0x401B +#define MLXSW_REG_QPSC_LEN 0x28 + +MLXSW_REG_DEFINE(qpsc, MLXSW_REG_QPSC_ID, MLXSW_REG_QPSC_LEN); + +enum mlxsw_reg_qpsc_port_speed { + MLXSW_REG_QPSC_PORT_SPEED_100M, + MLXSW_REG_QPSC_PORT_SPEED_1G, + MLXSW_REG_QPSC_PORT_SPEED_10G, + MLXSW_REG_QPSC_PORT_SPEED_25G, +}; + +/* reg_qpsc_port_speed + * Port speed. + * Access: Index + */ +MLXSW_ITEM32(reg, qpsc, port_speed, 0x00, 0, 4); + +/* reg_qpsc_shaper_time_exp + * The base-time-interval for updating the shapers tokens (for all hierarchies). + * shaper_update_rate = 2 ^ shaper_time_exp * (1 + shaper_time_mantissa) * 32nSec + * shaper_rate = 64bit * shaper_inc / shaper_update_rate + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, shaper_time_exp, 0x04, 16, 4); + +/* reg_qpsc_shaper_time_mantissa + * The base-time-interval for updating the shapers tokens (for all hierarchies). + * shaper_update_rate = 2 ^ shaper_time_exp * (1 + shaper_time_mantissa) * 32nSec + * shaper_rate = 64bit * shaper_inc / shaper_update_rate + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, shaper_time_mantissa, 0x04, 0, 5); + +/* reg_qpsc_shaper_inc + * Number of tokens added to shaper on each update. + * Units of 8B. + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, shaper_inc, 0x08, 0, 5); + +/* reg_qpsc_shaper_bs + * Max shaper Burst size. + * Burst size is 2 ^ max_shaper_bs * 512 [bits] + * Range is: 5..25 (from 2KB..2GB) + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, shaper_bs, 0x0C, 0, 6); + +/* reg_qpsc_ptsc_we + * Write enable to port_to_shaper_credits. + * Access: WO + */ +MLXSW_ITEM32(reg, qpsc, ptsc_we, 0x10, 31, 1); + +/* reg_qpsc_port_to_shaper_credits + * For split ports: range 1..57 + * For non-split ports: range 1..112 + * Written only when ptsc_we is set. + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, port_to_shaper_credits, 0x10, 0, 8); + +/* reg_qpsc_ing_timestamp_inc + * Ingress timestamp increment. + * 2's complement. + * The timestamp of MTPPTR at ingress will be incremented by this value. Global + * value for all ports. + * Same units as used by MTPPTR. + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, ing_timestamp_inc, 0x20, 0, 32); + +/* reg_qpsc_egr_timestamp_inc + * Egress timestamp increment. + * 2's complement. + * The timestamp of MTPPTR at egress will be incremented by this value. Global + * value for all ports. + * Same units as used by MTPPTR. + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, egr_timestamp_inc, 0x24, 0, 32); + +static inline void +mlxsw_reg_qpsc_pack(char *payload, enum mlxsw_reg_qpsc_port_speed port_speed, + u8 shaper_time_exp, u8 shaper_time_mantissa, u8 shaper_inc, + u8 shaper_bs, u8 port_to_shaper_credits, + int ing_timestamp_inc, int egr_timestamp_inc) +{ + MLXSW_REG_ZERO(qpsc, payload); + mlxsw_reg_qpsc_port_speed_set(payload, port_speed); + mlxsw_reg_qpsc_shaper_time_exp_set(payload, shaper_time_exp); + mlxsw_reg_qpsc_shaper_time_mantissa_set(payload, shaper_time_mantissa); + mlxsw_reg_qpsc_shaper_inc_set(payload, shaper_inc); + mlxsw_reg_qpsc_shaper_bs_set(payload, shaper_bs); + mlxsw_reg_qpsc_ptsc_we_set(payload, true); + mlxsw_reg_qpsc_port_to_shaper_credits_set(payload, port_to_shaper_credits); + mlxsw_reg_qpsc_ing_timestamp_inc_set(payload, ing_timestamp_inc); + mlxsw_reg_qpsc_egr_timestamp_inc_set(payload, egr_timestamp_inc); +} + /* PMLP - Ports Module to Local Port Register * ------------------------------------------ * Configures the assignment of modules to local ports. @@ -5292,6 +5420,8 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR, + MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0, + MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1, }; /* reg_htgt_trap_group @@ -8039,16 +8169,21 @@ MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7); MLXSW_REG_DEFINE(mtmp, MLXSW_REG_MTMP_ID, MLXSW_REG_MTMP_LEN); +#define MLXSW_REG_MTMP_MODULE_INDEX_MIN 64 +#define MLXSW_REG_MTMP_GBOX_INDEX_MIN 256 /* reg_mtmp_sensor_index * Sensors index to access. * 64-127 of sensor_index are mapped to the SFP+/QSFP modules sequentially * (module 0 is mapped to sensor_index 64). * Access: Index */ -MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 7); +MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 12); /* Convert to milli degrees Celsius */ -#define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125) +#define MLXSW_REG_MTMP_TEMP_TO_MC(val) ({ typeof(val) v_ = (val); \ + ((v_) >= 0) ? ((v_) * 125) : \ + ((s16)((GENMASK(15, 0) + (v_) + 1) \ + * 125)); }) /* reg_mtmp_temperature * Temperature reading from the sensor. Reading is in 0.125 Celsius @@ -8107,7 +8242,7 @@ MLXSW_ITEM32(reg, mtmp, temperature_threshold_lo, 0x10, 0, 16); */ MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE); -static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index, +static inline void mlxsw_reg_mtmp_pack(char *payload, u16 sensor_index, bool max_temp_enable, bool max_temp_reset) { @@ -8119,11 +8254,10 @@ static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index, MLXSW_REG_MTMP_THRESH_HI); } -static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, - unsigned int *p_max_temp, - char *sensor_name) +static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp, + int *p_max_temp, char *sensor_name) { - u16 temp; + s16 temp; if (p_temp) { temp = mlxsw_reg_mtmp_temperature_get(payload); @@ -8156,7 +8290,7 @@ MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN); * 64-127 are mapped to the SFP+/QSFP modules sequentially). * Access: Index */ -MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 7); +MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 12); /* reg_mtbr_num_rec * Request: Number of records to read @@ -8183,7 +8317,7 @@ MLXSW_ITEM32_INDEXED(reg, mtbr, rec_max_temp, MLXSW_REG_MTBR_BASE_LEN, 16, MLXSW_ITEM32_INDEXED(reg, mtbr, rec_temp, MLXSW_REG_MTBR_BASE_LEN, 0, 16, MLXSW_REG_MTBR_REC_LEN, 0x00, false); -static inline void mlxsw_reg_mtbr_pack(char *payload, u8 base_sensor_index, +static inline void mlxsw_reg_mtbr_pack(char *payload, u16 base_sensor_index, u8 num_rec) { MLXSW_REG_ZERO(mtbr, payload); @@ -8689,6 +8823,107 @@ static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port, MLXSW_REG_MLCR_DURATION_MAX : 0); } +/* MTPPS - Management Pulse Per Second Register + * -------------------------------------------- + * This register provides the device PPS capabilities, configure the PPS in and + * out modules and holds the PPS in time stamp. + */ +#define MLXSW_REG_MTPPS_ID 0x9053 +#define MLXSW_REG_MTPPS_LEN 0x3C + +MLXSW_REG_DEFINE(mtpps, MLXSW_REG_MTPPS_ID, MLXSW_REG_MTPPS_LEN); + +/* reg_mtpps_enable + * Enables the PPS functionality the specific pin. + * A boolean variable. + * Access: RW + */ +MLXSW_ITEM32(reg, mtpps, enable, 0x20, 31, 1); + +enum mlxsw_reg_mtpps_pin_mode { + MLXSW_REG_MTPPS_PIN_MODE_VIRTUAL_PIN = 0x2, +}; + +/* reg_mtpps_pin_mode + * Pin mode to be used. The mode must comply with the supported modes of the + * requested pin. + * Access: RW + */ +MLXSW_ITEM32(reg, mtpps, pin_mode, 0x20, 8, 4); + +#define MLXSW_REG_MTPPS_PIN_SP_VIRTUAL_PIN 7 + +/* reg_mtpps_pin + * Pin to be configured or queried out of the supported pins. + * Access: Index + */ +MLXSW_ITEM32(reg, mtpps, pin, 0x20, 0, 8); + +/* reg_mtpps_time_stamp + * When pin_mode = pps_in, the latched device time when it was triggered from + * the external GPIO pin. + * When pin_mode = pps_out or virtual_pin or pps_out_and_virtual_pin, the target + * time to generate next output signal. + * Time is in units of device clock. + * Access: RW + */ +MLXSW_ITEM64(reg, mtpps, time_stamp, 0x28, 0, 64); + +static inline void +mlxsw_reg_mtpps_vpin_pack(char *payload, u64 time_stamp) +{ + MLXSW_REG_ZERO(mtpps, payload); + mlxsw_reg_mtpps_pin_set(payload, MLXSW_REG_MTPPS_PIN_SP_VIRTUAL_PIN); + mlxsw_reg_mtpps_pin_mode_set(payload, + MLXSW_REG_MTPPS_PIN_MODE_VIRTUAL_PIN); + mlxsw_reg_mtpps_enable_set(payload, true); + mlxsw_reg_mtpps_time_stamp_set(payload, time_stamp); +} + +/* MTUTC - Management UTC Register + * ------------------------------- + * Configures the HW UTC counter. + */ +#define MLXSW_REG_MTUTC_ID 0x9055 +#define MLXSW_REG_MTUTC_LEN 0x1C + +MLXSW_REG_DEFINE(mtutc, MLXSW_REG_MTUTC_ID, MLXSW_REG_MTUTC_LEN); + +enum mlxsw_reg_mtutc_operation { + MLXSW_REG_MTUTC_OPERATION_SET_TIME_AT_NEXT_SEC = 0, + MLXSW_REG_MTUTC_OPERATION_ADJUST_FREQ = 3, +}; + +/* reg_mtutc_operation + * Operation. + * Access: OP + */ +MLXSW_ITEM32(reg, mtutc, operation, 0x00, 0, 4); + +/* reg_mtutc_freq_adjustment + * Frequency adjustment: Every PPS the HW frequency will be + * adjusted by this value. Units of HW clock, where HW counts + * 10^9 HW clocks for 1 HW second. + * Access: RW + */ +MLXSW_ITEM32(reg, mtutc, freq_adjustment, 0x04, 0, 32); + +/* reg_mtutc_utc_sec + * UTC seconds. + * Access: WO + */ +MLXSW_ITEM32(reg, mtutc, utc_sec, 0x10, 0, 32); + +static inline void +mlxsw_reg_mtutc_pack(char *payload, enum mlxsw_reg_mtutc_operation oper, + u32 freq_adj, u32 utc_sec) +{ + MLXSW_REG_ZERO(mtutc, payload); + mlxsw_reg_mtutc_operation_set(payload, oper); + mlxsw_reg_mtutc_freq_adjustment_set(payload, freq_adj); + mlxsw_reg_mtutc_utc_sec_set(payload, utc_sec); +} + /* MCQI - Management Component Query Information * --------------------------------------------- * This register allows querying information about firmware components. @@ -9043,6 +9278,267 @@ static inline void mlxsw_reg_mprs_pack(char *payload, u16 parsing_depth, mlxsw_reg_mprs_vxlan_udp_dport_set(payload, vxlan_udp_dport); } +/* MOGCR - Monitoring Global Configuration Register + * ------------------------------------------------ + */ +#define MLXSW_REG_MOGCR_ID 0x9086 +#define MLXSW_REG_MOGCR_LEN 0x20 + +MLXSW_REG_DEFINE(mogcr, MLXSW_REG_MOGCR_ID, MLXSW_REG_MOGCR_LEN); + +/* reg_mogcr_ptp_iftc + * PTP Ingress FIFO Trap Clear + * The PTP_ING_FIFO trap provides MTPPTR with clr according + * to this value. Default 0. + * Reserved when IB switches and when SwitchX/-2, Spectrum-2 + * Access: RW + */ +MLXSW_ITEM32(reg, mogcr, ptp_iftc, 0x00, 1, 1); + +/* reg_mogcr_ptp_eftc + * PTP Egress FIFO Trap Clear + * The PTP_EGR_FIFO trap provides MTPPTR with clr according + * to this value. Default 0. + * Reserved when IB switches and when SwitchX/-2, Spectrum-2 + * Access: RW + */ +MLXSW_ITEM32(reg, mogcr, ptp_eftc, 0x00, 0, 1); + +/* MTPPPC - Time Precision Packet Port Configuration + * ------------------------------------------------- + * This register serves for configuration of which PTP messages should be + * timestamped. This is a global configuration, despite the register name. + * + * Reserved when Spectrum-2. + */ +#define MLXSW_REG_MTPPPC_ID 0x9090 +#define MLXSW_REG_MTPPPC_LEN 0x28 + +MLXSW_REG_DEFINE(mtpppc, MLXSW_REG_MTPPPC_ID, MLXSW_REG_MTPPPC_LEN); + +/* reg_mtpppc_ing_timestamp_message_type + * Bitwise vector of PTP message types to timestamp at ingress. + * MessageType field as defined by IEEE 1588 + * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req) + * Default all 0 + * Access: RW + */ +MLXSW_ITEM32(reg, mtpppc, ing_timestamp_message_type, 0x08, 0, 16); + +/* reg_mtpppc_egr_timestamp_message_type + * Bitwise vector of PTP message types to timestamp at egress. + * MessageType field as defined by IEEE 1588 + * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req) + * Default all 0 + * Access: RW + */ +MLXSW_ITEM32(reg, mtpppc, egr_timestamp_message_type, 0x0C, 0, 16); + +static inline void mlxsw_reg_mtpppc_pack(char *payload, u16 ing, u16 egr) +{ + MLXSW_REG_ZERO(mtpppc, payload); + mlxsw_reg_mtpppc_ing_timestamp_message_type_set(payload, ing); + mlxsw_reg_mtpppc_egr_timestamp_message_type_set(payload, egr); +} + +/* MTPPTR - Time Precision Packet Timestamping Reading + * --------------------------------------------------- + * The MTPPTR is used for reading the per port PTP timestamp FIFO. + * There is a trap for packets which are latched to the timestamp FIFO, thus the + * SW knows which FIFO to read. Note that packets enter the FIFO before been + * trapped. The sequence number is used to synchronize the timestamp FIFO + * entries and the trapped packets. + * Reserved when Spectrum-2. + */ + +#define MLXSW_REG_MTPPTR_ID 0x9091 +#define MLXSW_REG_MTPPTR_BASE_LEN 0x10 /* base length, without records */ +#define MLXSW_REG_MTPPTR_REC_LEN 0x10 /* record length */ +#define MLXSW_REG_MTPPTR_REC_MAX_COUNT 4 +#define MLXSW_REG_MTPPTR_LEN (MLXSW_REG_MTPPTR_BASE_LEN + \ + MLXSW_REG_MTPPTR_REC_LEN * MLXSW_REG_MTPPTR_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(mtpptr, MLXSW_REG_MTPPTR_ID, MLXSW_REG_MTPPTR_LEN); + +/* reg_mtpptr_local_port + * Not supported for CPU port. + * Access: Index + */ +MLXSW_ITEM32(reg, mtpptr, local_port, 0x00, 16, 8); + +enum mlxsw_reg_mtpptr_dir { + MLXSW_REG_MTPPTR_DIR_INGRESS, + MLXSW_REG_MTPPTR_DIR_EGRESS, +}; + +/* reg_mtpptr_dir + * Direction. + * Access: Index + */ +MLXSW_ITEM32(reg, mtpptr, dir, 0x00, 0, 1); + +/* reg_mtpptr_clr + * Clear the records. + * Access: OP + */ +MLXSW_ITEM32(reg, mtpptr, clr, 0x04, 31, 1); + +/* reg_mtpptr_num_rec + * Number of valid records in the response + * Range 0.. cap_ptp_timestamp_fifo + * Access: RO + */ +MLXSW_ITEM32(reg, mtpptr, num_rec, 0x08, 0, 4); + +/* reg_mtpptr_rec_message_type + * MessageType field as defined by IEEE 1588 Each bit corresponds to a value + * (e.g. Bit0: Sync, Bit1: Delay_Req) + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_message_type, + MLXSW_REG_MTPPTR_BASE_LEN, 8, 4, + MLXSW_REG_MTPPTR_REC_LEN, 0, false); + +/* reg_mtpptr_rec_domain_number + * DomainNumber field as defined by IEEE 1588 + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_domain_number, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 8, + MLXSW_REG_MTPPTR_REC_LEN, 0, false); + +/* reg_mtpptr_rec_sequence_id + * SequenceId field as defined by IEEE 1588 + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_sequence_id, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 16, + MLXSW_REG_MTPPTR_REC_LEN, 0x4, false); + +/* reg_mtpptr_rec_timestamp_high + * Timestamp of when the PTP packet has passed through the port Units of PLL + * clock time. + * For Spectrum-1 the PLL clock is 156.25Mhz and PLL clock time is 6.4nSec. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_high, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 32, + MLXSW_REG_MTPPTR_REC_LEN, 0x8, false); + +/* reg_mtpptr_rec_timestamp_low + * See rec_timestamp_high. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_low, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 32, + MLXSW_REG_MTPPTR_REC_LEN, 0xC, false); + +static inline void mlxsw_reg_mtpptr_unpack(const char *payload, + unsigned int rec, + u8 *p_message_type, + u8 *p_domain_number, + u16 *p_sequence_id, + u64 *p_timestamp) +{ + u32 timestamp_high, timestamp_low; + + *p_message_type = mlxsw_reg_mtpptr_rec_message_type_get(payload, rec); + *p_domain_number = mlxsw_reg_mtpptr_rec_domain_number_get(payload, rec); + *p_sequence_id = mlxsw_reg_mtpptr_rec_sequence_id_get(payload, rec); + timestamp_high = mlxsw_reg_mtpptr_rec_timestamp_high_get(payload, rec); + timestamp_low = mlxsw_reg_mtpptr_rec_timestamp_low_get(payload, rec); + *p_timestamp = (u64)timestamp_high << 32 | timestamp_low; +} + +/* MTPTPT - Monitoring Precision Time Protocol Trap Register + * --------------------------------------------------------- + * This register is used for configuring under which trap to deliver PTP + * packets depending on type of the packet. + */ +#define MLXSW_REG_MTPTPT_ID 0x9092 +#define MLXSW_REG_MTPTPT_LEN 0x08 + +MLXSW_REG_DEFINE(mtptpt, MLXSW_REG_MTPTPT_ID, MLXSW_REG_MTPTPT_LEN); + +enum mlxsw_reg_mtptpt_trap_id { + MLXSW_REG_MTPTPT_TRAP_ID_PTP0, + MLXSW_REG_MTPTPT_TRAP_ID_PTP1, +}; + +/* reg_mtptpt_trap_id + * Trap id. + * Access: Index + */ +MLXSW_ITEM32(reg, mtptpt, trap_id, 0x00, 0, 4); + +/* reg_mtptpt_message_type + * Bitwise vector of PTP message types to trap. This is a necessary but + * non-sufficient condition since need to enable also per port. See MTPPPC. + * Message types are defined by IEEE 1588 Each bit corresponds to a value (e.g. + * Bit0: Sync, Bit1: Delay_Req) + */ +MLXSW_ITEM32(reg, mtptpt, message_type, 0x04, 0, 16); + +static inline void mlxsw_reg_mtptptp_pack(char *payload, + enum mlxsw_reg_mtptpt_trap_id trap_id, + u16 message_type) +{ + MLXSW_REG_ZERO(mtptpt, payload); + mlxsw_reg_mtptpt_trap_id_set(payload, trap_id); + mlxsw_reg_mtptpt_message_type_set(payload, message_type); +} + +/* MGPIR - Management General Peripheral Information Register + * ---------------------------------------------------------- + * MGPIR register allows software to query the hardware and + * firmware general information of peripheral entities. + */ +#define MLXSW_REG_MGPIR_ID 0x9100 +#define MLXSW_REG_MGPIR_LEN 0xA0 + +MLXSW_REG_DEFINE(mgpir, MLXSW_REG_MGPIR_ID, MLXSW_REG_MGPIR_LEN); + +enum mlxsw_reg_mgpir_device_type { + MLXSW_REG_MGPIR_DEVICE_TYPE_NONE, + MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE, +}; + +/* device_type + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, device_type, 0x00, 24, 4); + +/* devices_per_flash + * Number of devices of device_type per flash (can be shared by few devices). + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8); + +/* num_of_devices + * Number of devices of device_type. + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8); + +static inline void mlxsw_reg_mgpir_pack(char *payload) +{ + MLXSW_REG_ZERO(mgpir, payload); +} + +static inline void +mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices, + enum mlxsw_reg_mgpir_device_type *device_type, + u8 *devices_per_flash) +{ + if (num_of_devices) + *num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload); + if (device_type) + *device_type = mlxsw_reg_mgpir_device_type_get(payload); + if (devices_per_flash) + *devices_per_flash = + mlxsw_reg_mgpir_devices_per_flash_get(payload); +} + /* TNGCR - Tunneling NVE General Configuration Register * ---------------------------------------------------- * The TNGCR register is used for setting up the NVE Tunneling configuration. @@ -10006,6 +10502,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(qpdsm), MLXSW_REG(qpdpm), MLXSW_REG(qtctm), + MLXSW_REG(qpsc), MLXSW_REG(pmlp), MLXSW_REG(pmtu), MLXSW_REG(ptys), @@ -10052,12 +10549,19 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mgir), MLXSW_REG(mrsr), MLXSW_REG(mlcr), + MLXSW_REG(mtpps), + MLXSW_REG(mtutc), MLXSW_REG(mpsc), MLXSW_REG(mcqi), MLXSW_REG(mcc), MLXSW_REG(mcda), MLXSW_REG(mgpc), MLXSW_REG(mprs), + MLXSW_REG(mogcr), + MLXSW_REG(mtpppc), + MLXSW_REG(mtpptr), + MLXSW_REG(mtptpt), + MLXSW_REG(mgpir), MLXSW_REG(tngcr), MLXSW_REG(tnumt), MLXSW_REG(tnqcr), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 23204356ad88..ce285fbeebd3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -41,6 +41,7 @@ #include "spectrum_dpipe.h" #include "spectrum_acl_flex_actions.h" #include "spectrum_span.h" +#include "spectrum_ptp.h" #include "../mlxfw/mlxfw.h" #define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100) @@ -146,6 +147,35 @@ struct mlxsw_sp_mlxfw_dev { struct mlxsw_sp *mlxsw_sp; }; +struct mlxsw_sp_ptp_ops { + struct mlxsw_sp_ptp_clock * + (*clock_init)(struct mlxsw_sp *mlxsw_sp, struct device *dev); + void (*clock_fini)(struct mlxsw_sp_ptp_clock *clock); + + struct mlxsw_sp_ptp_state *(*init)(struct mlxsw_sp *mlxsw_sp); + void (*fini)(struct mlxsw_sp_ptp_state *ptp_state); + + /* Notify a driver that a packet that might be PTP was received. Driver + * is responsible for freeing the passed-in SKB. + */ + void (*receive)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port); + + /* Notify a driver that a timestamped packet was transmitted. Driver + * is responsible for freeing the passed-in SKB. + */ + void (*transmitted)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port); + + int (*hwtstamp_get)(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + int (*hwtstamp_set)(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + void (*shaper_work)(struct work_struct *work); + int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info); +}; + static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev, u16 component_index, u32 *p_max_size, u8 *p_align_bits, u16 *p_max_write_size) @@ -294,6 +324,19 @@ static void mlxsw_sp_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); } +static void mlxsw_sp_status_notify(struct mlxfw_dev *mlxfw_dev, + const char *msg, const char *comp_name, + u32 done_bytes, u32 total_bytes) +{ + struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = + container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; + + devlink_flash_update_status_notify(priv_to_devlink(mlxsw_sp->core), + msg, comp_name, + done_bytes, total_bytes); +} + static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = { .component_query = mlxsw_sp_component_query, .fsm_lock = mlxsw_sp_fsm_lock, @@ -303,11 +346,13 @@ static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = { .fsm_activate = mlxsw_sp_fsm_activate, .fsm_query_state = mlxsw_sp_fsm_query_state, .fsm_cancel = mlxsw_sp_fsm_cancel, - .fsm_release = mlxsw_sp_fsm_release + .fsm_release = mlxsw_sp_fsm_release, + .status_notify = mlxsw_sp_status_notify, }; static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp, - const struct firmware *firmware) + const struct firmware *firmware, + struct netlink_ext_ack *extack) { struct mlxsw_sp_mlxfw_dev mlxsw_sp_mlxfw_dev = { .mlxfw_dev = { @@ -320,7 +365,10 @@ static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp, int err; mlxsw_core_fw_flash_start(mlxsw_sp->core); - err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware); + devlink_flash_update_begin_notify(priv_to_devlink(mlxsw_sp->core)); + err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, + firmware, extack); + devlink_flash_update_end_notify(priv_to_devlink(mlxsw_sp->core)); mlxsw_core_fw_flash_end(mlxsw_sp->core); return err; @@ -374,7 +422,7 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) return err; } - err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware); + err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware, NULL); release_firmware(firmware); if (err) dev_err(mlxsw_sp->bus_info->dev, "Could not upgrade firmware\n"); @@ -388,6 +436,27 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_flash_update(struct mlxsw_core *mlxsw_core, + const char *file_name, const char *component, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + const struct firmware *firmware; + int err; + + if (component) + return -EOPNOTSUPP; + + err = request_firmware_direct(&firmware, file_name, + mlxsw_sp->bus_info->dev); + if (err) + return err; + err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware, extack); + release_firmware(firmware); + + return err; +} + int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index, u64 *packets, u64 *bytes) @@ -738,6 +807,8 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, u64 len; int err; + memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb)); + if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info)) return NETDEV_TX_BUSY; @@ -1745,6 +1816,65 @@ mlxsw_sp_port_get_devlink_port(struct net_device *dev) mlxsw_sp_port->local_port); } +static int mlxsw_sp_port_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, + &config); + if (err) + return err; + + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) + return -EFAULT; + + return 0; +} + +static int mlxsw_sp_port_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port, + &config); + if (err) + return err; + + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) + return -EFAULT; + + return 0; +} + +static inline void mlxsw_sp_port_ptp_clear(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct hwtstamp_config config = {0}; + + mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config); +} + +static int +mlxsw_sp_port_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + switch (cmd) { + case SIOCSHWTSTAMP: + return mlxsw_sp_port_hwtstamp_set(mlxsw_sp_port, ifr); + case SIOCGHWTSTAMP: + return mlxsw_sp_port_hwtstamp_get(mlxsw_sp_port, ifr); + default: + return -EOPNOTSUPP; + } +} + static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_open = mlxsw_sp_port_open, .ndo_stop = mlxsw_sp_port_stop, @@ -1760,6 +1890,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_set_features = mlxsw_sp_set_features, .ndo_get_devlink_port = mlxsw_sp_port_get_devlink_port, + .ndo_do_ioctl = mlxsw_sp_port_ioctl, }; static void mlxsw_sp_port_get_drvinfo(struct net_device *dev, @@ -2525,28 +2656,33 @@ mlxsw_sp1_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, } } +static u32 +mlxsw_sp1_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto) +{ + int i; + + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) + return mlxsw_sp1_port_link_mode[i].speed; + } + + return SPEED_UNKNOWN; +} + static void mlxsw_sp1_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd) { - u32 speed = SPEED_UNKNOWN; - u8 duplex = DUPLEX_UNKNOWN; - int i; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; if (!carrier_ok) - goto out; + return; - for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { - if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) { - speed = mlxsw_sp1_port_link_mode[i].speed; - duplex = DUPLEX_FULL; - break; - } - } -out: - cmd->base.speed = speed; - cmd->base.duplex = duplex; + cmd->base.speed = mlxsw_sp1_from_ptys_speed(mlxsw_sp, ptys_eth_proto); + if (cmd->base.speed != SPEED_UNKNOWN) + cmd->base.duplex = DUPLEX_FULL; } static u32 @@ -2617,6 +2753,7 @@ static const struct mlxsw_sp_port_type_speed_ops mlxsw_sp1_port_type_speed_ops = { .from_ptys_supported_port = mlxsw_sp1_from_ptys_supported_port, .from_ptys_link = mlxsw_sp1_from_ptys_link, + .from_ptys_speed = mlxsw_sp1_from_ptys_speed, .from_ptys_speed_duplex = mlxsw_sp1_from_ptys_speed_duplex, .to_ptys_advert_link = mlxsw_sp1_to_ptys_advert_link, .to_ptys_speed = mlxsw_sp1_to_ptys_speed, @@ -2867,28 +3004,33 @@ mlxsw_sp2_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, } } +static u32 +mlxsw_sp2_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto) +{ + int i; + + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) + return mlxsw_sp2_port_link_mode[i].speed; + } + + return SPEED_UNKNOWN; +} + static void mlxsw_sp2_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd) { - u32 speed = SPEED_UNKNOWN; - u8 duplex = DUPLEX_UNKNOWN; - int i; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; if (!carrier_ok) - goto out; + return; - for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { - if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) { - speed = mlxsw_sp2_port_link_mode[i].speed; - duplex = DUPLEX_FULL; - break; - } - } -out: - cmd->base.speed = speed; - cmd->base.duplex = duplex; + cmd->base.speed = mlxsw_sp2_from_ptys_speed(mlxsw_sp, ptys_eth_proto); + if (cmd->base.speed != SPEED_UNKNOWN) + cmd->base.duplex = DUPLEX_FULL; } static bool @@ -2999,6 +3141,7 @@ static const struct mlxsw_sp_port_type_speed_ops mlxsw_sp2_port_type_speed_ops = { .from_ptys_supported_port = mlxsw_sp2_from_ptys_supported_port, .from_ptys_link = mlxsw_sp2_from_ptys_link, + .from_ptys_speed = mlxsw_sp2_from_ptys_speed, .from_ptys_speed_duplex = mlxsw_sp2_from_ptys_speed_duplex, .to_ptys_advert_link = mlxsw_sp2_to_ptys_advert_link, .to_ptys_speed = mlxsw_sp2_to_ptys_speed, @@ -3159,31 +3302,6 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, return 0; } -static int mlxsw_sp_flash_device(struct net_device *dev, - struct ethtool_flash *flash) -{ - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - const struct firmware *firmware; - int err; - - if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) - return -EOPNOTSUPP; - - dev_hold(dev); - rtnl_unlock(); - - err = request_firmware_direct(&firmware, flash->data, &dev->dev); - if (err) - goto out; - err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware); - release_firmware(firmware); -out: - rtnl_lock(); - dev_put(dev); - return err; -} - static int mlxsw_sp_get_module_info(struct net_device *netdev, struct ethtool_modinfo *modinfo) { @@ -3213,6 +3331,15 @@ static int mlxsw_sp_get_module_eeprom(struct net_device *netdev, return err; } +static int +mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + return mlxsw_sp->ptp_ops->get_ts_info(mlxsw_sp, info); +} + static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_drvinfo = mlxsw_sp_port_get_drvinfo, .get_link = ethtool_op_get_link, @@ -3224,9 +3351,9 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_sset_count = mlxsw_sp_port_get_sset_count, .get_link_ksettings = mlxsw_sp_port_get_link_ksettings, .set_link_ksettings = mlxsw_sp_port_set_link_ksettings, - .flash_device = mlxsw_sp_flash_device, .get_module_info = mlxsw_sp_get_module_info, .get_module_eeprom = mlxsw_sp_get_module_eeprom, + .get_ts_info = mlxsw_sp_get_ts_info, }; static int @@ -3343,8 +3470,9 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) return err; } - /* Make sure the max shaper is disabled in all hierarchies that - * support it. + /* Make sure the max shaper is disabled in all hierarchies that support + * it. Note that this disables ptps (PTP shaper), but that is intended + * for the initial configuration. */ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0, @@ -3589,6 +3717,9 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, } mlxsw_sp_port->default_vlan = mlxsw_sp_port_vlan; + INIT_DELAYED_WORK(&mlxsw_sp_port->ptp.shaper_dw, + mlxsw_sp->ptp_ops->shaper_work); + mlxsw_sp->ports[local_port] = mlxsw_sp_port; err = register_netdev(dev); if (err) { @@ -3643,6 +3774,8 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw); + cancel_delayed_work_sync(&mlxsw_sp_port->ptp.shaper_dw); + mlxsw_sp_port_ptp_clear(mlxsw_sp_port); mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp->ports[local_port] = NULL; @@ -3927,14 +4060,55 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, if (status == MLXSW_PORT_OPER_STATUS_UP) { netdev_info(mlxsw_sp_port->dev, "link up\n"); netif_carrier_on(mlxsw_sp_port->dev); + mlxsw_core_schedule_dw(&mlxsw_sp_port->ptp.shaper_dw, 0); } else { netdev_info(mlxsw_sp_port->dev, "link down\n"); netif_carrier_off(mlxsw_sp_port->dev); } } -static void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, - u8 local_port, void *priv) +static void mlxsw_sp1_ptp_fifo_event_func(struct mlxsw_sp *mlxsw_sp, + char *mtpptr_pl, bool ingress) +{ + u8 local_port; + u8 num_rec; + int i; + + local_port = mlxsw_reg_mtpptr_local_port_get(mtpptr_pl); + num_rec = mlxsw_reg_mtpptr_num_rec_get(mtpptr_pl); + for (i = 0; i < num_rec; i++) { + u8 domain_number; + u8 message_type; + u16 sequence_id; + u64 timestamp; + + mlxsw_reg_mtpptr_unpack(mtpptr_pl, i, &message_type, + &domain_number, &sequence_id, + ×tamp); + mlxsw_sp1_ptp_got_timestamp(mlxsw_sp, ingress, local_port, + message_type, domain_number, + sequence_id, timestamp); + } +} + +static void mlxsw_sp1_ptp_ing_fifo_event_func(const struct mlxsw_reg_info *reg, + char *mtpptr_pl, void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, true); +} + +static void mlxsw_sp1_ptp_egr_fifo_event_func(const struct mlxsw_reg_info *reg, + char *mtpptr_pl, void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, false); +} + +void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, + u8 local_port, void *priv) { struct mlxsw_sp *mlxsw_sp = priv; struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; @@ -4008,6 +4182,14 @@ out: consume_skb(skb); } +static void mlxsw_sp_rx_listener_ptp(struct sk_buff *skb, u8 local_port, + void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port); +} + #define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) @@ -4029,7 +4211,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { /* L2 traps */ MLXSW_SP_RXL_NO_MARK(STP, TRAP_TO_CPU, STP, true), MLXSW_SP_RXL_NO_MARK(LACP, TRAP_TO_CPU, LACP, true), - MLXSW_SP_RXL_NO_MARK(LLDP, TRAP_TO_CPU, LLDP, true), + MLXSW_RXL(mlxsw_sp_rx_listener_ptp, LLDP, TRAP_TO_CPU, + false, SP_LLDP, DISCARD), MLXSW_SP_RXL_MARK(DHCP, MIRROR_TO_CPU, DHCP, false), MLXSW_SP_RXL_MARK(IGMP_QUERY, MIRROR_TO_CPU, IGMP, false), MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, TRAP_TO_CPU, IGMP, false), @@ -4098,6 +4281,16 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { /* NVE traps */ MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false), MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, TRAP_TO_CPU, ARP, false), + /* PTP traps */ + MLXSW_RXL(mlxsw_sp_rx_listener_ptp, PTP0, TRAP_TO_CPU, + false, SP_PTP0, DISCARD), + MLXSW_SP_RXL_NO_MARK(PTP1, TRAP_TO_CPU, PTP1, false), +}; + +static const struct mlxsw_listener mlxsw_sp1_listener[] = { + /* Events */ + MLXSW_EVENTL(mlxsw_sp1_ptp_egr_fifo_event_func, PTP_EGR_FIFO, SP_PTP0), + MLXSW_EVENTL(mlxsw_sp1_ptp_ing_fifo_event_func, PTP_ING_FIFO, SP_PTP0), }; static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) @@ -4149,6 +4342,14 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) rate = 1024; burst_size = 7; break; + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0: + rate = 24 * 1024; + burst_size = 12; + break; + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1: + rate = 19 * 1024; + burst_size = 12; + break; default: continue; } @@ -4187,6 +4388,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0: priority = 5; tc = 5; break; @@ -4204,6 +4406,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1: priority = 2; tc = 2; break; @@ -4237,22 +4440,16 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) return 0; } -static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_traps_register(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_listener listeners[], + size_t listeners_count) { int i; int err; - err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core); - if (err) - return err; - - err = mlxsw_sp_trap_groups_set(mlxsw_sp->core); - if (err) - return err; - - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) { + for (i = 0; i < listeners_count; i++) { err = mlxsw_core_trap_register(mlxsw_sp->core, - &mlxsw_sp_listener[i], + &listeners[i], mlxsw_sp); if (err) goto err_listener_register; @@ -4263,23 +4460,63 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) err_listener_register: for (i--; i >= 0; i--) { mlxsw_core_trap_unregister(mlxsw_sp->core, - &mlxsw_sp_listener[i], + &listeners[i], mlxsw_sp); } return err; } -static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_traps_unregister(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_listener listeners[], + size_t listeners_count) { int i; - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) { + for (i = 0; i < listeners_count; i++) { mlxsw_core_trap_unregister(mlxsw_sp->core, - &mlxsw_sp_listener[i], + &listeners[i], mlxsw_sp); } } +static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core); + if (err) + return err; + + err = mlxsw_sp_trap_groups_set(mlxsw_sp->core); + if (err) + return err; + + err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener)); + if (err) + return err; + + err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp->listeners, + mlxsw_sp->listeners_count); + if (err) + goto err_extra_traps_init; + + return 0; + +err_extra_traps_init: + mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener)); + return err; +} + +static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp->listeners, + mlxsw_sp->listeners_count); + mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener)); +} + #define MLXSW_SP_LAG_SEED_INIT 0xcafecafe static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) @@ -4332,6 +4569,32 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); } +static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = { + .clock_init = mlxsw_sp1_ptp_clock_init, + .clock_fini = mlxsw_sp1_ptp_clock_fini, + .init = mlxsw_sp1_ptp_init, + .fini = mlxsw_sp1_ptp_fini, + .receive = mlxsw_sp1_ptp_receive, + .transmitted = mlxsw_sp1_ptp_transmitted, + .hwtstamp_get = mlxsw_sp1_ptp_hwtstamp_get, + .hwtstamp_set = mlxsw_sp1_ptp_hwtstamp_set, + .shaper_work = mlxsw_sp1_ptp_shaper_work, + .get_ts_info = mlxsw_sp1_ptp_get_ts_info, +}; + +static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { + .clock_init = mlxsw_sp2_ptp_clock_init, + .clock_fini = mlxsw_sp2_ptp_clock_fini, + .init = mlxsw_sp2_ptp_init, + .fini = mlxsw_sp2_ptp_fini, + .receive = mlxsw_sp2_ptp_receive, + .transmitted = mlxsw_sp2_ptp_transmitted, + .hwtstamp_get = mlxsw_sp2_ptp_hwtstamp_get, + .hwtstamp_set = mlxsw_sp2_ptp_hwtstamp_set, + .shaper_work = mlxsw_sp2_ptp_shaper_work, + .get_ts_info = mlxsw_sp2_ptp_get_ts_info, +}; + static int mlxsw_sp_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr); @@ -4429,6 +4692,28 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_router_init; } + if (mlxsw_sp->bus_info->read_frc_capable) { + /* NULL is a valid return value from clock_init */ + mlxsw_sp->clock = + mlxsw_sp->ptp_ops->clock_init(mlxsw_sp, + mlxsw_sp->bus_info->dev); + if (IS_ERR(mlxsw_sp->clock)) { + err = PTR_ERR(mlxsw_sp->clock); + dev_err(mlxsw_sp->bus_info->dev, "Failed to init ptp clock\n"); + goto err_ptp_clock_init; + } + } + + if (mlxsw_sp->clock) { + /* NULL is a valid return value from ptp_ops->init */ + mlxsw_sp->ptp_state = mlxsw_sp->ptp_ops->init(mlxsw_sp); + if (IS_ERR(mlxsw_sp->ptp_state)) { + err = PTR_ERR(mlxsw_sp->ptp_state); + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize PTP\n"); + goto err_ptp_init; + } + } + /* Initialize netdevice notifier after router and SPAN is initialized, * so that the event handler can use router structures and call SPAN * respin. @@ -4459,6 +4744,12 @@ err_ports_create: err_dpipe_init: unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); err_netdev_notifier: + if (mlxsw_sp->clock) + mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state); +err_ptp_init: + if (mlxsw_sp->clock) + mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock); +err_ptp_clock_init: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: mlxsw_sp_acl_fini(mlxsw_sp); @@ -4502,6 +4793,9 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr; mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals; mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops; + mlxsw_sp->ptp_ops = &mlxsw_sp1_ptp_ops; + mlxsw_sp->listeners = mlxsw_sp1_listener; + mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener); return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -4521,6 +4815,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr; mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; + mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -4532,6 +4827,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_ports_remove(mlxsw_sp); mlxsw_sp_dpipe_fini(mlxsw_sp); unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); + if (mlxsw_sp->clock) { + mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state); + mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock); + } mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_nve_fini(mlxsw_sp); @@ -4874,6 +5173,15 @@ static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core) mlxsw_sp_params_unregister(mlxsw_core); } +static void mlxsw_sp_ptp_transmitted(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u8 local_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + skb_pull(skb, MLXSW_TXHDR_LEN); + mlxsw_sp->ptp_ops->transmitted(mlxsw_sp, skb, local_port); +} + static struct mlxsw_driver mlxsw_sp1_driver = { .kind = mlxsw_sp1_driver_name, .priv_size = sizeof(struct mlxsw_sp), @@ -4892,11 +5200,13 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, + .flash_update = mlxsw_sp_flash_update, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp1_resources_register, .kvd_sizes_get = mlxsw_sp_kvd_sizes_get, .params_register = mlxsw_sp_params_register, .params_unregister = mlxsw_sp_params_unregister, + .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp1_config_profile, .res_query_enabled = true, @@ -4920,10 +5230,12 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, + .flash_update = mlxsw_sp_flash_update, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, .params_register = mlxsw_sp2_params_register, .params_unregister = mlxsw_sp2_params_unregister, + .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, .res_query_enabled = true, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 8601b3041acd..abbb563db440 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -136,6 +136,8 @@ struct mlxsw_sp_acl_tcam_ops; struct mlxsw_sp_nve_ops; struct mlxsw_sp_sb_vals; struct mlxsw_sp_port_type_speed_ops; +struct mlxsw_sp_ptp_state; +struct mlxsw_sp_ptp_ops; struct mlxsw_sp { struct mlxsw_sp_port **ports; @@ -155,6 +157,8 @@ struct mlxsw_sp { struct mlxsw_sp_kvdl *kvdl; struct mlxsw_sp_nve *nve; struct notifier_block netdevice_nb; + struct mlxsw_sp_ptp_clock *clock; + struct mlxsw_sp_ptp_state *ptp_state; struct mlxsw_sp_counter_pool *counter_pool; struct { @@ -172,6 +176,9 @@ struct mlxsw_sp { const struct mlxsw_sp_rif_ops **rif_ops_arr; const struct mlxsw_sp_sb_vals *sb_vals; const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; + const struct mlxsw_sp_ptp_ops *ptp_ops; + const struct mlxsw_listener *listeners; + size_t listeners_count; }; static inline struct mlxsw_sp_upper * @@ -259,6 +266,12 @@ struct mlxsw_sp_port { unsigned acl_rule_count; struct mlxsw_sp_acl_block *ing_acl_block; struct mlxsw_sp_acl_block *eg_acl_block; + struct { + struct delayed_work shaper_dw; + struct hwtstamp_config hwtstamp_config; + u16 ing_types; + u16 egr_types; + } ptp; }; struct mlxsw_sp_port_type_speed_ops { @@ -267,6 +280,7 @@ struct mlxsw_sp_port_type_speed_ops { struct ethtool_link_ksettings *cmd); void (*from_ptys_link)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, unsigned long *mode); + u32 (*from_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto); void (*from_ptys_speed_duplex)(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd); @@ -435,6 +449,8 @@ struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, extern struct notifier_block mlxsw_sp_switchdev_notifier; /* spectrum.c */ +void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, + u8 local_port, void *priv); int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, bool dwrr, u8 dwrr_weight); @@ -620,6 +636,15 @@ enum mlxsw_sp_acl_profile { MLXSW_SP_ACL_PROFILE_MR, }; +struct mlxsw_sp_acl_block { + struct list_head binding_list; + struct mlxsw_sp_acl_ruleset *ruleset_zero; + struct mlxsw_sp *mlxsw_sp; + unsigned int rule_count; + unsigned int disable_count; + struct net *net; +}; + struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block); unsigned int mlxsw_sp_acl_block_rule_count(struct mlxsw_sp_acl_block *block); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index a146a44634e9..e8ac90564dbe 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -45,14 +45,6 @@ struct mlxsw_sp_acl_block_binding { bool ingress; }; -struct mlxsw_sp_acl_block { - struct list_head binding_list; - struct mlxsw_sp_acl_ruleset *ruleset_zero; - struct mlxsw_sp *mlxsw_sp; - unsigned int rule_count; - unsigned int disable_count; -}; - struct mlxsw_sp_acl_ruleset_ht_key { struct mlxsw_sp_acl_block *block; u32 chain_index; @@ -221,6 +213,7 @@ struct mlxsw_sp_acl_block *mlxsw_sp_acl_block_create(struct mlxsw_sp *mlxsw_sp, return NULL; INIT_LIST_HEAD(&block->binding_list); block->mlxsw_sp = mlxsw_sp; + block->net = net; return block; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c index 2a998dea4f39..279c241f76f0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c @@ -12,7 +12,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = { MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x02, 4), MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3), MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12), - MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = { @@ -20,7 +20,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = { MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x02, 4), MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3), MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12), - MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = { @@ -32,13 +32,13 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_sip[] = { MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4), MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8), - MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = { MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x00, 4), MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8), - MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = { @@ -149,7 +149,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_4[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5[] = { MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 16, 12), - MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x04, 0, 8), /* RX_ACL_SYSTEM_PORT */ + MLXSW_AFK_ELEMENT_INST_EXT_U32(SRC_SYS_PORT, 0x04, 0, 8, -1, true), /* RX_ACL_SYSTEM_PORT */ }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_0[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 96b23c856f4d..a83e1a986ef1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -120,6 +120,49 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return 0; } +static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei, + struct tc_cls_flower_offload *f, + struct mlxsw_sp_acl_block *block) +{ + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct mlxsw_sp_port *mlxsw_sp_port; + struct net_device *ingress_dev; + struct flow_match_meta match; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) + return 0; + + flow_rule_match_meta(rule, &match); + if (match.mask->ingress_ifindex != 0xFFFFFFFF) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported ingress ifindex mask"); + return -EINVAL; + } + + ingress_dev = __dev_get_by_index(block->net, + match.key->ingress_ifindex); + if (!ingress_dev) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't find specified ingress port to match on"); + return -EINVAL; + } + + if (!mlxsw_sp_port_dev_check(ingress_dev)) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on non-mlxsw ingress port"); + return -EINVAL; + } + + mlxsw_sp_port = netdev_priv(ingress_dev); + if (mlxsw_sp_port->mlxsw_sp != block->mlxsw_sp) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on a port from different device"); + return -EINVAL; + } + + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_SRC_SYS_PORT, + mlxsw_sp_port->local_port, + 0xFFFFFFFF); + return 0; +} + static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei, struct tc_cls_flower_offload *f) { @@ -267,7 +310,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, int err; if (dissector->used_keys & - ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + ~(BIT(FLOW_DISSECTOR_KEY_META) | + BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | @@ -283,6 +327,10 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_rulei_priority(rulei, f->common.prio); + err = mlxsw_sp_flower_parse_meta(rulei, f, block); + if (err) + return err; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { struct flow_match_control match; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c new file mode 100644 index 000000000000..bd9c2bc2d5d6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -0,0 +1,1111 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ + +#include <linux/ptp_clock_kernel.h> +#include <linux/clocksource.h> +#include <linux/timecounter.h> +#include <linux/spinlock.h> +#include <linux/device.h> +#include <linux/rhashtable.h> +#include <linux/ptp_classify.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/net_tstamp.h> + +#include "spectrum.h" +#include "spectrum_ptp.h" +#include "core.h" + +#define MLXSW_SP1_PTP_CLOCK_CYCLES_SHIFT 29 +#define MLXSW_SP1_PTP_CLOCK_FREQ_KHZ 156257 /* 6.4nSec */ +#define MLXSW_SP1_PTP_CLOCK_MASK 64 + +#define MLXSW_SP1_PTP_HT_GC_INTERVAL 500 /* ms */ + +/* How long, approximately, should the unmatched entries stay in the hash table + * before they are collected. Should be evenly divisible by the GC interval. + */ +#define MLXSW_SP1_PTP_HT_GC_TIMEOUT 1000 /* ms */ + +struct mlxsw_sp_ptp_state { + struct mlxsw_sp *mlxsw_sp; + struct rhashtable unmatched_ht; + spinlock_t unmatched_lock; /* protects the HT */ + struct delayed_work ht_gc_dw; + u32 gc_cycle; +}; + +struct mlxsw_sp1_ptp_key { + u8 local_port; + u8 message_type; + u16 sequence_id; + u8 domain_number; + bool ingress; +}; + +struct mlxsw_sp1_ptp_unmatched { + struct mlxsw_sp1_ptp_key key; + struct rhash_head ht_node; + struct rcu_head rcu; + struct sk_buff *skb; + u64 timestamp; + u32 gc_cycle; +}; + +static const struct rhashtable_params mlxsw_sp1_ptp_unmatched_ht_params = { + .key_len = sizeof_field(struct mlxsw_sp1_ptp_unmatched, key), + .key_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, key), + .head_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, ht_node), +}; + +struct mlxsw_sp_ptp_clock { + struct mlxsw_core *core; + spinlock_t lock; /* protect this structure */ + struct cyclecounter cycles; + struct timecounter tc; + u32 nominal_c_mult; + struct ptp_clock *ptp; + struct ptp_clock_info ptp_info; + unsigned long overflow_period; + struct delayed_work overflow_work; +}; + +static u64 __mlxsw_sp1_ptp_read_frc(struct mlxsw_sp_ptp_clock *clock, + struct ptp_system_timestamp *sts) +{ + struct mlxsw_core *mlxsw_core = clock->core; + u32 frc_h1, frc_h2, frc_l; + + frc_h1 = mlxsw_core_read_frc_h(mlxsw_core); + ptp_read_system_prets(sts); + frc_l = mlxsw_core_read_frc_l(mlxsw_core); + ptp_read_system_postts(sts); + frc_h2 = mlxsw_core_read_frc_h(mlxsw_core); + + if (frc_h1 != frc_h2) { + /* wrap around */ + ptp_read_system_prets(sts); + frc_l = mlxsw_core_read_frc_l(mlxsw_core); + ptp_read_system_postts(sts); + } + + return (u64) frc_l | (u64) frc_h2 << 32; +} + +static u64 mlxsw_sp1_ptp_read_frc(const struct cyclecounter *cc) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(cc, struct mlxsw_sp_ptp_clock, cycles); + + return __mlxsw_sp1_ptp_read_frc(clock, NULL) & cc->mask; +} + +static int +mlxsw_sp1_ptp_phc_adjfreq(struct mlxsw_sp_ptp_clock *clock, int freq_adj) +{ + struct mlxsw_core *mlxsw_core = clock->core; + char mtutc_pl[MLXSW_REG_MTUTC_LEN]; + + mlxsw_reg_mtutc_pack(mtutc_pl, MLXSW_REG_MTUTC_OPERATION_ADJUST_FREQ, + freq_adj, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl); +} + +static u64 mlxsw_sp1_ptp_ns2cycles(const struct timecounter *tc, u64 nsec) +{ + u64 cycles = (u64) nsec; + + cycles <<= tc->cc->shift; + cycles = div_u64(cycles, tc->cc->mult); + + return cycles; +} + +static int +mlxsw_sp1_ptp_phc_settime(struct mlxsw_sp_ptp_clock *clock, u64 nsec) +{ + struct mlxsw_core *mlxsw_core = clock->core; + u64 next_sec, next_sec_in_nsec, cycles; + char mtutc_pl[MLXSW_REG_MTUTC_LEN]; + char mtpps_pl[MLXSW_REG_MTPPS_LEN]; + int err; + + next_sec = div_u64(nsec, NSEC_PER_SEC) + 1; + next_sec_in_nsec = next_sec * NSEC_PER_SEC; + + spin_lock_bh(&clock->lock); + cycles = mlxsw_sp1_ptp_ns2cycles(&clock->tc, next_sec_in_nsec); + spin_unlock_bh(&clock->lock); + + mlxsw_reg_mtpps_vpin_pack(mtpps_pl, cycles); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtpps), mtpps_pl); + if (err) + return err; + + mlxsw_reg_mtutc_pack(mtutc_pl, + MLXSW_REG_MTUTC_OPERATION_SET_TIME_AT_NEXT_SEC, + 0, next_sec); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl); +} + +static int mlxsw_sp1_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + int neg_adj = 0; + u32 diff; + u64 adj; + s32 ppb; + + ppb = scaled_ppm_to_ppb(scaled_ppm); + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + + adj = clock->nominal_c_mult; + adj *= ppb; + diff = div_u64(adj, NSEC_PER_SEC); + + spin_lock_bh(&clock->lock); + timecounter_read(&clock->tc); + clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff : + clock->nominal_c_mult + diff; + spin_unlock_bh(&clock->lock); + + return mlxsw_sp1_ptp_phc_adjfreq(clock, neg_adj ? -ppb : ppb); +} + +static int mlxsw_sp1_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + u64 nsec; + + spin_lock_bh(&clock->lock); + timecounter_adjtime(&clock->tc, delta); + nsec = timecounter_read(&clock->tc); + spin_unlock_bh(&clock->lock); + + return mlxsw_sp1_ptp_phc_settime(clock, nsec); +} + +static int mlxsw_sp1_ptp_gettimex(struct ptp_clock_info *ptp, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + u64 cycles, nsec; + + spin_lock_bh(&clock->lock); + cycles = __mlxsw_sp1_ptp_read_frc(clock, sts); + nsec = timecounter_cyc2time(&clock->tc, cycles); + spin_unlock_bh(&clock->lock); + + *ts = ns_to_timespec64(nsec); + + return 0; +} + +static int mlxsw_sp1_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + u64 nsec = timespec64_to_ns(ts); + + spin_lock_bh(&clock->lock); + timecounter_init(&clock->tc, &clock->cycles, nsec); + nsec = timecounter_read(&clock->tc); + spin_unlock_bh(&clock->lock); + + return mlxsw_sp1_ptp_phc_settime(clock, nsec); +} + +static const struct ptp_clock_info mlxsw_sp1_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "mlxsw_sp_clock", + .max_adj = 100000000, + .adjfine = mlxsw_sp1_ptp_adjfine, + .adjtime = mlxsw_sp1_ptp_adjtime, + .gettimex64 = mlxsw_sp1_ptp_gettimex, + .settime64 = mlxsw_sp1_ptp_settime, +}; + +static void mlxsw_sp1_ptp_clock_overflow(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlxsw_sp_ptp_clock *clock; + + clock = container_of(dwork, struct mlxsw_sp_ptp_clock, overflow_work); + + spin_lock_bh(&clock->lock); + timecounter_read(&clock->tc); + spin_unlock_bh(&clock->lock); + mlxsw_core_schedule_dw(&clock->overflow_work, clock->overflow_period); +} + +struct mlxsw_sp_ptp_clock * +mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev) +{ + u64 overflow_cycles, nsec, frac = 0; + struct mlxsw_sp_ptp_clock *clock; + int err; + + clock = kzalloc(sizeof(*clock), GFP_KERNEL); + if (!clock) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&clock->lock); + clock->cycles.read = mlxsw_sp1_ptp_read_frc; + clock->cycles.shift = MLXSW_SP1_PTP_CLOCK_CYCLES_SHIFT; + clock->cycles.mult = clocksource_khz2mult(MLXSW_SP1_PTP_CLOCK_FREQ_KHZ, + clock->cycles.shift); + clock->nominal_c_mult = clock->cycles.mult; + clock->cycles.mask = CLOCKSOURCE_MASK(MLXSW_SP1_PTP_CLOCK_MASK); + clock->core = mlxsw_sp->core; + + timecounter_init(&clock->tc, &clock->cycles, + ktime_to_ns(ktime_get_real())); + + /* Calculate period in seconds to call the overflow watchdog - to make + * sure counter is checked at least twice every wrap around. + * The period is calculated as the minimum between max HW cycles count + * (The clock source mask) and max amount of cycles that can be + * multiplied by clock multiplier where the result doesn't exceed + * 64bits. + */ + overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult); + overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3)); + + nsec = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles, 0, &frac); + clock->overflow_period = nsecs_to_jiffies(nsec); + + INIT_DELAYED_WORK(&clock->overflow_work, mlxsw_sp1_ptp_clock_overflow); + mlxsw_core_schedule_dw(&clock->overflow_work, 0); + + clock->ptp_info = mlxsw_sp1_ptp_clock_info; + clock->ptp = ptp_clock_register(&clock->ptp_info, dev); + if (IS_ERR(clock->ptp)) { + err = PTR_ERR(clock->ptp); + dev_err(dev, "ptp_clock_register failed %d\n", err); + goto err_ptp_clock_register; + } + + return clock; + +err_ptp_clock_register: + cancel_delayed_work_sync(&clock->overflow_work); + kfree(clock); + return ERR_PTR(err); +} + +void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock) +{ + ptp_clock_unregister(clock->ptp); + cancel_delayed_work_sync(&clock->overflow_work); + kfree(clock); +} + +static int mlxsw_sp_ptp_parse(struct sk_buff *skb, + u8 *p_domain_number, + u8 *p_message_type, + u16 *p_sequence_id) +{ + unsigned int offset = 0; + unsigned int ptp_class; + u8 *data; + + data = skb_mac_header(skb); + ptp_class = ptp_classify_raw(skb); + + switch (ptp_class & PTP_CLASS_VMASK) { + case PTP_CLASS_V1: + case PTP_CLASS_V2: + break; + default: + return -ERANGE; + } + + if (ptp_class & PTP_CLASS_VLAN) + offset += VLAN_HLEN; + + switch (ptp_class & PTP_CLASS_PMASK) { + case PTP_CLASS_IPV4: + offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN; + break; + case PTP_CLASS_IPV6: + offset += ETH_HLEN + IP6_HLEN + UDP_HLEN; + break; + case PTP_CLASS_L2: + offset += ETH_HLEN; + break; + default: + return -ERANGE; + } + + /* PTP header is 34 bytes. */ + if (skb->len < offset + 34) + return -EINVAL; + + *p_message_type = data[offset] & 0x0f; + *p_domain_number = data[offset + 4]; + *p_sequence_id = (u16)(data[offset + 30]) << 8 | data[offset + 31]; + return 0; +} + +/* Returns NULL on successful insertion, a pointer on conflict, or an ERR_PTR on + * error. + */ +static struct mlxsw_sp1_ptp_unmatched * +mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key, + struct sk_buff *skb, + u64 timestamp) +{ + int cycles = MLXSW_SP1_PTP_HT_GC_TIMEOUT / MLXSW_SP1_PTP_HT_GC_INTERVAL; + struct mlxsw_sp_ptp_state *ptp_state = mlxsw_sp->ptp_state; + struct mlxsw_sp1_ptp_unmatched *unmatched; + struct mlxsw_sp1_ptp_unmatched *conflict; + + unmatched = kzalloc(sizeof(*unmatched), GFP_ATOMIC); + if (!unmatched) + return ERR_PTR(-ENOMEM); + + unmatched->key = key; + unmatched->skb = skb; + unmatched->timestamp = timestamp; + unmatched->gc_cycle = mlxsw_sp->ptp_state->gc_cycle + cycles; + + conflict = rhashtable_lookup_get_insert_fast(&ptp_state->unmatched_ht, + &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); + if (conflict) + kfree(unmatched); + + return conflict; +} + +static struct mlxsw_sp1_ptp_unmatched * +mlxsw_sp1_ptp_unmatched_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key) +{ + return rhashtable_lookup(&mlxsw_sp->ptp_state->unmatched_ht, &key, + mlxsw_sp1_ptp_unmatched_ht_params); +} + +static int +mlxsw_sp1_ptp_unmatched_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_unmatched *unmatched) +{ + return rhashtable_remove_fast(&mlxsw_sp->ptp_state->unmatched_ht, + &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); +} + +/* This function is called in the following scenarios: + * + * 1) When a packet is matched with its timestamp. + * 2) In several situation when it is necessary to immediately pass on + * an SKB without a timestamp. + * 3) From GC indirectly through mlxsw_sp1_ptp_unmatched_finish(). + * This case is similar to 2) above. + */ +static void mlxsw_sp1_ptp_packet_finish(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port, + bool ingress, + struct skb_shared_hwtstamps *hwtstamps) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + /* Between capturing the packet and finishing it, there is a window of + * opportunity for the originating port to go away (e.g. due to a + * split). Also make sure the SKB device reference is still valid. + */ + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!(mlxsw_sp_port && (!skb->dev || skb->dev == mlxsw_sp_port->dev))) { + dev_kfree_skb_any(skb); + return; + } + + if (ingress) { + if (hwtstamps) + *skb_hwtstamps(skb) = *hwtstamps; + mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp); + } else { + /* skb_tstamp_tx() allows hwtstamps to be NULL. */ + skb_tstamp_tx(skb, hwtstamps); + dev_kfree_skb_any(skb); + } +} + +static void mlxsw_sp1_packet_timestamp(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key, + struct sk_buff *skb, + u64 timestamp) +{ + struct skb_shared_hwtstamps hwtstamps; + u64 nsec; + + spin_lock_bh(&mlxsw_sp->clock->lock); + nsec = timecounter_cyc2time(&mlxsw_sp->clock->tc, timestamp); + spin_unlock_bh(&mlxsw_sp->clock->lock); + + hwtstamps.hwtstamp = ns_to_ktime(nsec); + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, + key.local_port, key.ingress, &hwtstamps); +} + +static void +mlxsw_sp1_ptp_unmatched_finish(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_unmatched *unmatched) +{ + if (unmatched->skb && unmatched->timestamp) + mlxsw_sp1_packet_timestamp(mlxsw_sp, unmatched->key, + unmatched->skb, + unmatched->timestamp); + else if (unmatched->skb) + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, unmatched->skb, + unmatched->key.local_port, + unmatched->key.ingress, NULL); + kfree_rcu(unmatched, rcu); +} + +static void mlxsw_sp1_ptp_unmatched_free_fn(void *ptr, void *arg) +{ + struct mlxsw_sp1_ptp_unmatched *unmatched = ptr; + + /* This is invoked at a point where the ports are gone already. Nothing + * to do with whatever is left in the HT but to free it. + */ + if (unmatched->skb) + dev_kfree_skb_any(unmatched->skb); + kfree_rcu(unmatched, rcu); +} + +static void mlxsw_sp1_ptp_got_piece(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key, + struct sk_buff *skb, u64 timestamp) +{ + struct mlxsw_sp1_ptp_unmatched *unmatched, *conflict; + int err; + + rcu_read_lock(); + + unmatched = mlxsw_sp1_ptp_unmatched_lookup(mlxsw_sp, key); + + spin_lock(&mlxsw_sp->ptp_state->unmatched_lock); + + if (unmatched) { + /* There was an unmatched entry when we looked, but it may have + * been removed before we took the lock. + */ + err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, unmatched); + if (err) + unmatched = NULL; + } + + if (!unmatched) { + /* We have no unmatched entry, but one may have been added after + * we looked, but before we took the lock. + */ + unmatched = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key, + skb, timestamp); + if (IS_ERR(unmatched)) { + if (skb) + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, + key.local_port, + key.ingress, NULL); + unmatched = NULL; + } else if (unmatched) { + /* Save just told us, under lock, that the entry is + * there, so this has to work. + */ + err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, + unmatched); + WARN_ON_ONCE(err); + } + } + + /* If unmatched is non-NULL here, it comes either from the lookup, or + * from the save attempt above. In either case the entry was removed + * from the hash table. If unmatched is NULL, a new unmatched entry was + * added to the hash table, and there was no conflict. + */ + + if (skb && unmatched && unmatched->timestamp) { + unmatched->skb = skb; + } else if (timestamp && unmatched && unmatched->skb) { + unmatched->timestamp = timestamp; + } else if (unmatched) { + /* unmatched holds an older entry of the same type: either an + * skb if we are handling skb, or a timestamp if we are handling + * timestamp. We can't match that up, so save what we have. + */ + conflict = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key, + skb, timestamp); + if (IS_ERR(conflict)) { + if (skb) + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, + key.local_port, + key.ingress, NULL); + } else { + /* Above, we removed an object with this key from the + * hash table, under lock, so conflict can not be a + * valid pointer. + */ + WARN_ON_ONCE(conflict); + } + } + + spin_unlock(&mlxsw_sp->ptp_state->unmatched_lock); + + if (unmatched) + mlxsw_sp1_ptp_unmatched_finish(mlxsw_sp, unmatched); + + rcu_read_unlock(); +} + +static void mlxsw_sp1_ptp_got_packet(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port, + bool ingress) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp1_ptp_key key; + u8 types; + int err; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + goto immediate; + + types = ingress ? mlxsw_sp_port->ptp.ing_types : + mlxsw_sp_port->ptp.egr_types; + if (!types) + goto immediate; + + memset(&key, 0, sizeof(key)); + key.local_port = local_port; + key.ingress = ingress; + + err = mlxsw_sp_ptp_parse(skb, &key.domain_number, &key.message_type, + &key.sequence_id); + if (err) + goto immediate; + + /* For packets whose timestamping was not enabled on this port, don't + * bother trying to match the timestamp. + */ + if (!((1 << key.message_type) & types)) + goto immediate; + + mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, skb, 0); + return; + +immediate: + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, local_port, ingress, NULL); +} + +void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, + u8 local_port, u8 message_type, + u8 domain_number, u16 sequence_id, + u64 timestamp) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp1_ptp_key key; + u8 types; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + return; + + types = ingress ? mlxsw_sp_port->ptp.ing_types : + mlxsw_sp_port->ptp.egr_types; + + /* For message types whose timestamping was not enabled on this port, + * don't bother with the timestamp. + */ + if (!((1 << message_type) & types)) + return; + + memset(&key, 0, sizeof(key)); + key.local_port = local_port; + key.domain_number = domain_number; + key.message_type = message_type; + key.sequence_id = sequence_id; + key.ingress = ingress; + + mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, NULL, timestamp); +} + +void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port) +{ + skb_reset_mac_header(skb); + mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, true); +} + +void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, false); +} + +static void +mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state, + struct mlxsw_sp1_ptp_unmatched *unmatched) +{ + int err; + + /* If an unmatched entry has an SKB, it has to be handed over to the + * networking stack. This is usually done from a trap handler, which is + * invoked in a softirq context. Here we are going to do it in process + * context. If that were to be interrupted by a softirq, it could cause + * a deadlock when an attempt is made to take an already-taken lock + * somewhere along the sending path. Disable softirqs to prevent this. + */ + local_bh_disable(); + + spin_lock(&ptp_state->unmatched_lock); + err = rhashtable_remove_fast(&ptp_state->unmatched_ht, + &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); + spin_unlock(&ptp_state->unmatched_lock); + + if (err) + /* The packet was matched with timestamp during the walk. */ + goto out; + + /* mlxsw_sp1_ptp_unmatched_finish() invokes netif_receive_skb(). While + * the comment at that function states that it can only be called in + * soft IRQ context, this pattern of local_bh_disable() + + * netif_receive_skb(), in process context, is seen elsewhere in the + * kernel, notably in pktgen. + */ + mlxsw_sp1_ptp_unmatched_finish(ptp_state->mlxsw_sp, unmatched); + +out: + local_bh_enable(); +} + +static void mlxsw_sp1_ptp_ht_gc(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlxsw_sp1_ptp_unmatched *unmatched; + struct mlxsw_sp_ptp_state *ptp_state; + struct rhashtable_iter iter; + u32 gc_cycle; + void *obj; + + ptp_state = container_of(dwork, struct mlxsw_sp_ptp_state, ht_gc_dw); + gc_cycle = ptp_state->gc_cycle++; + + rhashtable_walk_enter(&ptp_state->unmatched_ht, &iter); + rhashtable_walk_start(&iter); + while ((obj = rhashtable_walk_next(&iter))) { + if (IS_ERR(obj)) + continue; + + unmatched = obj; + if (unmatched->gc_cycle <= gc_cycle) + mlxsw_sp1_ptp_ht_gc_collect(ptp_state, unmatched); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + + mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw, + MLXSW_SP1_PTP_HT_GC_INTERVAL); +} + +static int mlxsw_sp_ptp_mtptpt_set(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_mtptpt_trap_id trap_id, + u16 message_type) +{ + char mtptpt_pl[MLXSW_REG_MTPTPT_LEN]; + + mlxsw_reg_mtptptp_pack(mtptpt_pl, trap_id, message_type); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtptpt), mtptpt_pl); +} + +static int mlxsw_sp1_ptp_set_fifo_clr_on_trap(struct mlxsw_sp *mlxsw_sp, + bool clr) +{ + char mogcr_pl[MLXSW_REG_MOGCR_LEN] = {0}; + int err; + + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl); + if (err) + return err; + + mlxsw_reg_mogcr_ptp_iftc_set(mogcr_pl, clr); + mlxsw_reg_mogcr_ptp_eftc_set(mogcr_pl, clr); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl); +} + +static int mlxsw_sp1_ptp_mtpppc_set(struct mlxsw_sp *mlxsw_sp, + u16 ing_types, u16 egr_types) +{ + char mtpppc_pl[MLXSW_REG_MTPPPC_LEN]; + + mlxsw_reg_mtpppc_pack(mtpppc_pl, ing_types, egr_types); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtpppc), mtpppc_pl); +} + +struct mlxsw_sp1_ptp_shaper_params { + u32 ethtool_speed; + enum mlxsw_reg_qpsc_port_speed port_speed; + u8 shaper_time_exp; + u8 shaper_time_mantissa; + u8 shaper_inc; + u8 shaper_bs; + u8 port_to_shaper_credits; + int ing_timestamp_inc; + int egr_timestamp_inc; +}; + +static const struct mlxsw_sp1_ptp_shaper_params +mlxsw_sp1_ptp_shaper_params[] = { + { + .ethtool_speed = SPEED_100, + .port_speed = MLXSW_REG_QPSC_PORT_SPEED_100M, + .shaper_time_exp = 4, + .shaper_time_mantissa = 12, + .shaper_inc = 9, + .shaper_bs = 1, + .port_to_shaper_credits = 1, + .ing_timestamp_inc = -313, + .egr_timestamp_inc = 313, + }, + { + .ethtool_speed = SPEED_1000, + .port_speed = MLXSW_REG_QPSC_PORT_SPEED_1G, + .shaper_time_exp = 0, + .shaper_time_mantissa = 12, + .shaper_inc = 6, + .shaper_bs = 0, + .port_to_shaper_credits = 1, + .ing_timestamp_inc = -35, + .egr_timestamp_inc = 35, + }, + { + .ethtool_speed = SPEED_10000, + .port_speed = MLXSW_REG_QPSC_PORT_SPEED_10G, + .shaper_time_exp = 0, + .shaper_time_mantissa = 2, + .shaper_inc = 14, + .shaper_bs = 1, + .port_to_shaper_credits = 1, + .ing_timestamp_inc = -11, + .egr_timestamp_inc = 11, + }, + { + .ethtool_speed = SPEED_25000, + .port_speed = MLXSW_REG_QPSC_PORT_SPEED_25G, + .shaper_time_exp = 0, + .shaper_time_mantissa = 0, + .shaper_inc = 11, + .shaper_bs = 1, + .port_to_shaper_credits = 1, + .ing_timestamp_inc = -14, + .egr_timestamp_inc = 14, + }, +}; + +#define MLXSW_SP1_PTP_SHAPER_PARAMS_LEN ARRAY_SIZE(mlxsw_sp1_ptp_shaper_params) + +static int mlxsw_sp1_ptp_shaper_params_set(struct mlxsw_sp *mlxsw_sp) +{ + const struct mlxsw_sp1_ptp_shaper_params *params; + char qpsc_pl[MLXSW_REG_QPSC_LEN]; + int i, err; + + for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) { + params = &mlxsw_sp1_ptp_shaper_params[i]; + mlxsw_reg_qpsc_pack(qpsc_pl, params->port_speed, + params->shaper_time_exp, + params->shaper_time_mantissa, + params->shaper_inc, params->shaper_bs, + params->port_to_shaper_credits, + params->ing_timestamp_inc, + params->egr_timestamp_inc); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpsc), qpsc_pl); + if (err) + return err; + } + + return 0; +} + +struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_ptp_state *ptp_state; + u16 message_type; + int err; + + err = mlxsw_sp1_ptp_shaper_params_set(mlxsw_sp); + if (err) + return ERR_PTR(err); + + ptp_state = kzalloc(sizeof(*ptp_state), GFP_KERNEL); + if (!ptp_state) + return ERR_PTR(-ENOMEM); + ptp_state->mlxsw_sp = mlxsw_sp; + + spin_lock_init(&ptp_state->unmatched_lock); + + err = rhashtable_init(&ptp_state->unmatched_ht, + &mlxsw_sp1_ptp_unmatched_ht_params); + if (err) + goto err_hashtable_init; + + /* Delive these message types as PTP0. */ + message_type = BIT(MLXSW_SP_PTP_MESSAGE_TYPE_SYNC) | + BIT(MLXSW_SP_PTP_MESSAGE_TYPE_DELAY_REQ) | + BIT(MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_REQ) | + BIT(MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_RESP); + err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, + message_type); + if (err) + goto err_mtptpt_set; + + /* Everything else is PTP1. */ + message_type = ~message_type; + err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, + message_type); + if (err) + goto err_mtptpt1_set; + + err = mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, true); + if (err) + goto err_fifo_clr; + + INIT_DELAYED_WORK(&ptp_state->ht_gc_dw, mlxsw_sp1_ptp_ht_gc); + mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw, + MLXSW_SP1_PTP_HT_GC_INTERVAL); + return ptp_state; + +err_fifo_clr: + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0); +err_mtptpt1_set: + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); +err_mtptpt_set: + rhashtable_destroy(&ptp_state->unmatched_ht); +err_hashtable_init: + kfree(ptp_state); + return ERR_PTR(err); +} + +void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state) +{ + struct mlxsw_sp *mlxsw_sp = ptp_state->mlxsw_sp; + + cancel_delayed_work_sync(&ptp_state->ht_gc_dw); + mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp, 0, 0); + mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, false); + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0); + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); + rhashtable_free_and_destroy(&ptp_state->unmatched_ht, + &mlxsw_sp1_ptp_unmatched_free_fn, NULL); + kfree(ptp_state); +} + +int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + *config = mlxsw_sp_port->ptp.hwtstamp_config; + return 0; +} + +static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config, + u16 *p_ing_types, u16 *p_egr_types, + enum hwtstamp_rx_filters *p_rx_filter) +{ + enum hwtstamp_rx_filters rx_filter = config->rx_filter; + enum hwtstamp_tx_types tx_type = config->tx_type; + u16 ing_types = 0x00; + u16 egr_types = 0x00; + + switch (tx_type) { + case HWTSTAMP_TX_OFF: + egr_types = 0x00; + break; + case HWTSTAMP_TX_ON: + egr_types = 0xff; + break; + case HWTSTAMP_TX_ONESTEP_SYNC: + return -ERANGE; + } + + switch (rx_filter) { + case HWTSTAMP_FILTER_NONE: + ing_types = 0x00; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + ing_types = 0x01; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + ing_types = 0x02; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + ing_types = 0x0f; + break; + case HWTSTAMP_FILTER_ALL: + ing_types = 0xff; + break; + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_NTP_ALL: + return -ERANGE; + } + + *p_ing_types = ing_types; + *p_egr_types = egr_types; + *p_rx_filter = rx_filter; + return 0; +} + +static int mlxsw_sp1_ptp_mtpppc_update(struct mlxsw_sp_port *mlxsw_sp_port, + u16 ing_types, u16 egr_types) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port *tmp; + int i; + + /* MTPPPC configures timestamping globally, not per port. Find the + * configuration that contains all configured timestamping requests. + */ + for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) { + tmp = mlxsw_sp->ports[i]; + if (tmp && tmp != mlxsw_sp_port) { + ing_types |= tmp->ptp.ing_types; + egr_types |= tmp->ptp.egr_types; + } + } + + return mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp_port->mlxsw_sp, + ing_types, egr_types); +} + +static bool mlxsw_sp1_ptp_hwtstamp_enabled(struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_port->ptp.ing_types || mlxsw_sp_port->ptp.egr_types; +} + +static int +mlxsw_sp1_ptp_port_shaper_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qeec_pl[MLXSW_REG_QEEC_LEN]; + + mlxsw_reg_qeec_ptps_pack(qeec_pl, mlxsw_sp_port->local_port, enable); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); +} + +static int mlxsw_sp1_ptp_port_shaper_check(struct mlxsw_sp_port *mlxsw_sp_port) +{ + const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u32 eth_proto_oper, speed; + bool ptps = false; + int err, i; + + if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port)) + return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, false); + + port_type_speed_ops = mlxsw_sp->port_type_speed_ops; + port_type_speed_ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, + mlxsw_sp_port->local_port, 0, + false); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) + return err; + port_type_speed_ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, NULL, NULL, + ð_proto_oper); + + speed = port_type_speed_ops->from_ptys_speed(mlxsw_sp, eth_proto_oper); + for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) { + if (mlxsw_sp1_ptp_shaper_params[i].ethtool_speed == speed) { + ptps = true; + break; + } + } + + return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, ptps); +} + +void mlxsw_sp1_ptp_shaper_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + mlxsw_sp_port = container_of(dwork, struct mlxsw_sp_port, + ptp.shaper_dw); + + if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port)) + return; + + err = mlxsw_sp1_ptp_port_shaper_check(mlxsw_sp_port); + if (err) + netdev_err(mlxsw_sp_port->dev, "Failed to set up PTP shaper\n"); +} + +int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + enum hwtstamp_rx_filters rx_filter; + u16 ing_types; + u16 egr_types; + int err; + + err = mlxsw_sp_ptp_get_message_types(config, &ing_types, &egr_types, + &rx_filter); + if (err) + return err; + + err = mlxsw_sp1_ptp_mtpppc_update(mlxsw_sp_port, ing_types, egr_types); + if (err) + return err; + + mlxsw_sp_port->ptp.hwtstamp_config = *config; + mlxsw_sp_port->ptp.ing_types = ing_types; + mlxsw_sp_port->ptp.egr_types = egr_types; + + err = mlxsw_sp1_ptp_port_shaper_check(mlxsw_sp_port); + if (err) + return err; + + /* Notify the ioctl caller what we are actually timestamping. */ + config->rx_filter = rx_filter; + + return 0; +} + +int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info) +{ + info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp); + + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ON); + + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h new file mode 100644 index 000000000000..72e55f6926b9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_PTP_H +#define _MLXSW_SPECTRUM_PTP_H + +#include <linux/device.h> +#include <linux/rhashtable.h> + +struct mlxsw_sp; +struct mlxsw_sp_port; +struct mlxsw_sp_ptp_clock; + +enum { + MLXSW_SP_PTP_MESSAGE_TYPE_SYNC, + MLXSW_SP_PTP_MESSAGE_TYPE_DELAY_REQ, + MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_REQ, + MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_RESP, +}; + +static inline int mlxsw_sp_ptp_get_ts_info_noptp(struct ethtool_ts_info *info) +{ + info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + info->phc_index = -1; + return 0; +} + +#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK) + +struct mlxsw_sp_ptp_clock * +mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev); + +void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock); + +struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp); + +void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state); + +void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port); + +void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port); + +void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, + u8 local_port, u8 message_type, + u8 domain_number, u16 sequence_id, + u64 timestamp); + +int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + +int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + +void mlxsw_sp1_ptp_shaper_work(struct work_struct *work); + +int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info); + +#else + +static inline struct mlxsw_sp_ptp_clock * +mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev) +{ + return NULL; +} + +static inline void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock) +{ +} + +static inline struct mlxsw_sp_ptp_state * +mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp) +{ + return NULL; +} + +static inline void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state) +{ +} + +static inline void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp); +} + +static inline void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + dev_kfree_skb_any(skb); +} + +static inline void +mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, + u8 local_port, u8 message_type, + u8 domain_number, + u16 sequence_id, u64 timestamp) +{ +} + +static inline int +mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline int +mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline void mlxsw_sp1_ptp_shaper_work(struct work_struct *work) +{ +} + +static inline int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info) +{ + return mlxsw_sp_ptp_get_ts_info_noptp(info); +} + +#endif + +static inline struct mlxsw_sp_ptp_clock * +mlxsw_sp2_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev) +{ + return NULL; +} + +static inline void mlxsw_sp2_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock) +{ +} + +static inline struct mlxsw_sp_ptp_state * +mlxsw_sp2_ptp_init(struct mlxsw_sp *mlxsw_sp) +{ + return NULL; +} + +static inline void mlxsw_sp2_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state) +{ +} + +static inline void mlxsw_sp2_ptp_receive(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp); +} + +static inline void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + dev_kfree_skb_any(skb); +} + +static inline int +mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline int +mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline void mlxsw_sp2_ptp_shaper_work(struct work_struct *work) +{ +} + +static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info) +{ + return mlxsw_sp_ptp_get_ts_info_noptp(info); +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index ef554739dd54..e618be7ce6c6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -21,6 +21,7 @@ #include <net/arp.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> +#include <net/nexthop.h> #include <net/fib_rules.h> #include <net/ip_tunnels.h> #include <net/l3mdev.h> @@ -2887,7 +2888,7 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp, return false; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh; + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; struct in6_addr *gw; int ifindex, weight; @@ -2959,7 +2960,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed) struct net_device *dev; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - dev = mlxsw_sp_rt6->rt->fib6_nh.fib_nh_dev; + dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev; val ^= dev->ifindex; } @@ -3883,23 +3884,25 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, } static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, - const struct fib_info *fi) + struct fib_info *fi) { - return fi->fib_nh->fib_nh_scope == RT_SCOPE_LINK || - mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL); + const struct fib_nh *nh = fib_info_nh(fi, 0); + + return nh->fib_nh_scope == RT_SCOPE_LINK || + mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL); } static struct mlxsw_sp_nexthop_group * mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) { + unsigned int nhs = fib_info_num_path(fi); struct mlxsw_sp_nexthop_group *nh_grp; struct mlxsw_sp_nexthop *nh; struct fib_nh *fib_nh; int i; int err; - nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs), - GFP_KERNEL); + nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); nh_grp->priv = fi; @@ -3907,11 +3910,11 @@ mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) nh_grp->neigh_tbl = &arp_tbl; nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi); - nh_grp->count = fi->fib_nhs; + nh_grp->count = nhs; fib_info_hold(fi); for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; - fib_nh = &fi->fib_nh[i]; + fib_nh = fib_info_nh(fi, i); err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh); if (err) goto err_nexthop4_init; @@ -4027,9 +4030,9 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; struct fib6_info *rt = mlxsw_sp_rt6->rt; - if (nh->rif && nh->rif->dev == rt->fib6_nh.fib_nh_dev && + if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev && ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, - &rt->fib6_nh.fib_nh_gw6)) + &rt->fib6_nh->fib_nh_gw6)) return nh; continue; } @@ -4089,13 +4092,13 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) { list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, - list)->rt->fib6_nh.fib_nh_flags |= RTNH_F_OFFLOAD; + list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD; return; } list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; - struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh; + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; struct mlxsw_sp_nexthop *nh; nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); @@ -4117,7 +4120,7 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { struct fib6_info *rt = mlxsw_sp_rt6->rt; - rt->fib6_nh.fib_nh_flags &= ~RTNH_F_OFFLOAD; + rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; } } @@ -4349,9 +4352,9 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info, struct mlxsw_sp_fib_entry *fib_entry) { + struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id); - struct net_device *dev = fen_info->fi->fib_dev; struct mlxsw_sp_ipip_entry *ipip_entry; struct fib_info *fi = fen_info->fi; @@ -4995,7 +4998,8 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt) { /* RTF_CACHE routes are ignored */ - return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family; + return !(rt->fib6_flags & RTF_ADDRCONF) && + rt->fib6_nh->fib_nh_gw_family; } static struct fib6_info * @@ -5054,8 +5058,8 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt, enum mlxsw_sp_ipip_type *ret) { - return rt->fib6_nh.fib_nh_dev && - mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.fib_nh_dev, ret); + return rt->fib6_nh->fib_nh_dev && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret); } static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, @@ -5065,7 +5069,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_ipip_ops *ipip_ops; struct mlxsw_sp_ipip_entry *ipip_entry; - struct net_device *dev = rt->fib6_nh.fib_nh_dev; + struct net_device *dev = rt->fib6_nh->fib_nh_dev; struct mlxsw_sp_rif *rif; int err; @@ -5108,11 +5112,11 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, const struct fib6_info *rt) { - struct net_device *dev = rt->fib6_nh.fib_nh_dev; + struct net_device *dev = rt->fib6_nh->fib_nh_dev; nh->nh_grp = nh_grp; - nh->nh_weight = rt->fib6_nh.fib_nh_weight; - memcpy(&nh->gw_addr, &rt->fib6_nh.fib_nh_gw6, sizeof(nh->gw_addr)); + nh->nh_weight = rt->fib6_nh->fib_nh_weight; + memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr)); mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); @@ -5135,7 +5139,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt) { - return rt->fib6_nh.fib_nh_gw_family || + return rt->fib6_nh->fib_nh_gw_family || mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); } @@ -5274,17 +5278,21 @@ err_nexthop6_group_get: static int mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - struct fib6_info *rt) + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; - int err; + int err, i; - mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); - if (IS_ERR(mlxsw_sp_rt6)) - return PTR_ERR(mlxsw_sp_rt6); + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); + goto err_rt6_create; + } - list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); - fib6_entry->nrt6++; + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; + } err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); if (err) @@ -5293,27 +5301,38 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, return 0; err_nexthop6_group_update: - fib6_entry->nrt6--; - list_del(&mlxsw_sp_rt6->list); - mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + i = nrt6; +err_rt6_create: + for (i--; i >= 0; i--) { + fib6_entry->nrt6--; + mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } return err; } static void mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - struct fib6_info *rt) + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int i; - mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt); - if (WARN_ON(!mlxsw_sp_rt6)) - return; + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, + rt_arr[i]); + if (WARN_ON_ONCE(!mlxsw_sp_rt6)) + continue; + + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } - fib6_entry->nrt6--; - list_del(&mlxsw_sp_rt6->list); mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); - mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); } static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, @@ -5354,29 +5373,32 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry) static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, - struct fib6_info *rt) + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; - int err; + int err, i; fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL); if (!fib6_entry) return ERR_PTR(-ENOMEM); fib_entry = &fib6_entry->common; - mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); - if (IS_ERR(mlxsw_sp_rt6)) { - err = PTR_ERR(mlxsw_sp_rt6); - goto err_rt6_create; + INIT_LIST_HEAD(&fib6_entry->rt6_list); + + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); + goto err_rt6_create; + } + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; } - mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt); + mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]); - INIT_LIST_HEAD(&fib6_entry->rt6_list); - list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); - fib6_entry->nrt6 = 1; err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); if (err) goto err_nexthop6_group_get; @@ -5386,9 +5408,15 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, return fib6_entry; err_nexthop6_group_get: - list_del(&mlxsw_sp_rt6->list); - mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + i = nrt6; err_rt6_create: + for (i--; i >= 0; i--) { + fib6_entry->nrt6--; + mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } kfree(fib6_entry); return ERR_PTR(err); } @@ -5431,16 +5459,16 @@ mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, static int mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, - bool replace) + bool *p_replace) { struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); struct mlxsw_sp_fib6_entry *fib6_entry; - fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace); + fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace); - if (replace && WARN_ON(!fib6_entry)) - return -EINVAL; + if (*p_replace && !fib6_entry) + *p_replace = false; if (fib6_entry) { list_add_tail(&new6_entry->common.list, @@ -5475,11 +5503,11 @@ mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry) static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - bool replace) + bool *p_replace) { int err; - err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace); + err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace); if (err) return err; @@ -5552,10 +5580,12 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, - struct fib6_info *rt, bool replace) + struct fib6_info **rt_arr, + unsigned int nrt6, bool replace) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; + struct fib6_info *rt = rt_arr[0]; int err; if (mlxsw_sp->router->aborted) @@ -5580,19 +5610,21 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, */ fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace); if (fib6_entry) { - err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt); + err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, + rt_arr, nrt6); if (err) goto err_fib6_entry_nexthop_add; return 0; } - fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt); + fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr, + nrt6); if (IS_ERR(fib6_entry)) { err = PTR_ERR(fib6_entry); goto err_fib6_entry_create; } - err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace); + err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace); if (err) goto err_fib6_node_entry_link; @@ -5609,10 +5641,12 @@ err_fib6_entry_nexthop_add: } static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, - struct fib6_info *rt) + struct fib6_info **rt_arr, + unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; + struct fib6_info *rt = rt_arr[0]; if (mlxsw_sp->router->aborted) return; @@ -5624,11 +5658,12 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, if (WARN_ON(!fib6_entry)) return; - /* If route is part of a multipath entry, but not the last one - * removed, then only reduce its nexthop group. + /* If not all the nexthops are deleted, then only reduce the nexthop + * group. */ - if (!list_is_singular(&fib6_entry->rt6_list)) { - mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt); + if (nrt6 != fib6_entry->nrt6) { + mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr, + nrt6); return; } @@ -5889,10 +5924,15 @@ static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); } +struct mlxsw_sp_fib6_event_work { + struct fib6_info **rt_arr; + unsigned int nrt6; +}; + struct mlxsw_sp_fib_event_work { struct work_struct work; union { - struct fib6_entry_notifier_info fen6_info; + struct mlxsw_sp_fib6_event_work fib6_work; struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; @@ -5903,6 +5943,54 @@ struct mlxsw_sp_fib_event_work { unsigned long event; }; +static int +mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work, + struct fib6_entry_notifier_info *fen6_info) +{ + struct fib6_info *rt = fen6_info->rt; + struct fib6_info **rt_arr; + struct fib6_info *iter; + unsigned int nrt6; + int i = 0; + + nrt6 = fen6_info->nsiblings + 1; + + rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC); + if (!rt_arr) + return -ENOMEM; + + fib6_work->rt_arr = rt_arr; + fib6_work->nrt6 = nrt6; + + rt_arr[0] = rt; + fib6_info_hold(rt); + + if (!fen6_info->nsiblings) + return 0; + + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { + if (i == fen6_info->nsiblings) + break; + + rt_arr[i + 1] = iter; + fib6_info_hold(iter); + i++; + } + WARN_ON_ONCE(i != fen6_info->nsiblings); + + return 0; +} + +static void +mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work) +{ + int i; + + for (i = 0; i < fib6_work->nrt6; i++) + mlxsw_sp_rt6_release(fib6_work->rt_arr[i]); + kfree(fib6_work->rt_arr); +} + static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) { struct mlxsw_sp_fib_event_work *fib_work = @@ -5961,18 +6049,21 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ case FIB_EVENT_ENTRY_ADD: replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; err = mlxsw_sp_router_fib6_add(mlxsw_sp, - fib_work->fen6_info.rt, replace); + fib_work->fib6_work.rt_arr, + fib_work->fib6_work.nrt6, + replace); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); - mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); break; case FIB_EVENT_ENTRY_DEL: - mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt); - mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + mlxsw_sp_router_fib6_del(mlxsw_sp, + fib_work->fib6_work.rt_arr, + fib_work->fib6_work.nrt6); + mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); break; case FIB_EVENT_RULE_ADD: /* if we get here, a rule was added that we do not support. @@ -6061,22 +6152,26 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, } } -static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, - struct fib_notifier_info *info) +static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) { struct fib6_entry_notifier_info *fen6_info; + int err; switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: fen6_info = container_of(info, struct fib6_entry_notifier_info, info); - fib_work->fen6_info = *fen6_info; - fib6_info_hold(fib_work->fen6_info.rt); + err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work, + fen6_info); + if (err) + return err; break; } + + return 0; } static void @@ -6185,6 +6280,20 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported"); return notifier_from_errno(-EINVAL); } + if (fen_info->fi->nh) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); + return notifier_from_errno(-EINVAL); + } + } else if (info->family == AF_INET6) { + struct fib6_entry_notifier_info *fen6_info; + + fen6_info = container_of(info, + struct fib6_entry_notifier_info, + info); + if (fen6_info->rt->nh) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported"); + return notifier_from_errno(-EINVAL); + } } break; } @@ -6203,7 +6312,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, break; case AF_INET6: INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); - mlxsw_sp_router_fib6_event(fib_work, info); + err = mlxsw_sp_router_fib6_event(fib_work, info); + if (err) + goto err_fib_event; break; case RTNL_FAMILY_IP6MR: case RTNL_FAMILY_IPMR: @@ -6215,6 +6326,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, mlxsw_core_schedule_work(&fib_work->work); return NOTIFY_DONE; + +err_fib_event: + kfree(fib_work); + return NOTIFY_BAD; } struct mlxsw_sp_rif * diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index fc4f19167262..bdab96f5bc70 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -299,6 +299,8 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, u64 len; int err; + memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb)); + if (mlxsw_core_skb_transmit_busy(mlxsw_sx->core, &tx_info)) return NETDEV_TX_BUSY; diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 451216dd7f6b..19202bdb5105 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -17,6 +17,8 @@ enum { MLXSW_TRAP_ID_MVRP = 0x15, MLXSW_TRAP_ID_RPVST = 0x16, MLXSW_TRAP_ID_DHCP = 0x19, + MLXSW_TRAP_ID_PTP0 = 0x28, + MLXSW_TRAP_ID_PTP1 = 0x29, MLXSW_TRAP_ID_IGMP_QUERY = 0x30, MLXSW_TRAP_ID_IGMP_V1_REPORT = 0x31, MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32, @@ -76,6 +78,10 @@ enum { enum mlxsw_event_trap_id { /* Port Up/Down event generated by hardware */ MLXSW_TRAP_ID_PUDE = 0x8, + /* PTP Ingress FIFO has a new entry */ + MLXSW_TRAP_ID_PTP_ING_FIFO = 0x2D, + /* PTP Egress FIFO has a new entry */ + MLXSW_TRAP_ID_PTP_EGR_FIFO = 0x2E, }; #endif /* _MLXSW_TRAP_H */ |