diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5')
252 files changed, 24108 insertions, 12734 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 92056452a9e3..26685fd0fdaa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -16,13 +16,9 @@ config MLX5_CORE Core driver for low level functionality of the ConnectX-4 and Connect-IB cards by Mellanox Technologies. -config MLX5_ACCEL - bool - config MLX5_FPGA bool "Mellanox Technologies Innova support" depends on MLX5_CORE - select MLX5_ACCEL help Build support for the Innova family of network cards by Mellanox Technologies. Innova network cards are comprised of a ConnectX chip @@ -115,6 +111,7 @@ config MLX5_TC_CT config MLX5_TC_SAMPLE bool "MLX5 TC sample offload support" depends on MLX5_CLS_ACT + depends on PSAMPLE=y || PSAMPLE=n || MLX5_CORE=m default y help Say Y here if you want to support offloading sample rules via tc @@ -142,71 +139,29 @@ config MLX5_CORE_IPOIB help MLX5 IPoIB offloads & acceleration support. -config MLX5_FPGA_IPSEC - bool "Mellanox Technologies IPsec Innova support" - depends on MLX5_CORE - depends on MLX5_FPGA - help - Build IPsec support for the Innova family of network cards by Mellanox - Technologies. Innova network cards are comprised of a ConnectX chip - and an FPGA chip on one board. If you select this option, the - mlx5_core driver will include the Innova FPGA core and allow building - sandbox-specific client drivers. - -config MLX5_IPSEC - bool "Mellanox Technologies IPsec Connect-X support" +config MLX5_EN_MACSEC + bool "Connect-X support for MACSec offload" depends on MLX5_CORE_EN - depends on XFRM_OFFLOAD - depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD - select MLX5_ACCEL + depends on MACSEC + default n help - Build IPsec support for the Connect-X family of network cards by Mellanox - Technologies. - Note: If you select this option, the mlx5_core driver will include - IPsec support for the Connect-X family. + Build support for MACsec cryptography-offload acceleration in the NIC. config MLX5_EN_IPSEC - bool "IPSec XFRM cryptography-offload acceleration" + bool "Mellanox Technologies IPsec Connect-X support" depends on MLX5_CORE_EN depends on XFRM_OFFLOAD depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD - depends on MLX5_FPGA_IPSEC || MLX5_IPSEC help Build support for IPsec cryptography-offload acceleration in the NIC. - Note: Support for hardware with this capability needs to be selected - for this option to become available. -config MLX5_FPGA_TLS - bool "Mellanox Technologies TLS Innova support" - depends on TLS_DEVICE - depends on TLS=y || MLX5_CORE=m - depends on MLX5_CORE_EN - depends on MLX5_FPGA - select MLX5_EN_TLS - help - Build TLS support for the Innova family of network cards by Mellanox - Technologies. Innova network cards are comprised of a ConnectX chip - and an FPGA chip on one board. If you select this option, the - mlx5_core driver will include the Innova FPGA core and allow building - sandbox-specific client drivers. - -config MLX5_TLS +config MLX5_EN_TLS bool "Mellanox Technologies TLS Connect-X support" depends on TLS_DEVICE depends on TLS=y || MLX5_CORE=m depends on MLX5_CORE_EN - select MLX5_ACCEL - select MLX5_EN_TLS - help - Build TLS support for the Connect-X family of network cards by Mellanox - Technologies. - -config MLX5_EN_TLS - bool help Build support for TLS cryptography-offload acceleration in the NIC. - Note: Support for hardware with this capability needs to be selected - for this option to become available. config MLX5_SW_STEERING bool "Mellanox Technologies software-managed steering" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index e63bb9ceb9c0..a22c32aabf11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -14,10 +14,10 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o port.o mr.o pd.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ - fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \ + fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ - fw_reset.o qos.o lib/tout.o + fw_reset.o qos.o lib/tout.o lib/aso.o # # Netdev basic @@ -28,7 +28,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ - en/qos.o en/trap.o en/fs_tt_redirect.o + en/qos.o en/htb.o en/trap.o en/fs_tt_redirect.o en/selq.o \ + lib/crypto.o # # Netdev extra @@ -39,14 +40,28 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag/mp.o lag/port_sel.o lib/geneve.o lib/port_tun.o \ en_rep.o en/rep/bond.o en/mod_hdr.o \ - en/mapping.o + en/mapping.o lag/mpesw.o mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ lib/fs_chains.o en/tc_tun.o \ esw/indir_table.o en/tc_tun_encap.o \ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \ - en/tc/post_act.o en/tc/int_port.o -mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o + en/tc/post_act.o en/tc/int_port.o en/tc/meter.o \ + en/tc/post_meter.o + +mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en/tc/act/act.o en/tc/act/drop.o en/tc/act/trap.o \ + en/tc/act/accept.o en/tc/act/mark.o en/tc/act/goto.o \ + en/tc/act/tun.o en/tc/act/csum.o en/tc/act/pedit.o \ + en/tc/act/vlan.o en/tc/act/vlan_mangle.o en/tc/act/mpls.o \ + en/tc/act/mirred.o en/tc/act/mirred_nic.o \ + en/tc/act/ct.o en/tc/act/sample.o en/tc/act/ptype.o \ + en/tc/act/redirect_ingress.o en/tc/act/police.o + +ifneq ($(CONFIG_MLX5_TC_CT),) + mlx5_core-y += en/tc_ct.o en/tc/ct_fs_dmfs.o + mlx5_core-$(CONFIG_MLX5_SW_STEERING) += en/tc/ct_fs_smfs.o +endif + mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o # @@ -54,7 +69,7 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ ecpf.o rdma.o esw/legacy.o \ - esw/devlink_port.o esw/vporttbl.o esw/qos.o + esw/debugfs.o esw/devlink_port.o esw/vporttbl.o esw/qos.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ @@ -75,17 +90,16 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib # # Accelerations & FPGA # -mlx5_core-$(CONFIG_MLX5_IPSEC) += accel/ipsec_offload.o -mlx5_core-$(CONFIG_MLX5_FPGA_IPSEC) += fpga/ipsec.o -mlx5_core-$(CONFIG_MLX5_FPGA_TLS) += fpga/tls.o -mlx5_core-$(CONFIG_MLX5_ACCEL) += lib/crypto.o accel/tls.o accel/ipsec.o - mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o +mlx5_core-$(CONFIG_MLX5_EN_MACSEC) += en_accel/macsec.o en_accel/macsec_fs.o \ + en_accel/macsec_stats.o + mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ - en_accel/ipsec_stats.o en_accel/ipsec_fs.o + en_accel/ipsec_stats.o en_accel/ipsec_fs.o \ + en_accel/ipsec_offload.o -mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \ +mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/ktls_stats.o \ en_accel/fs_tcp.o en_accel/ktls.o en_accel/ktls_txrx.o \ en_accel/ktls_tx.o en_accel/ktls_rx.o @@ -94,12 +108,14 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o steering/dr_icm_pool.o steering/dr_buddy.o \ steering/dr_ste.o steering/dr_send.o \ steering/dr_ste_v0.o steering/dr_ste_v1.o \ + steering/dr_ste_v2.o \ steering/dr_cmd.o steering/dr_fw.o \ - steering/dr_action.o steering/fs_dr.o + steering/dr_action.o steering/fs_dr.o \ + steering/dr_dbg.o lib/smfs.o # # SF device # -mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o +mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_affinity.o # # SF manager diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h deleted file mode 100644 index 82b185121edb..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef __MLX5E_ACCEL_H__ -#define __MLX5E_ACCEL_H__ - -#ifdef CONFIG_MLX5_ACCEL - -#include <linux/skbuff.h> -#include <linux/netdevice.h> - -static inline bool is_metadata_hdr_valid(struct sk_buff *skb) -{ - __be16 *ethtype; - - if (unlikely(skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN)) - return false; - ethtype = (__be16 *)(skb->data + ETH_ALEN * 2); - if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE)) - return false; - return true; -} - -static inline void remove_metadata_hdr(struct sk_buff *skb) -{ - struct ethhdr *old_eth; - struct ethhdr *new_eth; - - /* Remove the metadata from the buffer */ - old_eth = (struct ethhdr *)skb->data; - new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN); - memmove(new_eth, old_eth, 2 * ETH_ALEN); - /* Ethertype is already in its new place */ - skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN); -} - -#endif /* CONFIG_MLX5_ACCEL */ - -#endif /* __MLX5E_EN_ACCEL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c deleted file mode 100644 index 09f5ce97af46..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/mlx5/device.h> - -#include "accel/ipsec.h" -#include "mlx5_core.h" -#include "fpga/ipsec.h" -#include "accel/ipsec_offload.h" - -void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops; - int err = 0; - - ipsec_ops = (mlx5_ipsec_offload_ops(mdev)) ? - mlx5_ipsec_offload_ops(mdev) : - mlx5_fpga_ipsec_ops(mdev); - - if (!ipsec_ops || !ipsec_ops->init) { - mlx5_core_dbg(mdev, "IPsec ops is not supported\n"); - return; - } - - err = ipsec_ops->init(mdev); - if (err) { - mlx5_core_warn_once(mdev, "Failed to start IPsec device, err = %d\n", err); - return; - } - - mdev->ipsec_ops = ipsec_ops; -} - -void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->cleanup) - return; - - ipsec_ops->cleanup(mdev); -} - -u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->device_caps) - return 0; - - return ipsec_ops->device_caps(mdev); -} -EXPORT_SYMBOL_GPL(mlx5_accel_ipsec_device_caps); - -unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->counters_count) - return -EOPNOTSUPP; - - return ipsec_ops->counters_count(mdev); -} - -int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, - unsigned int count) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->counters_read) - return -EOPNOTSUPP; - - return ipsec_ops->counters_read(mdev, counters, count); -} - -void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *xfrm, - u32 *sa_handle) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - __be32 saddr[4] = {}, daddr[4] = {}; - - if (!ipsec_ops || !ipsec_ops->create_hw_context) - return ERR_PTR(-EOPNOTSUPP); - - if (!xfrm->attrs.is_ipv6) { - saddr[3] = xfrm->attrs.saddr.a4; - daddr[3] = xfrm->attrs.daddr.a4; - } else { - memcpy(saddr, xfrm->attrs.saddr.a6, sizeof(saddr)); - memcpy(daddr, xfrm->attrs.daddr.a6, sizeof(daddr)); - } - - return ipsec_ops->create_hw_context(mdev, xfrm, saddr, daddr, xfrm->attrs.spi, - xfrm->attrs.is_ipv6, sa_handle); -} - -void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->free_hw_context) - return; - - ipsec_ops->free_hw_context(context); -} - -struct mlx5_accel_esp_xfrm * -mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - struct mlx5_accel_esp_xfrm *xfrm; - - if (!ipsec_ops || !ipsec_ops->esp_create_xfrm) - return ERR_PTR(-EOPNOTSUPP); - - xfrm = ipsec_ops->esp_create_xfrm(mdev, attrs, flags); - if (IS_ERR(xfrm)) - return xfrm; - - xfrm->mdev = mdev; - return xfrm; -} -EXPORT_SYMBOL_GPL(mlx5_accel_esp_create_xfrm); - -void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = xfrm->mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->esp_destroy_xfrm) - return; - - ipsec_ops->esp_destroy_xfrm(xfrm); -} -EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm); - -int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = xfrm->mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->esp_modify_xfrm) - return -EOPNOTSUPP; - - return ipsec_ops->esp_modify_xfrm(xfrm, attrs); -} -EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h deleted file mode 100644 index fbb9c5415d53..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __MLX5_ACCEL_IPSEC_H__ -#define __MLX5_ACCEL_IPSEC_H__ - -#include <linux/mlx5/driver.h> -#include <linux/mlx5/accel.h> - -#ifdef CONFIG_MLX5_ACCEL - -#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \ - MLX5_ACCEL_IPSEC_CAP_DEVICE) - -unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev); -int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, - unsigned int count); - -void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *xfrm, - u32 *sa_handle); -void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context); - -void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev); -void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev); - -struct mlx5_accel_ipsec_ops { - u32 (*device_caps)(struct mlx5_core_dev *mdev); - unsigned int (*counters_count)(struct mlx5_core_dev *mdev); - int (*counters_read)(struct mlx5_core_dev *mdev, u64 *counters, unsigned int count); - void* (*create_hw_context)(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *xfrm, - const __be32 saddr[4], const __be32 daddr[4], - const __be32 spi, bool is_ipv6, u32 *sa_handle); - void (*free_hw_context)(void *context); - int (*init)(struct mlx5_core_dev *mdev); - void (*cleanup)(struct mlx5_core_dev *mdev); - struct mlx5_accel_esp_xfrm* (*esp_create_xfrm)(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags); - int (*esp_modify_xfrm)(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs); - void (*esp_destroy_xfrm)(struct mlx5_accel_esp_xfrm *xfrm); -}; - -#else - -#define MLX5_IPSEC_DEV(mdev) false - -static inline void * -mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *xfrm, - u32 *sa_handle) -{ - return NULL; -} - -static inline void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context) {} - -static inline void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) {} - -static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) {} - -#endif /* CONFIG_MLX5_ACCEL */ - -#endif /* __MLX5_ACCEL_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c deleted file mode 100644 index d6667d38e1de..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c +++ /dev/null @@ -1,385 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ - -#include "mlx5_core.h" -#include "ipsec_offload.h" -#include "lib/mlx5.h" -#include "en_accel/ipsec_fs.h" - -#define MLX5_IPSEC_DEV_BASIC_CAPS (MLX5_ACCEL_IPSEC_CAP_DEVICE | MLX5_ACCEL_IPSEC_CAP_IPV6 | \ - MLX5_ACCEL_IPSEC_CAP_LSO) - -struct mlx5_ipsec_sa_ctx { - struct rhash_head hash; - u32 enc_key_id; - u32 ipsec_obj_id; - /* hw ctx */ - struct mlx5_core_dev *dev; - struct mlx5_ipsec_esp_xfrm *mxfrm; -}; - -struct mlx5_ipsec_esp_xfrm { - /* reference counter of SA ctx */ - struct mlx5_ipsec_sa_ctx *sa_ctx; - struct mutex lock; /* protects mlx5_ipsec_esp_xfrm */ - struct mlx5_accel_esp_xfrm accel_xfrm; -}; - -static u32 mlx5_ipsec_offload_device_caps(struct mlx5_core_dev *mdev) -{ - u32 caps = MLX5_IPSEC_DEV_BASIC_CAPS; - - if (!mlx5_is_ipsec_device(mdev)) - return 0; - - if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ipsec_encrypt) || - !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ipsec_decrypt)) - return 0; - - if (MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_encrypt) && - MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_decrypt)) - caps |= MLX5_ACCEL_IPSEC_CAP_ESP; - - if (MLX5_CAP_IPSEC(mdev, ipsec_esn)) { - caps |= MLX5_ACCEL_IPSEC_CAP_ESN; - caps |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN; - } - - /* We can accommodate up to 2^24 different IPsec objects - * because we use up to 24 bit in flow table metadata - * to hold the IPsec Object unique handle. - */ - WARN_ON_ONCE(MLX5_CAP_IPSEC(mdev, log_max_ipsec_offload) > 24); - return caps; -} - -static int -mlx5_ipsec_offload_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs) -{ - if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) { - mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay (replay_type = %d)\n", - attrs->replay_type); - return -EOPNOTSUPP; - } - - if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) { - mlx5_core_err(mdev, "Only aes gcm keymat is supported (keymat_type = %d)\n", - attrs->keymat_type); - return -EOPNOTSUPP; - } - - if (attrs->keymat.aes_gcm.iv_algo != - MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) { - mlx5_core_err(mdev, "Only iv sequence algo is supported (iv_algo = %d)\n", - attrs->keymat.aes_gcm.iv_algo); - return -EOPNOTSUPP; - } - - if (attrs->keymat.aes_gcm.key_len != 128 && - attrs->keymat.aes_gcm.key_len != 256) { - mlx5_core_err(mdev, "Cannot offload xfrm states with key length other than 128/256 bit (key length = %d)\n", - attrs->keymat.aes_gcm.key_len); - return -EOPNOTSUPP; - } - - if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) && - !MLX5_CAP_IPSEC(mdev, ipsec_esn)) { - mlx5_core_err(mdev, "Cannot offload xfrm states with ESN triggered\n"); - return -EOPNOTSUPP; - } - - return 0; -} - -static struct mlx5_accel_esp_xfrm * -mlx5_ipsec_offload_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) -{ - struct mlx5_ipsec_esp_xfrm *mxfrm; - int err = 0; - - err = mlx5_ipsec_offload_esp_validate_xfrm_attrs(mdev, attrs); - if (err) - return ERR_PTR(err); - - mxfrm = kzalloc(sizeof(*mxfrm), GFP_KERNEL); - if (!mxfrm) - return ERR_PTR(-ENOMEM); - - mutex_init(&mxfrm->lock); - memcpy(&mxfrm->accel_xfrm.attrs, attrs, - sizeof(mxfrm->accel_xfrm.attrs)); - - return &mxfrm->accel_xfrm; -} - -static void mlx5_ipsec_offload_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) -{ - struct mlx5_ipsec_esp_xfrm *mxfrm = container_of(xfrm, struct mlx5_ipsec_esp_xfrm, - accel_xfrm); - - /* assuming no sa_ctx are connected to this xfrm_ctx */ - WARN_ON(mxfrm->sa_ctx); - kfree(mxfrm); -} - -struct mlx5_ipsec_obj_attrs { - const struct aes_gcm_keymat *aes_gcm; - u32 accel_flags; - u32 esn_msb; - u32 enc_key_id; -}; - -static int mlx5_create_ipsec_obj(struct mlx5_core_dev *mdev, - struct mlx5_ipsec_obj_attrs *attrs, - u32 *ipsec_id) -{ - const struct aes_gcm_keymat *aes_gcm = attrs->aes_gcm; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; - u32 in[MLX5_ST_SZ_DW(create_ipsec_obj_in)] = {}; - void *obj, *salt_p, *salt_iv_p; - int err; - - obj = MLX5_ADDR_OF(create_ipsec_obj_in, in, ipsec_object); - - /* salt and seq_iv */ - salt_p = MLX5_ADDR_OF(ipsec_obj, obj, salt); - memcpy(salt_p, &aes_gcm->salt, sizeof(aes_gcm->salt)); - - switch (aes_gcm->icv_len) { - case 64: - MLX5_SET(ipsec_obj, obj, icv_length, - MLX5_IPSEC_OBJECT_ICV_LEN_8B); - break; - case 96: - MLX5_SET(ipsec_obj, obj, icv_length, - MLX5_IPSEC_OBJECT_ICV_LEN_12B); - break; - case 128: - MLX5_SET(ipsec_obj, obj, icv_length, - MLX5_IPSEC_OBJECT_ICV_LEN_16B); - break; - default: - return -EINVAL; - } - salt_iv_p = MLX5_ADDR_OF(ipsec_obj, obj, implicit_iv); - memcpy(salt_iv_p, &aes_gcm->seq_iv, sizeof(aes_gcm->seq_iv)); - /* esn */ - if (attrs->accel_flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) { - MLX5_SET(ipsec_obj, obj, esn_en, 1); - MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn_msb); - if (attrs->accel_flags & MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) - MLX5_SET(ipsec_obj, obj, esn_overlap, 1); - } - - MLX5_SET(ipsec_obj, obj, dekn, attrs->enc_key_id); - - /* general object fields set */ - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, - MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, - MLX5_GENERAL_OBJECT_TYPES_IPSEC); - - err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); - if (!err) - *ipsec_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); - - return err; -} - -static void mlx5_destroy_ipsec_obj(struct mlx5_core_dev *mdev, u32 ipsec_id) -{ - u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; - - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, - MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, - MLX5_GENERAL_OBJECT_TYPES_IPSEC); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, ipsec_id); - - mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); -} - -static void *mlx5_ipsec_offload_create_sa_ctx(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *accel_xfrm, - const __be32 saddr[4], const __be32 daddr[4], - const __be32 spi, bool is_ipv6, u32 *hw_handle) -{ - struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs = &accel_xfrm->attrs; - struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm; - struct mlx5_ipsec_obj_attrs ipsec_attrs = {}; - struct mlx5_ipsec_esp_xfrm *mxfrm; - struct mlx5_ipsec_sa_ctx *sa_ctx; - int err; - - /* alloc SA context */ - sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL); - if (!sa_ctx) - return ERR_PTR(-ENOMEM); - - sa_ctx->dev = mdev; - - mxfrm = container_of(accel_xfrm, struct mlx5_ipsec_esp_xfrm, accel_xfrm); - mutex_lock(&mxfrm->lock); - sa_ctx->mxfrm = mxfrm; - - /* key */ - err = mlx5_create_encryption_key(mdev, aes_gcm->aes_key, - aes_gcm->key_len / BITS_PER_BYTE, - MLX5_ACCEL_OBJ_IPSEC_KEY, - &sa_ctx->enc_key_id); - if (err) { - mlx5_core_dbg(mdev, "Failed to create encryption key (err = %d)\n", err); - goto err_sa_ctx; - } - - ipsec_attrs.aes_gcm = aes_gcm; - ipsec_attrs.accel_flags = accel_xfrm->attrs.flags; - ipsec_attrs.esn_msb = accel_xfrm->attrs.esn; - ipsec_attrs.enc_key_id = sa_ctx->enc_key_id; - err = mlx5_create_ipsec_obj(mdev, &ipsec_attrs, - &sa_ctx->ipsec_obj_id); - if (err) { - mlx5_core_dbg(mdev, "Failed to create IPsec object (err = %d)\n", err); - goto err_enc_key; - } - - *hw_handle = sa_ctx->ipsec_obj_id; - mxfrm->sa_ctx = sa_ctx; - mutex_unlock(&mxfrm->lock); - - return sa_ctx; - -err_enc_key: - mlx5_destroy_encryption_key(mdev, sa_ctx->enc_key_id); -err_sa_ctx: - mutex_unlock(&mxfrm->lock); - kfree(sa_ctx); - return ERR_PTR(err); -} - -static void mlx5_ipsec_offload_delete_sa_ctx(void *context) -{ - struct mlx5_ipsec_sa_ctx *sa_ctx = (struct mlx5_ipsec_sa_ctx *)context; - struct mlx5_ipsec_esp_xfrm *mxfrm = sa_ctx->mxfrm; - - mutex_lock(&mxfrm->lock); - mlx5_destroy_ipsec_obj(sa_ctx->dev, sa_ctx->ipsec_obj_id); - mlx5_destroy_encryption_key(sa_ctx->dev, sa_ctx->enc_key_id); - kfree(sa_ctx); - mxfrm->sa_ctx = NULL; - mutex_unlock(&mxfrm->lock); -} - -static int mlx5_ipsec_offload_init(struct mlx5_core_dev *mdev) -{ - return 0; -} - -static int mlx5_modify_ipsec_obj(struct mlx5_core_dev *mdev, - struct mlx5_ipsec_obj_attrs *attrs, - u32 ipsec_id) -{ - u32 in[MLX5_ST_SZ_DW(modify_ipsec_obj_in)] = {}; - u32 out[MLX5_ST_SZ_DW(query_ipsec_obj_out)]; - u64 modify_field_select = 0; - u64 general_obj_types; - void *obj; - int err; - - if (!(attrs->accel_flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED)) - return 0; - - general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); - if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC)) - return -EINVAL; - - /* general object fields set */ - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_IPSEC); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, ipsec_id); - err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); - if (err) { - mlx5_core_err(mdev, "Query IPsec object failed (Object id %d), err = %d\n", - ipsec_id, err); - return err; - } - - obj = MLX5_ADDR_OF(query_ipsec_obj_out, out, ipsec_object); - modify_field_select = MLX5_GET64(ipsec_obj, obj, modify_field_select); - - /* esn */ - if (!(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP) || - !(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB)) - return -EOPNOTSUPP; - - obj = MLX5_ADDR_OF(modify_ipsec_obj_in, in, ipsec_object); - MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn_msb); - if (attrs->accel_flags & MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) - MLX5_SET(ipsec_obj, obj, esn_overlap, 1); - - /* general object fields set */ - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); - - return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); -} - -static int mlx5_ipsec_offload_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs) -{ - struct mlx5_ipsec_obj_attrs ipsec_attrs = {}; - struct mlx5_core_dev *mdev = xfrm->mdev; - struct mlx5_ipsec_esp_xfrm *mxfrm; - - int err = 0; - - if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs))) - return 0; - - if (mlx5_ipsec_offload_esp_validate_xfrm_attrs(mdev, attrs)) - return -EOPNOTSUPP; - - mxfrm = container_of(xfrm, struct mlx5_ipsec_esp_xfrm, accel_xfrm); - - mutex_lock(&mxfrm->lock); - - if (!mxfrm->sa_ctx) - /* Not bound xfrm, change only sw attrs */ - goto change_sw_xfrm_attrs; - - /* need to add find and replace in ipsec_rhash_sa the sa_ctx */ - /* modify device with new hw_sa */ - ipsec_attrs.accel_flags = attrs->flags; - ipsec_attrs.esn_msb = attrs->esn; - err = mlx5_modify_ipsec_obj(mdev, - &ipsec_attrs, - mxfrm->sa_ctx->ipsec_obj_id); - -change_sw_xfrm_attrs: - if (!err) - memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs)); - - mutex_unlock(&mxfrm->lock); - return err; -} - -static const struct mlx5_accel_ipsec_ops ipsec_offload_ops = { - .device_caps = mlx5_ipsec_offload_device_caps, - .create_hw_context = mlx5_ipsec_offload_create_sa_ctx, - .free_hw_context = mlx5_ipsec_offload_delete_sa_ctx, - .init = mlx5_ipsec_offload_init, - .esp_create_xfrm = mlx5_ipsec_offload_esp_create_xfrm, - .esp_destroy_xfrm = mlx5_ipsec_offload_esp_destroy_xfrm, - .esp_modify_xfrm = mlx5_ipsec_offload_esp_modify_xfrm, -}; - -const struct mlx5_accel_ipsec_ops *mlx5_ipsec_offload_ops(struct mlx5_core_dev *mdev) -{ - if (!mlx5_ipsec_offload_device_caps(mdev)) - return NULL; - - return &ipsec_offload_ops; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h deleted file mode 100644 index 970c66d19c1d..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ - -#ifndef __MLX5_IPSEC_OFFLOAD_H__ -#define __MLX5_IPSEC_OFFLOAD_H__ - -#include <linux/mlx5/driver.h> -#include "accel/ipsec.h" - -#ifdef CONFIG_MLX5_IPSEC - -const struct mlx5_accel_ipsec_ops *mlx5_ipsec_offload_ops(struct mlx5_core_dev *mdev); -static inline bool mlx5_is_ipsec_device(struct mlx5_core_dev *mdev) -{ - if (!MLX5_CAP_GEN(mdev, ipsec_offload)) - return false; - - if (!MLX5_CAP_GEN(mdev, log_max_dek)) - return false; - - if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC)) - return false; - - return MLX5_CAP_IPSEC(mdev, ipsec_crypto_offload) && - MLX5_CAP_ETH(mdev, insert_trailer); -} - -#else -static inline const struct mlx5_accel_ipsec_ops * -mlx5_ipsec_offload_ops(struct mlx5_core_dev *mdev) { return NULL; } -static inline bool mlx5_is_ipsec_device(struct mlx5_core_dev *mdev) -{ - return false; -} - -#endif /* CONFIG_MLX5_IPSEC */ -#endif /* __MLX5_IPSEC_OFFLOAD_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c deleted file mode 100644 index 6c2b86a26863..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/mlx5/device.h> - -#include "accel/tls.h" -#include "mlx5_core.h" -#include "lib/mlx5.h" - -#ifdef CONFIG_MLX5_FPGA_TLS -#include "fpga/tls.h" - -int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx) -{ - return mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, - start_offload_tcp_sn, p_swid, - direction_sx); -} - -void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - bool direction_sx) -{ - mlx5_fpga_tls_del_flow(mdev, swid, GFP_KERNEL, direction_sx); -} - -int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn) -{ - return mlx5_fpga_tls_resync_rx(mdev, handle, seq, rcd_sn); -} - -bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) -{ - return mlx5_fpga_is_tls_device(mdev) || - mlx5_accel_is_ktls_device(mdev); -} - -u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) -{ - return mlx5_fpga_tls_device_caps(mdev); -} - -int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) -{ - return mlx5_fpga_tls_init(mdev); -} - -void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) -{ - mlx5_fpga_tls_cleanup(mdev); -} -#endif - -#ifdef CONFIG_MLX5_TLS -int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, - struct tls_crypto_info *crypto_info, - u32 *p_key_id) -{ - u32 sz_bytes; - void *key; - - switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: { - struct tls12_crypto_info_aes_gcm_128 *info = - (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; - - key = info->key; - sz_bytes = sizeof(info->key); - break; - } - case TLS_CIPHER_AES_GCM_256: { - struct tls12_crypto_info_aes_gcm_256 *info = - (struct tls12_crypto_info_aes_gcm_256 *)crypto_info; - - key = info->key; - sz_bytes = sizeof(info->key); - break; - } - default: - return -EINVAL; - } - - return mlx5_create_encryption_key(mdev, key, sz_bytes, - MLX5_ACCEL_OBJ_TLS_KEY, - p_key_id); -} - -void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) -{ - mlx5_destroy_encryption_key(mdev, key_id); -} -#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h deleted file mode 100644 index fd874f0c380a..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __MLX5_ACCEL_TLS_H__ -#define __MLX5_ACCEL_TLS_H__ - -#include <linux/mlx5/driver.h> -#include <linux/tls.h> - -#ifdef CONFIG_MLX5_TLS -int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, - struct tls_crypto_info *crypto_info, - u32 *p_key_id); -void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id); - -static inline bool mlx5_accel_is_ktls_tx(struct mlx5_core_dev *mdev) -{ - return MLX5_CAP_GEN(mdev, tls_tx); -} - -static inline bool mlx5_accel_is_ktls_rx(struct mlx5_core_dev *mdev) -{ - return MLX5_CAP_GEN(mdev, tls_rx); -} - -static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) -{ - if (!mlx5_accel_is_ktls_tx(mdev) && - !mlx5_accel_is_ktls_rx(mdev)) - return false; - - if (!MLX5_CAP_GEN(mdev, log_max_dek)) - return false; - - return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); -} - -static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, - struct tls_crypto_info *crypto_info) -{ - switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: - if (crypto_info->version == TLS_1_2_VERSION) - return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); - break; - } - - return false; -} -#else -static inline bool mlx5_accel_is_ktls_tx(struct mlx5_core_dev *mdev) -{ return false; } - -static inline bool mlx5_accel_is_ktls_rx(struct mlx5_core_dev *mdev) -{ return false; } - -static inline int -mlx5_ktls_create_key(struct mlx5_core_dev *mdev, - struct tls_crypto_info *crypto_info, - u32 *p_key_id) { return -ENOTSUPP; } -static inline void -mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) {} - -static inline bool -mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; } -static inline bool -mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, - struct tls_crypto_info *crypto_info) { return false; } -#endif - -enum { - MLX5_ACCEL_TLS_TX = BIT(0), - MLX5_ACCEL_TLS_RX = BIT(1), - MLX5_ACCEL_TLS_V12 = BIT(2), - MLX5_ACCEL_TLS_V13 = BIT(3), - MLX5_ACCEL_TLS_LRO = BIT(4), - MLX5_ACCEL_TLS_IPV6 = BIT(5), - MLX5_ACCEL_TLS_AES_GCM128 = BIT(30), - MLX5_ACCEL_TLS_AES_GCM256 = BIT(31), -}; - -struct mlx5_ifc_tls_flow_bits { - u8 src_port[0x10]; - u8 dst_port[0x10]; - union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6; - union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6; - u8 ipv6[0x1]; - u8 direction_sx[0x1]; - u8 reserved_at_2[0x1e]; -}; - -#ifdef CONFIG_MLX5_FPGA_TLS -int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx); -void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - bool direction_sx); -int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn); -bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev); -u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev); -int mlx5_accel_tls_init(struct mlx5_core_dev *mdev); -void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev); - -#else - -static inline int -mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx) { return -ENOTSUPP; } -static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - bool direction_sx) { } -static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn) { return 0; } -static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) -{ - return mlx5_accel_is_ktls_device(mdev); -} -static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; } -static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; } -static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { } -#endif - -#endif /* __MLX5_ACCEL_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 291e427e9e4f..6aca004e88cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -71,53 +71,6 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, return cpu_handle; } -static int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, - struct mlx5_frag_buf *buf, int node) -{ - dma_addr_t t; - - buf->size = size; - buf->npages = 1; - buf->page_shift = (u8)get_order(size) + PAGE_SHIFT; - - buf->frags = kzalloc(sizeof(*buf->frags), GFP_KERNEL); - if (!buf->frags) - return -ENOMEM; - - buf->frags->buf = mlx5_dma_zalloc_coherent_node(dev, size, - &t, node); - if (!buf->frags->buf) - goto err_out; - - buf->frags->map = t; - - while (t & ((1 << buf->page_shift) - 1)) { - --buf->page_shift; - buf->npages *= 2; - } - - return 0; -err_out: - kfree(buf->frags); - return -ENOMEM; -} - -int mlx5_buf_alloc(struct mlx5_core_dev *dev, - int size, struct mlx5_frag_buf *buf) -{ - return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node); -} -EXPORT_SYMBOL(mlx5_buf_alloc); - -void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) -{ - dma_free_coherent(mlx5_core_dma_dev(dev), buf->size, buf->frags->buf, - buf->frags->map); - - kfree(buf->frags); -} -EXPORT_SYMBOL_GPL(mlx5_buf_free); - int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node) { @@ -183,11 +136,11 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, u32 db_per_page = PAGE_SIZE / cache_line_size(); struct mlx5_db_pgdir *pgdir; - pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL); + pgdir = kzalloc_node(sizeof(*pgdir), GFP_KERNEL, node); if (!pgdir) return NULL; - pgdir->bitmap = bitmap_zalloc(db_per_page, GFP_KERNEL); + pgdir->bitmap = bitmap_zalloc_node(db_per_page, GFP_KERNEL, node); if (!pgdir->bitmap) { kfree(pgdir); return NULL; @@ -260,12 +213,6 @@ out: } EXPORT_SYMBOL_GPL(mlx5_db_alloc_node); -int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db) -{ - return mlx5_db_alloc_node(dev, db, dev->priv.numa_node); -} -EXPORT_SYMBOL_GPL(mlx5_db_alloc); - void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) { u32 db_per_page = PAGE_SIZE / cache_line_size(); @@ -286,19 +233,6 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) } EXPORT_SYMBOL_GPL(mlx5_db_free); -void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas) -{ - u64 addr; - int i; - - for (i = 0; i < buf->npages; i++) { - addr = buf->frags->map + (i << buf->page_shift); - - pas[i] = cpu_to_be64(addr); - } -} -EXPORT_SYMBOL_GPL(mlx5_fill_page_array); - void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm) { int i; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a46284ca5172..2e0d59ca62b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -31,7 +31,6 @@ */ #include <linux/highmem.h> -#include <linux/module.h> #include <linux/errno.h> #include <linux/pci.h> #include <linux/dma-mapping.h> @@ -131,11 +130,8 @@ static int cmd_alloc_index(struct mlx5_cmd *cmd) static void cmd_free_index(struct mlx5_cmd *cmd, int idx) { - unsigned long flags; - - spin_lock_irqsave(&cmd->alloc_lock, flags); + lockdep_assert_held(&cmd->alloc_lock); set_bit(idx, &cmd->bitmask); - spin_unlock_irqrestore(&cmd->alloc_lock, flags); } static void cmd_ent_get(struct mlx5_cmd_work_ent *ent) @@ -145,13 +141,21 @@ static void cmd_ent_get(struct mlx5_cmd_work_ent *ent) static void cmd_ent_put(struct mlx5_cmd_work_ent *ent) { + struct mlx5_cmd *cmd = ent->cmd; + unsigned long flags; + + spin_lock_irqsave(&cmd->alloc_lock, flags); if (!refcount_dec_and_test(&ent->refcnt)) - return; + goto out; - if (ent->idx >= 0) - cmd_free_index(ent->cmd, ent->idx); + if (ent->idx >= 0) { + cmd_free_index(cmd, ent->idx); + up(ent->page_queue ? &cmd->pages_sem : &cmd->sem); + } cmd_free_ent(ent); +out: + spin_unlock_irqrestore(&cmd->alloc_lock, flags); } static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) @@ -186,10 +190,10 @@ static int verify_block_sig(struct mlx5_cmd_prot_block *block) int xor_len = sizeof(*block) - sizeof(block->data) - 1; if (xor8_buf(block, rsvd0_off, xor_len) != 0xff) - return -EINVAL; + return -EHWPOISON; if (xor8_buf(block, 0, sizeof(*block)) != 0xff) - return -EINVAL; + return -EHWPOISON; return 0; } @@ -255,12 +259,12 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent) sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay)); if (sig != 0xff) - return -EINVAL; + return -EHWPOISON; for (i = 0; i < n && next; i++) { err = verify_block_sig(next->buf); if (err) - return err; + return -EHWPOISON; next = next->next; } @@ -473,9 +477,14 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_VHCA_STATE: case MLX5_CMD_OP_MODIFY_VHCA_STATE: case MLX5_CMD_OP_ALLOC_SF: + case MLX5_CMD_OP_SUSPEND_VHCA: + case MLX5_CMD_OP_RESUME_VHCA: + case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE: + case MLX5_CMD_OP_SAVE_VHCA_STATE: + case MLX5_CMD_OP_LOAD_VHCA_STATE: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; - return -EIO; + return -ENOLINK; default: mlx5_core_err(dev, "Unknown FW command (%d)\n", op); return -EINVAL; @@ -670,6 +679,11 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(MODIFY_VHCA_STATE); MLX5_COMMAND_STR_CASE(ALLOC_SF); MLX5_COMMAND_STR_CASE(DEALLOC_SF); + MLX5_COMMAND_STR_CASE(SUSPEND_VHCA); + MLX5_COMMAND_STR_CASE(RESUME_VHCA); + MLX5_COMMAND_STR_CASE(QUERY_VHCA_MIGRATION_STATE); + MLX5_COMMAND_STR_CASE(SAVE_VHCA_STATE); + MLX5_COMMAND_STR_CASE(LOAD_VHCA_STATE); default: return "unknown command opcode"; } } @@ -756,44 +770,72 @@ struct mlx5_ifc_mbox_in_bits { u8 reserved_at_40[0x40]; }; -void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome) +void mlx5_cmd_out_err(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) { - *status = MLX5_GET(mbox_out, out, status); - *syndrome = MLX5_GET(mbox_out, out, syndrome); + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); + + mlx5_core_err_rl(dev, + "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n", + mlx5_command_str(opcode), opcode, op_mod, + cmd_status_str(status), status, syndrome, cmd_status_to_err(status)); } +EXPORT_SYMBOL(mlx5_cmd_out_err); -static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out) +static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) { + u16 opcode, op_mod; u32 syndrome; u8 status; - u16 opcode; - u16 op_mod; u16 uid; + int err; - mlx5_cmd_mbox_status(out, &status, &syndrome); - if (!status) - return 0; + syndrome = MLX5_GET(mbox_out, out, syndrome); + status = MLX5_GET(mbox_out, out, status); opcode = MLX5_GET(mbox_in, in, opcode); op_mod = MLX5_GET(mbox_in, in, op_mod); uid = MLX5_GET(mbox_in, in, uid); + err = cmd_status_to_err(status); + if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY) - mlx5_core_err_rl(dev, - "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n", - mlx5_command_str(opcode), opcode, op_mod, - cmd_status_str(status), status, syndrome); + mlx5_cmd_out_err(dev, opcode, op_mod, out); else mlx5_core_dbg(dev, - "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n", - mlx5_command_str(opcode), - opcode, op_mod, - cmd_status_str(status), - status, - syndrome); + "%s(0x%x) op_mod(0x%x) uid(%d) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n", + mlx5_command_str(opcode), opcode, op_mod, uid, + cmd_status_str(status), status, syndrome, err); +} + +int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out) +{ + /* aborted due to PCI error or via reset flow mlx5_cmd_trigger_completions() */ + if (err == -ENXIO) { + u16 opcode = MLX5_GET(mbox_in, in, opcode); + u32 syndrome; + u8 status; + + /* PCI Error, emulate command return status, for smooth reset */ + err = mlx5_internal_err_ret_value(dev, opcode, &syndrome, &status); + MLX5_SET(mbox_out, out, status, status); + MLX5_SET(mbox_out, out, syndrome, syndrome); + if (!err) + return 0; + } + + /* driver or FW delivery error */ + if (err != -EREMOTEIO && err) + return err; - return cmd_status_to_err(status); + /* check outbox status */ + err = cmd_status_to_err(MLX5_GET(mbox_out, out, status)); + if (err) + cmd_status_print(dev, in, out); + + return err; } +EXPORT_SYMBOL(mlx5_cmd_check); static void dump_command(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent, int input) @@ -900,25 +942,6 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) return cmd->allowed_opcode == opcode; } -static int cmd_alloc_index_retry(struct mlx5_cmd *cmd) -{ - unsigned long alloc_end = jiffies + msecs_to_jiffies(1000); - int idx; - -retry: - idx = cmd_alloc_index(cmd); - if (idx < 0 && time_before(jiffies, alloc_end)) { - /* Index allocation can fail on heavy load of commands. This is a temporary - * situation as the current command already holds the semaphore, meaning that - * another command completion is being handled and it is expected to release - * the entry index soon. - */ - cpu_relax(); - goto retry; - } - return idx; -} - bool mlx5_cmd_is_down(struct mlx5_core_dev *dev) { return pci_channel_offline(dev->pdev) || @@ -946,7 +969,7 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { - alloc_ret = cmd_alloc_index_retry(cmd); + alloc_ret = cmd_alloc_index(cmd); if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { @@ -995,13 +1018,7 @@ static void cmd_work_handler(struct work_struct *work) /* Skip sending command to fw if internal error */ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { - u8 status = 0; - u32 drv_synd; - - ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status); - MLX5_SET(mbox_out, ent->out, status, status); - MLX5_SET(mbox_out, ent->out, syndrome, drv_synd); - + ent->ret = -ENXIO; mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); return; } @@ -1020,6 +1037,31 @@ static void cmd_work_handler(struct work_struct *work) } } +static int deliv_status_to_err(u8 status) +{ + switch (status) { + case MLX5_CMD_DELIVERY_STAT_OK: + case MLX5_DRIVER_STATUS_ABORTED: + return 0; + case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: + case MLX5_CMD_DELIVERY_STAT_TOK_ERR: + return -EBADR; + case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: + case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: + case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: + return -EFAULT; /* Bad address */ + case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: + case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: + case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: + case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: + return -ENOMSG; + case MLX5_CMD_DELIVERY_STAT_FW_ERR: + return -EIO; + default: + return -EINVAL; + } +} + static const char *deliv_status_to_str(u8 status) { switch (status) { @@ -1116,16 +1158,27 @@ out_err: /* Notes: * 1. Callback functions may not sleep * 2. page queue commands do not support asynchrous completion + * + * return value in case (!callback): + * ret < 0 : Command execution couldn't be submitted by driver + * ret > 0 : Command execution couldn't be performed by firmware + * ret == 0: Command was executed by FW, Caller must check FW outbox status. + * + * return value in case (callback): + * ret < 0 : Command execution couldn't be submitted by driver + * ret == 0: Command will be submitted to FW for execution + * and the callback will be called for further status updates */ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, struct mlx5_cmd_msg *out, void *uout, int uout_size, mlx5_cmd_cbk_t callback, - void *context, int page_queue, u8 *status, + void *context, int page_queue, u8 token, bool force_polling) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; struct mlx5_cmd_stats *stats; + u8 status = 0; int err = 0; s64 ds; u16 op; @@ -1156,12 +1209,12 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, cmd_work_handler(&ent->work); } else if (!queue_work(cmd->wq, &ent->work)) { mlx5_core_warn(dev, "failed to queue work\n"); - err = -ENOMEM; + err = -EALREADY; goto out_free; } if (callback) - goto out; /* mlx5_cmd_comp_handler() will put(ent) */ + return 0; /* mlx5_cmd_comp_handler() will put(ent) */ err = wait_func(dev, ent); if (err == -ETIMEDOUT || err == -ECANCELED) @@ -1179,12 +1232,11 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, "fw exec time for %s is %lld nsec\n", mlx5_command_str(op), ds); - *status = ent->status; out_free: + status = ent->status; cmd_ent_put(ent); -out: - return err; + return err ? : status; } static ssize_t dbg_write(struct file *filp, const char __user *buf, @@ -1501,7 +1553,7 @@ static void create_debugfs_files(struct mlx5_core_dev *dev) { struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; - dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root); + dbg->dbg_root = debugfs_create_dir("cmd", mlx5_debugfs_get_dev_root(dev)); debugfs_create_file("in", 0400, dbg->dbg_root, dev, &dfops); debugfs_create_file("out", 0200, dbg->dbg_root, dev, &dfops); @@ -1602,8 +1654,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force vector = vec & 0xffffffff; for (i = 0; i < (1 << cmd->log_sz); i++) { if (test_bit(i, &vector)) { - struct semaphore *sem; - ent = cmd->ent_arr[i]; /* if we already completed the command, ignore it */ @@ -1626,22 +1676,18 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) cmd_ent_put(ent); - if (ent->page_queue) - sem = &cmd->pages_sem; - else - sem = &cmd->sem; ent->ts2 = ktime_get_ns(); memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out)); dump_command(dev, ent, 0); - if (!ent->ret) { + + if (vec & MLX5_TRIGGERED_CMD_COMP) + ent->ret = -ENXIO; + + if (!ent->ret) { /* Command completed by FW */ if (!cmd->checksum_disabled) ent->ret = verify_signature(ent); - else - ent->ret = 0; - if (vec & MLX5_TRIGGERED_CMD_COMP) - ent->status = MLX5_DRIVER_STATUS_ABORTED; - else - ent->status = ent->lay->status_own >> 1; + + ent->status = ent->lay->status_own >> 1; mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", ent->ret, deliv_status_to_str(ent->status), ent->status); @@ -1659,21 +1705,18 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force callback = ent->callback; context = ent->context; - err = ent->ret; - if (!err) { + err = ent->ret ? : ent->status; + if (err > 0) /* Failed in FW, command didn't execute */ + err = deliv_status_to_err(err); + + if (!err) err = mlx5_copy_from_msg(ent->uout, ent->out, ent->uout_size); - err = err ? err : mlx5_cmd_check(dev, - ent->in->first.data, - ent->uout); - } - mlx5_free_cmd_msg(dev, ent->out); free_msg(dev, ent->in); - err = err ? err : ent->status; /* final consumer is done, release ent */ cmd_ent_put(ent); callback(err, context); @@ -1683,12 +1726,11 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force */ complete(&ent->done); } - up(sem); } } } -void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) +static void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) { struct mlx5_cmd *cmd = &dev->cmd; unsigned long bitmask; @@ -1728,12 +1770,17 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev) struct mlx5_cmd *cmd = &dev->cmd; int i; - for (i = 0; i < cmd->max_reg_cmds; i++) - while (down_trylock(&cmd->sem)) + for (i = 0; i < cmd->max_reg_cmds; i++) { + while (down_trylock(&cmd->sem)) { mlx5_cmd_trigger_completions(dev); + cond_resched(); + } + } - while (down_trylock(&cmd->pages_sem)) + while (down_trylock(&cmd->pages_sem)) { mlx5_cmd_trigger_completions(dev); + cond_resched(); + } /* Unlock cmdif */ up(&cmd->pages_sem); @@ -1741,31 +1788,6 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev) up(&cmd->sem); } -static int status_to_err(u8 status) -{ - switch (status) { - case MLX5_CMD_DELIVERY_STAT_OK: - case MLX5_DRIVER_STATUS_ABORTED: - return 0; - case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: - case MLX5_CMD_DELIVERY_STAT_TOK_ERR: - return -EBADR; - case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: - case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: - case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: - return -EFAULT; /* Bad address */ - case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: - case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: - case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: - case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: - return -ENOMSG; - case MLX5_CMD_DELIVERY_STAT_FW_ERR: - return -EIO; - default: - return -EINVAL; - } -} - static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, gfp_t gfp) { @@ -1809,27 +1831,23 @@ static int is_manage_pages(void *in) return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES; } +/* Notes: + * 1. Callback functions may not sleep + * 2. Page queue commands do not support asynchrous completion + */ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size, mlx5_cmd_cbk_t callback, void *context, bool force_polling) { - struct mlx5_cmd_msg *inb; - struct mlx5_cmd_msg *outb; + u16 opcode = MLX5_GET(mbox_in, in, opcode); + struct mlx5_cmd_msg *inb, *outb; int pages_queue; gfp_t gfp; - int err; - u8 status = 0; - u32 drv_synd; - u16 opcode; u8 token; + int err; - opcode = MLX5_GET(mbox_in, in, opcode); - if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) { - err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); - MLX5_SET(mbox_out, out, status, status); - MLX5_SET(mbox_out, out, syndrome, drv_synd); - return err; - } + if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) + return -ENXIO; pages_queue = is_manage_pages(in); gfp = callback ? GFP_ATOMIC : GFP_KERNEL; @@ -1855,46 +1873,143 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, } err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, - pages_queue, &status, token, force_polling); + pages_queue, token, force_polling); + if (callback) + return err; + + if (err > 0) /* Failed in FW, command didn't execute */ + err = deliv_status_to_err(err); + if (err) goto out_out; - mlx5_core_dbg(dev, "err %d, status %d\n", err, status); - if (status) { - err = status_to_err(status); - goto out_out; + /* command completed by FW */ + err = mlx5_copy_from_msg(out, outb, out_size); +out_out: + mlx5_free_cmd_msg(dev, outb); +out_in: + free_msg(dev, inb); + return err; +} + +static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, + u32 syndrome, int err) +{ + struct mlx5_cmd_stats *stats; + + if (!err) + return; + + stats = &dev->cmd.stats[opcode]; + spin_lock_irq(&stats->lock); + stats->failed++; + if (err < 0) + stats->last_failed_errno = -err; + if (err == -EREMOTEIO) { + stats->failed_mbox_status++; + stats->last_failed_mbox_status = status; + stats->last_failed_syndrome = syndrome; } + spin_unlock_irq(&stats->lock); +} - if (!callback) - err = mlx5_copy_from_msg(out, outb, out_size); +/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */ +static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out) +{ + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); -out_out: - if (!callback) - mlx5_free_cmd_msg(dev, outb); + if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */ + err = -EIO; -out_in: - if (!callback) - free_msg(dev, inb); + if (!err && status != MLX5_CMD_STAT_OK) + err = -EREMOTEIO; + + cmd_status_log(dev, opcode, status, syndrome, err); + return err; +} + +/** + * mlx5_cmd_do - Executes a fw command, wait for completion. + * Unlike mlx5_cmd_exec, this function will not translate or intercept + * outbox.status and will return -EREMOTEIO when + * outbox.status != MLX5_CMD_STAT_OK + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: + * -EREMOTEIO : Command executed by FW, outbox.status != MLX5_CMD_STAT_OK. + * Caller must check FW outbox status. + * 0 : Command execution successful, outbox.status == MLX5_CMD_STAT_OK. + * < 0 : Command execution couldn't be performed by firmware or driver + */ +int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) +{ + int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); + u16 opcode = MLX5_GET(mbox_in, in, opcode); + + err = cmd_status_err(dev, err, opcode, out); return err; } +EXPORT_SYMBOL(mlx5_cmd_do); +/** + * mlx5_cmd_exec - Executes a fw command, wait for completion + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: 0 if no error, FW command execution was successful + * and outbox status is ok. + */ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) { - int err; + int err = mlx5_cmd_do(dev, in, in_size, out, out_size); - err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); - return err ? : mlx5_cmd_check(dev, in, out); + return mlx5_cmd_check(dev, err, in, out); } EXPORT_SYMBOL(mlx5_cmd_exec); +/** + * mlx5_cmd_exec_polling - Executes a fw command, poll for completion + * Needed for driver force teardown, when command completion EQ + * will not be available to complete the command + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: 0 if no error, FW command execution was successful + * and outbox status is ok. + */ +int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, + void *out, int out_size) +{ + int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); + u16 opcode = MLX5_GET(mbox_in, in, opcode); + + err = cmd_status_err(dev, err, opcode, out); + return mlx5_cmd_check(dev, err, in, out); +} +EXPORT_SYMBOL(mlx5_cmd_exec_polling); + void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, struct mlx5_async_ctx *ctx) { ctx->dev = dev; /* Starts at 1 to avoid doing wake_up if we are not cleaning up */ atomic_set(&ctx->num_inflight, 1); - init_waitqueue_head(&ctx->wait); + init_completion(&ctx->inflight_done); } EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); @@ -1908,19 +2023,21 @@ EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); */ void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx) { - atomic_dec(&ctx->num_inflight); - wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0); + if (!atomic_dec_and_test(&ctx->num_inflight)) + wait_for_completion(&ctx->inflight_done); } EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); static void mlx5_cmd_exec_cb_handler(int status, void *_work) { struct mlx5_async_work *work = _work; - struct mlx5_async_ctx *ctx = work->ctx; + struct mlx5_async_ctx *ctx; + ctx = work->ctx; + status = cmd_status_err(ctx->dev, status, work->opcode, work->out); work->user_callback(status, work); if (atomic_dec_and_test(&ctx->num_inflight)) - wake_up(&ctx->wait); + complete(&ctx->inflight_done); } int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, @@ -1931,28 +2048,19 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, work->ctx = ctx; work->user_callback = callback; + work->opcode = MLX5_GET(mbox_in, in, opcode); + work->out = out; if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) return -EIO; ret = cmd_exec(ctx->dev, in, in_size, out, out_size, mlx5_cmd_exec_cb_handler, work, false); if (ret && atomic_dec_and_test(&ctx->num_inflight)) - wake_up(&ctx->wait); + complete(&ctx->inflight_done); return ret; } EXPORT_SYMBOL(mlx5_cmd_exec_cb); -int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size) -{ - int err; - - err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); - - return err ? : mlx5_cmd_check(dev, in, out); -} -EXPORT_SYMBOL(mlx5_cmd_exec_polling); - static void destroy_msg_cache(struct mlx5_core_dev *dev) { struct cmd_msg_cache *ch; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 5371ad0a12eb..4caa1b6f40ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -31,7 +31,6 @@ */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/hardirq.h> #include <linux/mlx5/driver.h> #include <rdma/ib_verbs.h> @@ -86,8 +85,9 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, spin_unlock_irqrestore(&tasklet_ctx->lock, flags); } -int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - u32 *in, int inlen, u32 *out, int outlen) +/* Callers must verify outbox status in case of err */ +int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen, u32 *out, int outlen) { int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn_or_apu_element); @@ -101,7 +101,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, memset(out, 0, outlen); MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ); - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + err = mlx5_cmd_do(dev, in, inlen, out, outlen); if (err) return err; @@ -148,6 +148,16 @@ err_cmd: mlx5_cmd_exec_in(dev, destroy_cq, din); return err; } +EXPORT_SYMBOL(mlx5_create_cq); + +/* oubox is checked and err val is normalized */ +int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen, u32 *out, int outlen) +{ + int err = mlx5_create_cq(dev, cq, in, inlen, out, outlen); + + return mlx5_cmd_check(dev, err, in, out); +} EXPORT_SYMBOL(mlx5_core_create_cq); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 10d195042ab5..3e232a65a0c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/module.h> #include <linux/debugfs.h> #include <linux/mlx5/qp.h> #include <linux/mlx5/cq.h> @@ -99,26 +98,32 @@ void mlx5_unregister_debugfs(void) debugfs_remove(mlx5_debugfs_root); } +struct dentry *mlx5_debugfs_get_dev_root(struct mlx5_core_dev *dev) +{ + return dev->priv.dbg.dbg_root; +} +EXPORT_SYMBOL(mlx5_debugfs_get_dev_root); + void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev) { - dev->priv.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg_root); + dev->priv.dbg.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg.dbg_root); } EXPORT_SYMBOL(mlx5_qp_debugfs_init); void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev) { - debugfs_remove_recursive(dev->priv.qp_debugfs); + debugfs_remove_recursive(dev->priv.dbg.qp_debugfs); } EXPORT_SYMBOL(mlx5_qp_debugfs_cleanup); void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev) { - dev->priv.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg_root); + dev->priv.dbg.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg.dbg_root); } void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev) { - debugfs_remove_recursive(dev->priv.eq_debugfs); + debugfs_remove_recursive(dev->priv.dbg.eq_debugfs); } static ssize_t average_read(struct file *filp, char __user *buf, size_t count, @@ -161,6 +166,28 @@ static const struct file_operations stats_fops = { .write = average_write, }; +static ssize_t slots_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_cmd *cmd; + char tbuf[6]; + int weight; + int field; + int ret; + + cmd = filp->private_data; + weight = bitmap_weight(&cmd->bitmask, cmd->max_reg_cmds); + field = cmd->max_reg_cmds - weight; + ret = snprintf(tbuf, sizeof(tbuf), "%d\n", field); + return simple_read_from_buffer(buf, count, pos, tbuf, ret); +} + +static const struct file_operations slots_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = slots_read, +}; + void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) { struct mlx5_cmd_stats *stats; @@ -168,8 +195,10 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) const char *namep; int i; - cmd = &dev->priv.cmdif_debugfs; - *cmd = debugfs_create_dir("commands", dev->priv.dbg_root); + cmd = &dev->priv.dbg.cmdif_debugfs; + *cmd = debugfs_create_dir("commands", dev->priv.dbg.dbg_root); + + debugfs_create_file("slots_inuse", 0400, *cmd, &dev->cmd, &slots_fops); for (i = 0; i < MLX5_CMD_OP_MAX; i++) { stats = &dev->cmd.stats[i]; @@ -180,23 +209,53 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) debugfs_create_file("average", 0400, stats->root, stats, &stats_fops); debugfs_create_u64("n", 0400, stats->root, &stats->n); + debugfs_create_u64("failed", 0400, stats->root, &stats->failed); + debugfs_create_u64("failed_mbox_status", 0400, stats->root, + &stats->failed_mbox_status); + debugfs_create_u32("last_failed_errno", 0400, stats->root, + &stats->last_failed_errno); + debugfs_create_u8("last_failed_mbox_status", 0400, stats->root, + &stats->last_failed_mbox_status); + debugfs_create_x32("last_failed_syndrome", 0400, stats->root, + &stats->last_failed_syndrome); } } } void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev) { - debugfs_remove_recursive(dev->priv.cmdif_debugfs); + debugfs_remove_recursive(dev->priv.dbg.cmdif_debugfs); } void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev) { - dev->priv.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg_root); + dev->priv.dbg.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg.dbg_root); } void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev) { - debugfs_remove_recursive(dev->priv.cq_debugfs); + debugfs_remove_recursive(dev->priv.dbg.cq_debugfs); +} + +void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev) +{ + struct dentry *pages; + + dev->priv.dbg.pages_debugfs = debugfs_create_dir("pages", dev->priv.dbg.dbg_root); + pages = dev->priv.dbg.pages_debugfs; + + debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages); + debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.vfs_pages); + debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.host_pf_pages); + debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed); + debugfs_create_u32("fw_pages_give_dropped", 0400, pages, &dev->priv.give_pages_dropped); + debugfs_create_u32("fw_pages_reclaim_discard", 0400, pages, + &dev->priv.reclaim_pages_discard); +} + +void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + debugfs_remove_recursive(dev->priv.dbg.pages_debugfs); } static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, @@ -441,7 +500,7 @@ int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) if (!mlx5_debugfs_root) return 0; - err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.qp_debugfs, + err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.dbg.qp_debugfs, &qp->dbg, qp->qpn, qp_fields, ARRAY_SIZE(qp_fields), qp); if (err) @@ -468,7 +527,7 @@ int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq) if (!mlx5_debugfs_root) return 0; - err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.eq_debugfs, + err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.dbg.eq_debugfs, &eq->dbg, eq->eqn, eq_fields, ARRAY_SIZE(eq_fields), eq); if (err) @@ -493,7 +552,7 @@ int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) if (!mlx5_debugfs_root) return 0; - err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.cq_debugfs, + err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.dbg.cq_debugfs, &cq->dbg, cq->cqn, cq_fields, ARRAY_SIZE(cq_fields), cq); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index a8b84d53dfb0..0571e40c6ee5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -340,8 +340,10 @@ int mlx5_attach_device(struct mlx5_core_dev *dev) struct auxiliary_driver *adrv; int ret = 0, i; + devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&mlx5_intf_mutex); priv->flags &= ~MLX5_PRIV_FLAGS_DETACH; + priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) { if (!priv->adev[i]) { bool is_supported = false; @@ -389,6 +391,7 @@ int mlx5_attach_device(struct mlx5_core_dev *dev) break; } } + priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; mutex_unlock(&mlx5_intf_mutex); return ret; } @@ -401,7 +404,9 @@ void mlx5_detach_device(struct mlx5_core_dev *dev) pm_message_t pm = {}; int i; + devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&mlx5_intf_mutex); + priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) { if (!priv->adev[i]) continue; @@ -430,6 +435,7 @@ skip_suspend: del_adev(&priv->adev[i]->adev); priv->adev[i] = NULL; } + priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; priv->flags |= MLX5_PRIV_FLAGS_DETACH; mutex_unlock(&mlx5_intf_mutex); } @@ -438,6 +444,7 @@ int mlx5_register_device(struct mlx5_core_dev *dev) { int ret; + devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&mlx5_intf_mutex); dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; ret = mlx5_rescan_drivers_locked(dev); @@ -450,6 +457,7 @@ int mlx5_register_device(struct mlx5_core_dev *dev) void mlx5_unregister_device(struct mlx5_core_dev *dev) { + devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&mlx5_intf_mutex); dev->priv.flags = MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; mlx5_rescan_drivers_locked(dev); @@ -526,19 +534,25 @@ del_adev: int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; + int err = 0; lockdep_assert_held(&mlx5_intf_mutex); if (priv->flags & MLX5_PRIV_FLAGS_DETACH) return 0; + priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; delete_drivers(dev); if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) - return 0; + goto out; + + err = add_drivers(dev); - return add_drivers(dev); +out: + priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; + return err; } -static bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev) +bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev) { u64 fsystem_guid, psystem_guid; @@ -555,12 +569,9 @@ static u32 mlx5_gen_pci_id(const struct mlx5_core_dev *dev) PCI_SLOT(dev->pdev->devfn)); } -static int next_phys_dev(struct device *dev, const void *data) +static int _next_phys_dev(struct mlx5_core_dev *mdev, + const struct mlx5_core_dev *curr) { - struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev); - struct mlx5_core_dev *mdev = madev->mdev; - const struct mlx5_core_dev *curr = data; - if (!mlx5_core_is_pf(mdev)) return 0; @@ -574,22 +585,52 @@ static int next_phys_dev(struct device *dev, const void *data) return 1; } -/* Must be called with intf_mutex held */ -struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) +static void *pci_get_other_drvdata(struct device *this, struct device *other) { - struct auxiliary_device *adev; - struct mlx5_adev *madev; + if (this->driver != other->driver) + return NULL; + + return pci_get_drvdata(to_pci_dev(other)); +} + +static int next_phys_dev_lag(struct device *dev, const void *data) +{ + struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data; + + mdev = pci_get_other_drvdata(this->device, dev); + if (!mdev) + return 0; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager) || + !MLX5_CAP_GEN(mdev, lag_master) || + (MLX5_CAP_GEN(mdev, num_lag_ports) > MLX5_MAX_PORTS || + MLX5_CAP_GEN(mdev, num_lag_ports) <= 1)) + return 0; + + return _next_phys_dev(mdev, data); +} + +static struct mlx5_core_dev *mlx5_get_next_dev(struct mlx5_core_dev *dev, + int (*match)(struct device *dev, const void *data)) +{ + struct device *next; if (!mlx5_core_is_pf(dev)) return NULL; - adev = auxiliary_find_device(NULL, dev, &next_phys_dev); - if (!adev) + next = bus_find_device(&pci_bus_type, NULL, dev, match); + if (!next) return NULL; - madev = container_of(adev, struct mlx5_adev, adev); - put_device(&adev->dev); - return madev->mdev; + put_device(next); + return pci_get_drvdata(to_pci_dev(next)); +} + +/* Must be called with intf_mutex held */ +struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev) +{ + lockdep_assert_held(&mlx5_intf_mutex); + return mlx5_get_next_dev(dev, &next_phys_dev_lag); } void mlx5_dev_list_lock(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 1c98652b244a..66c6a7017695 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -100,14 +100,19 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli } net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE); - err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive); + err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive, extack); if (err) - goto out; + return err; err = mlx5_fw_reset_wait_reset_done(dev); -out: if (err) - NL_SET_ERR_MSG_MOD(extack, "FW activate command failed"); + return err; + + mlx5_unload_one_devl_locked(dev); + err = mlx5_health_wait_pci_up(dev); + if (err) + NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset"); + return err; } @@ -138,6 +143,7 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, struct mlx5_core_dev *dev = devlink_priv(devlink); struct pci_dev *pdev = dev->pdev; bool sf_dev_allocated; + int ret = 0; sf_dev_allocated = mlx5_sf_dev_allocated(dev); if (sf_dev_allocated) { @@ -160,17 +166,21 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: - mlx5_unload_one(dev); - return 0; + mlx5_unload_one_devl_locked(dev); + break; case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) - return mlx5_devlink_trigger_fw_live_patch(devlink, extack); - return mlx5_devlink_reload_fw_activate(devlink, extack); + ret = mlx5_devlink_trigger_fw_live_patch(devlink, extack); + else + ret = mlx5_devlink_reload_fw_activate(devlink, extack); + break; default: /* Unsupported action should not get to this function */ WARN_ON(1); - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; } + + return ret; } static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action, @@ -178,24 +188,27 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); + int ret = 0; *actions_performed = BIT(action); switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: - return mlx5_load_one(dev); + ret = mlx5_load_one_devl_locked(dev, false); + break; case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) break; /* On fw_activate action, also driver is reloaded and reinit performed */ *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); - return mlx5_load_one(dev); + ret = mlx5_load_one_devl_locked(dev, false); + break; default: /* Unsupported action should not get to this function */ WARN_ON(1); - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; } - return 0; + return ret; } static struct mlx5_devlink_trap *mlx5_find_trap_by_id(struct mlx5_core_dev *dev, int trap_id) @@ -546,6 +559,13 @@ static int mlx5_devlink_enable_remote_dev_reset_get(struct devlink *devlink, u32 return 0; } +static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + return (val.vu16 >= 64 && val.vu16 <= 4096) ? 0 : -EINVAL; +} + static const struct devlink_param mlx5_devlink_params[] = { DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, @@ -570,6 +590,10 @@ static const struct devlink_param mlx5_devlink_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME), mlx5_devlink_enable_remote_dev_reset_get, mlx5_devlink_enable_remote_dev_reset_set, NULL), + DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_eq_depth_validate), + DEVLINK_PARAM_GENERIC(EVENT_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_eq_depth_validate), }; static void mlx5_devlink_set_params_init_values(struct devlink *devlink) @@ -577,14 +601,6 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) struct mlx5_core_dev *dev = devlink_priv(devlink); union devlink_param_value value; - if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS) - strcpy(value.vstr, "dmfs"); - else - strcpy(value.vstr, "smfs"); - devlink_param_driverinit_value_set(devlink, - MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, - value); - value.vbool = MLX5_CAP_GEN(dev, roce); devlink_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, @@ -595,19 +611,17 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) devlink_param_driverinit_value_set(devlink, MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, value); - - if (MLX5_ESWITCH_MANAGER(dev)) { - if (mlx5_esw_vport_match_metadata_supported(dev->priv.eswitch)) { - dev->priv.eswitch->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; - value.vbool = true; - } else { - value.vbool = false; - } - devlink_param_driverinit_value_set(devlink, - MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, - value); - } #endif + + value.vu32 = MLX5_COMP_EQ_SIZE; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + value); + + value.vu32 = MLX5_NUM_ASYNC_EQE; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, + value); } static const struct devlink_param enable_eth_param = @@ -752,6 +766,66 @@ static void mlx5_devlink_auxdev_params_unregister(struct devlink *devlink) mlx5_devlink_eth_param_unregister(devlink); } +static int mlx5_devlink_max_uc_list_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (val.vu32 == 0) { + NL_SET_ERR_MSG_MOD(extack, "max_macs value must be greater than 0"); + return -EINVAL; + } + + if (!is_power_of_2(val.vu32)) { + NL_SET_ERR_MSG_MOD(extack, "Only power of 2 values are supported for max_macs"); + return -EINVAL; + } + + if (ilog2(val.vu32) > + MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list)) { + NL_SET_ERR_MSG_MOD(extack, "max_macs value is out of the supported range"); + return -EINVAL; + } + + return 0; +} + +static const struct devlink_param max_uc_list_param = + DEVLINK_PARAM_GENERIC(MAX_MACS, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_max_uc_list_validate); + +static int mlx5_devlink_max_uc_list_param_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + int err; + + if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported)) + return 0; + + err = devlink_param_register(devlink, &max_uc_list_param); + if (err) + return err; + + value.vu32 = 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + value); + return 0; +} + +static void +mlx5_devlink_max_uc_list_param_unregister(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported)) + return; + + devlink_param_unregister(devlink, &max_uc_list_param); +} + #define MLX5_TRAP_DROP(_id, _group_id) \ DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ @@ -771,28 +845,28 @@ static int mlx5_devlink_traps_register(struct devlink *devlink) struct mlx5_core_dev *core_dev = devlink_priv(devlink); int err; - err = devlink_trap_groups_register(devlink, mlx5_trap_groups_arr, - ARRAY_SIZE(mlx5_trap_groups_arr)); + err = devl_trap_groups_register(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); if (err) return err; - err = devlink_traps_register(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr), - &core_dev->priv); + err = devl_traps_register(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr), + &core_dev->priv); if (err) goto err_trap_group; return 0; err_trap_group: - devlink_trap_groups_unregister(devlink, mlx5_trap_groups_arr, - ARRAY_SIZE(mlx5_trap_groups_arr)); + devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); return err; } static void mlx5_devlink_traps_unregister(struct devlink *devlink) { - devlink_traps_unregister(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr)); - devlink_trap_groups_unregister(devlink, mlx5_trap_groups_arr, - ARRAY_SIZE(mlx5_trap_groups_arr)); + devl_traps_unregister(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr)); + devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); } int mlx5_devlink_register(struct devlink *devlink) @@ -811,6 +885,10 @@ int mlx5_devlink_register(struct devlink *devlink) if (err) goto auxdev_reg_err; + err = mlx5_devlink_max_uc_list_param_register(devlink); + if (err) + goto max_uc_list_err; + err = mlx5_devlink_traps_register(devlink); if (err) goto traps_reg_err; @@ -821,6 +899,8 @@ int mlx5_devlink_register(struct devlink *devlink) return 0; traps_reg_err: + mlx5_devlink_max_uc_list_param_unregister(devlink); +max_uc_list_err: mlx5_devlink_auxdev_params_unregister(devlink); auxdev_reg_err: devlink_params_unregister(devlink, mlx5_devlink_params, @@ -831,6 +911,7 @@ auxdev_reg_err: void mlx5_devlink_unregister(struct devlink *devlink) { mlx5_devlink_traps_unregister(devlink); + mlx5_devlink_max_uc_list_param_unregister(devlink); mlx5_devlink_auxdev_params_unregister(devlink); devlink_params_unregister(devlink, mlx5_devlink_params, ARRAY_SIZE(mlx5_devlink_params)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 7841ef6c193c..c5bb79a4fa57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -259,6 +259,9 @@ const char *parse_fs_dst(struct trace_seq *p, case MLX5_FLOW_DESTINATION_TYPE_PORT: trace_seq_printf(p, "port\n"); break; + case MLX5_FLOW_DESTINATION_TYPE_NONE: + trace_seq_printf(p, "none\n"); + break; } trace_seq_putc(p, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index eae9aa9c0811..978a2bb8e122 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -675,6 +675,9 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work) if (!tracer->owner) return; + if (unlikely(!tracer->str_db.loaded)) + goto arm; + block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE; start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE; @@ -732,6 +735,7 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work) &tmp_trace_block[TRACES_PER_BLOCK - 1]); } +arm: mlx5_fw_tracer_arm(dev); } @@ -1136,8 +1140,7 @@ static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void queue_work(tracer->work_queue, &tracer->ownership_change_work); break; case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE: - if (likely(tracer->str_db.loaded)) - queue_work(tracer->work_queue, &tracer->handle_traces_work); + queue_work(tracer->work_queue, &tracer->handle_traces_work); break; default: mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c index 538adab6878b..c5b560a8b026 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c @@ -31,6 +31,7 @@ static const char *const mlx5_rsc_sgmt_name[] = { struct mlx5_rsc_dump { u32 pdn; u32 mkey; + u32 number_of_menu_items; u16 fw_segment_type[MLX5_SGMT_TYPE_NUM]; }; @@ -50,21 +51,37 @@ static int mlx5_rsc_dump_sgmt_get_by_name(char *name) return -EINVAL; } -static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page) +#define MLX5_RSC_DUMP_MENU_HEADER_SIZE (MLX5_ST_SZ_BYTES(resource_dump_info_segment) + \ + MLX5_ST_SZ_BYTES(resource_dump_command_segment) + \ + MLX5_ST_SZ_BYTES(resource_dump_menu_segment)) + +static int mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page, + int read_size, int start_idx) { void *data = page_address(page); enum mlx5_sgmt_type sgmt_idx; int num_of_items; char *sgmt_name; void *member; + int size = 0; void *menu; int i; - menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu); - num_of_items = MLX5_GET(resource_dump_menu_segment, menu, num_of_records); + if (!start_idx) { + menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu); + rsc_dump->number_of_menu_items = MLX5_GET(resource_dump_menu_segment, menu, + num_of_records); + size = MLX5_RSC_DUMP_MENU_HEADER_SIZE; + data += size; + } + num_of_items = rsc_dump->number_of_menu_items; + + for (i = 0; start_idx + i < num_of_items; i++) { + size += MLX5_ST_SZ_BYTES(resource_dump_menu_record); + if (size >= read_size) + return start_idx + i; - for (i = 0; i < num_of_items; i++) { - member = MLX5_ADDR_OF(resource_dump_menu_segment, menu, record[i]); + member = data + MLX5_ST_SZ_BYTES(resource_dump_menu_record) * i; sgmt_name = MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name); sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name); if (sgmt_idx == -EINVAL) @@ -72,6 +89,7 @@ static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record, member, segment_type); } + return 0; } static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, @@ -168,6 +186,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev) struct mlx5_rsc_dump_cmd *cmd = NULL; struct mlx5_rsc_key key = {}; struct page *page; + int start_idx = 0; int size; int err; @@ -189,7 +208,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev) if (err < 0) goto destroy_cmd; - mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page); + start_idx = mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page, size, start_idx); } while (err > 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f0ac6b0d9653..26a23047f1f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -59,6 +59,7 @@ #include "lib/hv_vhca.h" #include "lib/clock.h" #include "en/rx_res.h" +#include "en/selq.h" extern const struct net_device_ops mlx5e_netdev_ops; struct page_pool; @@ -92,29 +93,26 @@ struct page_pool; #define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \ MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD)) -#define MLX5_MPWRQ_LOG_WQE_SZ 18 -#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ - MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) -#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) - -#define MLX5_ALIGN_MTTS(mtts) (ALIGN(mtts, 8)) -#define MLX5_ALIGNED_MTTS_OCTW(mtts) ((mtts) / 2) -#define MLX5_MTT_OCTW(mtts) (MLX5_ALIGNED_MTTS_OCTW(MLX5_ALIGN_MTTS(mtts))) -/* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between - * WQEs, This page will absorb write overflow by the hardware, when - * receiving packets larger than MTU. These oversize packets are - * dropped by the driver at a later stage. +#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18 + +/* Keep in sync with mlx5e_mpwrq_log_wqe_sz. + * These are theoretical maximums, which can be further restricted by + * capabilities. These values are used for static resource allocations and + * sanity checks. + * MLX5_SEND_WQE_MAX_SIZE is a bit bigger than the maximum cacheline-aligned WQE + * size actually used at runtime, but it's not a problem when calculating static + * array sizes. */ -#define MLX5E_REQUIRED_WQE_MTTS (MLX5_ALIGN_MTTS(MLX5_MPWRQ_PAGES_PER_WQE + 1)) -#define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS) +#define MLX5_UMR_MAX_MTT_SPACE \ + (ALIGN_DOWN(MLX5_SEND_WQE_MAX_SIZE - sizeof(struct mlx5e_umr_wqe), \ + MLX5_UMR_MTT_ALIGNMENT)) +#define MLX5_MPWRQ_MAX_PAGES_PER_WQE \ + rounddown_pow_of_two(MLX5_UMR_MAX_MTT_SPACE / sizeof(struct mlx5_mtt)) + #define MLX5E_MAX_RQ_NUM_MTTS \ - ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */ + (ALIGN_DOWN(U16_MAX, 4) * 2) /* Fits into u16 and aligned by WQEBB. */ +#define MLX5E_MAX_RQ_NUM_KSMS (U16_MAX - 1) /* So that num_ksms fits into u16. */ #define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024)) -#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW \ - (ilog2(MLX5E_MAX_RQ_NUM_MTTS / MLX5E_REQUIRED_WQE_MTTS)) -#define MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW \ - (MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW + \ - (MLX5_MPWRQ_LOG_WQE_SZ - MLX5E_ORDER2_MAX_PACKET_MTU)) #define MLX5E_MIN_SKB_FRAG_SZ (MLX5_SKB_FRAG_SZ(MLX5_RX_HEADROOM)) #define MLX5E_LOG_MAX_RX_WQE_BULK \ @@ -126,8 +124,7 @@ struct page_pool; #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE (1 + MLX5E_LOG_MAX_RX_WQE_BULK) #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa -#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE min_t(u8, 0xd, \ - MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW) +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 @@ -145,18 +142,10 @@ struct page_pool; #define MLX5E_MIN_NUM_CHANNELS 0x1 #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2) -#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_TX_XSK_POLL_BUDGET 64 #define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ -#define MLX5E_UMR_WQE_INLINE_SZ \ - (sizeof(struct mlx5e_umr_wqe) + \ - ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(struct mlx5_mtt), \ - MLX5_UMR_MTT_ALIGNMENT)) -#define MLX5E_UMR_WQEBBS \ - (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB)) - #define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\ (sizeof(struct mlx5e_umr_wqe) +\ (sizeof(struct mlx5_klm) * (sgl_len))) @@ -174,7 +163,7 @@ struct page_pool; ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT) #define MLX5E_MAX_KLM_PER_WQE(mdev) \ - MLX5E_KLM_ENTRIES_PER_WQE(MLX5E_TX_MPW_MAX_NUM_DS << MLX5_MKEY_BSF_OCTO_SIZE) + MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev)) #define MLX5E_MSG_LEVEL NETIF_MSG_LINK @@ -188,12 +177,6 @@ do { \ #define mlx5e_state_dereference(priv, p) \ rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock)) -enum mlx5e_rq_group { - MLX5E_RQ_GROUP_REGULAR, - MLX5E_RQ_GROUP_XSK, -#define MLX5E_NUM_RQ_GROUPS(g) (1 + MLX5E_RQ_GROUP_##g) -}; - static inline u8 mlx5e_get_num_lag_ports(struct mlx5_core_dev *mdev) { if (mlx5_lag_is_lacp_owner(mdev)) @@ -222,10 +205,40 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS); } +/* The maximum WQE size can be retrieved by max_wqe_sz_sq in + * bytes units. Driver hardens the limitation to 1KB (16 + * WQEBBs), unless firmware capability is stricter. + */ +static inline u8 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev) +{ + BUILD_BUG_ON(MLX5_SEND_WQE_MAX_WQEBBS > U8_MAX); + + return (u8)min_t(u16, MLX5_SEND_WQE_MAX_WQEBBS, + MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB); +} + +static inline u8 mlx5e_get_max_sq_aligned_wqebbs(struct mlx5_core_dev *mdev) +{ +/* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS. + * Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16, + * see mlx5e_get_max_sq_wqebbs(), the multiplication (16 * 4 == 64) + * overflows the 6-bit DS field of Ctrl Segment. Use a bound lower + * than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be + * cache-aligned. + */ + u8 wqebbs = mlx5e_get_max_sq_wqebbs(mdev); + + wqebbs = min_t(u8, wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1); +#if L1_CACHE_BYTES >= 128 + wqebbs = ALIGN_DOWN(wqebbs, 2); +#endif + return wqebbs; +} + struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; - struct mlx5_wqe_data_seg data[0]; + struct mlx5_wqe_data_seg data[]; }; struct mlx5e_rx_wqe_ll { @@ -242,8 +255,9 @@ struct mlx5e_umr_wqe { struct mlx5_wqe_umr_ctrl_seg uctrl; struct mlx5_mkey_seg mkc; union { - struct mlx5_mtt inline_mtts[0]; - struct mlx5_klm inline_klms[0]; + DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts); + DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms); + DECLARE_FLEX_ARRAY(struct mlx5_ksm, inline_ksms); }; }; @@ -294,7 +308,8 @@ struct mlx5e_params { u8 num_tc; struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; struct { - struct mlx5e_mqprio_rl *rl; + u64 max_rate[TC_MAX_QUEUE]; + u32 hw_id[TC_MAX_QUEUE]; } channel; } mqprio; bool rx_cqe_compress_def; @@ -327,7 +342,6 @@ enum { MLX5E_RQ_STATE_AM, MLX5E_RQ_STATE_NO_CSUM_COMPLETE, MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */ - MLX5E_RQ_STATE_FPGA_TLS, /* FPGA TLS enabled */ MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */ MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */ }; @@ -378,6 +392,7 @@ enum { MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, MLX5E_SQ_STATE_PENDING_XSK_TX, MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, + MLX5E_SQ_STATE_XDP_MULTIBUF, }; struct mlx5e_tx_mpwqe { @@ -428,12 +443,12 @@ struct mlx5e_txqsq { struct netdev_queue *txq; u32 sqn; u16 stop_room; + u8 max_sq_mpw_wqebbs; u8 min_inline_mode; struct device *pdev; __be32 mkey_be; unsigned long state; unsigned int hw_mtu; - struct hwtstamp_config *tstamp; struct mlx5_clock *clock; struct net_device *netdev; struct mlx5_core_dev *mdev; @@ -449,12 +464,9 @@ struct mlx5e_txqsq { cqe_ts_to_ns ptp_cyc2time; } ____cacheline_aligned_in_smp; -struct mlx5e_dma_info { - dma_addr_t addr; - union { - struct page *page; - struct xdp_buff *xsk; - }; +union mlx5e_alloc_unit { + struct page *page; + struct xdp_buff *xsk; }; /* XDP packets can be transmitted in different ways. On completion, we need to @@ -487,7 +499,7 @@ struct mlx5e_xdp_info { } frame; struct { struct mlx5e_rq *rq; - struct mlx5e_dma_info di; + struct page *page; } page; }; }; @@ -509,7 +521,7 @@ struct mlx5e_xdpsq; typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *); typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *, struct mlx5e_xmit_data *, - struct mlx5e_xdp_info *, + struct skb_shared_info *, int); struct mlx5e_xdpsq { @@ -541,6 +553,8 @@ struct mlx5e_xdpsq { u32 sqn; struct device *pdev; __be32 mkey_be; + u16 stop_room; + u8 max_sq_mpw_wqebbs; u8 min_inline_mode; unsigned long state; unsigned int hw_mtu; @@ -581,19 +595,15 @@ struct mlx5e_icosq { } ____cacheline_aligned_in_smp; struct mlx5e_wqe_frag_info { - struct mlx5e_dma_info *di; + union mlx5e_alloc_unit *au; u32 offset; bool last_in_page; }; -struct mlx5e_umr_dma_info { - struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; -}; - struct mlx5e_mpw_info { - struct mlx5e_umr_dma_info umr; u16 consumed_strides; - DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); + DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE); + union mlx5e_alloc_unit alloc_units[]; }; #define MLX5E_MAX_RX_FRAGS 4 @@ -601,13 +611,13 @@ struct mlx5e_mpw_info { /* a single cache unit is capable to serve one napi call (for non-striding rq) * or a MPWQE (for striding rq). */ -#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ - MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT) +#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ + MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT) #define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) struct mlx5e_page_cache { u32 head; u32 tail; - struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE]; + struct page *page_cache[MLX5E_CACHE_SIZE]; }; struct mlx5e_rq; @@ -616,8 +626,8 @@ typedef struct sk_buff * (*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, u16 cqe_bcnt, u32 head_offset, u32 page_idx); typedef struct sk_buff * -(*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, - struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); +(*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt); typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq); typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); typedef void (*mlx5e_fp_shampo_dealloc_hd)(struct mlx5e_rq*, u16, u16, bool); @@ -640,6 +650,12 @@ struct mlx5e_rq_frags_info { u8 num_frags; u8 log_num_frags; u8 wqe_bulk; + u8 wqe_index_mask; +}; + +struct mlx5e_dma_info { + dma_addr_t addr; + struct page *page; }; struct mlx5e_shampo_hd { @@ -661,13 +677,20 @@ struct mlx5e_hw_gro_data { int second_ip_id; }; +enum mlx5e_mpwrq_umr_mode { + MLX5E_MPWRQ_UMR_MODE_ALIGNED, + MLX5E_MPWRQ_UMR_MODE_UNALIGNED, + MLX5E_MPWRQ_UMR_MODE_OVERSIZED, + MLX5E_MPWRQ_UMR_MODE_TRIPLE, +}; + struct mlx5e_rq { /* data path */ union { struct { struct mlx5_wq_cyc wq; struct mlx5e_wqe_frag_info *frags; - struct mlx5e_dma_info *di; + union mlx5e_alloc_unit *alloc_units; struct mlx5e_rq_frags_info info; mlx5e_fp_skb_from_cqe skb_from_cqe; } wqe; @@ -676,12 +699,19 @@ struct mlx5e_rq { struct mlx5e_umr_wqe umr_wqe; struct mlx5e_mpw_info *info; mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq; + __be32 umr_mkey_be; u16 num_strides; u16 actual_wq_head; u8 log_stride_sz; u8 umr_in_progress; u8 umr_last_bulk; u8 umr_completed; + u8 min_wqe_bulk; + u8 page_shift; + u8 pages_per_wqe; + u8 umr_wqebbs; + u8 mtts_per_wqe; + u8 umr_mode; struct mlx5e_shampo_hd *shampo; } mpwqe; }; @@ -731,7 +761,7 @@ struct mlx5e_rq { u8 wq_type; u32 rqn; struct mlx5_core_dev *mdev; - u32 umr_mkey; + struct mlx5e_channel *channel; struct mlx5e_dma_info wqe_overflow; /* XDP read-mostly */ @@ -783,6 +813,8 @@ struct mlx5e_channel { DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; int cpu; + /* Sync between icosq recovery and XSK enable/disable. */ + struct mutex icosq_recovery_lock; }; struct mlx5e_ptp; @@ -818,11 +850,6 @@ enum { MLX5E_STATE_XDP_ACTIVE, }; -enum { - MLX5E_TC_PRIO = 0, - MLX5E_NIC_PRIO -}; - struct mlx5e_modify_sq_param { int curr_state; int next_state; @@ -862,24 +889,13 @@ struct mlx5e_scratchpad { cpumask_var_t cpumask; }; -struct mlx5e_htb { - DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES)); - DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES); - struct mlx5e_sq_stats **qos_sq_stats; - u16 max_qos_sqs; - u16 maj_id; - u16 defcls; -}; - struct mlx5e_trap; +struct mlx5e_htb; struct mlx5e_priv { /* priv data path fields - start */ - /* +1 for port ptp ts */ - struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC + - MLX5E_QOS_MAX_LEAF_NODES]; - int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; - int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC]; + struct mlx5e_selq selq; + struct mlx5e_txqsq **txq2sq; #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx_dp dcbx_dp; #endif @@ -893,9 +909,9 @@ struct mlx5e_priv { struct mlx5e_channels channels; u32 tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC]; struct mlx5e_rx_res *rx_res; - u32 tx_rates[MLX5E_MAX_NUM_SQS]; + u32 *tx_rates; - struct mlx5e_flow_steering fs; + struct mlx5e_flow_steering *fs; struct workqueue_struct *wq; struct work_struct update_carrier_work; @@ -909,9 +925,11 @@ struct mlx5e_priv { struct net_device *netdev; struct mlx5e_trap *en_trap; struct mlx5e_stats stats; - struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_channel_stats **channel_stats; struct mlx5e_channel_stats trap_stats; struct mlx5e_ptp_stats ptp_stats; + struct mlx5e_sq_stats **htb_qos_sq_stats; + u16 htb_max_qos_sqs; u16 stats_nch; u16 max_nch; u8 max_opened_tc; @@ -922,7 +940,6 @@ struct mlx5e_priv { u16 drop_rq_q_counter; struct notifier_block events_nb; struct notifier_block blocking_events_nb; - int num_tc_x_num_ch; struct udp_tunnel_nic_info nic_info; #ifdef CONFIG_MLX5_CORE_EN_DCB @@ -931,6 +948,9 @@ struct mlx5e_priv { const struct mlx5e_profile *profile; void *ppriv; +#ifdef CONFIG_MLX5_EN_MACSEC + struct mlx5e_macsec *macsec; +#endif #ifdef CONFIG_MLX5_EN_IPSEC struct mlx5e_ipsec *ipsec; #endif @@ -944,7 +964,7 @@ struct mlx5e_priv { struct mlx5e_hv_vhca_stats_agent stats_agent; #endif struct mlx5e_scratchpad scratchpad; - struct mlx5e_htb htb; + struct mlx5e_htb *htb; struct mlx5e_mqprio_rl *mqprio_rl; }; @@ -956,6 +976,14 @@ struct mlx5e_rx_handlers { extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic; +enum mlx5e_profile_feature { + MLX5E_PROFILE_FEATURE_PTP_RX, + MLX5E_PROFILE_FEATURE_PTP_TX, + MLX5E_PROFILE_FEATURE_QOS_HTB, + MLX5E_PROFILE_FEATURE_FS_VLAN, + MLX5E_PROFILE_FEATURE_FS_TC, +}; + struct mlx5e_profile { int (*init)(struct mlx5_core_dev *mdev, struct net_device *netdev); @@ -969,23 +997,26 @@ struct mlx5e_profile { int (*update_rx)(struct mlx5e_priv *priv); void (*update_stats)(struct mlx5e_priv *priv); void (*update_carrier)(struct mlx5e_priv *priv); + int (*max_nch_limit)(struct mlx5_core_dev *mdev); unsigned int (*stats_grps_num)(struct mlx5e_priv *priv); mlx5e_stats_grp_t *stats_grps; const struct mlx5e_rx_handlers *rx_handlers; int max_tc; - u8 rq_groups; - bool rx_ptp_support; + u32 features; }; +#define mlx5e_profile_feature_cap(profile, feature) \ + ((profile)->features & BIT(MLX5E_PROFILE_FEATURE_##feature)) + void mlx5e_build_ptys2ethtool_map(void); -bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); +bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close); void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); -void mlx5e_init_l2_addr(struct mlx5e_priv *priv); int mlx5e_self_test_num(struct mlx5e_priv *priv); int mlx5e_self_test_fill_strings(struct mlx5e_priv *priv, u8 *data); void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest, @@ -1008,15 +1039,13 @@ struct mlx5e_rq_param; int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, int node, struct mlx5e_rq *rq); +#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); void mlx5e_close_rq(struct mlx5e_rq *rq); int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param); void mlx5e_destroy_rq(struct mlx5e_rq *rq); struct mlx5e_sq_param; -int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct mlx5e_icosq *sq); -void mlx5e_close_icosq(struct mlx5e_icosq *sq); int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, struct mlx5e_xdpsq *sq, bool is_redirect); @@ -1038,6 +1067,9 @@ void mlx5e_close_cq(struct mlx5e_cq *cq); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); +void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c); +void mlx5e_trigger_napi_sched(struct napi_struct *napi); + int mlx5e_open_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs); void mlx5e_close_channels(struct mlx5e_channels *chs); @@ -1057,13 +1089,12 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv, mlx5e_fp_preactivate preactivate, void *context, bool reset); int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv); -int mlx5e_num_channels_changed(struct mlx5e_priv *priv); int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx); -int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state); +int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state); void mlx5e_activate_rq(struct mlx5e_rq *rq); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); void mlx5e_activate_icosq(struct mlx5e_icosq *icosq); @@ -1098,6 +1129,7 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) extern const struct ethtool_ops mlx5e_ethtool_ops; +int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey); int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, @@ -1110,8 +1142,6 @@ void mlx5e_destroy_q_counters(struct mlx5e_priv *priv); int mlx5e_open_drop_rq(struct mlx5e_priv *priv, struct mlx5e_rq *drop_rq); void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq); -int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node); -void mlx5e_free_di_list(struct mlx5e_rq *rq); int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); @@ -1140,7 +1170,8 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset); void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, struct ethtool_stats *stats, u64 *data); void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, - struct ethtool_ringparam *param); + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param); int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, struct ethtool_ringparam *param); void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, @@ -1148,9 +1179,12 @@ void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch); int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, - struct ethtool_coalesce *coal); + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal); int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, - struct ethtool_coalesce *coal); + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack); int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, struct ethtool_link_ksettings *link_ksettings); int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, @@ -1180,14 +1214,14 @@ mlx5e_tx_mpwqe_supported(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe); } +int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev); int mlx5e_priv_init(struct mlx5e_priv *priv, const struct mlx5e_profile *profile, struct net_device *netdev, struct mlx5_core_dev *mdev); void mlx5e_priv_cleanup(struct mlx5e_priv *priv); struct net_device * -mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, - unsigned int txqs, unsigned int rxqs); +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile); int mlx5e_attach_netdev(struct mlx5e_priv *priv); void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c index e7c14c0de0a7..48581ea3adcb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c @@ -10,28 +10,33 @@ unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs) return chs->num; } -void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +static struct mlx5e_channel *mlx5e_channels_get(struct mlx5e_channels *chs, unsigned int ix) { - struct mlx5e_channel *c; + WARN_ON_ONCE(ix >= mlx5e_channels_get_num(chs)); + return chs->c[ix]; +} - WARN_ON(ix >= mlx5e_channels_get_num(chs)); - c = chs->c[ix]; +bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix) +{ + struct mlx5e_channel *c = mlx5e_channels_get(chs, ix); - *rqn = c->rq.rqn; + return test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); } -bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) { - struct mlx5e_channel *c; + struct mlx5e_channel *c = mlx5e_channels_get(chs, ix); - WARN_ON(ix >= mlx5e_channels_get_num(chs)); - c = chs->c[ix]; + *rqn = c->rq.rqn; +} - if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) - return false; +void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +{ + struct mlx5e_channel *c = mlx5e_channels_get(chs, ix); + + WARN_ON_ONCE(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)); *rqn = c->xskrq.rqn; - return true; } bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h index ca00cbc827cb..637ca90daaa8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h @@ -9,8 +9,9 @@ struct mlx5e_channels; unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs); +bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix); void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); -bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); +void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn); #endif /* __MLX5_EN_CHANNELS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h index 9976de8b9047..b59aee75de94 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h @@ -40,13 +40,11 @@ struct mlx5e_dcbx_dp { }; void mlx5e_dcbnl_build_netdev(struct net_device *netdev); -void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev); void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv); void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv); void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv); #else static inline void mlx5e_dcbnl_build_netdev(struct net_device *netdev) {} -static inline void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev) {} static inline void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) {} static inline void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) {} static inline void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) {} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index ae52e7f38306..b69f9d10ccbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -21,6 +21,7 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv) struct netdev_phys_item_id ppid = {}; struct devlink_port *dl_port; unsigned int dl_port_index; + int ret; if (mlx5_core_is_pf(priv->mdev)) { attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; @@ -41,7 +42,13 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv) memset(dl_port, 0, sizeof(*dl_port)); devlink_port_attrs_set(dl_port, &attrs); - return devlink_port_register(devlink, dl_port, dl_port_index); + if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) + devl_lock(devlink); + ret = devl_port_register(devlink, dl_port, dl_port_index); + if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) + devl_unlock(devlink); + + return ret; } void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv) @@ -54,8 +61,13 @@ void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv) void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv) { struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); + struct devlink *devlink = priv_to_devlink(priv->mdev); - devlink_port_unregister(dl_port); + if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) + devl_lock(devlink); + devl_port_unregister(dl_port); + if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) + devl_unlock(devlink); } struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 678ffbb48a25..bf2741eb7f9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -8,32 +8,17 @@ #include "lib/fs_ttc.h" struct mlx5e_post_act; +struct mlx5e_tc_table; enum { MLX5E_TC_FT_LEVEL = 0, MLX5E_TC_TTC_FT_LEVEL, + MLX5E_TC_MISS_LEVEL, }; -struct mlx5e_tc_table { - /* Protects the dynamic assignment of the t parameter - * which is the nic tc root table. - */ - struct mutex t_lock; - struct mlx5_flow_table *t; - struct mlx5_fs_chains *chains; - struct mlx5e_post_act *post_act; - - struct rhashtable ht; - - struct mod_hdr_tbl mod_hdr; - struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */ - DECLARE_HASHTABLE(hairpin_tbl, 8); - - struct notifier_block netdevice_nb; - struct netdev_net_notifier netdevice_nn; - - struct mlx5_tc_ct_priv *ct; - struct mapping_ctx *mapping; +enum { + MLX5E_TC_PRIO = 0, + MLX5E_NIC_PRIO }; struct mlx5e_flow_table { @@ -104,108 +89,116 @@ enum { #endif }; -struct mlx5e_priv; - -#ifdef CONFIG_MLX5_EN_RXNFC - -struct mlx5e_ethtool_table { - struct mlx5_flow_table *ft; - int num_rules; -}; - -#define ETHTOOL_NUM_L3_L4_FTS 7 -#define ETHTOOL_NUM_L2_FTS 4 - -struct mlx5e_ethtool_steering { - struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS]; - struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS]; - struct list_head rules; - int tot_num_rules; -}; - -void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv); -void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv); -int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd); -int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, - struct ethtool_rxnfc *info, u32 *rule_locs); -#else -static inline void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) { } -static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) { } -static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) -{ return -EOPNOTSUPP; } -static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, - struct ethtool_rxnfc *info, u32 *rule_locs) -{ return -EOPNOTSUPP; } -#endif /* CONFIG_MLX5_EN_RXNFC */ +struct mlx5e_flow_steering; +struct mlx5e_rx_res; #ifdef CONFIG_MLX5_EN_ARFS struct mlx5e_arfs_tables; -int mlx5e_arfs_create_tables(struct mlx5e_priv *priv); -void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv); -int mlx5e_arfs_enable(struct mlx5e_priv *priv); -int mlx5e_arfs_disable(struct mlx5e_priv *priv); +int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, bool ntuple); +void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple); +int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs); +int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs); int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id); #else -static inline int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) { return 0; } -static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {} -static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; } -static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; } +static inline int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, bool ntuple) +{ return 0; } +static inline void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple) {} +static inline int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs) +{ return -EOPNOTSUPP; } +static inline int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs) +{ return -EOPNOTSUPP; } #endif #ifdef CONFIG_MLX5_EN_TLS struct mlx5e_accel_fs_tcp; #endif +struct mlx5e_profile; struct mlx5e_fs_udp; struct mlx5e_fs_any; struct mlx5e_ptp_fs; -struct mlx5e_flow_steering { - struct mlx5_flow_namespace *ns; - struct mlx5_flow_namespace *egress_ns; +void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + struct ttc_params *ttc_params, bool tunnel); + +void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs); +int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res); + +void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); + +void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc); +void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc); + +int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + const struct mlx5e_profile *profile, + struct net_device *netdev); +void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple, + const struct mlx5e_profile *profile); + +struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile, + struct mlx5_core_dev *mdev, + bool state_destroy); +void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs); +struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_tc(struct mlx5e_flow_steering *fs, struct mlx5e_tc_table *tc); +struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs); +struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs); +struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress); +void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress); #ifdef CONFIG_MLX5_EN_RXNFC - struct mlx5e_ethtool_steering ethtool; +struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs); #endif - struct mlx5e_tc_table tc; - struct mlx5e_promisc_table promisc; - struct mlx5e_vlan_table *vlan; - struct mlx5e_l2_table l2; - struct mlx5_ttc_table *ttc; - struct mlx5_ttc_table *inner_ttc; +struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner); +void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner); #ifdef CONFIG_MLX5_EN_ARFS - struct mlx5e_arfs_tables *arfs; +struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs); #endif +struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs); +struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any); +struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp); #ifdef CONFIG_MLX5_EN_TLS - struct mlx5e_accel_fs_tcp *accel_tcp; +struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp); #endif - struct mlx5e_fs_udp *udp; - struct mlx5e_fs_any *any; - struct mlx5e_ptp_fs *ptp_fs; -}; - -void mlx5e_set_ttc_params(struct mlx5e_priv *priv, - struct ttc_params *ttc_params, bool tunnel); - -void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv); -int mlx5e_create_ttc_table(struct mlx5e_priv *priv); - -void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); - -void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv); -void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); - -int mlx5e_create_flow_steering(struct mlx5e_priv *priv); -void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); - -int mlx5e_fs_init(struct mlx5e_priv *priv); -void mlx5e_fs_cleanup(struct mlx5e_priv *priv); - -int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num); -void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv); -int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num); -void mlx5e_remove_mac_trap(struct mlx5e_priv *priv); +void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy); +void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs, bool vlan_strip_disable); + +struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs); +int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num); +void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs); +int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num); +void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, struct net_device *netdev); +int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, + __be16 proto, u16 vid); +int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, + __be16 proto, u16 vid); +void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev); + +#define fs_err(fs, fmt, ...) \ + mlx5_core_err(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__) + +#define fs_dbg(fs, fmt, ...) \ + mlx5_core_dbg(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__) + +#define fs_warn(fs, fmt, ...) \ + mlx5_core_warn(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__) + +#define fs_warn_once(fs, fmt, ...) \ + mlx5_core_warn_once(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__) #endif /* __MLX5E_FLOW_STEER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h new file mode 100644 index 000000000000..9e276fd3c0cf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5E_FS_ETHTOOL_H__ +#define __MLX5E_FS_ETHTOOL_H__ + +struct mlx5e_priv; +struct mlx5e_ethtool_steering; +#ifdef CONFIG_MLX5_EN_RXNFC +int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool); +void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool); +void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs); +void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs); +int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd); +int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs); +#else +static inline int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool) +{ return 0; } +static inline void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool) { } +static inline void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs) { } +static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs) { } +static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) +{ return -EOPNOTSUPP; } +static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs) +{ return -EOPNOTSUPP; } +#endif +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index 7aa25a5e29d7..03cb79adf912 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ -#include <linux/netdevice.h> #include "en/fs_tt_redirect.h" #include "fs_core.h" +#include "mlx5_core.h" enum fs_udp_type { FS_IPV4_UDP, @@ -74,17 +74,17 @@ static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type } struct mlx5_flow_handle * -mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv, +mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs, enum mlx5_traffic_types ttc_type, u32 tir_num, u16 d_port) { + struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs); enum fs_udp_type type = tt2fs_udp(ttc_type); struct mlx5_flow_destination dest = {}; struct mlx5_flow_table *ft = NULL; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; - struct mlx5e_fs_udp *fs_udp; int err; if (type == FS_UDP_NUM_TYPES) @@ -94,7 +94,6 @@ mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv, if (!spec) return ERR_PTR(-ENOMEM); - fs_udp = priv->fs.udp; ft = fs_udp->tables[type].t; fs_udp_set_dport_flow(spec, type, d_port); @@ -106,31 +105,30 @@ mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv, if (IS_ERR(rule)) { err = PTR_ERR(rule); - netdev_err(priv->netdev, "%s: add %s rule failed, err %d\n", - __func__, fs_udp_type2str(type), err); + fs_err(fs, "%s: add %s rule failed, err %d\n", + __func__, fs_udp_type2str(type), err); } return rule; } -static int fs_udp_add_default_rule(struct mlx5e_priv *priv, enum fs_udp_type type) +static int fs_udp_add_default_rule(struct mlx5e_flow_steering *fs, enum fs_udp_type type) { + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs); struct mlx5e_flow_table *fs_udp_t; struct mlx5_flow_destination dest; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; - struct mlx5e_fs_udp *fs_udp; int err; - fs_udp = priv->fs.udp; fs_udp_t = &fs_udp->tables[type]; - dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_udp2tt(type)); + dest = mlx5_ttc_get_default_dest(ttc, fs_udp2tt(type)); rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); - netdev_err(priv->netdev, - "%s: add default rule failed, fs type=%d, err %d\n", - __func__, type, err); + fs_err(fs, "%s: add default rule failed, fs type=%d, err %d\n", + __func__, type, err); return err; } @@ -206,33 +204,36 @@ out: return err; } -static int fs_udp_create_table(struct mlx5e_priv *priv, enum fs_udp_type type) +static int fs_udp_create_table(struct mlx5e_flow_steering *fs, enum fs_udp_type type) { - struct mlx5e_flow_table *ft = &priv->fs.udp->tables[type]; + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false); + struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs); struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_flow_table *ft; int err; + ft = &fs_udp->tables[type]; ft->num_groups = 0; ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE; ft_attr.level = MLX5E_FS_TT_UDP_FT_LEVEL; ft_attr.prio = MLX5E_NIC_PRIO; - ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + ft->t = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; return err; } - netdev_dbg(priv->netdev, "Created fs %s table id %u level %u\n", - fs_udp_type2str(type), ft->t->id, ft->t->level); + mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs %s table id %u level %u\n", + fs_udp_type2str(type), ft->t->id, ft->t->level); err = fs_udp_create_groups(ft, type); if (err) goto err; - err = fs_udp_add_default_rule(priv, type); + err = fs_udp_add_default_rule(fs, type); if (err) goto err; @@ -253,17 +254,17 @@ static void fs_udp_destroy_table(struct mlx5e_fs_udp *fs_udp, int i) fs_udp->tables[i].t = NULL; } -static int fs_udp_disable(struct mlx5e_priv *priv) +static int fs_udp_disable(struct mlx5e_flow_steering *fs) { + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); int err, i; for (i = 0; i < FS_UDP_NUM_TYPES; i++) { /* Modify ttc rules destination to point back to the indir TIRs */ - err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_udp2tt(i)); + err = mlx5_ttc_fwd_default_dest(ttc, fs_udp2tt(i)); if (err) { - netdev_err(priv->netdev, - "%s: modify ttc[%d] default destination failed, err(%d)\n", - __func__, fs_udp2tt(i), err); + fs_err(fs, "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, fs_udp2tt(i), err); return err; } } @@ -271,30 +272,31 @@ static int fs_udp_disable(struct mlx5e_priv *priv) return 0; } -static int fs_udp_enable(struct mlx5e_priv *priv) +static int fs_udp_enable(struct mlx5e_flow_steering *fs) { + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs); struct mlx5_flow_destination dest = {}; int err, i; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; for (i = 0; i < FS_UDP_NUM_TYPES; i++) { - dest.ft = priv->fs.udp->tables[i].t; + dest.ft = udp->tables[i].t; /* Modify ttc rules destination to point on the accel_fs FTs */ - err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_udp2tt(i), &dest); + err = mlx5_ttc_fwd_dest(ttc, fs_udp2tt(i), &dest); if (err) { - netdev_err(priv->netdev, - "%s: modify ttc[%d] destination to accel failed, err(%d)\n", - __func__, fs_udp2tt(i), err); + fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", + __func__, fs_udp2tt(i), err); return err; } } return 0; } -void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv) +void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs) { - struct mlx5e_fs_udp *fs_udp = priv->fs.udp; + struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs); int i; if (!fs_udp) @@ -303,48 +305,50 @@ void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv) if (--fs_udp->ref_cnt) return; - fs_udp_disable(priv); + fs_udp_disable(fs); for (i = 0; i < FS_UDP_NUM_TYPES; i++) fs_udp_destroy_table(fs_udp, i); kfree(fs_udp); - priv->fs.udp = NULL; + mlx5e_fs_set_udp(fs, NULL); } -int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv) +int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs) { + struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs); int i, err; - if (priv->fs.udp) { - priv->fs.udp->ref_cnt++; + if (udp) { + udp->ref_cnt++; return 0; } - priv->fs.udp = kzalloc(sizeof(*priv->fs.udp), GFP_KERNEL); - if (!priv->fs.udp) + udp = kzalloc(sizeof(*udp), GFP_KERNEL); + if (!udp) return -ENOMEM; + mlx5e_fs_set_udp(fs, udp); for (i = 0; i < FS_UDP_NUM_TYPES; i++) { - err = fs_udp_create_table(priv, i); + err = fs_udp_create_table(fs, i); if (err) goto err_destroy_tables; } - err = fs_udp_enable(priv); + err = fs_udp_enable(fs); if (err) goto err_destroy_tables; - priv->fs.udp->ref_cnt = 1; + udp->ref_cnt = 1; return 0; err_destroy_tables: while (--i >= 0) - fs_udp_destroy_table(priv->fs.udp, i); + fs_udp_destroy_table(udp, i); - kfree(priv->fs.udp); - priv->fs.udp = NULL; + kfree(udp); + mlx5e_fs_set_udp(fs, NULL); return err; } @@ -356,22 +360,21 @@ static void fs_any_set_ethertype_flow(struct mlx5_flow_spec *spec, u16 ether_typ } struct mlx5_flow_handle * -mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv, +mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs, u32 tir_num, u16 ether_type) { + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); struct mlx5_flow_destination dest = {}; struct mlx5_flow_table *ft = NULL; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; - struct mlx5e_fs_any *fs_any; int err; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return ERR_PTR(-ENOMEM); - fs_any = priv->fs.any; ft = fs_any->table.t; fs_any_set_ethertype_flow(spec, ether_type); @@ -383,31 +386,29 @@ mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv, if (IS_ERR(rule)) { err = PTR_ERR(rule); - netdev_err(priv->netdev, "%s: add ANY rule failed, err %d\n", - __func__, err); + fs_err(fs, "%s: add ANY rule failed, err %d\n", + __func__, err); } return rule; } -static int fs_any_add_default_rule(struct mlx5e_priv *priv) +static int fs_any_add_default_rule(struct mlx5e_flow_steering *fs) { + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); struct mlx5e_flow_table *fs_any_t; struct mlx5_flow_destination dest; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; - struct mlx5e_fs_any *fs_any; int err; - fs_any = priv->fs.any; fs_any_t = &fs_any->table; - - dest = mlx5_ttc_get_default_dest(priv->fs.ttc, MLX5_TT_ANY); + dest = mlx5_ttc_get_default_dest(ttc, MLX5_TT_ANY); rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); - netdev_err(priv->netdev, - "%s: add default rule failed, fs type=ANY, err %d\n", - __func__, err); + fs_err(fs, "%s: add default rule failed, fs type=ANY, err %d\n", + __func__, err); return err; } @@ -472,9 +473,11 @@ err: return err; } -static int fs_any_create_table(struct mlx5e_priv *priv) +static int fs_any_create_table(struct mlx5e_flow_steering *fs) { - struct mlx5e_flow_table *ft = &priv->fs.any->table; + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false); + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); + struct mlx5e_flow_table *ft = &fs_any->table; struct mlx5_flow_table_attr ft_attr = {}; int err; @@ -484,21 +487,21 @@ static int fs_any_create_table(struct mlx5e_priv *priv) ft_attr.level = MLX5E_FS_TT_ANY_FT_LEVEL; ft_attr.prio = MLX5E_NIC_PRIO; - ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + ft->t = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; return err; } - netdev_dbg(priv->netdev, "Created fs ANY table id %u level %u\n", - ft->t->id, ft->t->level); + mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs ANY table id %u level %u\n", + ft->t->id, ft->t->level); err = fs_any_create_groups(ft); if (err) goto err; - err = fs_any_add_default_rule(priv); + err = fs_any_add_default_rule(fs); if (err) goto err; @@ -509,35 +512,38 @@ err: return err; } -static int fs_any_disable(struct mlx5e_priv *priv) +static int fs_any_disable(struct mlx5e_flow_steering *fs) { + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); int err; /* Modify ttc rules destination to point back to the indir TIRs */ - err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, MLX5_TT_ANY); + err = mlx5_ttc_fwd_default_dest(ttc, MLX5_TT_ANY); if (err) { - netdev_err(priv->netdev, - "%s: modify ttc[%d] default destination failed, err(%d)\n", - __func__, MLX5_TT_ANY, err); + fs_err(fs, + "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, MLX5_TT_ANY, err); return err; } return 0; } -static int fs_any_enable(struct mlx5e_priv *priv) +static int fs_any_enable(struct mlx5e_flow_steering *fs) { + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_fs_any *any = mlx5e_fs_get_any(fs); struct mlx5_flow_destination dest = {}; int err; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = priv->fs.any->table.t; + dest.ft = any->table.t; /* Modify ttc rules destination to point on the accel_fs FTs */ - err = mlx5_ttc_fwd_dest(priv->fs.ttc, MLX5_TT_ANY, &dest); + err = mlx5_ttc_fwd_dest(ttc, MLX5_TT_ANY, &dest); if (err) { - netdev_err(priv->netdev, - "%s: modify ttc[%d] destination to accel failed, err(%d)\n", - __func__, MLX5_TT_ANY, err); + fs_err(fs, + "%s: modify ttc[%d] destination to accel failed, err(%d)\n", + __func__, MLX5_TT_ANY, err); return err; } return 0; @@ -553,9 +559,9 @@ static void fs_any_destroy_table(struct mlx5e_fs_any *fs_any) fs_any->table.t = NULL; } -void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv) +void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs) { - struct mlx5e_fs_any *fs_any = priv->fs.any; + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); if (!fs_any) return; @@ -563,43 +569,45 @@ void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv) if (--fs_any->ref_cnt) return; - fs_any_disable(priv); + fs_any_disable(fs); fs_any_destroy_table(fs_any); kfree(fs_any); - priv->fs.any = NULL; + mlx5e_fs_set_any(fs, NULL); } -int mlx5e_fs_tt_redirect_any_create(struct mlx5e_priv *priv) +int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs) { + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); int err; - if (priv->fs.any) { - priv->fs.any->ref_cnt++; + if (fs_any) { + fs_any->ref_cnt++; return 0; } - priv->fs.any = kzalloc(sizeof(*priv->fs.any), GFP_KERNEL); - if (!priv->fs.any) + fs_any = kzalloc(sizeof(*fs_any), GFP_KERNEL); + if (!fs_any) return -ENOMEM; + mlx5e_fs_set_any(fs, fs_any); - err = fs_any_create_table(priv); + err = fs_any_create_table(fs); if (err) return err; - err = fs_any_enable(priv); + err = fs_any_enable(fs); if (err) goto err_destroy_table; - priv->fs.any->ref_cnt = 1; + fs_any->ref_cnt = 1; return 0; err_destroy_table: - fs_any_destroy_table(priv->fs.any); + fs_any_destroy_table(fs_any); - kfree(priv->fs.any); - priv->fs.any = NULL; + kfree(fs_any); + mlx5e_fs_set_any(fs, NULL); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h index 7a70c4f38fda..5780fd7ad507 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h @@ -4,23 +4,22 @@ #ifndef __MLX5E_FS_TT_REDIRECT_H__ #define __MLX5E_FS_TT_REDIRECT_H__ -#include "en.h" #include "en/fs.h" void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule); /* UDP traffic type redirect */ struct mlx5_flow_handle * -mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv, +mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs, enum mlx5_traffic_types ttc_type, u32 tir_num, u16 d_port); -void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv); -int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv); +void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs); +int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs); /* ANY traffic type redirect*/ struct mlx5_flow_handle * -mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv, +mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs, u32 tir_num, u16 ether_type); -void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv); -int mlx5e_fs_tt_redirect_any_create(struct mlx5e_priv *priv); +void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs); +int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index d5b7110a4265..0107e4e73bb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -30,6 +30,8 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq); void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c); +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c); #define MLX5E_REPORTER_PER_Q_MAX_LEN 256 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c new file mode 100644 index 000000000000..6dac76fa58a3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c @@ -0,0 +1,722 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <net/pkt_cls.h> +#include "htb.h" +#include "en.h" +#include "../qos.h" + +struct mlx5e_qos_node { + struct hlist_node hnode; + struct mlx5e_qos_node *parent; + u64 rate; + u32 bw_share; + u32 max_average_bw; + u32 hw_id; + u32 classid; /* 16-bit, except root. */ + u16 qid; +}; + +struct mlx5e_htb { + DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES)); + DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES); + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5e_priv *priv; + struct mlx5e_selq *selq; +}; + +#define MLX5E_QOS_QID_INNER 0xffff +#define MLX5E_HTB_CLASSID_ROOT 0xffffffff + +/* Software representation of the QoS tree */ + +int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data) +{ + struct mlx5e_qos_node *node = NULL; + int bkt, err; + + hash_for_each(htb->qos_tc2node, bkt, node, hnode) { + if (node->qid == MLX5E_QOS_QID_INNER) + continue; + err = callback(data, node->qid, node->hw_id); + if (err) + return err; + } + return 0; +} + +int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb) +{ + int last; + + last = find_last_bit(htb->qos_used_qids, mlx5e_qos_max_leaf_nodes(htb->mdev)); + return last == mlx5e_qos_max_leaf_nodes(htb->mdev) ? 0 : last + 1; +} + +static int mlx5e_htb_find_unused_qos_qid(struct mlx5e_htb *htb) +{ + int size = mlx5e_qos_max_leaf_nodes(htb->mdev); + struct mlx5e_priv *priv = htb->priv; + int res; + + WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__); + res = find_first_zero_bit(htb->qos_used_qids, size); + + return res == size ? -ENOSPC : res; +} + +static struct mlx5e_qos_node * +mlx5e_htb_node_create_leaf(struct mlx5e_htb *htb, u16 classid, u16 qid, + struct mlx5e_qos_node *parent) +{ + struct mlx5e_qos_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->parent = parent; + + node->qid = qid; + __set_bit(qid, htb->qos_used_qids); + + node->classid = classid; + hash_add_rcu(htb->qos_tc2node, &node->hnode, classid); + + mlx5e_update_tx_netdev_queues(htb->priv); + + return node; +} + +static struct mlx5e_qos_node *mlx5e_htb_node_create_root(struct mlx5e_htb *htb) +{ + struct mlx5e_qos_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->qid = MLX5E_QOS_QID_INNER; + node->classid = MLX5E_HTB_CLASSID_ROOT; + hash_add_rcu(htb->qos_tc2node, &node->hnode, node->classid); + + return node; +} + +static struct mlx5e_qos_node *mlx5e_htb_node_find(struct mlx5e_htb *htb, u32 classid) +{ + struct mlx5e_qos_node *node = NULL; + + hash_for_each_possible(htb->qos_tc2node, node, hnode, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +static struct mlx5e_qos_node *mlx5e_htb_node_find_rcu(struct mlx5e_htb *htb, u32 classid) +{ + struct mlx5e_qos_node *node = NULL; + + hash_for_each_possible_rcu(htb->qos_tc2node, node, hnode, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +static void mlx5e_htb_node_delete(struct mlx5e_htb *htb, struct mlx5e_qos_node *node) +{ + hash_del_rcu(&node->hnode); + if (node->qid != MLX5E_QOS_QID_INNER) { + __clear_bit(node->qid, htb->qos_used_qids); + mlx5e_update_tx_netdev_queues(htb->priv); + } + /* Make sure this qid is no longer selected by mlx5e_select_queue, so + * that mlx5e_reactivate_qos_sq can safely restart the netdev TX queue. + */ + synchronize_net(); + kfree(node); +} + +/* TX datapath API */ + +int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid) +{ + struct mlx5e_qos_node *node; + u16 qid; + int res; + + rcu_read_lock(); + + node = mlx5e_htb_node_find_rcu(htb, classid); + if (!node) { + res = -ENOENT; + goto out; + } + qid = READ_ONCE(node->qid); + if (qid == MLX5E_QOS_QID_INNER) { + res = -EINVAL; + goto out; + } + res = mlx5e_qid_from_qos(&htb->priv->channels, qid); + +out: + rcu_read_unlock(); + return res; +} + +/* HTB TC handlers */ + +static int +mlx5e_htb_root_add(struct mlx5e_htb *htb, u16 htb_maj_id, u16 htb_defcls, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = htb->priv; + struct mlx5e_qos_node *root; + bool opened; + int err; + + qos_dbg(htb->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls); + + mlx5e_selq_prepare_htb(htb->selq, htb_maj_id, htb_defcls); + + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (opened) { + err = mlx5e_qos_alloc_queues(priv, &priv->channels); + if (err) + goto err_cancel_selq; + } + + root = mlx5e_htb_node_create_root(htb); + if (IS_ERR(root)) { + err = PTR_ERR(root); + goto err_free_queues; + } + + err = mlx5_qos_create_root_node(htb->mdev, &root->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware."); + goto err_sw_node_delete; + } + + mlx5e_selq_apply(htb->selq); + + return 0; + +err_sw_node_delete: + mlx5e_htb_node_delete(htb, root); + +err_free_queues: + if (opened) + mlx5e_qos_close_all_queues(&priv->channels); +err_cancel_selq: + mlx5e_selq_cancel(htb->selq); + return err; +} + +static int mlx5e_htb_root_del(struct mlx5e_htb *htb) +{ + struct mlx5e_priv *priv = htb->priv; + struct mlx5e_qos_node *root; + int err; + + qos_dbg(htb->mdev, "TC_HTB_DESTROY\n"); + + /* Wait until real_num_tx_queues is updated for mlx5e_select_queue, + * so that we can safely switch to its non-HTB non-PTP fastpath. + */ + synchronize_net(); + + mlx5e_selq_prepare_htb(htb->selq, 0, 0); + mlx5e_selq_apply(htb->selq); + + root = mlx5e_htb_node_find(htb, MLX5E_HTB_CLASSID_ROOT); + if (!root) { + qos_err(htb->mdev, "Failed to find the root node in the QoS tree\n"); + return -ENOENT; + } + err = mlx5_qos_destroy_node(htb->mdev, root->hw_id); + if (err) + qos_err(htb->mdev, "Failed to destroy root node %u, err = %d\n", + root->hw_id, err); + mlx5e_htb_node_delete(htb, root); + + mlx5e_qos_deactivate_all_queues(&priv->channels); + mlx5e_qos_close_all_queues(&priv->channels); + + return err; +} + +static int mlx5e_htb_convert_rate(struct mlx5e_htb *htb, u64 rate, + struct mlx5e_qos_node *parent, u32 *bw_share) +{ + u64 share = 0; + + while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw) + parent = parent->parent; + + if (parent->max_average_bw) + share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT), + parent->max_average_bw); + else + share = 101; + + *bw_share = share == 0 ? 1 : share > 100 ? 0 : share; + + qos_dbg(htb->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n", + rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share); + + return 0; +} + +static void mlx5e_htb_convert_ceil(struct mlx5e_htb *htb, u64 ceil, u32 *max_average_bw) +{ + /* Hardware treats 0 as "unlimited", set at least 1. */ + *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1); + + qos_dbg(htb->mdev, "Convert: ceil %llu -> max_average_bw %u\n", + ceil, *max_average_bw); +} + +int +mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid, + u32 parent_classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *parent; + struct mlx5e_priv *priv = htb->priv; + int qid; + int err; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n", + classid, parent_classid, rate, ceil); + + qid = mlx5e_htb_find_unused_qos_qid(htb); + if (qid < 0) { + NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached."); + return qid; + } + + parent = mlx5e_htb_node_find(htb, parent_classid); + if (!parent) + return -EINVAL; + + node = mlx5e_htb_node_create_leaf(htb, classid, qid, parent); + if (IS_ERR(node)) + return PTR_ERR(node); + + node->rate = rate; + mlx5e_htb_convert_rate(htb, rate, node->parent, &node->bw_share); + mlx5e_htb_convert_ceil(htb, ceil, &node->max_average_bw); + + err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->hw_id, + node->bw_share, node->max_average_bw, + &node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + mlx5e_htb_node_delete(htb, node); + return err; + } + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, node->qid, node->hw_id); + } + } + + return mlx5e_qid_from_qos(&priv->channels, node->qid); +} + +int +mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid, + u64 rate, u64 ceil, struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *child; + struct mlx5e_priv *priv = htb->priv; + int err, tmp_err; + u32 new_hw_id; + u16 qid; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n", + classid, child_classid, rate, ceil); + + node = mlx5e_htb_node_find(htb, classid); + if (!node) + return -ENOENT; + + err = mlx5_qos_create_inner_node(htb->mdev, node->parent->hw_id, + node->bw_share, node->max_average_bw, + &new_hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node."); + qos_err(htb->mdev, "Failed to create an inner node (class %04x), err = %d\n", + classid, err); + return err; + } + + /* Intentionally reuse the qid for the upcoming first child. */ + child = mlx5e_htb_node_create_leaf(htb, child_classid, node->qid, node); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto err_destroy_hw_node; + } + + child->rate = rate; + mlx5e_htb_convert_rate(htb, rate, node, &child->bw_share); + mlx5e_htb_convert_ceil(htb, ceil, &child->max_average_bw); + + err = mlx5_qos_create_leaf_node(htb->mdev, new_hw_id, child->bw_share, + child->max_average_bw, &child->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + goto err_delete_sw_node; + } + + /* No fail point. */ + + qid = node->qid; + /* Pairs with mlx5e_htb_get_txq_by_classid. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + err = mlx5_qos_destroy_node(htb->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + node->hw_id = new_hw_id; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, child->qid, child->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, child->qid, child->hw_id); + } + } + + return 0; + +err_delete_sw_node: + child->qid = MLX5E_QOS_QID_INNER; + mlx5e_htb_node_delete(htb, child); + +err_destroy_hw_node: + tmp_err = mlx5_qos_destroy_node(htb->mdev, new_hw_id); + if (tmp_err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n", + new_hw_id, classid, tmp_err); + return err; +} + +static struct mlx5e_qos_node *mlx5e_htb_node_find_by_qid(struct mlx5e_htb *htb, u16 qid) +{ + struct mlx5e_qos_node *node = NULL; + int bkt; + + hash_for_each(htb->qos_tc2node, bkt, node, hnode) + if (node->qid == qid) + break; + + return node; +} + +int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = htb->priv; + struct mlx5e_qos_node *node; + struct netdev_queue *txq; + u16 qid, moved_qid; + bool opened; + int err; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid); + + node = mlx5e_htb_node_find(htb, *classid); + if (!node) + return -ENOENT; + + /* Store qid for reuse. */ + qid = node->qid; + + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (opened) { + txq = netdev_get_tx_queue(htb->netdev, + mlx5e_qid_from_qos(&priv->channels, qid)); + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + err = mlx5_qos_destroy_node(htb->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, *classid, err); + + mlx5e_htb_node_delete(htb, node); + + moved_qid = mlx5e_htb_cur_leaf_nodes(htb); + + if (moved_qid == 0) { + /* The last QoS SQ was just destroyed. */ + if (opened) + mlx5e_reactivate_qos_sq(priv, qid, txq); + return 0; + } + moved_qid--; + + if (moved_qid < qid) { + /* The highest QoS SQ was just destroyed. */ + WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u", + qid, moved_qid); + if (opened) + mlx5e_reactivate_qos_sq(priv, qid, txq); + return 0; + } + + WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid); + qos_dbg(htb->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid); + + node = mlx5e_htb_node_find_by_qid(htb, moved_qid); + WARN(!node, "Could not find a node with qid %u to move to queue %u", + moved_qid, qid); + + /* Stop traffic to the old queue. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + __clear_bit(moved_qid, priv->htb->qos_used_qids); + + if (opened) { + txq = netdev_get_tx_queue(htb->netdev, + mlx5e_qid_from_qos(&priv->channels, moved_qid)); + mlx5e_deactivate_qos_sq(priv, moved_qid); + mlx5e_close_qos_sq(priv, moved_qid); + } + + /* Prevent packets from the old class from getting into the new one. */ + mlx5e_reset_qdisc(htb->netdev, moved_qid); + + __set_bit(qid, htb->qos_used_qids); + WRITE_ONCE(node->qid, qid); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n", + node->classid, moved_qid, qid, err); + } else { + mlx5e_activate_qos_sq(priv, node->qid, node->hw_id); + } + } + + mlx5e_update_tx_netdev_queues(priv); + if (opened) + mlx5e_reactivate_qos_sq(priv, moved_qid, txq); + + *classid = node->classid; + return 0; +} + +int +mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *parent; + struct mlx5e_priv *priv = htb->priv; + u32 old_hw_id, new_hw_id; + int err, saved_err = 0; + u16 qid; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n", + force ? "_FORCE" : "", classid); + + node = mlx5e_htb_node_find(htb, classid); + if (!node) + return -ENOENT; + + err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->parent->hw_id, + node->parent->bw_share, + node->parent->max_average_bw, + &new_hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + if (!force) + return err; + saved_err = err; + } + + /* Store qid for reuse and prevent clearing the bit. */ + qid = node->qid; + /* Pairs with mlx5e_htb_get_txq_by_classid. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + /* Prevent packets from the old class from getting into the new one. */ + mlx5e_reset_qdisc(htb->netdev, qid); + + err = mlx5_qos_destroy_node(htb->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + parent = node->parent; + mlx5e_htb_node_delete(htb, node); + + node = parent; + WRITE_ONCE(node->qid, qid); + + /* Early return on error in force mode. Parent will still be an inner + * node to be deleted by a following delete operation. + */ + if (saved_err) + return saved_err; + + old_hw_id = node->hw_id; + node->hw_id = new_hw_id; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, node->qid, node->hw_id); + } + } + + err = mlx5_qos_destroy_node(htb->mdev, old_hw_id); + if (err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + return 0; +} + +static int +mlx5e_htb_update_children(struct mlx5e_htb *htb, struct mlx5e_qos_node *node, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *child; + int err = 0; + int bkt; + + hash_for_each(htb->qos_tc2node, bkt, child, hnode) { + u32 old_bw_share = child->bw_share; + int err_one; + + if (child->parent != node) + continue; + + mlx5e_htb_convert_rate(htb, child->rate, node, &child->bw_share); + if (child->bw_share == old_bw_share) + continue; + + err_one = mlx5_qos_update_node(htb->mdev, child->hw_id, child->bw_share, + child->max_average_bw, child->hw_id); + if (!err && err_one) { + err = err_one; + + NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node."); + qos_err(htb->mdev, "Failed to modify a child node (class %04x), err = %d\n", + node->classid, err); + } + } + + return err; +} + +int +mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack) +{ + u32 bw_share, max_average_bw; + struct mlx5e_qos_node *node; + bool ceil_changed = false; + int err; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n", + classid, rate, ceil); + + node = mlx5e_htb_node_find(htb, classid); + if (!node) + return -ENOENT; + + node->rate = rate; + mlx5e_htb_convert_rate(htb, rate, node->parent, &bw_share); + mlx5e_htb_convert_ceil(htb, ceil, &max_average_bw); + + err = mlx5_qos_update_node(htb->mdev, node->parent->hw_id, bw_share, + max_average_bw, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node."); + qos_err(htb->mdev, "Failed to modify a node (class %04x), err = %d\n", + classid, err); + return err; + } + + if (max_average_bw != node->max_average_bw) + ceil_changed = true; + + node->bw_share = bw_share; + node->max_average_bw = max_average_bw; + + if (ceil_changed) + err = mlx5e_htb_update_children(htb, node, extack); + + return err; +} + +struct mlx5e_htb *mlx5e_htb_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_htb), GFP_KERNEL); +} + +void mlx5e_htb_free(struct mlx5e_htb *htb) +{ + kvfree(htb); +} + +int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt, + struct net_device *netdev, struct mlx5_core_dev *mdev, + struct mlx5e_selq *selq, struct mlx5e_priv *priv) +{ + htb->mdev = mdev; + htb->netdev = netdev; + htb->selq = selq; + htb->priv = priv; + hash_init(htb->qos_tc2node); + return mlx5e_htb_root_add(htb, htb_qopt->parent_classid, htb_qopt->classid, + htb_qopt->extack); +} + +void mlx5e_htb_cleanup(struct mlx5e_htb *htb) +{ + mlx5e_htb_root_del(htb); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h new file mode 100644 index 000000000000..8386f1ea4559 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5E_EN_HTB_H_ +#define __MLX5E_EN_HTB_H_ + +#include "qos.h" + +#define MLX5E_QOS_MAX_LEAF_NODES 256 + +struct mlx5e_selq; +struct mlx5e_htb; + +typedef int (*mlx5e_fp_htb_enumerate)(void *data, u16 qid, u32 hw_id); +int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data); + +int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb); + +/* TX datapath API */ +int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid); + +/* HTB TC handlers */ + +int +mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid, + u32 parent_classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack); +int +mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid, + u64 rate, u64 ceil, struct netlink_ext_ack *extack); +int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid, + struct netlink_ext_ack *extack); +int +mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force, + struct netlink_ext_ack *extack); +int +mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack); +struct mlx5e_htb *mlx5e_htb_alloc(void); +void mlx5e_htb_free(struct mlx5e_htb *htb); +int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt, + struct net_device *netdev, struct mlx5_core_dev *mdev, + struct mlx5e_selq *selq, struct mlx5e_priv *priv); +void mlx5e_htb_cleanup(struct mlx5e_htb *htb); +#endif + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c index d290d7276b8d..b4f3bd7d346e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c @@ -20,7 +20,7 @@ mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch, struct mlx5e_channel_stats *stats; int tc; - stats = &priv->channel_stats[ch]; + stats = priv->channel_stats[ch]; data->rx_packets = stats->rq.packets; data->rx_bytes = stats->rq.bytes; @@ -120,14 +120,14 @@ static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent) cancel_delayed_work_sync(&priv->stats_agent.work); } -int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) +void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) { int buf_len = mlx5e_hv_vhca_stats_buf_size(priv); struct mlx5_hv_vhca_agent *agent; priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL); if (!priv->stats_agent.buf) - return -ENOMEM; + return; agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca, MLX5_HV_VHCA_AGENT_STATS, @@ -142,13 +142,11 @@ int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) PTR_ERR(agent)); kvfree(priv->stats_agent.buf); - return IS_ERR_OR_NULL(agent); + return; } priv->stats_agent.agent = agent; INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work); - - return 0; } void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h index 664463faf77b..29c8c6d3260f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h @@ -7,19 +7,12 @@ #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) -int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv); +void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv); void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv); #else - -static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) -{ - return 0; -} - -static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) -{ -} +static inline void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) {} +static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) {} #endif #endif /* __MLX5_EN_STATS_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c index 7edde4d536fd..17325c5d6516 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c @@ -155,3 +155,61 @@ struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh) return mh->modify_hdr; } +char * +mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + int new_num_actions, max_hw_actions; + size_t new_sz, old_sz; + void *ret; + + if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions) + goto out; + + max_hw_actions = mlx5e_mod_hdr_max_actions(mdev, namespace); + new_num_actions = min(max_hw_actions, + mod_hdr_acts->actions ? + mod_hdr_acts->max_actions * 2 : 1); + if (mod_hdr_acts->max_actions == new_num_actions) + return ERR_PTR(-ENOSPC); + + new_sz = MLX5_MH_ACT_SZ * new_num_actions; + old_sz = mod_hdr_acts->max_actions * MLX5_MH_ACT_SZ; + + if (mod_hdr_acts->is_static) { + ret = kzalloc(new_sz, GFP_KERNEL); + if (ret) { + memcpy(ret, mod_hdr_acts->actions, old_sz); + mod_hdr_acts->is_static = false; + } + } else { + ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL); + if (ret) + memset(ret + old_sz, 0, new_sz - old_sz); + } + if (!ret) + return ERR_PTR(-ENOMEM); + + mod_hdr_acts->actions = ret; + mod_hdr_acts->max_actions = new_num_actions; + +out: + return mod_hdr_acts->actions + (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ); +} + +void +mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + if (!mod_hdr_acts->is_static) + kfree(mod_hdr_acts->actions); + + mod_hdr_acts->actions = NULL; + mod_hdr_acts->num_actions = 0; + mod_hdr_acts->max_actions = 0; +} + +char * +mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos) +{ + return mod_hdr_acts->actions + (pos * MLX5_MH_ACT_SZ); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h index 33b23d8f9182..b8dac418d0a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h @@ -7,14 +7,32 @@ #include <linux/hashtable.h> #include <linux/mlx5/fs.h> +#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) + struct mlx5e_mod_hdr_handle; struct mlx5e_tc_mod_hdr_acts { int num_actions; int max_actions; + bool is_static; void *actions; }; +#define DECLARE_MOD_HDR_ACTS_ACTIONS(name, len) \ + u8 name[len][MLX5_MH_ACT_SZ] = {} + +#define DECLARE_MOD_HDR_ACTS(name, acts_arr) \ + struct mlx5e_tc_mod_hdr_acts name = { \ + .max_actions = ARRAY_SIZE(acts_arr), \ + .is_static = true, \ + .actions = acts_arr, \ + } + +char *mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +char *mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos); + struct mlx5e_mod_hdr_handle * mlx5e_mod_hdr_attach(struct mlx5_core_dev *mdev, struct mod_hdr_tbl *tbl, @@ -28,4 +46,12 @@ struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh); void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl); void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl); +static inline int mlx5e_mod_hdr_max_actions(struct mlx5_core_dev *mdev, int namespace) +{ + if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ + return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions); + else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */ + return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions); +} + #endif /* __MLX5E_EN_MOD_HDR_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index f8c29022dbb2..29dd3a04c154 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -5,13 +5,213 @@ #include "en/txrx.h" #include "en/port.h" #include "en_accel/en_accel.h" -#include "accel/ipsec.h" -#include "fpga/ipsec.h" +#include "en_accel/ipsec.h" +#include <net/xdp_sock_drv.h> -static bool mlx5e_rx_is_xdp(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev) +{ + u8 min_page_shift = MLX5_CAP_GEN_2(mdev, log_min_mkey_entity_size); + + return min_page_shift ? : 12; +} + +u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) +{ + u8 req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT; + u8 min_page_shift = mlx5e_mpwrq_min_page_shift(mdev); + + /* Regular RQ uses order-0 pages, the NIC must be able to map them. */ + if (WARN_ON_ONCE(!xsk && req_page_shift < min_page_shift)) + min_page_shift = req_page_shift; + + return max(req_page_shift, min_page_shift); +} + +enum mlx5e_mpwrq_umr_mode +mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) +{ + /* Different memory management schemes use different mechanisms to map + * user-mode memory. The stricter guarantees we have, the faster + * mechanisms we use: + * 1. MTT - direct mapping in page granularity. + * 2. KSM - indirect mapping to another MKey to arbitrary addresses, but + * all mappings have the same size. + * 3. KLM - indirect mapping to another MKey to arbitrary addresses, and + * mappings can have different sizes. + */ + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + bool unaligned = xsk ? xsk->unaligned : false; + bool oversized = false; + + if (xsk) { + oversized = xsk->chunk_size < (1 << page_shift); + WARN_ON_ONCE(xsk->chunk_size > (1 << page_shift)); + } + + /* XSK frame size doesn't match the UMR page size, either because the + * frame size is not a power of two, or it's smaller than the minimal + * page size supported by the firmware. + * It's possible to receive packets bigger than MTU in certain setups. + * To avoid writing over the XSK frame boundary, the top region of each + * stride is mapped to a garbage page, resulting in two mappings of + * different sizes per frame. + */ + if (oversized) { + /* An optimization for frame sizes equal to 3 * power_of_two. + * 3 KSMs point to the frame, and one KSM points to the garbage + * page, which works faster than KLM. + */ + if (xsk->chunk_size % 3 == 0 && is_power_of_2(xsk->chunk_size / 3)) + return MLX5E_MPWRQ_UMR_MODE_TRIPLE; + + return MLX5E_MPWRQ_UMR_MODE_OVERSIZED; + } + + /* XSK frames can start at arbitrary unaligned locations, but they all + * have the same size which is a power of two. It allows to optimize to + * one KSM per frame. + */ + if (unaligned) + return MLX5E_MPWRQ_UMR_MODE_UNALIGNED; + + /* XSK: frames are naturally aligned, MTT can be used. + * Non-XSK: Allocations happen in units of CPU pages, therefore, the + * mappings are naturally aligned. + */ + return MLX5E_MPWRQ_UMR_MODE_ALIGNED; +} + +u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode) +{ + switch (mode) { + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + return sizeof(struct mlx5_mtt); + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + return sizeof(struct mlx5_ksm); + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + return sizeof(struct mlx5_klm) * 2; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + return sizeof(struct mlx5_ksm) * 4; + } + WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode); + return 0; +} + +u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); + u8 max_pages_per_wqe, max_log_mpwqe_size; + u16 max_wqe_size; + + /* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */ + max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB; + max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe), + MLX5_UMR_MTT_ALIGNMENT) / umr_entry_size; + max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift; + + WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU); + + return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ); +} + +u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode); + u8 pages_per_wqe; + + pages_per_wqe = log_wqe_sz > page_shift ? (1 << (log_wqe_sz - page_shift)) : 1; + + /* Two MTTs are needed to form an octword. The number of MTTs is encoded + * in octwords in a UMR WQE, so we need at least two to avoid mapping + * garbage addresses. + */ + if (WARN_ON_ONCE(pages_per_wqe < 2 && umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + pages_per_wqe = 2; + + /* Sanity check for further calculations to succeed. */ + BUILD_BUG_ON(MLX5_MPWRQ_MAX_PAGES_PER_WQE > 64); + if (WARN_ON_ONCE(pages_per_wqe > MLX5_MPWRQ_MAX_PAGES_PER_WQE)) + return MLX5_MPWRQ_MAX_PAGES_PER_WQE; + + return pages_per_wqe; +} + +u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode); + u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); + u16 umr_wqe_sz; + + umr_wqe_sz = sizeof(struct mlx5e_umr_wqe) + + ALIGN(pages_per_wqe * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT); + + WARN_ON_ONCE(DIV_ROUND_UP(umr_wqe_sz, MLX5_SEND_WQE_DS) > MLX5_WQE_CTRL_DS_MASK); + + return umr_wqe_sz; +} + +u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + return DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(mdev, page_shift, umr_mode), + MLX5_SEND_WQE_BB); +} + +u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode); + + /* Add another page as a buffer between WQEs. This page will absorb + * write overflow by the hardware, when receiving packets larger than + * MTU. These oversize packets are dropped by the driver at a later + * stage. + */ + return ALIGN(pages_per_wqe + 1, + MLX5_SEND_WQE_BB / mlx5e_mpwrq_umr_entry_size(umr_mode)); +} + +u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, + enum mlx5e_mpwrq_umr_mode umr_mode) { - return params->xdp_prog || xsk; + /* Same limits apply to KSMs and KLMs. */ + u32 klm_limit = min(MLX5E_MAX_RQ_NUM_KSMS, + 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size)); + + switch (umr_mode) { + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + return MLX5E_MAX_RQ_NUM_MTTS; + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + return klm_limit; + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + /* Each entry is two KLMs. */ + return klm_limit / 2; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + /* Each entry is four KSMs. */ + return klm_limit / 4; + } + WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); + return 0; +} + +static u8 mlx5e_mpwrq_max_log_rq_size(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 mtts_per_wqe = mlx5e_mpwrq_mtts_per_wqe(mdev, page_shift, umr_mode); + u32 max_entries = mlx5e_mpwrq_max_num_entries(mdev, umr_mode); + + return ilog2(max_entries / mtts_per_wqe); +} + +u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + return mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode) + + mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) - + MLX5E_ORDER2_MAX_PACKET_MTU; } u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, @@ -23,7 +223,7 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, return xsk->headroom; headroom = NET_IP_ALIGN; - if (mlx5e_rx_is_xdp(params, xsk)) + if (params->xdp_prog) headroom += XDP_PACKET_HEADROOM; else headroom += MLX5_RX_HEADROOM; @@ -31,70 +231,80 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, return headroom; } -u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +static u32 mlx5e_rx_get_linear_sz_xsk(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk); - return linear_rq_headroom + hw_mtu; + return xsk->headroom + hw_mtu; } -static u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool xsk) { - u32 frag_sz = mlx5e_rx_get_min_frag_sz(params, xsk); - - /* AF_XDP doesn't build SKBs in place. */ - if (!xsk) - frag_sz = MLX5_SKB_FRAG_SZ(frag_sz); + /* SKBs built on XDP_PASS on XSK RQs don't have headroom. */ + u16 headroom = xsk ? 0 : mlx5e_get_linear_rq_headroom(params, NULL); + u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - /* XDP in mlx5e doesn't support multiple packets per page. AF_XDP is a - * special case. It can run with frames smaller than a page, as it - * doesn't allocate pages dynamically. However, here we pretend that - * fragments are page-sized: it allows to treat XSK frames like pages - * by redirecting alloc and free operations to XSK rings and by using - * the fact there are no multiple packets per "page" (which is a frame). - * The latter is important, because frames may come in a random order, - * and we will have trouble assemblying a real page of multiple frames. - */ - if (mlx5e_rx_is_xdp(params, xsk)) - frag_sz = max_t(u32, frag_sz, PAGE_SIZE); + return MLX5_SKB_FRAG_SZ(headroom + hw_mtu); +} - /* Even if we can go with a smaller fragment size, we must not put - * multiple packets into a single frame. +static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + bool mpwqe) +{ + /* XSK frames are mapped as individual pages, because frames may come in + * an arbitrary order from random locations in the UMEM. */ if (xsk) - frag_sz = max_t(u32, frag_sz, xsk->chunk_size); + return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE; - return frag_sz; + /* XDP in mlx5e doesn't support multiple packets per page. */ + if (params->xdp_prog) + return PAGE_SIZE; + + return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false)); } -u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk); + u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); - return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); + return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) - + order_base_2(linear_stride_sz); } -bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, +bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - /* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more - * than one page. For this, check both with and without xsk. + if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) + return false; + + /* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data + * must fit into a CPU page. */ - u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk), - mlx5e_rx_get_linear_frag_sz(params, NULL)); + if (mlx5e_rx_get_linear_sz_skb(params, xsk) > PAGE_SIZE) + return false; + + /* XSK frames must be big enough to hold the packet data. */ + if (xsk && mlx5e_rx_get_linear_sz_xsk(params, xsk) > xsk->chunk_size) + return false; - return params->packet_merge.type == MLX5E_PACKET_MERGE_NONE && - linear_frag_sz <= PAGE_SIZE; + return true; } -bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, - u8 log_stride_sz, u8 log_num_strides) +static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, + u8 log_stride_sz, u8 log_num_strides, + u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) { - if (log_stride_sz + log_num_strides != MLX5_MPWRQ_LOG_WQE_SZ) + if (log_stride_sz + log_num_strides != + mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode)) return false; if (log_stride_sz < MLX5_MPWQE_LOG_STRIDE_SZ_BASE || @@ -114,28 +324,53 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - s8 log_num_strides; + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + u8 log_num_strides; u8 log_stride_sz; + u8 log_wqe_sz; + + if (!mlx5e_rx_is_linear_skb(mdev, params, xsk)) + return false; + + log_stride_sz = order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true)); + log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode); - if (!mlx5e_rx_is_linear_skb(params, xsk)) + if (log_wqe_sz < log_stride_sz) return false; - log_stride_sz = order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk)); - log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - log_stride_sz; + log_num_strides = log_wqe_sz - log_stride_sz; - return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz, log_num_strides); + return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz, + log_num_strides, page_shift, + umr_mode); } -u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params, +u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 log_pkts_per_wqe, page_shift, max_log_rq_size; + + log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(mdev, params, xsk); + page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + max_log_rq_size = mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode); /* Numbers are unsigned, don't subtract to avoid underflow. */ if (params->log_rq_mtu_frames < log_pkts_per_wqe + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW) return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW; + /* Ethtool's rx_max_pending is calculated for regular RQ, that uses + * pages of PAGE_SIZE. Max length of an XSK RQ might differ if it uses a + * frame size not equal to PAGE_SIZE. + * A stricter condition is checked in mlx5e_mpwrq_validate_xsk, WARN on + * unexpected failure. + */ + if (WARN_ON_ONCE(params->log_rq_mtu_frames > log_pkts_per_wqe + max_log_rq_size)) + return max_log_rq_size; + return params->log_rq_mtu_frames - log_pkts_per_wqe; } @@ -165,7 +400,7 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) { if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) - return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk)); + return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true)); return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); } @@ -174,20 +409,35 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - return MLX5_MPWRQ_LOG_WQE_SZ - + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + + return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) - mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); } +u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz) +{ +#define UMR_WQE_BULK (2) + return min_t(unsigned int, UMR_WQE_BULK, wq_sz / 2 - 1); +} + u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ? - mlx5e_rx_is_linear_skb(params, xsk) : - mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); + u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk); + + if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) + return linear_headroom; + + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return linear_headroom; + + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + return linear_headroom; - return is_linear_skb || params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO ? - mlx5e_get_linear_rq_headroom(params, xsk) : 0; + return 0; } u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) @@ -195,14 +445,14 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); u16 stop_room; - stop_room = mlx5e_tls_get_stop_room(mdev, params); - stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + stop_room = mlx5e_ktls_get_stop_room(mdev, params); + stop_room += mlx5e_stop_room_for_max_wqe(mdev); if (is_mpwqe) - /* A MPWQE can take up to the maximum-sized WQE + all the normal - * stop room can be taken if a new packet breaks the active - * MPWQE session and allocates its WQEs right away. + /* A MPWQE can take up to the maximum cacheline-aligned WQE + + * all the normal stop room can be taken if a new packet breaks + * the active MPWQE session and allocates its WQEs right away. */ - stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + stop_room += mlx5e_stop_room_for_mpwqe(mdev); return stop_room; } @@ -309,25 +559,46 @@ bool slow_pci_heuristic(struct mlx5_core_dev *mdev) link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; } -bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) +int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { - if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) - return false; + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, NULL); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, NULL); - if (mlx5_fpga_is_ipsec_device(mdev)) - return false; + if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode)) + return -EOPNOTSUPP; - if (params->xdp_prog) { - /* XSK params are not considered here. If striding RQ is in use, - * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will - * be called with the known XSK params. - */ - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) - return false; + if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) + return -EINVAL; + + return 0; +} + +int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + bool unaligned = xsk ? xsk->unaligned : false; + u16 max_mtu_pkts; + + if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode)) + return -EOPNOTSUPP; + + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return -EINVAL; + + /* Current RQ length is too big for the given frame size, the + * needed number of WQEs exceeds the maximum. + */ + max_mtu_pkts = min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE, + mlx5e_mpwrq_max_log_rq_pkts(mdev, page_shift, unaligned)); + if (params->log_rq_mtu_frames > max_mtu_pkts) { + mlx5_core_err(mdev, "Current RQ length %d is too big for XSK with given frame size %u\n", + 1 << params->log_rq_mtu_frames, xsk->chunk_size); + return -EINVAL; } - return true; + return 0; } void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, @@ -340,7 +611,7 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? - BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) : + BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, NULL)) : BIT(params->log_rq_mtu_frames), BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)), MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); @@ -348,8 +619,7 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { - params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) && - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? + params->rq_wq_type = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_CYCLIC; } @@ -359,15 +629,16 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, { /* Prefer Striding RQ, unless any of the following holds: * - Striding RQ configuration is not possible/supported. - * - Slow PCI heuristic. + * - CQE compression is ON, and stride_index mini_cqe layout is not supported. * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. * * No XSK params: checking the availability of striding RQ in general. */ - if (!slow_pci_heuristic(mdev) && - mlx5e_striding_rq_possible(mdev, params) && + if ((!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) || + MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) && + !mlx5e_mpwrq_validate_regular(mdev, params) && (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || - !mlx5e_rx_is_linear_skb(params, NULL))) + !mlx5e_rx_is_linear_skb(mdev, params, NULL))) MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); mlx5e_set_rq_type(mdev, params); mlx5e_init_rq_type_params(mdev, params); @@ -385,58 +656,132 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e }; } +static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp) +{ + if (xdp) + /* XDP requires all fragments to be of the same size. */ + return first_frag_size + (MLX5E_MAX_RX_FRAGS - 1) * frag_size; + + /* Optimization for small packets: the last fragment is bigger than the others. */ + return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE; +} + #define DEFAULT_FRAG_SIZE (2048) -static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_frags_info *info) +static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_frags_info *info) { u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); int frag_size_max = DEFAULT_FRAG_SIZE; + int first_frag_size_max; u32 buf_size = 0; + u16 headroom; + int max_mtu; int i; - if (mlx5_fpga_is_ipsec_device(mdev)) - byte_count += MLX5E_METADATA_ETHER_LEN; - - if (mlx5e_rx_is_linear_skb(params, xsk)) { + if (mlx5e_rx_is_linear_skb(mdev, params, xsk)) { int frag_stride; - frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk); - frag_stride = roundup_pow_of_two(frag_stride); + frag_stride = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, false); info->arr[0].frag_size = byte_count; info->arr[0].frag_stride = frag_stride; info->num_frags = 1; - info->wqe_bulk = PAGE_SIZE / frag_stride; + + /* N WQEs share the same page, N = PAGE_SIZE / frag_stride. The + * first WQE in the page is responsible for allocation of this + * page, this WQE's index is k*N. If WQEs [k*N+1; k*N+N-1] are + * still not completed, the allocation must stop before k*N. + */ + info->wqe_index_mask = (PAGE_SIZE / frag_stride) - 1; + goto out; } - if (byte_count > PAGE_SIZE + - (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max) + headroom = mlx5e_get_linear_rq_headroom(params, xsk); + first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom); + + max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max, + params->xdp_prog); + if (byte_count > max_mtu || params->xdp_prog) { frag_size_max = PAGE_SIZE; + first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom); + + max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max, + params->xdp_prog); + if (byte_count > max_mtu) { + mlx5_core_err(mdev, "MTU %u is too big for non-linear legacy RQ (max %d)\n", + params->sw_mtu, max_mtu); + return -EINVAL; + } + } i = 0; while (buf_size < byte_count) { int frag_size = byte_count - buf_size; - if (i < MLX5E_MAX_RX_FRAGS - 1) + if (i == 0) + frag_size = min(frag_size, first_frag_size_max); + else if (i < MLX5E_MAX_RX_FRAGS - 1) frag_size = min(frag_size, frag_size_max); info->arr[i].frag_size = frag_size; - info->arr[i].frag_stride = roundup_pow_of_two(frag_size); - buf_size += frag_size; + + if (params->xdp_prog) { + /* XDP multi buffer expects fragments of the same size. */ + info->arr[i].frag_stride = frag_size_max; + } else { + if (i == 0) { + /* Ensure that headroom and tailroom are included. */ + frag_size += headroom; + frag_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + } + info->arr[i].frag_stride = roundup_pow_of_two(frag_size); + } + i++; } info->num_frags = i; - /* number of different wqes sharing a page */ - info->wqe_bulk = 1 + (info->num_frags % 2); + + /* The last fragment of WQE with index 2*N may share the page with the + * first fragment of WQE with index 2*N+1 in certain cases. If WQE 2*N+1 + * is not completed yet, WQE 2*N must not be allocated, as it's + * responsible for allocating a new page. + */ + if (frag_size_max == PAGE_SIZE) { + /* No WQE can start in the middle of a page. */ + info->wqe_index_mask = 0; + } else { + /* PAGE_SIZEs starting from 8192 don't use 2K-sized fragments, + * because there would be more than MLX5E_MAX_RX_FRAGS of them. + */ + WARN_ON(PAGE_SIZE != 2 * DEFAULT_FRAG_SIZE); + + /* Odd number of fragments allows to pack the last fragment of + * the previous WQE and the first fragment of the next WQE into + * the same page. + * As long as DEFAULT_FRAG_SIZE is 2048, and MLX5E_MAX_RX_FRAGS + * is 4, the last fragment can be bigger than the rest only if + * it's the fourth one, so WQEs consisting of 3 fragments will + * always share a page. + * When a page is shared, WQE bulk size is 2, otherwise just 1. + */ + info->wqe_index_mask = info->num_frags % 2; + } out: - info->wqe_bulk = max_t(u8, info->wqe_bulk, 8); + /* Bulking optimization to skip allocation until at least 8 WQEs can be + * allocated in a row. At the same time, never start allocation when + * the page is still used by older WQEs. + */ + info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8); + info->log_num_frags = order_base_2(info->num_frags); + + return 0; } static u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs) @@ -472,7 +817,7 @@ static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev, u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); - int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(params, xsk)); + int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); int wqe_size = BIT(log_stride_sz) * num_strides; /* +1 is for the case that the pkt_per_rsrv dont consume the reservation @@ -496,7 +841,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) log_cq_size = mlx5e_shampo_get_log_cq_size(mdev, params, xsk); else - log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + + log_cq_size = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk) + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); break; default: /* MLX5_WQ_TYPE_CYCLIC */ @@ -533,17 +878,22 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); int ndsegs = 1; + int err; switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: { u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size, - log_wqe_num_of_strides)) { + log_wqe_num_of_strides, + page_shift, umr_mode)) { mlx5_core_err(mdev, - "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u\n", - log_wqe_stride_size, log_wqe_num_of_strides); + "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, umr_mode %d\n", + log_wqe_stride_size, log_wqe_num_of_strides, + umr_mode); return -EINVAL; } @@ -551,7 +901,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); MLX5_SET(wq, wq, log_wqe_stride_size, log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); - MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk)); + MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { MLX5_SET(wq, wq, shampo_enable, true); MLX5_SET(wq, wq, log_reservation_size, @@ -572,7 +922,9 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, } default: /* MLX5_WQ_TYPE_CYCLIC */ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); + err = mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); + if (err) + return err; ndsegs = param->frags_info.num_frags; } @@ -638,8 +990,8 @@ void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, void *wq = MLX5_ADDR_OF(sqc, sqc, wq); bool allow_swp; - allow_swp = mlx5_geneve_tx_allowed(mdev) || - !!MLX5_IPSEC_DEV(mdev); + allow_swp = + mlx5_geneve_tx_allowed(mdev) || !!mlx5_ipsec_device_caps(mdev); mlx5e_build_sq_param_common(mdev, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); MLX5_SET(sqc, sqc, allow_swp, allow_swp); @@ -661,13 +1013,6 @@ static void mlx5e_build_ico_cq_param(struct mlx5_core_dev *mdev, param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } -static u8 mlx5e_get_rq_log_wq_sz(void *rqc) -{ - void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - - return MLX5_GET(wq, wq, log_wq_sz); -} - /* This function calculates the maximum number of headers entries that are needed * per WQE, the formula is based on the size of the reservations and the * restriction we have about max packets for reservation that is equal to max @@ -728,25 +1073,98 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, return wqebbs; } +static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + u8 umr_wqebbs; + + umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode); + + return umr_wqebbs * (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); +} + static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rqp) { - u32 wqebbs; + u32 wqebbs, total_pages, useful_space; /* MLX5_WQ_TYPE_CYCLIC */ if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - wqebbs = MLX5E_UMR_WQEBBS * BIT(mlx5e_get_rq_log_wq_sz(rqp->rqc)); + /* UMR WQEs for the regular RQ. */ + wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, NULL); + + /* If XDP program is attached, XSK may be turned on at any time without + * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of + * both regular RQ and XSK RQ. + * + * XSK uses different values of page_shift, and the total number of UMR + * WQEBBs depends on it. This dependency is complex and not monotonic, + * especially taking into consideration that some of the parameters come + * from capabilities. Hence, we have to try all valid values of XSK + * frame size (and page_shift) to find the maximum. + */ + if (params->xdp_prog) { + u32 max_xsk_wqebbs = 0; + u8 frame_shift; + + for (frame_shift = XDP_UMEM_MIN_CHUNK_SHIFT; + frame_shift <= PAGE_SHIFT; frame_shift++) { + /* The headroom doesn't affect the calculation. */ + struct mlx5e_xsk_param xsk = { + .chunk_size = 1 << frame_shift, + .unaligned = false, + }; + + /* XSK aligned mode. */ + max_xsk_wqebbs = max(max_xsk_wqebbs, + mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); + + /* XSK unaligned mode, frame size is a power of two. */ + xsk.unaligned = true; + max_xsk_wqebbs = max(max_xsk_wqebbs, + mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); + + /* XSK unaligned mode, frame size is not equal to stride size. */ + xsk.chunk_size -= 1; + max_xsk_wqebbs = max(max_xsk_wqebbs, + mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); + + /* XSK unaligned mode, frame size is a triple power of two. */ + xsk.chunk_size = (1 << frame_shift) / 4 * 3; + max_xsk_wqebbs = max(max_xsk_wqebbs, + mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); + } + + wqebbs += max_xsk_wqebbs; + } + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) wqebbs += mlx5e_shampo_icosq_sz(mdev, params, rqp); + + /* UMR WQEs don't cross the page boundary, they are padded with NOPs. + * This padding is always smaller than the max WQE size. That gives us + * at least (PAGE_SIZE - (max WQE size - MLX5_SEND_WQE_BB)) useful bytes + * per page. The number of pages is estimated as the total size of WQEs + * divided by the useful space in page, rounding up. If some WQEs don't + * fully fit into the useful space, they can occupy part of the padding, + * which proves this estimation to be correct (reserve enough space). + */ + useful_space = PAGE_SIZE - mlx5e_get_max_sq_wqebbs(mdev) + MLX5_SEND_WQE_BB; + total_pages = DIV_ROUND_UP(wqebbs * MLX5_SEND_WQE_BB, useful_space); + wqebbs = total_pages * (PAGE_SIZE / MLX5_SEND_WQE_BB); + return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, order_base_2(wqebbs)); } static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev) { - if (mlx5e_accel_is_ktls_rx(mdev)) + if (mlx5e_is_ktls_rx(mdev)) return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; @@ -774,10 +1192,10 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev, void *wq = MLX5_ADDR_OF(sqc, sqc, wq); mlx5e_build_sq_param_common(mdev, param); - param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */ - param->is_tls = mlx5e_accel_is_ktls_rx(mdev); + param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */ + param->is_tls = mlx5e_is_ktls_rx(mdev); if (param->is_tls) - param->stop_room += mlx5e_stop_room_for_wqe(1); /* for TLS RX resync NOP */ + param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */ MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq)); MLX5_SET(wq, wq, log_wq_sz, log_wq_size); mlx5e_build_ico_cq_param(mdev, log_wq_size, ¶m->cqp); @@ -785,6 +1203,7 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev, void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct mlx5e_sq_param *param) { void *sqc = param->sqc; @@ -793,6 +1212,7 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, mlx5e_build_sq_param_common(mdev, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); + param->is_xdp_mb = !mlx5e_rx_is_linear_skb(mdev, params, xsk); mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); } @@ -812,7 +1232,7 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev); mlx5e_build_sq_param(mdev, params, &cparam->txq_sq); - mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); + mlx5e_build_xdpsq_param(mdev, params, NULL, &cparam->xdp_sq); mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq); mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 433e6967692d..034debd140bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -9,6 +9,7 @@ struct mlx5e_xsk_param { u16 headroom; u16 chunk_size; + bool unaligned; }; struct mlx5e_cq_param { @@ -31,6 +32,7 @@ struct mlx5e_sq_param { struct mlx5_wq_param wq; bool is_mpw; bool is_tls; + bool is_xdp_mb; u16 stop_room; }; @@ -51,37 +53,26 @@ struct mlx5e_create_sq_param { u8 min_inline_mode; }; -static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params, - u16 qid, - enum mlx5e_rq_group group, - u16 *ix) -{ - int nch = params->num_channels; - int ch = qid - nch * group; - - if (ch < 0 || ch >= nch) - return false; - - *ix = ch; - return true; -} - -static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params, - u16 qid, - u16 *ix, - enum mlx5e_rq_group *group) -{ - u16 nch = params->num_channels; - - *ix = qid % nch; - *group = qid / nch; -} - -static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile, - struct mlx5e_params *params, u64 qid) -{ - return qid < params->num_channels * profile->rq_groups; -} +/* Striding RQ dynamic parameters */ + +u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk); +enum mlx5e_mpwrq_umr_mode +mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode); +u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); /* Parameter calculations */ @@ -91,25 +82,23 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); bool slow_pci_heuristic(struct mlx5_core_dev *mdev); -bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); -bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, - u8 log_stride_sz, u8 log_num_strides); u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); -u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); -u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); -bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, +bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); -u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params, +u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params); @@ -129,6 +118,7 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz); u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); @@ -154,6 +144,7 @@ void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev, struct mlx5e_cq_param *param); void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct mlx5e_sq_param *param); int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 673f1c82d381..c9d5d8d93994 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -309,8 +309,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, port_buff_cell_sz, - xoff, &port_buffer, &update_buffer); + err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff, + port_buff_cell_sz, &port_buffer, &update_buffer); if (err) return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 18d542b1c5cb..8469e9c38670 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -79,19 +79,49 @@ void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type, memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp)); } +#define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask) + +static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id) +{ + return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id)); +} + +static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id) +{ + struct skb_shared_hwtstamps hwts = {}; + struct sk_buff *skb; + + ptpsq->cq_stats->resync_event++; + + while (skb_cc != skb_id) { + skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo); + hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp; + skb_tstamp_tx(skb, &hwts); + ptpsq->cq_stats->resync_cqe++; + skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc); + } +} + static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq, struct mlx5_cqe64 *cqe, int budget) { - struct sk_buff *skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo); + u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter)); + u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc); struct mlx5e_txqsq *sq = &ptpsq->txqsq; + struct sk_buff *skb; ktime_t hwtstamp; if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo); ptpsq->cq_stats->err_cqe++; goto out; } + if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id)) + mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id); + + skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo); hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe)); mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_PORT_HWTSTAMP, hwtstamp, ptpsq->cq_stats); @@ -195,7 +225,6 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, int node; sq->pdev = c->pdev; - sq->tstamp = c->tstamp; sq->clock = &mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; @@ -242,6 +271,7 @@ static void mlx5e_ptp_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) static int mlx5e_ptp_alloc_traffic_db(struct mlx5e_ptpsq *ptpsq, int numa) { int wq_sz = mlx5_wq_cyc_get_size(&ptpsq->txqsq.wq); + struct mlx5_core_dev *mdev = ptpsq->txqsq.mdev; ptpsq->skb_fifo.fifo = kvzalloc_node(array_size(wq_sz, sizeof(*ptpsq->skb_fifo.fifo)), GFP_KERNEL, numa); @@ -251,7 +281,9 @@ static int mlx5e_ptp_alloc_traffic_db(struct mlx5e_ptpsq *ptpsq, int numa) ptpsq->skb_fifo.pc = &ptpsq->skb_fifo_pc; ptpsq->skb_fifo.cc = &ptpsq->skb_fifo_cc; ptpsq->skb_fifo.mask = wq_sz - 1; - + if (MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter)) + ptpsq->ts_cqe_ctr_mask = + (1 << MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter)) - 1; return 0; } @@ -449,7 +481,7 @@ static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev, wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); - param->stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + param->stop_room = mlx5e_stop_room_for_max_wqe(mdev); mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); } @@ -590,37 +622,39 @@ static int mlx5e_ptp_set_state(struct mlx5e_ptp *c, struct mlx5e_params *params) return bitmap_empty(c->state, MLX5E_PTP_STATE_NUM_STATES) ? -EINVAL : 0; } -static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv) +static void mlx5e_ptp_rx_unset_fs(struct mlx5e_flow_steering *fs) { - struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; + struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs); if (!ptp_fs->valid) return; mlx5e_fs_tt_redirect_del_rule(ptp_fs->l2_rule); - mlx5e_fs_tt_redirect_any_destroy(priv); + mlx5e_fs_tt_redirect_any_destroy(fs); mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule); mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule); - mlx5e_fs_tt_redirect_udp_destroy(priv); + mlx5e_fs_tt_redirect_udp_destroy(fs); ptp_fs->valid = false; } static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) { u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res); - struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; + struct mlx5e_flow_steering *fs = priv->fs; struct mlx5_flow_handle *rule; + struct mlx5e_ptp_fs *ptp_fs; int err; + ptp_fs = mlx5e_fs_get_ptp(fs); if (ptp_fs->valid) return 0; - err = mlx5e_fs_tt_redirect_udp_create(priv); + err = mlx5e_fs_tt_redirect_udp_create(fs); if (err) goto out_free; - rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV4_UDP, + rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV4_UDP, tirn, PTP_EV_PORT); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -628,7 +662,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) } ptp_fs->udp_v4_rule = rule; - rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV6_UDP, + rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV6_UDP, tirn, PTP_EV_PORT); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -636,11 +670,11 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) } ptp_fs->udp_v6_rule = rule; - err = mlx5e_fs_tt_redirect_any_create(priv); + err = mlx5e_fs_tt_redirect_any_create(fs); if (err) goto out_destroy_udp_v6_rule; - rule = mlx5e_fs_tt_redirect_any_add_rule(priv, tirn, ETH_P_1588); + rule = mlx5e_fs_tt_redirect_any_add_rule(fs, tirn, ETH_P_1588); if (IS_ERR(rule)) { err = PTR_ERR(rule); goto out_destroy_fs_any; @@ -651,13 +685,13 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) return 0; out_destroy_fs_any: - mlx5e_fs_tt_redirect_any_destroy(priv); + mlx5e_fs_tt_redirect_any_destroy(fs); out_destroy_udp_v6_rule: mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule); out_destroy_udp_v4_rule: mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule); out_destroy_fs_udp: - mlx5e_fs_tt_redirect_udp_destroy(priv); + mlx5e_fs_tt_redirect_udp_destroy(fs); out_free: return err; } @@ -691,7 +725,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, if (err) goto err_free; - netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll, 64); + netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll); mlx5e_ptp_build_params(c, cparams, params); @@ -737,6 +771,7 @@ void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c) if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { mlx5e_ptp_rx_set_fs(c->priv); mlx5e_activate_rq(&c->rq); + mlx5e_trigger_napi_sched(&c->napi); } } @@ -764,29 +799,31 @@ int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn) return 0; } -int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv) +int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs, + const struct mlx5e_profile *profile) { struct mlx5e_ptp_fs *ptp_fs; - if (!priv->profile->rx_ptp_support) + if (!mlx5e_profile_feature_cap(profile, PTP_RX)) return 0; ptp_fs = kzalloc(sizeof(*ptp_fs), GFP_KERNEL); if (!ptp_fs) return -ENOMEM; + mlx5e_fs_set_ptp(fs, ptp_fs); - priv->fs.ptp_fs = ptp_fs; return 0; } -void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv) +void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs, + const struct mlx5e_profile *profile) { - struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; + struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs); - if (!priv->profile->rx_ptp_support) + if (!mlx5e_profile_feature_cap(profile, PTP_RX)) return; - mlx5e_ptp_rx_unset_fs(priv); + mlx5e_ptp_rx_unset_fs(fs); kfree(ptp_fs); } @@ -794,7 +831,7 @@ int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set) { struct mlx5e_ptp *c = priv->channels.ptp; - if (!priv->profile->rx_ptp_support) + if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) return 0; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) @@ -812,6 +849,6 @@ int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set) netdev_WARN_ONCE(priv->netdev, "Don't try to remove PTP RX-FS rules"); return -EINVAL; } - mlx5e_ptp_rx_unset_fs(priv); + mlx5e_ptp_rx_unset_fs(priv->fs); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h index a71a32e00ebb..cc7efde88ac3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h @@ -6,6 +6,7 @@ #include "en.h" #include "en_stats.h" +#include "en/txrx.h" #include <linux/ptp_classify.h> #define MLX5E_PTP_CHANNEL_IX 0 @@ -17,6 +18,7 @@ struct mlx5e_ptpsq { u16 skb_fifo_pc; struct mlx5e_skb_fifo skb_fifo; struct mlx5e_ptp_cq_stats *cq_stats; + u16 ts_cqe_ctr_mask; }; enum { @@ -67,14 +69,24 @@ static inline bool mlx5e_use_ptpsq(struct sk_buff *skb) fk.ports.dst == htons(PTP_EV_PORT)); } +static inline bool mlx5e_ptpsq_fifo_has_room(struct mlx5e_txqsq *sq) +{ + if (!sq->ptpsq) + return true; + + return mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo); +} + int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, u8 lag_port, struct mlx5e_ptp **cp); void mlx5e_ptp_close(struct mlx5e_ptp *c); void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c); void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c); int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn); -int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv); -void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv); +int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs, + const struct mlx5e_profile *profile); +void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs, + const struct mlx5e_profile *profile); int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set); enum { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index 50977f01a050..2842195ee548 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -1,11 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ +#include <net/sch_generic.h> +#include <net/pkt_cls.h> #include "en.h" #include "params.h" #include "../qos.h" +#include "en/htb.h" -#define BYTES_IN_MBIT 125000 +struct qos_sq_callback_params { + struct mlx5e_priv *priv; + struct mlx5e_channels *chs; +}; int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes) { @@ -27,121 +33,14 @@ int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev) return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev)); } -int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv) -{ - int last = find_last_bit(priv->htb.qos_used_qids, mlx5e_qos_max_leaf_nodes(priv->mdev)); - - return last == mlx5e_qos_max_leaf_nodes(priv->mdev) ? 0 : last + 1; -} - -/* Software representation of the QoS tree (internal to this file) */ - -static int mlx5e_find_unused_qos_qid(struct mlx5e_priv *priv) -{ - int size = mlx5e_qos_max_leaf_nodes(priv->mdev); - int res; - - WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__); - res = find_first_zero_bit(priv->htb.qos_used_qids, size); - - return res == size ? -ENOSPC : res; -} - -struct mlx5e_qos_node { - struct hlist_node hnode; - struct rcu_head rcu; - struct mlx5e_qos_node *parent; - u64 rate; - u32 bw_share; - u32 max_average_bw; - u32 hw_id; - u32 classid; /* 16-bit, except root. */ - u16 qid; -}; - -#define MLX5E_QOS_QID_INNER 0xffff -#define MLX5E_HTB_CLASSID_ROOT 0xffffffff - -static struct mlx5e_qos_node * -mlx5e_sw_node_create_leaf(struct mlx5e_priv *priv, u16 classid, u16 qid, - struct mlx5e_qos_node *parent) -{ - struct mlx5e_qos_node *node; - - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - return ERR_PTR(-ENOMEM); - - node->parent = parent; - - node->qid = qid; - __set_bit(qid, priv->htb.qos_used_qids); - - node->classid = classid; - hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, classid); - - mlx5e_update_tx_netdev_queues(priv); - - return node; -} - -static struct mlx5e_qos_node *mlx5e_sw_node_create_root(struct mlx5e_priv *priv) -{ - struct mlx5e_qos_node *node; - - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - return ERR_PTR(-ENOMEM); - - node->qid = MLX5E_QOS_QID_INNER; - node->classid = MLX5E_HTB_CLASSID_ROOT; - hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, node->classid); - - return node; -} - -static struct mlx5e_qos_node *mlx5e_sw_node_find(struct mlx5e_priv *priv, u32 classid) -{ - struct mlx5e_qos_node *node = NULL; - - hash_for_each_possible(priv->htb.qos_tc2node, node, hnode, classid) { - if (node->classid == classid) - break; - } - - return node; -} - -static struct mlx5e_qos_node *mlx5e_sw_node_find_rcu(struct mlx5e_priv *priv, u32 classid) -{ - struct mlx5e_qos_node *node = NULL; - - hash_for_each_possible_rcu(priv->htb.qos_tc2node, node, hnode, classid) { - if (node->classid == classid) - break; - } - - return node; -} - -static void mlx5e_sw_node_delete(struct mlx5e_priv *priv, struct mlx5e_qos_node *node) -{ - hash_del_rcu(&node->hnode); - if (node->qid != MLX5E_QOS_QID_INNER) { - __clear_bit(node->qid, priv->htb.qos_used_qids); - mlx5e_update_tx_netdev_queues(priv); - } - kfree_rcu(node, rcu); -} - /* TX datapath API */ -static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid) +u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid) { /* These channel params are safe to access from the datapath, because: - * 1. This function is called only after checking priv->htb.maj_id != 0, + * 1. This function is called only after checking selq->htb_maj_id != 0, * and the number of queues can't change while HTB offload is active. - * 2. When priv->htb.maj_id becomes 0, synchronize_rcu waits for + * 2. When selq->htb_maj_id becomes 0, synchronize_rcu waits for * mlx5e_select_queue to finish while holding priv->state_lock, * preventing other code from changing the number of queues. */ @@ -150,30 +49,7 @@ static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid) return (chs->params.num_channels + is_ptp) * mlx5e_get_dcb_num_tc(&chs->params) + qid; } -int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid) -{ - struct mlx5e_qos_node *node; - u16 qid; - int res; - - rcu_read_lock(); - - node = mlx5e_sw_node_find_rcu(priv, classid); - if (!node) { - res = -ENOENT; - goto out; - } - qid = READ_ONCE(node->qid); - if (qid == MLX5E_QOS_QID_INNER) { - res = -EINVAL; - goto out; - } - res = mlx5e_qid_from_qos(&priv->channels, qid); - -out: - rcu_read_unlock(); - return res; -} +/* SQ lifecycle */ static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid) { @@ -190,10 +66,8 @@ static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid) return mlx5e_state_dereference(priv, qos_sqs[qid]); } -/* SQ lifecycle */ - -static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, - struct mlx5e_qos_node *node) +int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, + u16 node_qid, u32 hw_id) { struct mlx5e_create_cq_param ccp = {}; struct mlx5e_txqsq __rcu **qos_sqs; @@ -206,13 +80,13 @@ static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs params = &chs->params; - txq_ix = mlx5e_qid_from_qos(chs, node->qid); + txq_ix = mlx5e_qid_from_qos(chs, node_qid); - WARN_ON(node->qid > priv->htb.max_qos_sqs); - if (node->qid == priv->htb.max_qos_sqs) { + WARN_ON(node_qid > priv->htb_max_qos_sqs); + if (node_qid == priv->htb_max_qos_sqs) { struct mlx5e_sq_stats *stats, **stats_list = NULL; - if (priv->htb.max_qos_sqs == 0) { + if (priv->htb_max_qos_sqs == 0) { stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev), sizeof(*stats_list), GFP_KERNEL); @@ -225,16 +99,16 @@ static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs return -ENOMEM; } if (stats_list) - WRITE_ONCE(priv->htb.qos_sq_stats, stats_list); - WRITE_ONCE(priv->htb.qos_sq_stats[node->qid], stats); - /* Order max_qos_sqs increment after writing the array pointer. + WRITE_ONCE(priv->htb_qos_sq_stats, stats_list); + WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats); + /* Order htb_max_qos_sqs increment after writing the array pointer. * Pairs with smp_load_acquire in en_stats.c. */ - smp_store_release(&priv->htb.max_qos_sqs, priv->htb.max_qos_sqs + 1); + smp_store_release(&priv->htb_max_qos_sqs, priv->htb_max_qos_sqs + 1); } - ix = node->qid % params->num_channels; - qid = node->qid / params->num_channels; + ix = node_qid % params->num_channels; + qid = node_qid / params->num_channels; c = chs->c[ix]; qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); @@ -253,8 +127,8 @@ static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs if (err) goto err_free_sq; err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params, - ¶m_sq, sq, 0, node->hw_id, - priv->htb.qos_sq_stats[node->qid]); + ¶m_sq, sq, 0, hw_id, + priv->htb_qos_sq_stats[node_qid]); if (err) goto err_close_cq; @@ -269,13 +143,29 @@ err_free_sq: return err; } -static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node *node) +static int mlx5e_open_qos_sq_cb_wrapper(void *data, u16 node_qid, u32 hw_id) { + struct qos_sq_callback_params *cb_params = data; + + return mlx5e_open_qos_sq(cb_params->priv, cb_params->chs, node_qid, hw_id); +} + +int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id) +{ + struct mlx5e_priv *priv = data; struct mlx5e_txqsq *sq; + u16 qid; - sq = mlx5e_get_qos_sq(priv, node->qid); + sq = mlx5e_get_qos_sq(priv, node_qid); - WRITE_ONCE(priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, node->qid)], sq); + qid = mlx5e_qid_from_qos(&priv->channels, node_qid); + + /* If it's a new queue, it will be marked as started at this point. + * Stop it before updating txq2sq. + */ + mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid)); + + priv->txq2sq[qid] = sq; /* Make the change to txq2sq visible before the queue is started. * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, @@ -283,11 +173,13 @@ static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node */ smp_wmb(); - qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node->qid); + qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node_qid); mlx5e_activate_txqsq(sq); + + return 0; } -static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid) +void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid) { struct mlx5e_txqsq *sq; @@ -298,11 +190,16 @@ static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid) qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid); mlx5e_deactivate_txqsq(sq); - /* The queue is disabled, no synchronization with datapath is needed. */ priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL; + + /* Make the change to txq2sq visible before the queue is started again. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); } -static void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid) +void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid) { struct mlx5e_txqsq __rcu **qos_sqs; struct mlx5e_params *params; @@ -352,7 +249,7 @@ void mlx5e_qos_close_queues(struct mlx5e_channel *c) kvfree(qos_sqs); } -static void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs) +void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs) { int i; @@ -360,7 +257,7 @@ static void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs) mlx5e_qos_close_queues(chs->c[i]); } -static int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs) { u16 qos_sqs_size; int i; @@ -396,24 +293,20 @@ err_free: int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs) { - struct mlx5e_qos_node *node = NULL; - int bkt, err; - - if (!priv->htb.maj_id) - return 0; + struct qos_sq_callback_params callback_params; + int err; err = mlx5e_qos_alloc_queues(priv, chs); if (err) return err; - hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) { - if (node->qid == MLX5E_QOS_QID_INNER) - continue; - err = mlx5e_open_qos_sq(priv, chs, node); - if (err) { - mlx5e_qos_close_all_queues(chs); - return err; - } + callback_params.priv = priv; + callback_params.chs = chs; + + err = mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_open_qos_sq_cb_wrapper, &callback_params); + if (err) { + mlx5e_qos_close_all_queues(chs); + return err; } return 0; @@ -421,14 +314,7 @@ int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs) void mlx5e_qos_activate_queues(struct mlx5e_priv *priv) { - struct mlx5e_qos_node *node = NULL; - int bkt; - - hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) { - if (node->qid == MLX5E_QOS_QID_INNER) - continue; - mlx5e_activate_qos_sq(priv, node); - } + mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_activate_qos_sq, priv); } void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c) @@ -457,7 +343,7 @@ void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c) } } -static void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs) +void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs) { int i; @@ -465,278 +351,14 @@ static void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs) mlx5e_qos_deactivate_queues(chs->c[i]); } -/* HTB API */ - -int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls, - struct netlink_ext_ack *extack) -{ - struct mlx5e_qos_node *root; - bool opened; - int err; - - qos_dbg(priv->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls); - - if (!mlx5_qos_is_supported(priv->mdev)) { - NL_SET_ERR_MSG_MOD(extack, - "Missing QoS capabilities. Try disabling SRIOV or use a supported device."); - return -EOPNOTSUPP; - } - - opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (opened) { - err = mlx5e_qos_alloc_queues(priv, &priv->channels); - if (err) - return err; - } - - root = mlx5e_sw_node_create_root(priv); - if (IS_ERR(root)) { - err = PTR_ERR(root); - goto err_free_queues; - } - - err = mlx5_qos_create_root_node(priv->mdev, &root->hw_id); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware."); - goto err_sw_node_delete; - } - - WRITE_ONCE(priv->htb.defcls, htb_defcls); - /* Order maj_id after defcls - pairs with - * mlx5e_select_queue/mlx5e_select_htb_queues. - */ - smp_store_release(&priv->htb.maj_id, htb_maj_id); - - return 0; - -err_sw_node_delete: - mlx5e_sw_node_delete(priv, root); - -err_free_queues: - if (opened) - mlx5e_qos_close_all_queues(&priv->channels); - return err; -} - -int mlx5e_htb_root_del(struct mlx5e_priv *priv) -{ - struct mlx5e_qos_node *root; - int err; - - qos_dbg(priv->mdev, "TC_HTB_DESTROY\n"); - - WRITE_ONCE(priv->htb.maj_id, 0); - synchronize_rcu(); /* Sync with mlx5e_select_htb_queue and TX data path. */ - - root = mlx5e_sw_node_find(priv, MLX5E_HTB_CLASSID_ROOT); - if (!root) { - qos_err(priv->mdev, "Failed to find the root node in the QoS tree\n"); - return -ENOENT; - } - err = mlx5_qos_destroy_node(priv->mdev, root->hw_id); - if (err) - qos_err(priv->mdev, "Failed to destroy root node %u, err = %d\n", - root->hw_id, err); - mlx5e_sw_node_delete(priv, root); - - mlx5e_qos_deactivate_all_queues(&priv->channels); - mlx5e_qos_close_all_queues(&priv->channels); - - return err; -} - -static int mlx5e_htb_convert_rate(struct mlx5e_priv *priv, u64 rate, - struct mlx5e_qos_node *parent, u32 *bw_share) -{ - u64 share = 0; - - while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw) - parent = parent->parent; - - if (parent->max_average_bw) - share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT), - parent->max_average_bw); - else - share = 101; - - *bw_share = share == 0 ? 1 : share > 100 ? 0 : share; - - qos_dbg(priv->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n", - rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share); - - return 0; -} - -static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw) -{ - *max_average_bw = div_u64(ceil, BYTES_IN_MBIT); - - qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n", - ceil, *max_average_bw); -} - -int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid, - u32 parent_classid, u64 rate, u64 ceil, - struct netlink_ext_ack *extack) -{ - struct mlx5e_qos_node *node, *parent; - int qid; - int err; - - qos_dbg(priv->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n", - classid, parent_classid, rate, ceil); - - qid = mlx5e_find_unused_qos_qid(priv); - if (qid < 0) { - NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached."); - return qid; - } - - parent = mlx5e_sw_node_find(priv, parent_classid); - if (!parent) - return -EINVAL; - - node = mlx5e_sw_node_create_leaf(priv, classid, qid, parent); - if (IS_ERR(node)) - return PTR_ERR(node); - - node->rate = rate; - mlx5e_htb_convert_rate(priv, rate, node->parent, &node->bw_share); - mlx5e_htb_convert_ceil(priv, ceil, &node->max_average_bw); - - err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->hw_id, - node->bw_share, node->max_average_bw, - &node->hw_id); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); - qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n", - classid, err); - mlx5e_sw_node_delete(priv, node); - return err; - } - - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - err = mlx5e_open_qos_sq(priv, &priv->channels, node); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); - qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", - classid, err); - } else { - mlx5e_activate_qos_sq(priv, node); - } - } - - return mlx5e_qid_from_qos(&priv->channels, node->qid); -} - -int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid, - u64 rate, u64 ceil, struct netlink_ext_ack *extack) -{ - struct mlx5e_qos_node *node, *child; - int err, tmp_err; - u32 new_hw_id; - u16 qid; - - qos_dbg(priv->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n", - classid, child_classid, rate, ceil); - - node = mlx5e_sw_node_find(priv, classid); - if (!node) - return -ENOENT; - - err = mlx5_qos_create_inner_node(priv->mdev, node->parent->hw_id, - node->bw_share, node->max_average_bw, - &new_hw_id); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node."); - qos_err(priv->mdev, "Failed to create an inner node (class %04x), err = %d\n", - classid, err); - return err; - } - - /* Intentionally reuse the qid for the upcoming first child. */ - child = mlx5e_sw_node_create_leaf(priv, child_classid, node->qid, node); - if (IS_ERR(child)) { - err = PTR_ERR(child); - goto err_destroy_hw_node; - } - - child->rate = rate; - mlx5e_htb_convert_rate(priv, rate, node, &child->bw_share); - mlx5e_htb_convert_ceil(priv, ceil, &child->max_average_bw); - - err = mlx5_qos_create_leaf_node(priv->mdev, new_hw_id, child->bw_share, - child->max_average_bw, &child->hw_id); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); - qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n", - classid, err); - goto err_delete_sw_node; - } - - /* No fail point. */ - - qid = node->qid; - /* Pairs with mlx5e_get_txq_by_classid. */ - WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); - - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - mlx5e_deactivate_qos_sq(priv, qid); - mlx5e_close_qos_sq(priv, qid); - } - - err = mlx5_qos_destroy_node(priv->mdev, node->hw_id); - if (err) /* Not fatal. */ - qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", - node->hw_id, classid, err); - - node->hw_id = new_hw_id; - - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - err = mlx5e_open_qos_sq(priv, &priv->channels, child); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); - qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", - classid, err); - } else { - mlx5e_activate_qos_sq(priv, child); - } - } - - return 0; - -err_delete_sw_node: - child->qid = MLX5E_QOS_QID_INNER; - mlx5e_sw_node_delete(priv, child); - -err_destroy_hw_node: - tmp_err = mlx5_qos_destroy_node(priv->mdev, new_hw_id); - if (tmp_err) /* Not fatal. */ - qos_warn(priv->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n", - new_hw_id, classid, tmp_err); - return err; -} - -static struct mlx5e_qos_node *mlx5e_sw_node_find_by_qid(struct mlx5e_priv *priv, u16 qid) -{ - struct mlx5e_qos_node *node = NULL; - int bkt; - - hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) - if (node->qid == qid) - break; - - return node; -} - -static void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq) +void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq) { qos_dbg(priv->mdev, "Reactivate QoS SQ qid %u\n", qid); netdev_tx_reset_queue(txq); netif_tx_start_queue(txq); } -static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid) +void mlx5e_reset_qdisc(struct net_device *dev, u16 qid) { struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid); struct Qdisc *qdisc = dev_queue->qdisc_sleeping; @@ -749,251 +371,65 @@ static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid) spin_unlock_bh(qdisc_lock(qdisc)); } -int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 *classid, - struct netlink_ext_ack *extack) -{ - struct mlx5e_qos_node *node; - struct netdev_queue *txq; - u16 qid, moved_qid; - bool opened; - int err; - - qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid); - - node = mlx5e_sw_node_find(priv, *classid); - if (!node) - return -ENOENT; - - /* Store qid for reuse. */ - qid = node->qid; - - opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (opened) { - txq = netdev_get_tx_queue(priv->netdev, - mlx5e_qid_from_qos(&priv->channels, qid)); - mlx5e_deactivate_qos_sq(priv, qid); - mlx5e_close_qos_sq(priv, qid); - } - - err = mlx5_qos_destroy_node(priv->mdev, node->hw_id); - if (err) /* Not fatal. */ - qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", - node->hw_id, *classid, err); - - mlx5e_sw_node_delete(priv, node); - - moved_qid = mlx5e_qos_cur_leaf_nodes(priv); - - if (moved_qid == 0) { - /* The last QoS SQ was just destroyed. */ - if (opened) - mlx5e_reactivate_qos_sq(priv, qid, txq); - return 0; - } - moved_qid--; - - if (moved_qid < qid) { - /* The highest QoS SQ was just destroyed. */ - WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u", - qid, moved_qid); - if (opened) - mlx5e_reactivate_qos_sq(priv, qid, txq); - return 0; - } - - WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid); - qos_dbg(priv->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid); - - node = mlx5e_sw_node_find_by_qid(priv, moved_qid); - WARN(!node, "Could not find a node with qid %u to move to queue %u", - moved_qid, qid); - - /* Stop traffic to the old queue. */ - WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); - __clear_bit(moved_qid, priv->htb.qos_used_qids); - - if (opened) { - txq = netdev_get_tx_queue(priv->netdev, - mlx5e_qid_from_qos(&priv->channels, moved_qid)); - mlx5e_deactivate_qos_sq(priv, moved_qid); - mlx5e_close_qos_sq(priv, moved_qid); - } - - /* Prevent packets from the old class from getting into the new one. */ - mlx5e_reset_qdisc(priv->netdev, moved_qid); - - __set_bit(qid, priv->htb.qos_used_qids); - WRITE_ONCE(node->qid, qid); - - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - err = mlx5e_open_qos_sq(priv, &priv->channels, node); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); - qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n", - node->classid, moved_qid, qid, err); - } else { - mlx5e_activate_qos_sq(priv, node); - } - } - - mlx5e_update_tx_netdev_queues(priv); - if (opened) - mlx5e_reactivate_qos_sq(priv, moved_qid, txq); - - *classid = node->classid; - return 0; -} - -int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force, - struct netlink_ext_ack *extack) +int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb_qopt) { - struct mlx5e_qos_node *node, *parent; - u32 old_hw_id, new_hw_id; - int err, saved_err = 0; - u16 qid; - - qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n", - force ? "_FORCE" : "", classid); - - node = mlx5e_sw_node_find(priv, classid); - if (!node) - return -ENOENT; - - err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->parent->hw_id, - node->parent->bw_share, - node->parent->max_average_bw, - &new_hw_id); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); - qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n", - classid, err); - if (!force) - return err; - saved_err = err; - } - - /* Store qid for reuse and prevent clearing the bit. */ - qid = node->qid; - /* Pairs with mlx5e_get_txq_by_classid. */ - WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); - - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - mlx5e_deactivate_qos_sq(priv, qid); - mlx5e_close_qos_sq(priv, qid); - } - - /* Prevent packets from the old class from getting into the new one. */ - mlx5e_reset_qdisc(priv->netdev, qid); - - err = mlx5_qos_destroy_node(priv->mdev, node->hw_id); - if (err) /* Not fatal. */ - qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", - node->hw_id, classid, err); - - parent = node->parent; - mlx5e_sw_node_delete(priv, node); + struct mlx5e_htb *htb = priv->htb; + int res; - node = parent; - WRITE_ONCE(node->qid, qid); + if (!htb && htb_qopt->command != TC_HTB_CREATE) + return -EINVAL; - /* Early return on error in force mode. Parent will still be an inner - * node to be deleted by a following delete operation. - */ - if (saved_err) - return saved_err; - - old_hw_id = node->hw_id; - node->hw_id = new_hw_id; - - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - err = mlx5e_open_qos_sq(priv, &priv->channels, node); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); - qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", - classid, err); - } else { - mlx5e_activate_qos_sq(priv, node); + switch (htb_qopt->command) { + case TC_HTB_CREATE: + if (!mlx5_qos_is_supported(priv->mdev)) { + NL_SET_ERR_MSG_MOD(htb_qopt->extack, + "Missing QoS capabilities. Try disabling SRIOV or use a supported device."); + return -EOPNOTSUPP; } - } - - err = mlx5_qos_destroy_node(priv->mdev, old_hw_id); - if (err) /* Not fatal. */ - qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", - node->hw_id, classid, err); - - return 0; -} - -static int mlx5e_qos_update_children(struct mlx5e_priv *priv, struct mlx5e_qos_node *node, - struct netlink_ext_ack *extack) -{ - struct mlx5e_qos_node *child; - int err = 0; - int bkt; - - hash_for_each(priv->htb.qos_tc2node, bkt, child, hnode) { - u32 old_bw_share = child->bw_share; - int err_one; - - if (child->parent != node) - continue; - - mlx5e_htb_convert_rate(priv, child->rate, node, &child->bw_share); - if (child->bw_share == old_bw_share) - continue; - - err_one = mlx5_qos_update_node(priv->mdev, child->hw_id, child->bw_share, - child->max_average_bw, child->hw_id); - if (!err && err_one) { - err = err_one; - - NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node."); - qos_err(priv->mdev, "Failed to modify a child node (class %04x), err = %d\n", - node->classid, err); + priv->htb = mlx5e_htb_alloc(); + htb = priv->htb; + if (!htb) + return -ENOMEM; + res = mlx5e_htb_init(htb, htb_qopt, priv->netdev, priv->mdev, &priv->selq, priv); + if (res) { + mlx5e_htb_free(htb); + priv->htb = NULL; } + return res; + case TC_HTB_DESTROY: + mlx5e_htb_cleanup(htb); + mlx5e_htb_free(htb); + priv->htb = NULL; + return 0; + case TC_HTB_LEAF_ALLOC_QUEUE: + res = mlx5e_htb_leaf_alloc_queue(htb, htb_qopt->classid, htb_qopt->parent_classid, + htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack); + if (res < 0) + return res; + htb_qopt->qid = res; + return 0; + case TC_HTB_LEAF_TO_INNER: + return mlx5e_htb_leaf_to_inner(htb, htb_qopt->parent_classid, htb_qopt->classid, + htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack); + case TC_HTB_LEAF_DEL: + return mlx5e_htb_leaf_del(htb, &htb_qopt->classid, htb_qopt->extack); + case TC_HTB_LEAF_DEL_LAST: + case TC_HTB_LEAF_DEL_LAST_FORCE: + return mlx5e_htb_leaf_del_last(htb, htb_qopt->classid, + htb_qopt->command == TC_HTB_LEAF_DEL_LAST_FORCE, + htb_qopt->extack); + case TC_HTB_NODE_MODIFY: + return mlx5e_htb_node_modify(htb, htb_qopt->classid, htb_qopt->rate, htb_qopt->ceil, + htb_qopt->extack); + case TC_HTB_LEAF_QUERY_QUEUE: + res = mlx5e_htb_get_txq_by_classid(htb, htb_qopt->classid); + if (res < 0) + return res; + htb_qopt->qid = res; + return 0; + default: + return -EOPNOTSUPP; } - - return err; -} - -int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil, - struct netlink_ext_ack *extack) -{ - u32 bw_share, max_average_bw; - struct mlx5e_qos_node *node; - bool ceil_changed = false; - int err; - - qos_dbg(priv->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n", - classid, rate, ceil); - - node = mlx5e_sw_node_find(priv, classid); - if (!node) - return -ENOENT; - - node->rate = rate; - mlx5e_htb_convert_rate(priv, rate, node->parent, &bw_share); - mlx5e_htb_convert_ceil(priv, ceil, &max_average_bw); - - err = mlx5_qos_update_node(priv->mdev, node->parent->hw_id, bw_share, - max_average_bw, node->hw_id); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node."); - qos_err(priv->mdev, "Failed to modify a node (class %04x), err = %d\n", - classid, err); - return err; - } - - if (max_average_bw != node->max_average_bw) - ceil_changed = true; - - node->bw_share = bw_share; - node->max_average_bw = max_average_bw; - - if (ceil_changed) - err = mlx5e_qos_update_children(priv, node, extack); - - return err; } struct mlx5e_mqprio_rl { @@ -1079,3 +515,4 @@ int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_i *hw_id = rl->leaves_id[tc]; return 0; } + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h index b7558907ba20..4947afa23b73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h @@ -6,41 +6,39 @@ #include <linux/mlx5/driver.h> -#define MLX5E_QOS_MAX_LEAF_NODES 256 +#define BYTES_IN_MBIT 125000 struct mlx5e_priv; +struct mlx5e_htb; struct mlx5e_channels; struct mlx5e_channel; +struct tc_htb_qopt_offload; int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes); int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev); -int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv); - -/* TX datapath API */ -int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid); -struct mlx5e_txqsq *mlx5e_get_sq(struct mlx5e_priv *priv, int qid); /* SQ lifecycle */ +int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, + u16 node_qid, u32 hw_id); +int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id); +void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid); +void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid); +void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq); +void mlx5e_reset_qdisc(struct net_device *dev, u16 qid); + int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs); void mlx5e_qos_activate_queues(struct mlx5e_priv *priv); void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c); +void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs); void mlx5e_qos_close_queues(struct mlx5e_channel *c); +void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs); +int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs); + +/* TX datapath API */ +u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid); /* HTB API */ -int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls, - struct netlink_ext_ack *extack); -int mlx5e_htb_root_del(struct mlx5e_priv *priv); -int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid, - u32 parent_classid, u64 rate, u64 ceil, - struct netlink_ext_ack *extack); -int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid, - u64 rate, u64 ceil, struct netlink_ext_ack *extack); -int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 *classid, - struct netlink_ext_ack *extack); -int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force, - struct netlink_ext_ack *extack); -int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil, - struct netlink_ext_ack *extack); +int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb); /* MQPRIO TX rate limit */ struct mlx5e_mqprio_rl; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c index 9c076aa20306..b6f5c1bcdbcd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -183,18 +183,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) { - struct mlx5e_rep_priv *rpriv; - struct mlx5e_priv *priv; - - /* A given netdev is not a representor or not a slave of LAG configuration */ - if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev)) - return false; - - priv = netdev_priv(netdev); - rpriv = priv->ppriv; - - /* Egress acl forward to vport is supported only non-uplink representor */ - return rpriv->rep->vport != MLX5_VPORT_UPLINK; + return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev); } static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr) @@ -210,9 +199,6 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt u16 fwd_vport_num; int err; - if (!mlx5e_rep_is_lag_netdev(netdev)) - return; - info = ptr; lag_info = info->lower_state_info; /* This is not an event of a representor becoming active slave */ @@ -266,9 +252,6 @@ static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr) struct net_device *lag_dev; struct mlx5e_priv *priv; - if (!mlx5e_rep_is_lag_netdev(netdev)) - return; - priv = netdev_priv(netdev); rpriv = priv->ppriv; lag_dev = info->upper_dev; @@ -293,6 +276,19 @@ static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_bond *bond; + struct mlx5e_priv *priv; + + if (!mlx5e_rep_is_lag_netdev(netdev)) + return NOTIFY_DONE; + + bond = container_of(nb, struct mlx5e_rep_bond, nb); + priv = netdev_priv(netdev); + rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH); + /* Verify VF representor is on the same device of the bond handling the netevent. */ + if (rpriv->uplink_priv.bond != bond) + return NOTIFY_DONE; switch (event) { case NETDEV_CHANGELOWERSTATE: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index c6d2f8c78db7..8099a21e674c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -164,6 +164,36 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr return err; } +static int +mlx5_esw_bridge_changeupper_validate_netdev(void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info = ptr; + struct net_device *upper = info->upper_dev; + struct net_device *lower; + struct list_head *iter; + + if (!netif_is_bridge_master(upper) || !netif_is_lag_master(dev)) + return 0; + + netdev_for_each_lower_dev(dev, lower, iter) { + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + + if (!mlx5e_eswitch_rep(lower)) + continue; + + priv = netdev_priv(lower); + mdev = priv->mdev; + if (!mlx5_lag_is_active(mdev)) + return -EAGAIN; + if (!mlx5_lag_is_shared_fdb(mdev)) + return -EOPNOTSUPP; + } + + return 0; +} + static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -171,6 +201,7 @@ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb, switch (event) { case NETDEV_PRECHANGEUPPER: + err = mlx5_esw_bridge_changeupper_validate_netdev(ptr); break; case NETDEV_CHANGEUPPER: @@ -269,6 +300,12 @@ mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev, err = mlx5_esw_bridge_vlan_filtering_set(vport_num, esw_owner_vhca_id, attr->u.vlan_filtering, br_offloads); break; + case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL: + err = mlx5_esw_bridge_vlan_proto_set(vport_num, + esw_owner_vhca_id, + attr->u.vlan_protocol, + br_offloads); + break; default: err = -EOPNOTSUPP; } @@ -491,7 +528,7 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) } br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event; - err = register_netdevice_notifier(&br_offloads->netdev_nb); + err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb); if (err) { esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n", err); @@ -509,7 +546,9 @@ err_register_swdev_blk: err_register_swdev: destroy_workqueue(br_offloads->wq); err_alloc_wq: + rtnl_lock(); mlx5_esw_bridge_cleanup(esw); + rtnl_unlock(); } void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) @@ -524,7 +563,7 @@ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) return; cancel_delayed_work_sync(&br_offloads->update_work); - unregister_netdevice_notifier(&br_offloads->netdev_nb); + unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb); unregister_switchdev_blocking_notifier(&br_offloads->nb_blk); unregister_switchdev_notifier(&br_offloads->nb); destroy_workqueue(br_offloads->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index fcb0892c08a9..fac7e3ff2674 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -21,6 +21,7 @@ #include "en/tc/sample.h" #include "en_accel/ipsec_rxtx.h" #include "en/tc/int_port.h" +#include "en/tc/act/act.h" struct mlx5e_rep_indr_block_priv { struct net_device *netdev; @@ -263,14 +264,14 @@ int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) INIT_LIST_HEAD(&uplink_priv->unready_flows); /* init shared tc flow table */ - err = mlx5e_tc_esw_init(&uplink_priv->tc_ht); + err = mlx5e_tc_esw_init(uplink_priv); return err; } void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) { /* delete shared tc flow table */ - mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht); + mlx5e_tc_esw_cleanup(&rpriv->uplink_priv); mutex_destroy(&rpriv->uplink_priv.unready_flows_lock); } @@ -511,12 +512,129 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch, return 0; } +static int +mlx5e_rep_indr_replace_act(struct mlx5e_rep_priv *rpriv, + struct flow_offload_action *fl_act) + +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + enum mlx5_flow_namespace_type ns_type; + struct flow_action_entry *action; + struct mlx5e_tc_act *act; + bool add = false; + int i; + + /* There is no use case currently for more than one action (e.g. pedit). + * when there will be, need to handle cleaning multiple actions on err. + */ + if (!flow_offload_has_one_action(&fl_act->action)) + return -EOPNOTSUPP; + + if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) + ns_type = MLX5_FLOW_NAMESPACE_FDB; + else + ns_type = MLX5_FLOW_NAMESPACE_KERNEL; + + flow_action_for_each(i, action, &fl_act->action) { + act = mlx5e_tc_act_get(action->id, ns_type); + if (!act) + continue; + + if (!act->offload_action) + continue; + + if (!act->offload_action(priv, fl_act, action)) + add = true; + } + + return add ? 0 : -EOPNOTSUPP; +} + +static int +mlx5e_rep_indr_destroy_act(struct mlx5e_rep_priv *rpriv, + struct flow_offload_action *fl_act) +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + enum mlx5_flow_namespace_type ns_type; + struct mlx5e_tc_act *act; + + if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) + ns_type = MLX5_FLOW_NAMESPACE_FDB; + else + ns_type = MLX5_FLOW_NAMESPACE_KERNEL; + + act = mlx5e_tc_act_get(fl_act->id, ns_type); + if (!act || !act->destroy_action) + return -EOPNOTSUPP; + + return act->destroy_action(priv, fl_act); +} + +static int +mlx5e_rep_indr_stats_act(struct mlx5e_rep_priv *rpriv, + struct flow_offload_action *fl_act) + +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + enum mlx5_flow_namespace_type ns_type; + struct mlx5e_tc_act *act; + + if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) + ns_type = MLX5_FLOW_NAMESPACE_FDB; + else + ns_type = MLX5_FLOW_NAMESPACE_KERNEL; + + act = mlx5e_tc_act_get(fl_act->id, ns_type); + if (!act || !act->stats_action) + return -EOPNOTSUPP; + + return act->stats_action(priv, fl_act); +} + +static int +mlx5e_rep_indr_setup_act(struct mlx5e_rep_priv *rpriv, + struct flow_offload_action *fl_act) +{ + switch (fl_act->command) { + case FLOW_ACT_REPLACE: + return mlx5e_rep_indr_replace_act(rpriv, fl_act); + case FLOW_ACT_DESTROY: + return mlx5e_rep_indr_destroy_act(rpriv, fl_act); + case FLOW_ACT_STATS: + return mlx5e_rep_indr_stats_act(rpriv, fl_act); + default: + return -EOPNOTSUPP; + } +} + +static int +mlx5e_rep_indr_no_dev_setup(struct mlx5e_rep_priv *rpriv, + enum tc_setup_type type, + void *data) +{ + if (!data) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_ACT: + return mlx5e_rep_indr_setup_act(rpriv, data); + default: + return -EOPNOTSUPP; + } +} + static int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv, enum tc_setup_type type, void *type_data, void *data, void (*cleanup)(struct flow_block_cb *block_cb)) { + if (!netdev) + return mlx5e_rep_indr_no_dev_setup(cb_priv, type, data); + switch (type) { case TC_SETUP_BLOCK: return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h index d6c7c81690eb..7c9dd3a75f8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h @@ -66,7 +66,7 @@ mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, static inline void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, - struct sk_buff *skb) {} + struct sk_buff *skb) { napi_gro_receive(rq->cq.napi, skb); } #endif /* CONFIG_MLX5_CLS_ACT */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 74086eb556ae..5f6f95ad6888 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -62,6 +62,7 @@ static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq) static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) { + struct mlx5e_rq *xskrq = NULL; struct mlx5_core_dev *mdev; struct mlx5e_icosq *icosq; struct net_device *dev; @@ -70,7 +71,13 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) int err; icosq = ctx; + + mutex_lock(&icosq->channel->icosq_recovery_lock); + + /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */ rq = &icosq->channel->rq; + if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state)) + xskrq = &icosq->channel->xskrq; mdev = icosq->channel->mdev; dev = icosq->channel->netdev; err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state); @@ -84,6 +91,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_deactivate_rq(rq); + if (xskrq) + mlx5e_deactivate_rq(xskrq); + err = mlx5e_wait_for_icosq_flush(icosq); if (err) goto out; @@ -97,35 +107,31 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_reset_icosq_cc_pc(icosq); + mlx5e_free_rx_in_progress_descs(rq); + if (xskrq) + mlx5e_free_rx_in_progress_descs(xskrq); + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); mlx5e_activate_icosq(icosq); - mlx5e_activate_rq(rq); + mlx5e_activate_rq(rq); rq->stats->recover++; - return 0; -out: - clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); - return err; -} - -static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) -{ - struct net_device *dev = rq->netdev; - int err; - err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST); - if (err) { - netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); - return err; - } - err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); - if (err) { - netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); - return err; + if (xskrq) { + mlx5e_activate_rq(xskrq); + xskrq->stats->recover++; } + mlx5e_trigger_napi_icosq(icosq->channel); + + mutex_unlock(&icosq->channel->icosq_recovery_lock); + return 0; +out: + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + mutex_unlock(&icosq->channel->icosq_recovery_lock); + return err; } static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) @@ -134,19 +140,18 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) int err; mlx5e_deactivate_rq(rq); - mlx5e_free_rx_descs(rq); - - err = mlx5e_rq_to_ready(rq, MLX5_RQC_STATE_ERR); + err = mlx5e_flush_rq(rq, MLX5_RQC_STATE_ERR); + clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); if (err) - goto out; + return err; - clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); mlx5e_activate_rq(rq); rq->stats->recover++; + if (rq->channel) + mlx5e_trigger_napi_icosq(rq->channel); + else + mlx5e_trigger_napi_sched(rq->cq.napi); return 0; -out: - clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); - return err; } static int mlx5e_rx_reporter_timeout_recover(void *ctx) @@ -706,6 +711,16 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); } +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c) +{ + mutex_lock(&c->icosq_recovery_lock); +} + +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c) +{ + mutex_unlock(&c->icosq_recovery_lock); +} + static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { .name = "rx", .recover = mlx5e_rx_reporter_recover, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 4f4bc8726ec4..60bc5b577ab9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -466,6 +466,14 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } +static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_tx_timeout_ctx *to_ctx = ctx; + + return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); +} + static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { @@ -561,11 +569,11 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) to_ctx.sq = sq; err_ctx.ctx = &to_ctx; err_ctx.recover = mlx5e_tx_reporter_timeout_recover; - err_ctx.dump = mlx5e_tx_reporter_dump_sq; + err_ctx.dump = mlx5e_tx_reporter_timeout_dump; snprintf(err_str, sizeof(err_str), "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, - jiffies_to_usecs(jiffies - sq->txq->trans_start)); + jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start))); mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); return to_ctx.status; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c index c1cdd8c2e37a..7f93426b88b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -442,7 +442,7 @@ int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, goto inner_tir; err = mlx5e_tir_modify(tir, builder); if (err) { - mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of indirect TIR %#x for traffic type %d: err = %d\n", mlx5e_tir_get_tirn(tir), tt, err); if (!final_err) final_err = err; @@ -457,7 +457,7 @@ inner_tir: continue; err = mlx5e_tir_modify(tir, builder); if (err) { - mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of inner indirect TIR %#x for traffic type %d: err = %d\n", mlx5e_tir_get_tirn(tir), tt, err); if (!final_err) final_err = err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index 0015a81eb9a1..e1095bc36543 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -24,8 +24,6 @@ struct mlx5e_rx_res { struct { struct mlx5e_rqt direct_rqt; struct mlx5e_tir direct_tir; - struct mlx5e_rqt xsk_rqt; - struct mlx5e_tir xsk_tir; } *channels; struct { @@ -37,7 +35,6 @@ struct mlx5e_rx_res { /* API for rx_res_rss_* */ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, - const struct mlx5e_packet_merge_param *init_pkt_merge_param, unsigned int init_nch) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; @@ -52,7 +49,7 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, return -ENOMEM; err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn, - init_pkt_merge_param); + &res->pkt_merge_param); if (err) goto err_rss_free; @@ -277,8 +274,7 @@ struct mlx5e_rx_res *mlx5e_rx_res_alloc(void) return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL); } -static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, - const struct mlx5e_packet_merge_param *init_pkt_merge_param) +static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; struct mlx5e_tir_builder *builder; @@ -309,7 +305,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), inner_ft_support); - mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param); + mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param); mlx5e_tir_builder_build_direct(builder); err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true); @@ -322,48 +318,8 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, mlx5e_tir_builder_clear(builder); } - if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) - goto out; - - for (ix = 0; ix < res->max_nch; ix++) { - err = mlx5e_rqt_init_direct(&res->channels[ix].xsk_rqt, - res->mdev, false, res->drop_rqn); - if (err) { - mlx5_core_warn(res->mdev, "Failed to create an XSK RQT: err = %d, ix = %u\n", - err, ix); - goto err_destroy_xsk_rqts; - } - } - - for (ix = 0; ix < res->max_nch; ix++) { - mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, - mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), - inner_ft_support); - mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param); - mlx5e_tir_builder_build_direct(builder); - - err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true); - if (err) { - mlx5_core_warn(res->mdev, "Failed to create an XSK TIR: err = %d, ix = %u\n", - err, ix); - goto err_destroy_xsk_tirs; - } - - mlx5e_tir_builder_clear(builder); - } - goto out; -err_destroy_xsk_tirs: - while (--ix >= 0) - mlx5e_tir_destroy(&res->channels[ix].xsk_tir); - - ix = res->max_nch; -err_destroy_xsk_rqts: - while (--ix >= 0) - mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt); - - ix = res->max_nch; err_destroy_direct_tirs: while (--ix >= 0) mlx5e_tir_destroy(&res->channels[ix].direct_tir); @@ -422,12 +378,6 @@ static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res) for (ix = 0; ix < res->max_nch; ix++) { mlx5e_tir_destroy(&res->channels[ix].direct_tir); mlx5e_rqt_destroy(&res->channels[ix].direct_rqt); - - if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) - continue; - - mlx5e_tir_destroy(&res->channels[ix].xsk_tir); - mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt); } kvfree(res->channels); @@ -454,11 +404,11 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, res->pkt_merge_param = *init_pkt_merge_param; init_rwsem(&res->pkt_merge_param_sem); - err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch); + err = mlx5e_rx_res_rss_init_def(res, init_nch); if (err) goto err_out; - err = mlx5e_rx_res_channels_init(res, init_pkt_merge_param); + err = mlx5e_rx_res_channels_init(res); if (err) goto err_rss_destroy; @@ -493,13 +443,6 @@ u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix) return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir); } -u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix) -{ - WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_XSK)); - - return mlx5e_tir_get_tirn(&res->channels[ix].xsk_tir); -} - u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) { struct mlx5e_rss *rss = res->rss[0]; @@ -525,56 +468,53 @@ static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int i return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt); } -void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs) +static void mlx5e_rx_res_channel_activate_direct(struct mlx5e_rx_res *res, + struct mlx5e_channels *chs, + unsigned int ix) { - unsigned int nch, ix; + u32 rqn = res->rss_rqns[ix]; int err; - nch = mlx5e_channels_get_num(chs); - - for (ix = 0; ix < chs->num; ix++) - mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]); - res->rss_nch = chs->num; + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + rqn, ix, err); +} - mlx5e_rx_res_rss_enable(res); +static void mlx5e_rx_res_channel_deactivate_direct(struct mlx5e_rx_res *res, + unsigned int ix) +{ + int err; - for (ix = 0; ix < nch; ix++) { - u32 rqn; + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + res->drop_rqn, ix, err); +} - mlx5e_channels_get_regular_rqn(chs, ix, &rqn); - err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn); - if (err) - mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n", - mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), - rqn, ix, err); +void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs) +{ + unsigned int nch, ix; + int err; - if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) - continue; + nch = mlx5e_channels_get_num(chs); - if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn)) - rqn = res->drop_rqn; - err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn); - if (err) - mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to RQ %#x (channel %u): err = %d\n", - mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), - rqn, ix, err); + for (ix = 0; ix < chs->num; ix++) { + if (mlx5e_channels_is_xsk(chs, ix)) + mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]); + else + mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]); } - for (ix = nch; ix < res->max_nch; ix++) { - err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn); - if (err) - mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n", - mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), - res->drop_rqn, ix, err); + res->rss_nch = chs->num; - if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) - continue; + mlx5e_rx_res_rss_enable(res); - err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn); - if (err) - mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n", - mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), - res->drop_rqn, ix, err); - } + for (ix = 0; ix < nch; ix++) + mlx5e_rx_res_channel_activate_direct(res, chs, ix); + for (ix = nch; ix < res->max_nch; ix++) + mlx5e_rx_res_channel_deactivate_direct(res, ix); if (res->features & MLX5E_RX_RES_FEATURE_PTP) { u32 rqn; @@ -597,22 +537,8 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res) mlx5e_rx_res_rss_disable(res); - for (ix = 0; ix < res->max_nch; ix++) { - err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn); - if (err) - mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n", - mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), - res->drop_rqn, ix, err); - - if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) - continue; - - err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn); - if (err) - mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n", - mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), - res->drop_rqn, ix, err); - } + for (ix = 0; ix < res->max_nch; ix++) + mlx5e_rx_res_channel_deactivate_direct(res, ix); if (res->features & MLX5E_RX_RES_FEATURE_PTP) { err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn); @@ -623,33 +549,17 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res) } } -int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, - unsigned int ix) +void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, + unsigned int ix, bool xsk) { - u32 rqn; - int err; - - if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn)) - return -EINVAL; - - err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn); - if (err) - mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to XSK RQ %#x (channel %u): err = %d\n", - mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), - rqn, ix, err); - return err; -} + if (xsk) + mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]); + else + mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]); -int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix) -{ - int err; + mlx5e_rx_res_rss_enable(res); - err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn); - if (err) - mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n", - mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), - res->drop_rqn, ix, err); - return err; + mlx5e_rx_res_channel_activate_direct(res, chs, ix); } int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index b39b20a720e0..5d5f64fab60f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -17,8 +17,7 @@ struct mlx5e_rss_params_hash; enum mlx5e_rx_res_features { MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0), - MLX5E_RX_RES_FEATURE_XSK = BIT(1), - MLX5E_RX_RES_FEATURE_PTP = BIT(2), + MLX5E_RX_RES_FEATURE_PTP = BIT(1), }; /* Setup */ @@ -32,7 +31,6 @@ void mlx5e_rx_res_free(struct mlx5e_rx_res *res); /* TIRN getters for flow steering */ u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix); -u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix); u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); @@ -40,9 +38,8 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); /* Activate/deactivate API */ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs); void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res); -int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, - unsigned int ix); -int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix); +void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, + unsigned int ix, bool xsk); /* Configuration API */ void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c new file mode 100644 index 000000000000..f675b1926340 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c @@ -0,0 +1,266 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "selq.h" +#include <linux/slab.h> +#include <linux/netdevice.h> +#include <linux/rcupdate.h> +#include "en.h" +#include "en/ptp.h" +#include "en/htb.h" + +struct mlx5e_selq_params { + unsigned int num_regular_queues; + unsigned int num_channels; + unsigned int num_tcs; + union { + u8 is_special_queues; + struct { + bool is_htb : 1; + bool is_ptp : 1; + }; + }; + u16 htb_maj_id; + u16 htb_defcls; +}; + +int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock) +{ + struct mlx5e_selq_params *init_params; + + selq->state_lock = state_lock; + + selq->standby = kvzalloc(sizeof(*selq->standby), GFP_KERNEL); + if (!selq->standby) + return -ENOMEM; + + init_params = kvzalloc(sizeof(*selq->active), GFP_KERNEL); + if (!init_params) { + kvfree(selq->standby); + selq->standby = NULL; + return -ENOMEM; + } + /* Assign dummy values, so that mlx5e_select_queue won't crash. */ + *init_params = (struct mlx5e_selq_params) { + .num_regular_queues = 1, + .num_channels = 1, + .num_tcs = 1, + .is_htb = false, + .is_ptp = false, + .htb_maj_id = 0, + .htb_defcls = 0, + }; + rcu_assign_pointer(selq->active, init_params); + + return 0; +} + +void mlx5e_selq_cleanup(struct mlx5e_selq *selq) +{ + WARN_ON_ONCE(selq->is_prepared); + + kvfree(selq->standby); + selq->standby = NULL; + selq->is_prepared = true; + + mlx5e_selq_apply(selq); + + kvfree(selq->standby); + selq->standby = NULL; +} + +void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params) +{ + struct mlx5e_selq_params *selq_active; + + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(selq->is_prepared); + + selq->is_prepared = true; + + selq_active = rcu_dereference_protected(selq->active, + lockdep_is_held(selq->state_lock)); + *selq->standby = *selq_active; + selq->standby->num_channels = params->num_channels; + selq->standby->num_tcs = mlx5e_get_dcb_num_tc(params); + selq->standby->num_regular_queues = + selq->standby->num_channels * selq->standby->num_tcs; + selq->standby->is_ptp = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS); +} + +bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq) +{ + struct mlx5e_selq_params *selq_active = + rcu_dereference_protected(selq->active, lockdep_is_held(selq->state_lock)); + + return selq_active->htb_maj_id; +} + +void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls) +{ + struct mlx5e_selq_params *selq_active; + + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(selq->is_prepared); + + selq->is_prepared = true; + + selq_active = rcu_dereference_protected(selq->active, + lockdep_is_held(selq->state_lock)); + *selq->standby = *selq_active; + selq->standby->is_htb = htb_maj_id; + selq->standby->htb_maj_id = htb_maj_id; + selq->standby->htb_defcls = htb_defcls; +} + +void mlx5e_selq_apply(struct mlx5e_selq *selq) +{ + struct mlx5e_selq_params *old_params; + + WARN_ON_ONCE(!selq->is_prepared); + + selq->is_prepared = false; + + old_params = rcu_replace_pointer(selq->active, selq->standby, + lockdep_is_held(selq->state_lock)); + synchronize_net(); /* Wait until ndo_select_queue starts emitting correct values. */ + selq->standby = old_params; +} + +void mlx5e_selq_cancel(struct mlx5e_selq *selq) +{ + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(!selq->is_prepared); + + selq->is_prepared = false; +} + +#ifdef CONFIG_MLX5_CORE_EN_DCB +static int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ + int dscp_cp = 0; + + if (skb->protocol == htons(ETH_P_IP)) + dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; + else if (skb->protocol == htons(ETH_P_IPV6)) + dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; + + return priv->dcbx_dp.dscp2prio[dscp_cp]; +} +#endif + +static int mlx5e_get_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (READ_ONCE(priv->dcbx_dp.trust_state) == MLX5_QPTS_TRUST_DSCP) + return mlx5e_get_dscp_up(priv, skb); +#endif + if (skb_vlan_tag_present(skb)) + return skb_vlan_tag_get_prio(skb); + return 0; +} + +static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb, + struct mlx5e_selq_params *selq) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int up; + + up = selq->num_tcs > 1 ? mlx5e_get_up(priv, skb) : 0; + + return selq->num_regular_queues + up; +} + +static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_selq_params *selq) +{ + u16 classid; + + /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */ + if ((TC_H_MAJ(skb->priority) >> 16) == selq->htb_maj_id) + classid = TC_H_MIN(skb->priority); + else + classid = selq->htb_defcls; + + if (!classid) + return 0; + + return mlx5e_htb_get_txq_by_classid(priv->htb, classid); +} + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_selq_params *selq; + int txq_ix, up; + + selq = rcu_dereference_bh(priv->selq.active); + + /* This is a workaround needed only for the mlx5e_netdev_change_profile + * flow that zeroes out the whole priv without unregistering the netdev + * and without preventing ndo_select_queue from being called. + */ + if (unlikely(!selq)) + return 0; + + if (likely(!selq->is_special_queues)) { + /* No special queues, netdev_pick_tx returns one of the regular ones. */ + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + if (selq->num_tcs <= 1) + return txq_ix; + + up = mlx5e_get_up(priv, skb); + + /* Normalize any picked txq_ix to [0, num_channels), + * So we can return a txq_ix that matches the channel and + * packet UP. + */ + return mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels) + + up * selq->num_channels; + } + + if (unlikely(selq->htb_maj_id)) { + /* num_tcs == 1, shortcut for PTP */ + + txq_ix = mlx5e_select_htb_queue(priv, skb, selq); + if (txq_ix > 0) + return txq_ix; + + if (unlikely(selq->is_ptp && mlx5e_use_ptpsq(skb))) + return selq->num_channels; + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs. + * If they are selected, switch to regular queues. + * Driver to select these queues only at mlx5e_select_ptpsq() + * and mlx5e_select_htb_queue(). + */ + return mlx5e_txq_to_ch_ix_htb(txq_ix, selq->num_channels); + } + + /* PTP is enabled */ + + if (mlx5e_use_ptpsq(skb)) + return mlx5e_select_ptpsq(dev, skb, selq); + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + /* Normalize any picked txq_ix to [0, num_channels). Queues in range + * [0, num_regular_queues) will be mapped to the corresponding channel + * index, so that we can apply the packet's UP (if num_tcs > 1). + * If netdev_pick_tx() picks ptp_channel, switch to a regular queue, + * because driver should select the PTP only at mlx5e_select_ptpsq(). + */ + txq_ix = mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels); + + if (selq->num_tcs <= 1) + return txq_ix; + + up = mlx5e_get_up(priv, skb); + + return txq_ix + up * selq->num_channels; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h new file mode 100644 index 000000000000..fd590f80e4d1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_SELQ_H__ +#define __MLX5_EN_SELQ_H__ + +#include <linux/kernel.h> + +struct mlx5e_selq_params; + +struct mlx5e_selq { + struct mlx5e_selq_params __rcu *active; + struct mlx5e_selq_params *standby; + struct mutex *state_lock; /* points to priv->state_lock */ + bool is_prepared; +}; + +struct mlx5e_params; +struct net_device; +struct sk_buff; + +int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock); +void mlx5e_selq_cleanup(struct mlx5e_selq *selq); +void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params); +void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls); +bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq); +void mlx5e_selq_apply(struct mlx5e_selq *selq); +void mlx5e_selq_cancel(struct mlx5e_selq *selq); + +static inline u16 mlx5e_txq_to_ch_ix(u16 txq, u16 num_channels) +{ + while (unlikely(txq >= num_channels)) + txq -= num_channels; + return txq; +} + +static inline u16 mlx5e_txq_to_ch_ix_htb(u16 txq, u16 num_channels) +{ + if (unlikely(txq >= num_channels)) { + if (unlikely(txq >= num_channels << 3)) + txq %= num_channels; + else + do + txq -= num_channels; + while (txq >= num_channels); + } + return txq; +} + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev); + +#endif /* __MLX5_EN_SELQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c new file mode 100644 index 000000000000..21aab96357b5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->flags |= MLX5_ATTR_FLAG_ACCEPT; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_accept = { + .can_offload = tc_act_can_offload_accept, + .parse_action = tc_act_parse_accept, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c new file mode 100644 index 000000000000..3337241cfd84 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc/post_act.h" +#include "en/tc_priv.h" +#include "mlx5_core.h" + +static struct mlx5e_tc_act *tc_acts_fdb[NUM_FLOW_ACTIONS] = { + [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept, + [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop, + [FLOW_ACTION_TRAP] = &mlx5e_tc_act_trap, + [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto, + [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred, + [FLOW_ACTION_MIRRED] = &mlx5e_tc_act_mirred, + [FLOW_ACTION_REDIRECT_INGRESS] = &mlx5e_tc_act_redirect_ingress, + [FLOW_ACTION_VLAN_PUSH] = &mlx5e_tc_act_vlan, + [FLOW_ACTION_VLAN_POP] = &mlx5e_tc_act_vlan, + [FLOW_ACTION_VLAN_MANGLE] = &mlx5e_tc_act_vlan_mangle, + [FLOW_ACTION_TUNNEL_ENCAP] = &mlx5e_tc_act_tun_encap, + [FLOW_ACTION_TUNNEL_DECAP] = &mlx5e_tc_act_tun_decap, + [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum, + [FLOW_ACTION_PTYPE] = &mlx5e_tc_act_ptype, + [FLOW_ACTION_SAMPLE] = &mlx5e_tc_act_sample, + [FLOW_ACTION_POLICE] = &mlx5e_tc_act_police, + [FLOW_ACTION_CT] = &mlx5e_tc_act_ct, + [FLOW_ACTION_MPLS_PUSH] = &mlx5e_tc_act_mpls_push, + [FLOW_ACTION_MPLS_POP] = &mlx5e_tc_act_mpls_pop, + [FLOW_ACTION_VLAN_PUSH_ETH] = &mlx5e_tc_act_vlan, + [FLOW_ACTION_VLAN_POP_ETH] = &mlx5e_tc_act_vlan, +}; + +static struct mlx5e_tc_act *tc_acts_nic[NUM_FLOW_ACTIONS] = { + [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept, + [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop, + [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto, + [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred_nic, + [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum, + [FLOW_ACTION_MARK] = &mlx5e_tc_act_mark, + [FLOW_ACTION_CT] = &mlx5e_tc_act_ct, +}; + +/** + * mlx5e_tc_act_get() - Get an action parser for an action id. + * @act_id: Flow action id. + * @ns_type: flow namespace type. + */ +struct mlx5e_tc_act * +mlx5e_tc_act_get(enum flow_action_id act_id, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5e_tc_act **tc_acts; + + tc_acts = ns_type == MLX5_FLOW_NAMESPACE_FDB ? tc_acts_fdb : tc_acts_nic; + + return tc_acts[act_id]; +} + +/** + * mlx5e_tc_act_init_parse_state() - Init a new parse_state. + * @parse_state: Parsing state. + * @flow: mlx5e tc flow being handled. + * @flow_action: flow action to parse. + * @extack: to set an error msg. + * + * The same parse_state should be passed to action parsers + * for tracking the current parsing state. + */ +void +mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_tc_flow *flow, + struct flow_action *flow_action, + struct netlink_ext_ack *extack) +{ + memset(parse_state, 0, sizeof(*parse_state)); + parse_state->flow = flow; + parse_state->extack = extack; + parse_state->flow_action = flow_action; +} + +void +mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action, + struct mlx5e_tc_flow_action *flow_action_reorder) +{ + struct flow_action_entry *act; + int i, j = 0; + + flow_action_for_each(i, act, flow_action) { + /* Add CT action to be first. */ + if (act->id == FLOW_ACTION_CT) + flow_action_reorder->entries[j++] = act; + } + + flow_action_for_each(i, act, flow_action) { + if (act->id == FLOW_ACTION_CT) + continue; + flow_action_reorder->entries[j++] = act; + } +} + +int +mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action, + struct mlx5_flow_attr *attr, + enum mlx5_flow_namespace_type ns_type) +{ + struct flow_action_entry *act; + struct mlx5e_tc_act *tc_act; + struct mlx5e_priv *priv; + int err = 0, i; + + priv = parse_state->flow->priv; + + flow_action_for_each(i, act, flow_action) { + tc_act = mlx5e_tc_act_get(act->id, ns_type); + if (!tc_act || !tc_act->post_parse) + continue; + + err = tc_act->post_parse(parse_state, priv, attr); + if (err) + goto out; + } + +out: + return err; +} + +int +mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct mlx5_flow_attr *next_attr) +{ + struct mlx5_core_dev *mdev = flow->priv->mdev; + struct mlx5e_tc_mod_hdr_acts *mod_acts; + int err; + + mod_acts = &attr->parse_attr->mod_hdr_acts; + + /* Set handle on current post act rule to next post act rule. */ + err = mlx5e_tc_post_act_set_handle(mdev, next_attr->post_act_handle, mod_acts); + if (err) { + mlx5_core_warn(mdev, "Failed setting post action handle"); + return err; + } + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h new file mode 100644 index 000000000000..e1570ff056ae --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_H__ +#define __MLX5_EN_TC_ACT_H__ + +#include <net/tc_act/tc_pedit.h> +#include <net/flow_offload.h> +#include <linux/netlink.h> +#include "eswitch.h" +#include "pedit.h" + +struct mlx5_flow_attr; + +struct mlx5e_tc_act_parse_state { + struct flow_action *flow_action; + struct mlx5e_tc_flow *flow; + struct netlink_ext_ack *extack; + u32 actions; + bool ct; + bool ct_clear; + bool encap; + bool decap; + bool mpls_push; + bool eth_push; + bool eth_pop; + bool ptype_host; + const struct ip_tunnel_info *tun_info; + struct mlx5e_mpls_info mpls_info; + int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; + int if_count; + struct mlx5_tc_ct_priv *ct_priv; +}; + +struct mlx5e_tc_act { + bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr); + + int (*parse_action)(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr); + + int (*post_parse)(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr); + + bool (*is_multi_table_act)(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr); + + int (*offload_action)(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act, + struct flow_action_entry *act); + + int (*destroy_action)(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act); + + int (*stats_action)(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act); +}; + +struct mlx5e_tc_flow_action { + unsigned int num_entries; + struct flow_action_entry **entries; +}; + +extern struct mlx5e_tc_act mlx5e_tc_act_drop; +extern struct mlx5e_tc_act mlx5e_tc_act_trap; +extern struct mlx5e_tc_act mlx5e_tc_act_accept; +extern struct mlx5e_tc_act mlx5e_tc_act_mark; +extern struct mlx5e_tc_act mlx5e_tc_act_goto; +extern struct mlx5e_tc_act mlx5e_tc_act_tun_encap; +extern struct mlx5e_tc_act mlx5e_tc_act_tun_decap; +extern struct mlx5e_tc_act mlx5e_tc_act_csum; +extern struct mlx5e_tc_act mlx5e_tc_act_pedit; +extern struct mlx5e_tc_act mlx5e_tc_act_vlan; +extern struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle; +extern struct mlx5e_tc_act mlx5e_tc_act_mpls_push; +extern struct mlx5e_tc_act mlx5e_tc_act_mpls_pop; +extern struct mlx5e_tc_act mlx5e_tc_act_mirred; +extern struct mlx5e_tc_act mlx5e_tc_act_mirred_nic; +extern struct mlx5e_tc_act mlx5e_tc_act_ct; +extern struct mlx5e_tc_act mlx5e_tc_act_sample; +extern struct mlx5e_tc_act mlx5e_tc_act_ptype; +extern struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress; +extern struct mlx5e_tc_act mlx5e_tc_act_police; + +struct mlx5e_tc_act * +mlx5e_tc_act_get(enum flow_action_id act_id, + enum mlx5_flow_namespace_type ns_type); + +void +mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_tc_flow *flow, + struct flow_action *flow_action, + struct netlink_ext_ack *extack); + +void +mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action, + struct mlx5e_tc_flow_action *flow_action_reorder); + +int +mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action, + struct mlx5_flow_attr *attr, + enum mlx5_flow_namespace_type ns_type); + +int +mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct mlx5_flow_attr *next_attr); + +#endif /* __MLX5_EN_TC_ACT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c new file mode 100644 index 000000000000..c0f08ae6a57f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/tc_act/tc_csum.h> +#include "act.h" +#include "en/tc_priv.h" + +static bool +csum_offload_supported(struct mlx5e_priv *priv, + u32 action, + u32 update_flags, + struct netlink_ext_ack *extack) +{ + u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP | + TCA_CSUM_UPDATE_FLAG_UDP; + + /* The HW recalcs checksums only if re-writing headers */ + if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) { + NL_SET_ERR_MSG_MOD(extack, + "TC csum action is only offloaded with pedit"); + netdev_warn(priv->netdev, + "TC csum action is only offloaded with pedit\n"); + return false; + } + + if (update_flags & ~prot_flags) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload TC csum action for some header/s"); + netdev_warn(priv->netdev, + "can't offload TC csum action for some header/s - flags %#x\n", + update_flags); + return false; + } + + return true; +} + +static bool +tc_act_can_offload_csum(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow *flow = parse_state->flow; + + return csum_offload_supported(flow->priv, attr->action, + act->csum_flags, parse_state->extack); +} + +static int +tc_act_parse_csum(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_csum = { + .can_offload = tc_act_can_offload_csum, + .parse_action = tc_act_parse_csum, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c new file mode 100644 index 000000000000..a829c94289c1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" +#include "en/tc_ct.h" + +static bool +tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; + struct netlink_ext_ack *extack = parse_state->extack; + + if (parse_state->ct && !clear_action) { + NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supported"); + return false; + } + + return true; +} + +static int +tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; + int err; + + /* It's redundant to do ct clear more than once. */ + if (clear_action && parse_state->ct_clear) + return 0; + + err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr, + &attr->parse_attr->mod_hdr_acts, + act, parse_state->extack); + if (err) + return err; + + + if (mlx5e_is_eswitch_flow(parse_state->flow)) + attr->esw_attr->split_count = attr->esw_attr->out_count; + + if (clear_action) { + parse_state->ct_clear = true; + } else { + attr->flags |= MLX5_ATTR_FLAG_CT; + flow_flag_set(parse_state->flow, CT); + parse_state->ct = true; + } + + return 0; +} + +static int +tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts; + int err; + + /* If ct action exist, we can ignore previous ct_clear actions */ + if (parse_state->ct) + return 0; + + if (parse_state->ct_clear) { + err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts); + if (err) { + NL_SET_ERR_MSG_MOD(parse_state->extack, + "Failed to set registers for ct clear"); + return err; + } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + /* Prevent handling of additional, redundant clear actions */ + parse_state->ct_clear = false; + } + + return 0; +} + +static bool +tc_act_is_multi_table_act_ct(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + if (act->ct.action & TCA_CT_ACT_CLEAR) + return false; + + return true; +} + +struct mlx5e_tc_act mlx5e_tc_act_ct = { + .can_offload = tc_act_can_offload_ct, + .parse_action = tc_act_parse_ct, + .is_multi_table_act = tc_act_is_multi_table_act_ct, + .post_parse = tc_act_post_parse_ct, +}; + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c new file mode 100644 index 000000000000..dd025a95c439 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_drop = { + .can_offload = tc_act_can_offload_drop, + .parse_action = tc_act_parse_drop, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c new file mode 100644 index 000000000000..25174f68613e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" +#include "eswitch.h" + +static int +validate_goto_chain(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + bool is_esw = mlx5e_is_eswitch_flow(flow); + bool ft_flow = mlx5e_is_ft_flow(flow); + u32 dest_chain = act->chain_index; + struct mlx5_fs_chains *chains; + struct mlx5_eswitch *esw; + u32 reformat_and_fwd; + u32 max_chain; + + esw = priv->mdev->priv.eswitch; + chains = is_esw ? esw_chains(esw) : mlx5e_nic_chains(tc); + max_chain = mlx5_chains_get_chain_range(chains); + reformat_and_fwd = is_esw ? + MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) : + MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table); + + if (ft_flow) { + NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); + return -EOPNOTSUPP; + } + + if (!mlx5_chains_backwards_supported(chains) && + dest_chain <= attr->chain) { + NL_SET_ERR_MSG_MOD(extack, "Goto lower numbered chain isn't supported"); + return -EOPNOTSUPP; + } + + if (dest_chain > max_chain) { + NL_SET_ERR_MSG_MOD(extack, + "Requested destination chain is out of supported range"); + return -EOPNOTSUPP; + } + + if (attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_DECAP) && + !reformat_and_fwd) { + NL_SET_ERR_MSG_MOD(extack, + "Goto chain is not allowed if action has reformat or decap"); + return -EOPNOTSUPP; + } + + return 0; +} + +static bool +tc_act_can_offload_goto(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + + if (validate_goto_chain(flow->priv, flow, attr, act, extack)) + return false; + + return true; +} + +static int +tc_act_parse_goto(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->dest_chain = act->chain_index; + + return 0; +} + +static int +tc_act_post_parse_goto(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + + if (!attr->dest_chain) + return 0; + + if (parse_state->decap) { + /* It can be supported if we'll create a mapping for + * the tunnel device only (without tunnel), and set + * this tunnel id with this decap flow. + * + * On restore (miss), we'll just set this saved tunnel + * device. + */ + + NL_SET_ERR_MSG_MOD(extack, "Decap with goto isn't supported"); + netdev_warn(priv->netdev, "Decap with goto isn't supported"); + return -EOPNOTSUPP; + } + + if (!mlx5e_is_eswitch_flow(flow) && parse_attr->mirred_ifindex[0]) { + NL_SET_ERR_MSG_MOD(extack, "Mirroring goto chain rules isn't supported"); + return -EOPNOTSUPP; + } + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_goto = { + .can_offload = tc_act_can_offload_goto, + .parse_action = tc_act_parse_goto, + .post_parse = tc_act_post_parse_goto, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c new file mode 100644 index 000000000000..e8d227595b3e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en_tc.h" + +static bool +tc_act_can_offload_mark(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + if (act->mark & ~MLX5E_TC_FLOW_ID_MASK) { + NL_SET_ERR_MSG_MOD(parse_state->extack, "Bad flow mark, only 16 bit supported"); + return false; + } + + return true; +} + +static int +tc_act_parse_mark(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->nic_attr->flow_tag = act->mark; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mark = { + .can_offload = tc_act_can_offload_mark, + .parse_action = tc_act_parse_mark, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c new file mode 100644 index 000000000000..4ac7de3f6afa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -0,0 +1,337 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/if_macvlan.h> +#include <linux/if_vlan.h> +#include <net/bareudp.h> +#include <net/bonding.h> +#include "act.h" +#include "vlan.h" +#include "en/tc_tun_encap.h" +#include "en/tc_priv.h" +#include "en_rep.h" +#include "lag/lag.h" + +static bool +same_vf_reps(struct mlx5e_priv *priv, struct net_device *out_dev) +{ + return mlx5e_eswitch_vf_rep(priv->netdev) && + priv->netdev == out_dev; +} + +static int +verify_uplink_forwarding(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct net_device *out_dev, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rep_priv; + + /* Forwarding non encapsulated traffic between + * uplink ports is allowed only if + * termination_table_raw_traffic cap is set. + * + * Input vport was stored attr->in_rep. + * In LAG case, *priv* is the private data of + * uplink which may be not the input vport. + */ + rep_priv = mlx5e_rep_to_rep_priv(attr->esw_attr->in_rep); + + if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) && + mlx5e_eswitch_uplink_rep(out_dev))) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, + termination_table_raw_traffic)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are both uplink, can't offload forwarding"); + return -EOPNOTSUPP; + } else if (out_dev != rep_priv->netdev) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not the same uplink, can't offload forwarding"); + return -EOPNOTSUPP; + } + return 0; +} + +static bool +is_duplicated_output_device(struct net_device *dev, + struct net_device *out_dev, + int *ifindexes, int if_count, + struct netlink_ext_ack *extack) +{ + int i; + + for (i = 0; i < if_count; i++) { + if (ifindexes[i] == out_dev->ifindex) { + NL_SET_ERR_MSG_MOD(extack, "can't duplicate output to same device"); + netdev_err(dev, "can't duplicate output to same device: %s\n", + out_dev->name); + return true; + } + } + + return false; +} + +static struct net_device * +get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev) +{ + struct net_device *fdb_out_dev = out_dev; + struct net_device *uplink_upper; + + rcu_read_lock(); + uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev); + if (uplink_upper && netif_is_lag_master(uplink_upper) && + uplink_upper == out_dev) { + fdb_out_dev = uplink_dev; + } else if (netif_is_lag_master(out_dev)) { + fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev)); + if (fdb_out_dev && + (!mlx5e_eswitch_rep(fdb_out_dev) || + !netdev_port_same_parent_id(fdb_out_dev, uplink_dev))) + fdb_out_dev = NULL; + } + rcu_read_unlock(); + return fdb_out_dev; +} + +static bool +tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct net_device *out_dev = act->dev; + struct mlx5e_priv *priv = flow->priv; + struct mlx5_esw_flow_attr *esw_attr; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + + if (!out_dev) { + /* out_dev is NULL when filters with + * non-existing mirred device are replayed to + * the driver. + */ + return false; + } + + if (parse_state->mpls_push && !netif_is_bareudp(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "mpls is supported only through a bareudp device"); + return false; + } + + if (parse_state->eth_pop && !parse_state->mpls_push) { + NL_SET_ERR_MSG_MOD(extack, "vlan pop eth is supported only with mpls push"); + return false; + } + + if (flow_flag_test(parse_state->flow, L3_TO_L2_DECAP) && !parse_state->eth_push) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop is only supported with vlan eth push"); + return false; + } + + if (mlx5e_is_ft_flow(flow) && out_dev == priv->netdev) { + /* Ignore forward to self rules generated + * by adding both mlx5 devs to the flow table + * block on a normal nft offload setup. + */ + return false; + } + + if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { + NL_SET_ERR_MSG_MOD(extack, + "can't support more output ports, can't offload forwarding"); + netdev_warn(priv->netdev, + "can't support more than %d output ports, can't offload forwarding\n", + esw_attr->out_count); + return false; + } + + if (parse_state->encap || + netdev_port_same_parent_id(priv->netdev, out_dev) || + netif_is_ovs_master(out_dev)) + return true; + + if (parse_attr->filter_dev != priv->netdev) { + /* All mlx5 devices are called to configure + * high level device filters. Therefore, the + * *attempt* to install a filter on invalid + * eswitch should not trigger an explicit error + */ + return false; + } + + NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding"); + + return false; +} + +static int +parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *out_dev = act->dev; + + parse_attr->mirred_ifindex[esw_attr->out_count] = out_dev->ifindex; + parse_attr->tun_info[esw_attr->out_count] = + mlx5e_dup_tun_info(parse_state->tun_info); + + if (!parse_attr->tun_info[esw_attr->out_count]) + return -ENOMEM; + + parse_state->encap = false; + + if (parse_state->mpls_push) { + memcpy(&parse_attr->mpls_info[esw_attr->out_count], + &parse_state->mpls_info, sizeof(parse_state->mpls_info)); + parse_state->mpls_push = false; + } + esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; + esw_attr->out_count++; + /* attr->dests[].rep is resolved when we handle encap */ + + return 0; +} + +static int +parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct net_device *out_dev = act->dev; + struct net_device *uplink_dev; + struct mlx5e_priv *out_priv; + struct mlx5_eswitch *esw; + bool is_uplink_rep; + int *ifindexes; + int if_count; + int err; + + esw = priv->mdev->priv.eswitch; + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + ifindexes = parse_state->ifindexes; + if_count = parse_state->if_count; + + if (is_duplicated_output_device(priv->netdev, out_dev, ifindexes, if_count, extack)) + return -EOPNOTSUPP; + + parse_state->ifindexes[if_count] = out_dev->ifindex; + parse_state->if_count++; + is_uplink_rep = mlx5e_eswitch_uplink_rep(out_dev); + err = mlx5_lag_do_mirred(priv->mdev, out_dev); + if (err) + return err; + + out_dev = get_fdb_out_dev(uplink_dev, out_dev); + if (!out_dev) + return -ENODEV; + + if (is_vlan_dev(out_dev)) { + err = mlx5e_tc_act_vlan_add_push_action(priv, attr, &out_dev, extack); + if (err) + return err; + } + + if (is_vlan_dev(parse_attr->filter_dev)) { + err = mlx5e_tc_act_vlan_add_pop_action(priv, attr, extack); + if (err) + return err; + } + + if (netif_is_macvlan(out_dev)) + out_dev = macvlan_dev_real_dev(out_dev); + + err = verify_uplink_forwarding(priv, attr, out_dev, extack); + if (err) + return err; + + if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not on same switch HW, can't offload forwarding"); + return -EOPNOTSUPP; + } + + if (same_vf_reps(priv, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "can't forward from a VF to itself"); + return -EOPNOTSUPP; + } + + out_priv = netdev_priv(out_dev); + rpriv = out_priv->ppriv; + esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; + esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; + + /* If output device is bond master then rules are not explicit + * so we don't attempt to count them. + */ + if (is_uplink_rep && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) && + MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up)) + attr->lag.count = true; + + esw_attr->out_count++; + + return 0; +} + +static int +parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *out_dev = act->dev; + int err; + + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex, + MLX5E_TC_INT_PORT_EGRESS, + &attr->action, esw_attr->out_count); + if (err) + return err; + + esw_attr->out_count++; + return 0; +} + +static int +tc_act_parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct net_device *out_dev = act->dev; + int err = -EOPNOTSUPP; + + if (parse_state->encap) + err = parse_mirred_encap(parse_state, act, attr); + else if (netdev_port_same_parent_id(priv->netdev, out_dev)) + err = parse_mirred(parse_state, act, priv, attr); + else if (netif_is_ovs_master(out_dev)) + err = parse_mirred_ovs_master(parse_state, act, priv, attr); + + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mirred = { + .can_offload = tc_act_can_offload_mirred, + .parse_action = tc_act_parse_mirred, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c new file mode 100644 index 000000000000..90b4c1b34776 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + struct net_device *out_dev = act->dev; + struct mlx5e_priv *priv = flow->priv; + + if (act->id != FLOW_ACTION_REDIRECT) + return false; + + if (priv->netdev->netdev_ops != out_dev->netdev_ops || + !mlx5e_same_hw_devs(priv, netdev_priv(out_dev))) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not on same switch HW, can't offload forwarding"); + netdev_warn(priv->netdev, + "devices %s %s not on same switch HW, can't offload forwarding\n", + netdev_name(priv->netdev), + out_dev->name); + return false; + } + + return true; +} + +static int +tc_act_parse_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->parse_attr->mirred_ifindex[0] = act->dev->ifindex; + flow_flag_set(parse_state->flow, HAIRPIN); + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mirred_nic = { + .can_offload = tc_act_can_offload_mirred_nic, + .parse_action = tc_act_parse_mirred_nic, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c new file mode 100644 index 000000000000..f106190bf37c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <net/bareudp.h> +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_priv *priv = parse_state->flow->priv; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l2_to_l3_tunnel) || + act->mpls_push.proto != htons(ETH_P_MPLS_UC)) { + NL_SET_ERR_MSG_MOD(extack, "mpls push is supported only for mpls_uc protocol"); + return false; + } + + return true; +} + +static void +copy_mpls_info(struct mlx5e_mpls_info *mpls_info, + const struct flow_action_entry *act) +{ + mpls_info->label = act->mpls_push.label; + mpls_info->tc = act->mpls_push.tc; + mpls_info->bos = act->mpls_push.bos; + mpls_info->ttl = act->mpls_push.ttl; +} + +static int +tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + parse_state->mpls_push = true; + copy_mpls_info(&parse_state->mpls_info, act); + + return 0; +} + +static bool +tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct net_device *filter_dev; + + filter_dev = attr->parse_attr->filter_dev; + + /* we only support mpls pop if it is the first action + * or it is second action after tunnel key unset + * and the filter net device is bareudp. Subsequent + * actions can be pedit and the last can be mirred + * egress redirect. + */ + if ((act_index == 1 && !parse_state->decap) || act_index > 1) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only as first action or with decap"); + return false; + } + + if (!netif_is_bareudp(filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only on bareudp devices"); + return false; + } + + return true; +} + +static int +tc_act_parse_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->esw_attr->eth.h_proto = act->mpls_pop.proto; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_flag_set(parse_state->flow, L3_TO_L2_DECAP); + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mpls_push = { + .can_offload = tc_act_can_offload_mpls_push, + .parse_action = tc_act_parse_mpls_push, +}; + +struct mlx5e_tc_act mlx5e_tc_act_mpls_pop = { + .can_offload = tc_act_can_offload_mpls_pop, + .parse_action = tc_act_parse_mpls_pop, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c new file mode 100644 index 000000000000..47597c524e59 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/if_vlan.h> +#include "act.h" +#include "pedit.h" +#include "en/tc_priv.h" +#include "en/mod_hdr.h" + +static int pedit_header_offsets[] = { + [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), + [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), + [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), + [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), + [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), +}; + +#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) + +static int +set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) +{ + u32 *curr_pmask, *curr_pval; + + curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); + curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset); + + if (*curr_pmask & mask) { /* disallow acting twice on the same location */ + NL_SET_ERR_MSG_MOD(extack, + "curr_pmask and new mask same. Acting twice on same location"); + goto out_err; + } + + *curr_pmask |= mask; + *curr_pval |= (val & mask); + + return 0; + +out_err: + return -EOPNOTSUPP; +} + +int +mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) +{ + u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; + u8 htype = act->mangle.htype; + int err = -EOPNOTSUPP; + u32 mask, val, offset; + + if (htype == FLOW_ACT_MANGLE_UNSPEC) { + NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded"); + goto out_err; + } + + if (!mlx5e_mod_hdr_max_actions(priv->mdev, namespace)) { + NL_SET_ERR_MSG_MOD(extack, "The pedit offload action is not supported"); + goto out_err; + } + + mask = act->mangle.mask; + val = act->mangle.val; + offset = act->mangle.offset; + + err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd], extack); + if (err) + goto out_err; + + hdrs[cmd].pedits++; + + return 0; +out_err: + return err; +} + +static bool +tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5e_tc_flow *flow = parse_state->flow; + enum mlx5_flow_namespace_type ns_type; + int err; + + ns_type = mlx5e_get_flow_namespace(flow); + + err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr->hdrs, + parse_state->extack); + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) + esw_attr->split_count = esw_attr->out_count; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_pedit = { + .can_offload = tc_act_can_offload_pedit, + .parse_action = tc_act_parse_pedit, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h new file mode 100644 index 000000000000..434c8bd710a2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_PEDIT_H__ +#define __MLX5_EN_TC_ACT_PEDIT_H__ + +#include "en_tc.h" + +struct pedit_headers { + struct ethhdr eth; + struct vlan_hdr vlan; + struct iphdr ip4; + struct ipv6hdr ip6; + struct tcphdr tcp; + struct udphdr udp; +}; + +struct pedit_headers_action { + struct pedit_headers vals; + struct pedit_headers masks; + u32 pedits; +}; + +int +mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack); + +#endif /* __MLX5_EN_TC_ACT_PEDIT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c new file mode 100644 index 000000000000..c8e5ca65bb6e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_police(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(parse_state->extack, + "Offload not supported when conform action is not pipe or ok"); + return false; + } + if (mlx5e_policer_validate(parse_state->flow_action, act, + parse_state->extack)) + return false; + + return !!mlx5e_get_flow_meters(parse_state->flow->priv->mdev); +} + +static int +fill_meter_params_from_act(const struct flow_action_entry *act, + struct mlx5e_flow_meter_params *params) +{ + params->index = act->hw_index; + if (act->police.rate_bytes_ps) { + params->mode = MLX5_RATE_LIMIT_BPS; + /* change rate to bits per second */ + params->rate = act->police.rate_bytes_ps << 3; + params->burst = act->police.burst; + } else if (act->police.rate_pkt_ps) { + params->mode = MLX5_RATE_LIMIT_PPS; + params->rate = act->police.rate_pkt_ps; + params->burst = act->police.burst_pkt; + } else { + return -EOPNOTSUPP; + } + + return 0; +} + +static int +tc_act_parse_police(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + int err; + + err = fill_meter_params_from_act(act, &attr->meter_attr.params); + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO; + attr->exe_aso_type = MLX5_EXE_ASO_FLOW_METER; + + return 0; +} + +static bool +tc_act_is_multi_table_act_police(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_police_offload(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act, + struct flow_action_entry *act) +{ + struct mlx5e_flow_meter_params params = {}; + struct mlx5e_flow_meter_handle *meter; + int err = 0; + + err = mlx5e_policer_validate(&fl_act->action, act, fl_act->extack); + if (err) + return err; + + err = fill_meter_params_from_act(act, ¶ms); + if (err) + return err; + + meter = mlx5e_tc_meter_get(priv->mdev, ¶ms); + if (IS_ERR(meter) && PTR_ERR(meter) == -ENOENT) { + meter = mlx5e_tc_meter_replace(priv->mdev, ¶ms); + } else if (!IS_ERR(meter)) { + err = mlx5e_tc_meter_update(meter, ¶ms); + mlx5e_tc_meter_put(meter); + } + + if (IS_ERR(meter)) { + NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter"); + mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index); + err = PTR_ERR(meter); + } + + return err; +} + +static int +tc_act_police_destroy(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act) +{ + struct mlx5e_flow_meter_params params = {}; + struct mlx5e_flow_meter_handle *meter; + + params.index = fl_act->index; + meter = mlx5e_tc_meter_get(priv->mdev, ¶ms); + if (IS_ERR(meter)) { + NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter"); + mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index); + return PTR_ERR(meter); + } + /* first put for the get and second for cleanup */ + mlx5e_tc_meter_put(meter); + mlx5e_tc_meter_put(meter); + return 0; +} + +static int +tc_act_police_stats(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act) +{ + struct mlx5e_flow_meter_params params = {}; + struct mlx5e_flow_meter_handle *meter; + u64 bytes, packets, drops, lastuse; + + params.index = fl_act->index; + meter = mlx5e_tc_meter_get(priv->mdev, ¶ms); + if (IS_ERR(meter)) { + NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter"); + mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index); + return PTR_ERR(meter); + } + + mlx5e_tc_meter_get_stats(meter, &bytes, &packets, &drops, &lastuse); + flow_stats_update(&fl_act->stats, bytes, packets, drops, lastuse, + FLOW_ACTION_HW_STATS_DELAYED); + mlx5e_tc_meter_put(meter); + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_police = { + .can_offload = tc_act_can_offload_police, + .parse_action = tc_act_parse_police, + .is_multi_table_act = tc_act_is_multi_table_act_police, + .offload_action = tc_act_police_offload, + .destroy_action = tc_act_police_destroy, + .stats_action = tc_act_police_stats, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c new file mode 100644 index 000000000000..6454b031ff7a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + + if (act->ptype != PACKET_HOST) { + NL_SET_ERR_MSG_MOD(extack, "skbedit ptype is only supported with type host"); + return -EOPNOTSUPP; + } + + parse_state->ptype_host = true; + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_ptype = { + .can_offload = tc_act_can_offload_ptype, + .parse_action = tc_act_parse_ptype, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c new file mode 100644 index 000000000000..ad09a8a5f36e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct net_device *out_dev = act->dev; + struct mlx5_esw_flow_attr *esw_attr; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + + if (!out_dev) + return false; + + if (!netif_is_ovs_master(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to ingress is supported only for OVS internal ports"); + return false; + } + + if (netif_is_ovs_master(parse_attr->filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to ingress is not supported from internal port"); + return false; + } + + if (!parse_state->ptype_host) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to int port ingress requires ptype=host action"); + return false; + } + + if (esw_attr->out_count) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to int port ingress is supported only as single destination"); + return false; + } + + return true; +} + +static int +tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *out_dev = act->dev; + int err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex, + MLX5E_TC_INT_PORT_INGRESS, + &attr->action, esw_attr->out_count); + if (err) + return err; + + esw_attr->out_count++; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress = { + .can_offload = tc_act_can_offload_redirect_ingress, + .parse_action = tc_act_parse_redirect_ingress, +}; + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c new file mode 100644 index 000000000000..2c0196431302 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <net/psample.h> +#include "act.h" +#include "en/tc_priv.h" +#include "en/tc/act/sample.h" + +static bool +tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + bool ct_nat; + + ct_nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; + + if (flow_flag_test(parse_state->flow, CT) && ct_nat) { + NL_SET_ERR_MSG_MOD(extack, "Sample action with CT NAT is not supported"); + return false; + } + + return true; +} + +static int +tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_sample_attr *sample_attr = &attr->sample_attr; + + sample_attr->rate = act->sample.rate; + sample_attr->group_num = act->sample.psample_group->group_num; + + if (act->sample.truncate) + sample_attr->trunc_size = act->sample.trunc_size; + + attr->flags |= MLX5_ATTR_FLAG_SAMPLE; + flow_flag_set(parse_state->flow, SAMPLE); + + return 0; +} + +bool +mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev, + struct mlx5_flow_attr *attr) +{ + if (MLX5_CAP_GEN(mdev, reg_c_preserve) || + attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + return true; + + return false; +} + +static bool +tc_act_is_multi_table_act_sample(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + return mlx5e_tc_act_sample_is_multi_table(priv->mdev, attr); +} + +struct mlx5e_tc_act mlx5e_tc_act_sample = { + .can_offload = tc_act_can_offload_sample, + .parse_action = tc_act_parse_sample, + .is_multi_table_act = tc_act_is_multi_table_act_sample, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h new file mode 100644 index 000000000000..3efb3a15c5d2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_SAMPLE_H__ +#define __MLX5_EN_TC_ACT_SAMPLE_H__ + +#include <net/flow_offload.h> +#include "en/tc_priv.h" + +bool +mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev, + struct mlx5_flow_attr *attr); + +#endif /* __MLX5_EN_TC_ACT_SAMPLE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c new file mode 100644 index 000000000000..53b270f652b9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + + if (parse_state->flow_action->num_entries != 1) { + NL_SET_ERR_MSG_MOD(extack, "action trap is supported as a sole action only"); + return false; + } + + return true; +} + +static int +tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_trap = { + .can_offload = tc_act_can_offload_trap, + .parse_action = tc_act_parse_trap, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c new file mode 100644 index 000000000000..b4fa2de9711d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_tun_encap.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + if (!act->tunnel) { + NL_SET_ERR_MSG_MOD(parse_state->extack, + "Zero tunnel attributes is not supported"); + return false; + } + + return true; +} + +static int +tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + parse_state->tun_info = act->tunnel; + parse_state->encap = true; + + return 0; +} + +static bool +tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_parse_tun_decap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + parse_state->decap = true; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_tun_encap = { + .can_offload = tc_act_can_offload_tun_encap, + .parse_action = tc_act_parse_tun_encap, +}; + +struct mlx5e_tc_act mlx5e_tc_act_tun_decap = { + .can_offload = tc_act_can_offload_tun_decap, + .parse_action = tc_act_parse_tun_decap, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c new file mode 100644 index 000000000000..b86ac604d0c2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/if_vlan.h> +#include "act.h" +#include "vlan.h" +#include "en/tc_priv.h" + +static int +add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action, struct netlink_ext_ack *extack) +{ + const struct flow_action_entry prio_tag_act = { + .vlan.vid = 0, + .vlan.prio = + MLX5_GET(fte_match_set_lyr_2_4, + mlx5e_get_match_headers_value(*action, + &parse_attr->spec), + first_prio) & + MLX5_GET(fte_match_set_lyr_2_4, + mlx5e_get_match_headers_criteria(*action, + &parse_attr->spec), + first_prio), + }; + + return mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, + &prio_tag_act, parse_attr, action, + extack); +} + +static int +parse_tc_vlan_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_esw_flow_attr *attr, + u32 *action, + struct netlink_ext_ack *extack, + struct mlx5e_tc_act_parse_state *parse_state) +{ + u8 vlan_idx = attr->total_vlan; + + if (vlan_idx >= MLX5_FS_VLAN_DEPTH) { + NL_SET_ERR_MSG_MOD(extack, "Total vlans used is greater than supported"); + return -EOPNOTSUPP; + } + + switch (act->id) { + case FLOW_ACTION_VLAN_POP: + if (vlan_idx) { + if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, + MLX5_FS_VLAN_DEPTH)) { + NL_SET_ERR_MSG_MOD(extack, "vlan pop action is not supported"); + return -EOPNOTSUPP; + } + + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2; + } else { + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + } + break; + case FLOW_ACTION_VLAN_PUSH: + attr->vlan_vid[vlan_idx] = act->vlan.vid; + attr->vlan_prio[vlan_idx] = act->vlan.prio; + attr->vlan_proto[vlan_idx] = act->vlan.proto; + if (!attr->vlan_proto[vlan_idx]) + attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q); + + if (vlan_idx) { + if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, + MLX5_FS_VLAN_DEPTH)) { + NL_SET_ERR_MSG_MOD(extack, + "vlan push action is not supported for vlan depth > 1"); + return -EOPNOTSUPP; + } + + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + } else { + if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && + (act->vlan.proto != htons(ETH_P_8021Q) || + act->vlan.prio)) { + NL_SET_ERR_MSG_MOD(extack, "vlan push action is not supported"); + return -EOPNOTSUPP; + } + + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + } + break; + case FLOW_ACTION_VLAN_POP_ETH: + parse_state->eth_pop = true; + break; + case FLOW_ACTION_VLAN_PUSH_ETH: + if (!flow_flag_test(parse_state->flow, L3_TO_L2_DECAP)) + return -EOPNOTSUPP; + parse_state->eth_push = true; + memcpy(attr->eth.h_dest, act->vlan_push_eth.dst, ETH_ALEN); + memcpy(attr->eth.h_source, act->vlan_push_eth.src, ETH_ALEN); + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN"); + return -EINVAL; + } + + attr->total_vlan = vlan_idx + 1; + + return 0; +} + +int +mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct net_device **out_dev, + struct netlink_ext_ack *extack) +{ + struct net_device *vlan_dev = *out_dev; + struct flow_action_entry vlan_act = { + .id = FLOW_ACTION_VLAN_PUSH, + .vlan.vid = vlan_dev_vlan_id(vlan_dev), + .vlan.proto = vlan_dev_vlan_proto(vlan_dev), + .vlan.prio = 0, + }; + int err; + + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack, NULL); + if (err) + return err; + + rcu_read_lock(); + *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev)); + rcu_read_unlock(); + if (!*out_dev) + return -ENODEV; + + if (is_vlan_dev(*out_dev)) + err = mlx5e_tc_act_vlan_add_push_action(priv, attr, out_dev, extack); + + return err; +} + +int +mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack) +{ + struct flow_action_entry vlan_act = { + .id = FLOW_ACTION_VLAN_POP, + }; + int nest_level, err = 0; + + nest_level = attr->parse_attr->filter_dev->lower_level - + priv->netdev->lower_level; + while (nest_level--) { + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, + extack, NULL); + if (err) + return err; + } + + return err; +} + +static bool +tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int err; + + if (act->id == FLOW_ACTION_VLAN_PUSH && + (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) { + /* Replace vlan pop+push with vlan modify */ + attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, act, + attr->parse_attr, &attr->action, + parse_state->extack); + } else { + err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action, + parse_state->extack, parse_state); + } + + if (err) + return err; + + esw_attr->split_count = esw_attr->out_count; + + return 0; +} + +static int +tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int err; + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { + /* For prio tag mode, replace vlan pop with rewrite vlan prio + * tag rewrite. + */ + attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, + &attr->action, extack); + if (err) + return err; + } + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_vlan = { + .can_offload = tc_act_can_offload_vlan, + .parse_action = tc_act_parse_vlan, + .post_parse = tc_act_post_parse_vlan, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h new file mode 100644 index 000000000000..2fa58c6f44eb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_VLAN_H__ +#define __MLX5_EN_TC_ACT_VLAN_H__ + +#include <net/flow_offload.h> +#include "en/tc_priv.h" + +struct pedit_headers_action; + +int +mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct net_device **out_dev, + struct netlink_ext_ack *extack); + +int +mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack); + +int +mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, + const struct flow_action_entry *act, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action, struct netlink_ext_ack *extack); + +#endif /* __MLX5_EN_TC_ACT_VLAN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c new file mode 100644 index 000000000000..9a8a1a6bd99e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/if_vlan.h> +#include "act.h" +#include "vlan.h" +#include "en/tc_priv.h" + +struct pedit_headers_action; + +int +mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, + const struct flow_action_entry *act, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action, struct netlink_ext_ack *extack) +{ + u16 mask16 = VLAN_VID_MASK; + u16 val16 = act->vlan.vid & VLAN_VID_MASK; + const struct flow_action_entry pedit_act = { + .id = FLOW_ACTION_MANGLE, + .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH, + .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI), + .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16), + .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16), + }; + u8 match_prio_mask, match_prio_val; + void *headers_c, *headers_v; + int err; + + headers_c = mlx5e_get_match_headers_criteria(*action, &parse_attr->spec); + headers_v = mlx5e_get_match_headers_value(*action, &parse_attr->spec); + + if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) && + MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) { + NL_SET_ERR_MSG_MOD(extack, "VLAN rewrite action must have VLAN protocol match"); + return -EOPNOTSUPP; + } + + match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); + match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); + if (act->vlan.prio != (match_prio_val & match_prio_mask)) { + NL_SET_ERR_MSG_MOD(extack, "Changing VLAN prio is not supported"); + return -EOPNOTSUPP; + } + + err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr->hdrs, + extack); + *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + return err; +} + +static bool +tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + enum mlx5_flow_namespace_type ns_type; + int err; + + ns_type = mlx5e_get_flow_namespace(parse_state->flow); + err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, attr->parse_attr, + &attr->action, parse_state->extack); + if (err) + return err; + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) + attr->esw_attr->split_count = attr->esw_attr->out_count; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle = { + .can_offload = tc_act_can_offload_vlan_mangle, + .parse_action = tc_act_parse_vlan_mangle, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h new file mode 100644 index 000000000000..bb6b1a979ba1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_EN_TC_CT_FS_H__ +#define __MLX5_EN_TC_CT_FS_H__ + +struct mlx5_ct_fs { + const struct net_device *netdev; + struct mlx5_core_dev *dev; + + /* private data */ + void *priv_data[]; +}; + +struct mlx5_ct_fs_rule { +}; + +struct mlx5_ct_fs_ops { + int (*init)(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, + struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct); + void (*destroy)(struct mlx5_ct_fs *fs); + + struct mlx5_ct_fs_rule * (*ct_rule_add)(struct mlx5_ct_fs *fs, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + struct flow_rule *flow_rule); + void (*ct_rule_del)(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule); + + size_t priv_size; +}; + +static inline void *mlx5_ct_fs_priv(struct mlx5_ct_fs *fs) +{ + return &fs->priv_data; +} + +struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void); + +#if IS_ENABLED(CONFIG_MLX5_SW_STEERING) +struct mlx5_ct_fs_ops *mlx5_ct_fs_smfs_ops_get(void); +#else +static inline struct mlx5_ct_fs_ops * +mlx5_ct_fs_smfs_ops_get(void) +{ + return NULL; +} +#endif /* IS_ENABLED(CONFIG_MLX5_SW_STEERING) */ + +#endif /* __MLX5_EN_TC_CT_FS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c new file mode 100644 index 000000000000..ae4f55be48ce --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#include "en_tc.h" +#include "en/tc_ct.h" +#include "en/tc/ct_fs.h" + +#define ct_dbg(fmt, args...)\ + netdev_dbg(fs->netdev, "ct_fs_dmfs debug: " fmt "\n", ##args) + +struct mlx5_ct_fs_dmfs_rule { + struct mlx5_ct_fs_rule fs_rule; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; +}; + +static int +mlx5_ct_fs_dmfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, + struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct) +{ + return 0; +} + +static void +mlx5_ct_fs_dmfs_destroy(struct mlx5_ct_fs *fs) +{ +} + +static struct mlx5_ct_fs_rule * +mlx5_ct_fs_dmfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, struct flow_rule *flow_rule) +{ + struct mlx5e_priv *priv = netdev_priv(fs->netdev); + struct mlx5_ct_fs_dmfs_rule *dmfs_rule; + int err; + + dmfs_rule = kzalloc(sizeof(*dmfs_rule), GFP_KERNEL); + if (!dmfs_rule) + return ERR_PTR(-ENOMEM); + + dmfs_rule->rule = mlx5_tc_rule_insert(priv, spec, attr); + if (IS_ERR(dmfs_rule->rule)) { + err = PTR_ERR(dmfs_rule->rule); + ct_dbg("Failed to add ct entry fs rule"); + goto err_insert; + } + + dmfs_rule->attr = attr; + + return &dmfs_rule->fs_rule; + +err_insert: + kfree(dmfs_rule); + return ERR_PTR(err); +} + +static void +mlx5_ct_fs_dmfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule) +{ + struct mlx5_ct_fs_dmfs_rule *dmfs_rule = container_of(fs_rule, + struct mlx5_ct_fs_dmfs_rule, + fs_rule); + + mlx5_tc_rule_delete(netdev_priv(fs->netdev), dmfs_rule->rule, dmfs_rule->attr); + kfree(dmfs_rule); +} + +static struct mlx5_ct_fs_ops dmfs_ops = { + .ct_rule_add = mlx5_ct_fs_dmfs_ct_rule_add, + .ct_rule_del = mlx5_ct_fs_dmfs_ct_rule_del, + + .init = mlx5_ct_fs_dmfs_init, + .destroy = mlx5_ct_fs_dmfs_destroy, +}; + +struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void) +{ + return &dmfs_ops; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c new file mode 100644 index 000000000000..2b80fe73549d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#include <linux/refcount.h> + +#include "en_tc.h" +#include "en/tc_priv.h" +#include "en/tc_ct.h" +#include "en/tc/ct_fs.h" + +#include "lib/smfs.h" + +#define INIT_ERR_PREFIX "ct_fs_smfs init failed" +#define ct_dbg(fmt, args...)\ + netdev_dbg(fs->netdev, "ct_fs_smfs debug: " fmt "\n", ##args) +#define MLX5_CT_TCP_FLAGS_MASK cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16) + +struct mlx5_ct_fs_smfs_matcher { + struct mlx5dr_matcher *dr_matcher; + struct list_head list; + int prio; + refcount_t ref; +}; + +struct mlx5_ct_fs_smfs_matchers { + struct mlx5_ct_fs_smfs_matcher smfs_matchers[6]; + struct list_head used; +}; + +struct mlx5_ct_fs_smfs { + struct mlx5dr_table *ct_tbl, *ct_nat_tbl; + struct mlx5_ct_fs_smfs_matchers matchers; + struct mlx5_ct_fs_smfs_matchers matchers_nat; + struct mlx5dr_action *fwd_action; + struct mlx5_flow_table *ct_nat; + struct mutex lock; /* Guards matchers */ +}; + +struct mlx5_ct_fs_smfs_rule { + struct mlx5_ct_fs_rule fs_rule; + struct mlx5dr_rule *rule; + struct mlx5dr_action *count_action; + struct mlx5_ct_fs_smfs_matcher *smfs_matcher; +}; + +static inline void +mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp, + bool gre) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); + + if (likely(MLX5_CAP_FLOWTABLE_NIC_RX(fs->dev, ft_field_support.outer_ip_version))) + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version); + else + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + if (likely(ipv4)) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } else { + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xFF, + MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6)); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xFF, + MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4, + src_ipv4_src_ipv6.ipv6_layout.ipv6)); + } + + if (likely(tcp)) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_sport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_dport); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, + ntohs(MLX5_CT_TCP_FLAGS_MASK)); + } else if (!gre) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_sport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_dport); + } + + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 0, MLX5_CT_ZONE_MASK); +} + +static struct mlx5dr_matcher * +mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, bool ipv4, + bool tcp, bool gre, u32 priority) +{ + struct mlx5dr_matcher *dr_matcher; + struct mlx5_flow_spec *spec; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp, gre); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS; + + dr_matcher = mlx5_smfs_matcher_create(tbl, priority, spec); + kvfree(spec); + if (!dr_matcher) + return ERR_PTR(-EINVAL); + + return dr_matcher; +} + +static struct mlx5_ct_fs_smfs_matcher * +mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp, bool gre) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + struct mlx5_ct_fs_smfs_matcher *m, *smfs_matcher; + struct mlx5_ct_fs_smfs_matchers *matchers; + struct mlx5dr_matcher *dr_matcher; + struct mlx5dr_table *tbl; + struct list_head *prev; + int prio; + + matchers = nat ? &fs_smfs->matchers_nat : &fs_smfs->matchers; + smfs_matcher = &matchers->smfs_matchers[ipv4 * 3 + tcp * 2 + gre]; + + if (refcount_inc_not_zero(&smfs_matcher->ref)) + return smfs_matcher; + + mutex_lock(&fs_smfs->lock); + + /* Retry with lock, as another thread might have already created the relevant matcher + * till we acquired the lock + */ + if (refcount_inc_not_zero(&smfs_matcher->ref)) + goto out_unlock; + + // Find next available priority in sorted used list + prio = 0; + prev = &matchers->used; + list_for_each_entry(m, &matchers->used, list) { + prev = &m->list; + + if (m->prio == prio) + prio = m->prio + 1; + else + break; + } + + tbl = nat ? fs_smfs->ct_nat_tbl : fs_smfs->ct_tbl; + dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, gre, prio); + if (IS_ERR(dr_matcher)) { + netdev_warn(fs->netdev, + "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n", + nat, ipv4, tcp, gre, PTR_ERR(dr_matcher)); + + smfs_matcher = ERR_CAST(dr_matcher); + goto out_unlock; + } + + smfs_matcher->dr_matcher = dr_matcher; + smfs_matcher->prio = prio; + list_add(&smfs_matcher->list, prev); + refcount_set(&smfs_matcher->ref, 1); + +out_unlock: + mutex_unlock(&fs_smfs->lock); + return smfs_matcher; +} + +static void +mlx5_ct_fs_smfs_matcher_put(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_smfs_matcher *smfs_matcher) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + + if (!refcount_dec_and_mutex_lock(&smfs_matcher->ref, &fs_smfs->lock)) + return; + + mlx5_smfs_matcher_destroy(smfs_matcher->dr_matcher); + list_del(&smfs_matcher->list); + mutex_unlock(&fs_smfs->lock); +} + +static int +mlx5_ct_fs_smfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, + struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct) +{ + struct mlx5dr_table *ct_tbl, *ct_nat_tbl, *post_ct_tbl; + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + + post_ct_tbl = mlx5_smfs_table_get_from_fs_ft(post_ct); + ct_nat_tbl = mlx5_smfs_table_get_from_fs_ft(ct_nat); + ct_tbl = mlx5_smfs_table_get_from_fs_ft(ct); + fs_smfs->ct_nat = ct_nat; + + if (!ct_tbl || !ct_nat_tbl || !post_ct_tbl) { + netdev_warn(fs->netdev, "ct_fs_smfs: failed to init, missing backing dr tables"); + return -EOPNOTSUPP; + } + + ct_dbg("using smfs steering"); + + fs_smfs->fwd_action = mlx5_smfs_action_create_dest_table(post_ct_tbl); + if (!fs_smfs->fwd_action) { + return -EINVAL; + } + + fs_smfs->ct_tbl = ct_tbl; + fs_smfs->ct_nat_tbl = ct_nat_tbl; + mutex_init(&fs_smfs->lock); + INIT_LIST_HEAD(&fs_smfs->matchers.used); + INIT_LIST_HEAD(&fs_smfs->matchers_nat.used); + + return 0; +} + +static void +mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + + mlx5_smfs_action_destroy(fs_smfs->fwd_action); +} + +static inline bool +mlx5_tc_ct_valid_used_dissector_keys(const u32 used_keys) +{ +#define DISS_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name) + const u32 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | DISS_BIT(META); + const u32 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP); + const u32 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP); + const u32 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS); + const u32 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS); + const u32 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS); + const u32 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS); + + return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp || + used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre); +} + +static bool +mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *flow_rule) +{ + struct flow_match_ipv4_addrs ipv4_addrs; + struct flow_match_ipv6_addrs ipv6_addrs; + struct flow_match_control control; + struct flow_match_basic basic; + struct flow_match_ports ports; + struct flow_match_tcp tcp; + + if (!mlx5_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) { + ct_dbg("rule uses unexpected dissectors (0x%08x)", + flow_rule->match.dissector->used_keys); + return false; + } + + flow_rule_match_basic(flow_rule, &basic); + flow_rule_match_control(flow_rule, &control); + flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs); + flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs); + if (basic.key->ip_proto != IPPROTO_GRE) + flow_rule_match_ports(flow_rule, &ports); + if (basic.key->ip_proto == IPPROTO_TCP) + flow_rule_match_tcp(flow_rule, &tcp); + + if (basic.mask->n_proto != htons(0xFFFF) || + (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) || + basic.mask->ip_proto != 0xFF || + (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP && + basic.key->ip_proto != IPPROTO_GRE)) { + ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)", + ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto), + basic.key->ip_proto, basic.mask->ip_proto); + return false; + } + + if (basic.key->ip_proto != IPPROTO_GRE && + (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) { + ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)", + ports.mask->src, ports.mask->dst); + return false; + } + + if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) { + ct_dbg("rule uses unexpected tcp match (flags 0x%02x)", tcp.mask->flags); + return false; + } + + return true; +} + +static struct mlx5_ct_fs_rule * +mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, struct flow_rule *flow_rule) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + struct mlx5_ct_fs_smfs_matcher *smfs_matcher; + struct mlx5_ct_fs_smfs_rule *smfs_rule; + struct mlx5dr_action *actions[5]; + struct mlx5dr_rule *rule; + int num_actions = 0, err; + bool nat, tcp, ipv4, gre; + + if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule)) + return ERR_PTR(-EOPNOTSUPP); + + smfs_rule = kzalloc(sizeof(*smfs_rule), GFP_KERNEL); + if (!smfs_rule) + return ERR_PTR(-ENOMEM); + + smfs_rule->count_action = mlx5_smfs_action_create_flow_counter(mlx5_fc_id(attr->counter)); + if (!smfs_rule->count_action) { + err = -EINVAL; + goto err_count; + } + + actions[num_actions++] = smfs_rule->count_action; + actions[num_actions++] = attr->modify_hdr->action.dr_action; + actions[num_actions++] = fs_smfs->fwd_action; + + nat = (attr->ft == fs_smfs->ct_nat); + ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4; + tcp = MLX5_GET(fte_match_param, spec->match_value, + outer_headers.ip_protocol) == IPPROTO_TCP; + gre = MLX5_GET(fte_match_param, spec->match_value, + outer_headers.ip_protocol) == IPPROTO_GRE; + + smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp, gre); + if (IS_ERR(smfs_matcher)) { + err = PTR_ERR(smfs_matcher); + goto err_matcher; + } + + rule = mlx5_smfs_rule_create(smfs_matcher->dr_matcher, spec, num_actions, actions, + spec->flow_context.flow_source); + if (!rule) { + err = -EINVAL; + goto err_create; + } + + smfs_rule->rule = rule; + smfs_rule->smfs_matcher = smfs_matcher; + + return &smfs_rule->fs_rule; + +err_create: + mlx5_ct_fs_smfs_matcher_put(fs, smfs_matcher); +err_matcher: + mlx5_smfs_action_destroy(smfs_rule->count_action); +err_count: + kfree(smfs_rule); + return ERR_PTR(err); +} + +static void +mlx5_ct_fs_smfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule) +{ + struct mlx5_ct_fs_smfs_rule *smfs_rule = container_of(fs_rule, + struct mlx5_ct_fs_smfs_rule, + fs_rule); + + mlx5_smfs_rule_destroy(smfs_rule->rule); + mlx5_ct_fs_smfs_matcher_put(fs, smfs_rule->smfs_matcher); + mlx5_smfs_action_destroy(smfs_rule->count_action); + kfree(smfs_rule); +} + +static struct mlx5_ct_fs_ops fs_smfs_ops = { + .ct_rule_add = mlx5_ct_fs_smfs_ct_rule_add, + .ct_rule_del = mlx5_ct_fs_smfs_ct_rule_del, + + .init = mlx5_ct_fs_smfs_init, + .destroy = mlx5_ct_fs_smfs_destroy, + + .priv_size = sizeof(struct mlx5_ct_fs_smfs), +}; + +struct mlx5_ct_fs_ops * +mlx5_ct_fs_smfs_ops_get(void) +{ + return &fs_smfs_ops; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c new file mode 100644 index 000000000000..be74e1403328 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c @@ -0,0 +1,585 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/math64.h> +#include "lib/aso.h" +#include "en/tc/post_act.h" +#include "meter.h" +#include "en/tc_priv.h" + +#define MLX5_START_COLOR_SHIFT 28 +#define MLX5_METER_MODE_SHIFT 24 +#define MLX5_CBS_EXP_SHIFT 24 +#define MLX5_CBS_MAN_SHIFT 16 +#define MLX5_CIR_EXP_SHIFT 8 + +/* cir = 8*(10^9)*cir_mantissa/(2^cir_exponent)) bits/s */ +#define MLX5_CONST_CIR 8000000000ULL +#define MLX5_CALC_CIR(m, e) ((MLX5_CONST_CIR * (m)) >> (e)) +#define MLX5_MAX_CIR ((MLX5_CONST_CIR * 0x100) - 1) + +/* cbs = cbs_mantissa*2^cbs_exponent */ +#define MLX5_CALC_CBS(m, e) ((m) << (e)) +#define MLX5_MAX_CBS ((0x100ULL << 0x1F) - 1) +#define MLX5_MAX_HW_CBS 0x7FFFFFFF + +struct mlx5e_flow_meter_aso_obj { + struct list_head entry; + int base_id; + int total_meters; + + unsigned long meters_map[0]; /* must be at the end of this struct */ +}; + +struct mlx5e_flow_meters { + enum mlx5_flow_namespace_type ns_type; + struct mlx5_aso *aso; + struct mutex aso_lock; /* Protects aso operations */ + int log_granularity; + u32 pdn; + + DECLARE_HASHTABLE(hashtbl, 8); + + struct mutex sync_lock; /* protect flow meter operations */ + struct list_head partial_list; + struct list_head full_list; + + struct mlx5_core_dev *mdev; + struct mlx5e_post_act *post_act; +}; + +static void +mlx5e_flow_meter_cir_calc(u64 cir, u8 *man, u8 *exp) +{ + s64 _cir, _delta, delta = S64_MAX; + u8 e, _man = 0, _exp = 0; + u64 m; + + for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */ + m = cir << e; + if ((s64)m < 0) /* overflow */ + break; + m = div64_u64(m, MLX5_CONST_CIR); + if (m > 0xFF) /* man width 8 bit */ + continue; + _cir = MLX5_CALC_CIR(m, e); + _delta = cir - _cir; + if (_delta < delta) { + _man = m; + _exp = e; + if (!_delta) + goto found; + delta = _delta; + } + } + +found: + *man = _man; + *exp = _exp; +} + +static void +mlx5e_flow_meter_cbs_calc(u64 cbs, u8 *man, u8 *exp) +{ + s64 _cbs, _delta, delta = S64_MAX; + u8 e, _man = 0, _exp = 0; + u64 m; + + for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */ + m = cbs >> e; + if (m > 0xFF) /* man width 8 bit */ + continue; + _cbs = MLX5_CALC_CBS(m, e); + _delta = cbs - _cbs; + if (_delta < delta) { + _man = m; + _exp = e; + if (!_delta) + goto found; + delta = _delta; + } + } + +found: + *man = _man; + *exp = _exp; +} + +int +mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev, + struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *meter_params) +{ + struct mlx5_wqe_aso_ctrl_seg *aso_ctrl; + struct mlx5_wqe_aso_data_seg *aso_data; + struct mlx5e_flow_meters *flow_meters; + u8 cir_man, cir_exp, cbs_man, cbs_exp; + struct mlx5_aso_wqe *aso_wqe; + unsigned long expires; + struct mlx5_aso *aso; + u64 rate, burst; + u8 ds_cnt; + int err; + + rate = meter_params->rate; + burst = meter_params->burst; + + /* HW treats each packet as 128 bytes in PPS mode */ + if (meter_params->mode == MLX5_RATE_LIMIT_PPS) { + rate <<= 10; + burst <<= 7; + } + + if (!rate || rate > MLX5_MAX_CIR || !burst || burst > MLX5_MAX_CBS) + return -EINVAL; + + /* HW has limitation of total 31 bits for cbs */ + if (burst > MLX5_MAX_HW_CBS) { + mlx5_core_warn(mdev, + "burst(%lld) is too large, use HW allowed value(%d)\n", + burst, MLX5_MAX_HW_CBS); + burst = MLX5_MAX_HW_CBS; + } + + mlx5_core_dbg(mdev, "meter mode=%d\n", meter_params->mode); + mlx5e_flow_meter_cir_calc(rate, &cir_man, &cir_exp); + mlx5_core_dbg(mdev, "rate=%lld, cir=%lld, exp=%d, man=%d\n", + rate, MLX5_CALC_CIR(cir_man, cir_exp), cir_exp, cir_man); + mlx5e_flow_meter_cbs_calc(burst, &cbs_man, &cbs_exp); + mlx5_core_dbg(mdev, "burst=%lld, cbs=%lld, exp=%d, man=%d\n", + burst, MLX5_CALC_CBS((u64)cbs_man, cbs_exp), cbs_exp, cbs_man); + + if (!cir_man || !cbs_man) + return -EINVAL; + + flow_meters = meter->flow_meters; + aso = flow_meters->aso; + + mutex_lock(&flow_meters->aso_lock); + aso_wqe = mlx5_aso_get_wqe(aso); + ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_DS); + mlx5_aso_build_wqe(aso, ds_cnt, aso_wqe, meter->obj_id, + MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER); + + aso_ctrl = &aso_wqe->aso_ctrl; + memset(aso_ctrl, 0, sizeof(*aso_ctrl)); + aso_ctrl->data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE << 6; + aso_ctrl->condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE | + MLX5_ASO_ALWAYS_TRUE << 4; + aso_ctrl->data_offset_condition_operand = MLX5_ASO_LOGICAL_OR << 6; + aso_ctrl->data_mask = cpu_to_be64(0x80FFFFFFULL << (meter->idx ? 0 : 32)); + + aso_data = (struct mlx5_wqe_aso_data_seg *)(aso_wqe + 1); + memset(aso_data, 0, sizeof(*aso_data)); + aso_data->bytewise_data[meter->idx * 8] = cpu_to_be32((0x1 << 31) | /* valid */ + (MLX5_FLOW_METER_COLOR_GREEN << MLX5_START_COLOR_SHIFT)); + if (meter_params->mode == MLX5_RATE_LIMIT_PPS) + aso_data->bytewise_data[meter->idx * 8] |= + cpu_to_be32(MLX5_FLOW_METER_MODE_NUM_PACKETS << MLX5_METER_MODE_SHIFT); + else + aso_data->bytewise_data[meter->idx * 8] |= + cpu_to_be32(MLX5_FLOW_METER_MODE_BYTES_IP_LENGTH << MLX5_METER_MODE_SHIFT); + + aso_data->bytewise_data[meter->idx * 8 + 2] = cpu_to_be32((cbs_exp << MLX5_CBS_EXP_SHIFT) | + (cbs_man << MLX5_CBS_MAN_SHIFT) | + (cir_exp << MLX5_CIR_EXP_SHIFT) | + cir_man); + + mlx5_aso_post_wqe(aso, true, &aso_wqe->ctrl); + + /* With newer FW, the wait for the first ASO WQE is more than 2us, put the wait 10ms. */ + expires = jiffies + msecs_to_jiffies(10); + do { + err = mlx5_aso_poll_cq(aso, true); + if (err) + usleep_range(2, 10); + } while (err && time_is_after_jiffies(expires)); + mutex_unlock(&flow_meters->aso_lock); + + return err; +} + +static int +mlx5e_flow_meter_create_aso_obj(struct mlx5e_flow_meters *flow_meters, int *obj_id) +{ + u32 in[MLX5_ST_SZ_DW(create_flow_meter_aso_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + struct mlx5_core_dev *mdev = flow_meters->mdev; + void *obj; + int err; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO); + MLX5_SET(general_obj_in_cmd_hdr, in, log_obj_range, flow_meters->log_granularity); + + obj = MLX5_ADDR_OF(create_flow_meter_aso_obj_in, in, flow_meter_aso_obj); + MLX5_SET(flow_meter_aso_obj, obj, meter_aso_access_pd, flow_meters->pdn); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) { + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) created\n", *obj_id); + } + + return err; +} + +static void +mlx5e_flow_meter_destroy_aso_obj(struct mlx5_core_dev *mdev, u32 obj_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) destroyed\n", obj_id); +} + +static struct mlx5e_flow_meter_handle * +__mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters) +{ + struct mlx5_core_dev *mdev = flow_meters->mdev; + struct mlx5e_flow_meter_aso_obj *meters_obj; + struct mlx5e_flow_meter_handle *meter; + struct mlx5_fc *counter; + int err, pos, total; + u32 id; + + meter = kzalloc(sizeof(*meter), GFP_KERNEL); + if (!meter) + return ERR_PTR(-ENOMEM); + + counter = mlx5_fc_create(mdev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_red_counter; + } + meter->red_counter = counter; + + counter = mlx5_fc_create(mdev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_green_counter; + } + meter->green_counter = counter; + + meters_obj = list_first_entry_or_null(&flow_meters->partial_list, + struct mlx5e_flow_meter_aso_obj, + entry); + /* 2 meters in one object */ + total = 1 << (flow_meters->log_granularity + 1); + if (!meters_obj) { + err = mlx5e_flow_meter_create_aso_obj(flow_meters, &id); + if (err) { + mlx5_core_err(mdev, "Failed to create flow meter ASO object\n"); + goto err_create; + } + + meters_obj = kzalloc(sizeof(*meters_obj) + BITS_TO_BYTES(total), + GFP_KERNEL); + if (!meters_obj) { + err = -ENOMEM; + goto err_mem; + } + + meters_obj->base_id = id; + meters_obj->total_meters = total; + list_add(&meters_obj->entry, &flow_meters->partial_list); + pos = 0; + } else { + pos = find_first_zero_bit(meters_obj->meters_map, total); + if (bitmap_weight(meters_obj->meters_map, total) == total - 1) { + list_del(&meters_obj->entry); + list_add(&meters_obj->entry, &flow_meters->full_list); + } + } + + bitmap_set(meters_obj->meters_map, pos, 1); + meter->flow_meters = flow_meters; + meter->meters_obj = meters_obj; + meter->obj_id = meters_obj->base_id + pos / 2; + meter->idx = pos % 2; + + mlx5_core_dbg(mdev, "flow meter allocated, obj_id=0x%x, index=%d\n", + meter->obj_id, meter->idx); + + return meter; + +err_mem: + mlx5e_flow_meter_destroy_aso_obj(mdev, id); +err_create: + mlx5_fc_destroy(mdev, meter->green_counter); +err_green_counter: + mlx5_fc_destroy(mdev, meter->red_counter); +err_red_counter: + kfree(meter); + return ERR_PTR(err); +} + +static void +__mlx5e_flow_meter_free(struct mlx5e_flow_meter_handle *meter) +{ + struct mlx5e_flow_meters *flow_meters = meter->flow_meters; + struct mlx5_core_dev *mdev = flow_meters->mdev; + struct mlx5e_flow_meter_aso_obj *meters_obj; + int n, pos; + + mlx5_fc_destroy(mdev, meter->green_counter); + mlx5_fc_destroy(mdev, meter->red_counter); + + meters_obj = meter->meters_obj; + pos = (meter->obj_id - meters_obj->base_id) * 2 + meter->idx; + bitmap_clear(meters_obj->meters_map, pos, 1); + n = bitmap_weight(meters_obj->meters_map, meters_obj->total_meters); + if (n == 0) { + list_del(&meters_obj->entry); + mlx5e_flow_meter_destroy_aso_obj(mdev, meters_obj->base_id); + kfree(meters_obj); + } else if (n == meters_obj->total_meters - 1) { + list_del(&meters_obj->entry); + list_add(&meters_obj->entry, &flow_meters->partial_list); + } + + mlx5_core_dbg(mdev, "flow meter freed, obj_id=0x%x, index=%d\n", + meter->obj_id, meter->idx); + kfree(meter); +} + +static struct mlx5e_flow_meter_handle * +__mlx5e_tc_meter_get(struct mlx5e_flow_meters *flow_meters, u32 index) +{ + struct mlx5e_flow_meter_handle *meter; + + hash_for_each_possible(flow_meters->hashtbl, meter, hlist, index) + if (meter->params.index == index) + goto add_ref; + + return ERR_PTR(-ENOENT); + +add_ref: + meter->refcnt++; + + return meter; +} + +struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params) +{ + struct mlx5e_flow_meters *flow_meters; + struct mlx5e_flow_meter_handle *meter; + + flow_meters = mlx5e_get_flow_meters(mdev); + if (!flow_meters) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&flow_meters->sync_lock); + meter = __mlx5e_tc_meter_get(flow_meters, params->index); + mutex_unlock(&flow_meters->sync_lock); + + return meter; +} + +static void +__mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter) +{ + if (--meter->refcnt == 0) { + hash_del(&meter->hlist); + __mlx5e_flow_meter_free(meter); + } +} + +void +mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter) +{ + struct mlx5e_flow_meters *flow_meters = meter->flow_meters; + + mutex_lock(&flow_meters->sync_lock); + __mlx5e_tc_meter_put(meter); + mutex_unlock(&flow_meters->sync_lock); +} + +static struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_alloc(struct mlx5e_flow_meters *flow_meters, + struct mlx5e_flow_meter_params *params) +{ + struct mlx5e_flow_meter_handle *meter; + + meter = __mlx5e_flow_meter_alloc(flow_meters); + if (IS_ERR(meter)) + return meter; + + hash_add(flow_meters->hashtbl, &meter->hlist, params->index); + meter->params.index = params->index; + meter->refcnt++; + + return meter; +} + +static int +__mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *params) +{ + struct mlx5_core_dev *mdev = meter->flow_meters->mdev; + int err = 0; + + if (meter->params.mode != params->mode || meter->params.rate != params->rate || + meter->params.burst != params->burst) { + err = mlx5e_tc_meter_modify(mdev, meter, params); + if (err) + goto out; + + meter->params.mode = params->mode; + meter->params.rate = params->rate; + meter->params.burst = params->burst; + } + +out: + return err; +} + +int +mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *params) +{ + struct mlx5_core_dev *mdev = meter->flow_meters->mdev; + struct mlx5e_flow_meters *flow_meters; + int err; + + flow_meters = mlx5e_get_flow_meters(mdev); + if (!flow_meters) + return -EOPNOTSUPP; + + mutex_lock(&flow_meters->sync_lock); + err = __mlx5e_tc_meter_update(meter, params); + mutex_unlock(&flow_meters->sync_lock); + return err; +} + +struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params) +{ + struct mlx5e_flow_meters *flow_meters; + struct mlx5e_flow_meter_handle *meter; + int err; + + flow_meters = mlx5e_get_flow_meters(mdev); + if (!flow_meters) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&flow_meters->sync_lock); + meter = __mlx5e_tc_meter_get(flow_meters, params->index); + if (IS_ERR(meter)) { + meter = mlx5e_tc_meter_alloc(flow_meters, params); + if (IS_ERR(meter)) { + err = PTR_ERR(meter); + goto err_get; + } + } + + err = __mlx5e_tc_meter_update(meter, params); + if (err) + goto err_update; + + mutex_unlock(&flow_meters->sync_lock); + return meter; + +err_update: + __mlx5e_tc_meter_put(meter); +err_get: + mutex_unlock(&flow_meters->sync_lock); + return ERR_PTR(err); +} + +enum mlx5_flow_namespace_type +mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters) +{ + return flow_meters->ns_type; +} + +struct mlx5e_flow_meters * +mlx5e_flow_meters_init(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_flow_meters *flow_meters; + int err; + + if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO)) + return ERR_PTR(-EOPNOTSUPP); + + if (IS_ERR_OR_NULL(post_act)) { + netdev_dbg(priv->netdev, + "flow meter offload is not supported, post action is missing\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + flow_meters = kzalloc(sizeof(*flow_meters), GFP_KERNEL); + if (!flow_meters) + return ERR_PTR(-ENOMEM); + + err = mlx5_core_alloc_pd(mdev, &flow_meters->pdn); + if (err) { + mlx5_core_err(mdev, "Failed to alloc pd for flow meter aso, err=%d\n", err); + goto err_out; + } + + flow_meters->aso = mlx5_aso_create(mdev, flow_meters->pdn); + if (IS_ERR(flow_meters->aso)) { + mlx5_core_warn(mdev, "Failed to create aso wqe for flow meter\n"); + err = PTR_ERR(flow_meters->aso); + goto err_sq; + } + + mutex_init(&flow_meters->sync_lock); + INIT_LIST_HEAD(&flow_meters->partial_list); + INIT_LIST_HEAD(&flow_meters->full_list); + + flow_meters->ns_type = ns_type; + flow_meters->mdev = mdev; + flow_meters->post_act = post_act; + mutex_init(&flow_meters->aso_lock); + flow_meters->log_granularity = min_t(int, 6, + MLX5_CAP_QOS(mdev, log_meter_aso_max_alloc)); + + return flow_meters; + +err_sq: + mlx5_core_dealloc_pd(mdev, flow_meters->pdn); +err_out: + kfree(flow_meters); + return ERR_PTR(err); +} + +void +mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters) +{ + if (IS_ERR_OR_NULL(flow_meters)) + return; + + mlx5_aso_destroy(flow_meters->aso); + mlx5_core_dealloc_pd(flow_meters->mdev, flow_meters->pdn); + kfree(flow_meters); +} + +void +mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter, + u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse) +{ + u64 bytes1, packets1, lastuse1; + u64 bytes2, packets2, lastuse2; + + mlx5_fc_query_cached(meter->green_counter, &bytes1, &packets1, &lastuse1); + mlx5_fc_query_cached(meter->red_counter, &bytes2, &packets2, &lastuse2); + + *bytes = bytes1 + bytes2; + *packets = packets1 + packets2; + *drops = packets2; + *lastuse = max_t(u64, lastuse1, lastuse2); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h new file mode 100644 index 000000000000..6de6e8a16327 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_FLOW_METER_H__ +#define __MLX5_EN_FLOW_METER_H__ + +struct mlx5e_post_meter_priv; +struct mlx5e_flow_meter_aso_obj; +struct mlx5e_flow_meters; +struct mlx5_flow_attr; + +enum mlx5e_flow_meter_mode { + MLX5_RATE_LIMIT_BPS, + MLX5_RATE_LIMIT_PPS, +}; + +struct mlx5e_flow_meter_params { + enum mlx5e_flow_meter_mode mode; + /* police action index */ + u32 index; + u64 rate; + u64 burst; +}; + +struct mlx5e_flow_meter_handle { + struct mlx5e_flow_meters *flow_meters; + struct mlx5e_flow_meter_aso_obj *meters_obj; + u32 obj_id; + u8 idx; + + int refcnt; + struct hlist_node hlist; + struct mlx5e_flow_meter_params params; + + struct mlx5_fc *green_counter; + struct mlx5_fc *red_counter; +}; + +struct mlx5e_meter_attr { + struct mlx5e_flow_meter_params params; + struct mlx5e_flow_meter_handle *meter; + struct mlx5e_post_meter_priv *post_meter; +}; + +int +mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev, + struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *meter_params); + +struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params); +void +mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter); +int +mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *params); +struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params); + +enum mlx5_flow_namespace_type +mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters); + +struct mlx5e_flow_meters * +mlx5e_flow_meters_init(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_action); +void +mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters); + +void +mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter, + u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse); + +#endif /* __MLX5_EN_FLOW_METER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 31b4e39be2d3..4e48946c4c2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +#include "en/tc_priv.h" #include "en_tc.h" #include "post_act.h" #include "mlx5_core.h" @@ -21,9 +22,9 @@ struct mlx5e_post_act_handle { u32 id; }; -#define MLX5_POST_ACTION_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen) -#define MLX5_POST_ACTION_MAX GENMASK(MLX5_POST_ACTION_BITS - 1, 0) -#define MLX5_POST_ACTION_MASK MLX5_POST_ACTION_MAX +#define MLX5_POST_ACTION_BITS MLX5_REG_MAPPING_MBITS(FTEID_TO_REG) +#define MLX5_POST_ACTION_MASK MLX5_REG_MAPPING_MASK(FTEID_TO_REG) +#define MLX5_POST_ACTION_MAX MLX5_POST_ACTION_MASK struct mlx5e_post_act * mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, @@ -35,7 +36,7 @@ mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, int err; if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ignore_flow_level, table_type)) { - if (priv->mdev->coredev_type != MLX5_COREDEV_VF) + if (priv->mdev->coredev_type == MLX5_COREDEV_PF) mlx5_core_warn(priv->mdev, "firmware level support is missing\n"); err = -EOPNOTSUPP; goto err_check; @@ -75,21 +76,47 @@ mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act) kfree(post_act); } +int +mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle) +{ + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Post action rule matches on fte_id and executes original rule's tc rule action */ + mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, handle->id, MLX5_POST_ACTION_MASK); + + handle->rule = mlx5e_tc_rule_offload(post_act->priv, spec, handle->attr); + if (IS_ERR(handle->rule)) { + err = PTR_ERR(handle->rule); + netdev_warn(post_act->priv->netdev, "Failed to add post action rule"); + goto err_rule; + } + + kvfree(spec); + return 0; + +err_rule: + kvfree(spec); + return err; +} + struct mlx5e_post_act_handle * mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr) { u32 attr_sz = ns_to_attr_sz(post_act->ns_type); - struct mlx5e_post_act_handle *handle = NULL; - struct mlx5_flow_attr *post_attr = NULL; - struct mlx5_flow_spec *spec = NULL; + struct mlx5e_post_act_handle *handle; + struct mlx5_flow_attr *post_attr; int err; handle = kzalloc(sizeof(*handle), GFP_KERNEL); - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); post_attr = mlx5_alloc_flow_attr(post_act->ns_type); - if (!handle || !spec || !post_attr) { + if (!handle || !post_attr) { kfree(post_attr); - kvfree(spec); kfree(handle); return ERR_PTR(-ENOMEM); } @@ -100,7 +127,8 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at post_attr->ft = post_act->ft; post_attr->inner_match_level = MLX5_MATCH_NONE; post_attr->outer_match_level = MLX5_MATCH_NONE; - post_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); + post_attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_DECAP; + post_attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; handle->ns_type = post_act->ns_type; /* Splits were handled before post action */ @@ -112,36 +140,29 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at if (err) goto err_xarray; - /* Post action rule matches on fte_id and executes original rule's - * tc rule action - */ - mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, - handle->id, MLX5_POST_ACTION_MASK); - - handle->rule = mlx5_tc_rule_insert(post_act->priv, spec, post_attr); - if (IS_ERR(handle->rule)) { - err = PTR_ERR(handle->rule); - netdev_warn(post_act->priv->netdev, "Failed to add post action rule"); - goto err_rule; - } handle->attr = post_attr; - kvfree(spec); return handle; -err_rule: - xa_erase(&post_act->ids, handle->id); err_xarray: kfree(post_attr); - kvfree(spec); kfree(handle); return ERR_PTR(err); } void +mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle) +{ + mlx5e_tc_rule_unoffload(post_act->priv, handle->rule, handle->attr); + handle->rule = NULL; +} + +void mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle) { - mlx5_tc_rule_delete(post_act->priv, handle->rule, handle->attr); + if (!IS_ERR_OR_NULL(handle->rule)) + mlx5e_tc_post_act_unoffload(post_act, handle); xa_erase(&post_act->ids, handle->id); kfree(handle->attr); kfree(handle); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h index b530ec1981a5..f476774c0b75 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h @@ -24,6 +24,14 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at void mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle); +int +mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle); + +void +mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle); + struct mlx5_flow_table * mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c new file mode 100644 index 000000000000..8b77e822810e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "en/tc_priv.h" +#include "post_meter.h" +#include "en/tc/post_act.h" + +#define MLX5_PACKET_COLOR_BITS MLX5_REG_MAPPING_MBITS(PACKET_COLOR_TO_REG) +#define MLX5_PACKET_COLOR_MASK MLX5_REG_MAPPING_MASK(PACKET_COLOR_TO_REG) + +struct mlx5e_post_meter_priv { + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_flow_handle *fwd_green_rule; + struct mlx5_flow_handle *drop_red_rule; +}; + +struct mlx5_flow_table * +mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter) +{ + return post_meter->ft; +} + +static int +mlx5e_post_meter_table_create(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_meter_priv *post_meter) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *root_ns; + + root_ns = mlx5_get_flow_namespace(priv->mdev, ns_type); + if (!root_ns) { + mlx5_core_warn(priv->mdev, "Failed to get namespace for flow meter\n"); + return -EOPNOTSUPP; + } + + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.prio = FDB_SLOW_PATH; + ft_attr.max_fte = 2; + ft_attr.level = 1; + + post_meter->ft = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(post_meter->ft)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter table\n"); + return PTR_ERR(post_meter->ft); + } + + return 0; +} + +static int +mlx5e_post_meter_fg_create(struct mlx5e_priv *priv, + struct mlx5e_post_meter_priv *post_meter) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *misc2, *match_criteria; + u32 *flow_group_in; + int err = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + misc2 = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_5, MLX5_PACKET_COLOR_MASK); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + + post_meter->fg = mlx5_create_flow_group(post_meter->ft, flow_group_in); + if (IS_ERR(post_meter->fg)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter flow group\n"); + err = PTR_ERR(post_meter->fg); + } + + kvfree(flow_group_in); + return err; +} + +static int +mlx5e_post_meter_rules_create(struct mlx5e_priv *priv, + struct mlx5e_post_meter_priv *post_meter, + struct mlx5e_post_act *post_act, + struct mlx5_fc *green_counter, + struct mlx5_fc *red_counter) +{ + struct mlx5_flow_destination dest[2] = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG, + MLX5_FLOW_METER_COLOR_RED, MLX5_PACKET_COLOR_MASK); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[0].counter_id = mlx5_fc_id(red_counter); + + rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter flow drop rule\n"); + err = PTR_ERR(rule); + goto err_red; + } + post_meter->drop_red_rule = rule; + + mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG, + MLX5_FLOW_METER_COLOR_GREEN, MLX5_PACKET_COLOR_MASK); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[0].ft = mlx5e_tc_post_act_get_ft(post_act); + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter_id = mlx5_fc_id(green_counter); + + rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 2); + if (IS_ERR(rule)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter flow fwd rule\n"); + err = PTR_ERR(rule); + goto err_green; + } + post_meter->fwd_green_rule = rule; + + kvfree(spec); + return 0; + +err_green: + mlx5_del_flow_rules(post_meter->drop_red_rule); +err_red: + kvfree(spec); + return err; +} + +static void +mlx5e_post_meter_rules_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + mlx5_del_flow_rules(post_meter->drop_red_rule); + mlx5_del_flow_rules(post_meter->fwd_green_rule); +} + +static void +mlx5e_post_meter_fg_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + mlx5_destroy_flow_group(post_meter->fg); +} + +static void +mlx5e_post_meter_table_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + mlx5_destroy_flow_table(post_meter->ft); +} + +struct mlx5e_post_meter_priv * +mlx5e_post_meter_init(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act, + struct mlx5_fc *green_counter, + struct mlx5_fc *red_counter) +{ + struct mlx5e_post_meter_priv *post_meter; + int err; + + post_meter = kzalloc(sizeof(*post_meter), GFP_KERNEL); + if (!post_meter) + return ERR_PTR(-ENOMEM); + + err = mlx5e_post_meter_table_create(priv, ns_type, post_meter); + if (err) + goto err_ft; + + err = mlx5e_post_meter_fg_create(priv, post_meter); + if (err) + goto err_fg; + + err = mlx5e_post_meter_rules_create(priv, post_meter, post_act, green_counter, + red_counter); + if (err) + goto err_rules; + + return post_meter; + +err_rules: + mlx5e_post_meter_fg_destroy(post_meter); +err_fg: + mlx5e_post_meter_table_destroy(post_meter); +err_ft: + kfree(post_meter); + return ERR_PTR(err); +} + +void +mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter) +{ + mlx5e_post_meter_rules_destroy(post_meter); + mlx5e_post_meter_fg_destroy(post_meter); + mlx5e_post_meter_table_destroy(post_meter); + kfree(post_meter); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h new file mode 100644 index 000000000000..34d0e4b9fc7a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_POST_METER_H__ +#define __MLX5_EN_POST_METER_H__ + +#define packet_color_to_reg { \ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5, \ + .moffset = 0, \ + .mlen = 8, \ + .soffset = MLX5_BYTE_OFF(fte_match_param, \ + misc_parameters_2.metadata_reg_c_5), \ +} + +struct mlx5e_post_meter_priv; + +struct mlx5_flow_table * +mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter); + +struct mlx5e_post_meter_priv * +mlx5e_post_meter_init(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act, + struct mlx5_fc *green_counter, + struct mlx5_fc *red_counter); +void +mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter); + +#endif /* __MLX5_EN_POST_METER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c index df6888c4793c..1cbd2eb9d04f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -5,6 +5,8 @@ #include <net/psample.h> #include "en/mapping.h" #include "en/tc/post_act.h" +#include "en/tc/act/sample.h" +#include "en/mod_hdr.h" #include "sample.h" #include "eswitch.h" #include "en_tc.h" @@ -45,14 +47,12 @@ struct mlx5e_sample_flow { struct mlx5_flow_handle *pre_rule; struct mlx5_flow_attr *post_attr; struct mlx5_flow_handle *post_rule; - struct mlx5e_post_act_handle *post_act_handle; }; struct mlx5e_sample_restore { struct hlist_node hlist; struct mlx5_modify_hdr *modify_hdr; struct mlx5_flow_handle *rule; - struct mlx5e_post_act_handle *post_act_handle; u32 obj_id; int count; }; @@ -93,6 +93,7 @@ sampler_termtbl_create(struct mlx5e_tc_psample *tc_psample) act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; dest.vport.num = esw->manager_vport; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; tc_psample->termtbl_rule = mlx5_add_flow_rules(tc_psample->termtbl, NULL, &act, &dest, 1); if (IS_ERR(tc_psample->termtbl_rule)) { err = PTR_ERR(tc_psample->termtbl_rule); @@ -230,69 +231,46 @@ sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler) */ static struct mlx5_modify_hdr * sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id, - struct mlx5e_post_act_handle *handle) + struct mlx5e_tc_mod_hdr_acts *mod_acts) { - struct mlx5e_tc_mod_hdr_acts mod_acts = {}; struct mlx5_modify_hdr *modify_hdr; int err; - err = mlx5e_tc_match_to_reg_set(mdev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + err = mlx5e_tc_match_to_reg_set(mdev, mod_acts, MLX5_FLOW_NAMESPACE_FDB, CHAIN_TO_REG, obj_id); if (err) goto err_set_regc0; - if (handle) { - err = mlx5e_tc_post_act_set_handle(mdev, handle, &mod_acts); - if (err) - goto err_post_act; - } - modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB, - mod_acts.num_actions, - mod_acts.actions); + mod_acts->num_actions, + mod_acts->actions); if (IS_ERR(modify_hdr)) { err = PTR_ERR(modify_hdr); goto err_modify_hdr; } - dealloc_mod_hdr_actions(&mod_acts); + mlx5e_mod_hdr_dealloc(mod_acts); return modify_hdr; err_modify_hdr: -err_post_act: - dealloc_mod_hdr_actions(&mod_acts); + mlx5e_mod_hdr_dealloc(mod_acts); err_set_regc0: return ERR_PTR(err); } -static u32 -restore_hash(u32 obj_id, struct mlx5e_post_act_handle *post_act_handle) -{ - return jhash_2words(obj_id, hash32_ptr(post_act_handle), 0); -} - -static bool -restore_equal(struct mlx5e_sample_restore *restore, u32 obj_id, - struct mlx5e_post_act_handle *post_act_handle) -{ - return restore->obj_id == obj_id && restore->post_act_handle == post_act_handle; -} - static struct mlx5e_sample_restore * sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id, - struct mlx5e_post_act_handle *post_act_handle) + struct mlx5e_tc_mod_hdr_acts *mod_acts) { struct mlx5_eswitch *esw = tc_psample->esw; struct mlx5_core_dev *mdev = esw->dev; struct mlx5e_sample_restore *restore; struct mlx5_modify_hdr *modify_hdr; - u32 hash_key; int err; mutex_lock(&tc_psample->restore_lock); - hash_key = restore_hash(obj_id, post_act_handle); - hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, hash_key) - if (restore_equal(restore, obj_id, post_act_handle)) + hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, obj_id) + if (restore->obj_id == obj_id) goto add_ref; restore = kzalloc(sizeof(*restore), GFP_KERNEL); @@ -301,9 +279,8 @@ sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id, goto err_alloc; } restore->obj_id = obj_id; - restore->post_act_handle = post_act_handle; - modify_hdr = sample_modify_hdr_get(mdev, obj_id, post_act_handle); + modify_hdr = sample_modify_hdr_get(mdev, obj_id, mod_acts); if (IS_ERR(modify_hdr)) { err = PTR_ERR(modify_hdr); goto err_modify_hdr; @@ -316,7 +293,7 @@ sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id, goto err_restore; } - hash_add(tc_psample->restore_hashtbl, &restore->hlist, hash_key); + hash_add(tc_psample->restore_hashtbl, &restore->hlist, obj_id); add_ref: restore->count++; mutex_unlock(&tc_psample->restore_lock); @@ -402,7 +379,7 @@ add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, post_attr->chain = 0; post_attr->prio = 0; post_attr->ft = default_tbl; - post_attr->flags = MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + post_attr->flags = MLX5_ATTR_FLAG_NO_IN_PORT; /* When offloading sample and encap action, if there is no valid * neigh data struct, a slow path rule is offloaded first. Source @@ -491,16 +468,16 @@ del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, struct mlx5_flow_handle * mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - u32 tunnel_id) + struct mlx5_flow_attr *attr) { - struct mlx5e_post_act_handle *post_act_handle = NULL; struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; struct mlx5_esw_flow_attr *pre_esw_attr; struct mlx5_mapped_obj restore_obj = {}; + struct mlx5e_tc_mod_hdr_acts *mod_acts; struct mlx5e_sample_flow *sample_flow; struct mlx5e_sample_attr *sample_attr; struct mlx5_flow_attr *pre_attr; + u32 tunnel_id = attr->tunnel_id; struct mlx5_eswitch *esw; u32 default_tbl_id; u32 obj_id; @@ -512,7 +489,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL); if (!sample_flow) return ERR_PTR(-ENOMEM); - sample_attr = attr->sample_attr; + sample_attr = &attr->sample_attr; sample_attr->sample_flow = sample_flow; /* For NICs with reg_c_preserve support or decap action, use @@ -521,18 +498,11 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, * original flow table. */ esw = tc_psample->esw; - if (MLX5_CAP_GEN(esw->dev, reg_c_preserve) || - attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + if (mlx5e_tc_act_sample_is_multi_table(esw->dev, attr)) { struct mlx5_flow_table *ft; ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act); default_tbl_id = ft->id; - post_act_handle = mlx5e_tc_post_act_add(tc_psample->post_act, attr); - if (IS_ERR(post_act_handle)) { - err = PTR_ERR(post_act_handle); - goto err_post_act; - } - sample_flow->post_act_handle = post_act_handle; } else { err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id); if (err) @@ -545,6 +515,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, err = PTR_ERR(sample_flow->sampler); goto err_sampler; } + sample_attr->sampler_id = sample_flow->sampler->sampler_id; /* Create an id mapping reg_c0 value to sample object. */ restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE; @@ -558,7 +529,8 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, sample_attr->restore_obj_id = obj_id; /* Create sample restore context. */ - sample_flow->restore = sample_restore_get(tc_psample, obj_id, post_act_handle); + mod_acts = &attr->parse_attr->mod_hdr_acts; + sample_flow->restore = sample_restore_get(tc_psample, obj_id, mod_acts); if (IS_ERR(sample_flow->restore)) { err = PTR_ERR(sample_flow->restore); goto err_sample_restore; @@ -579,13 +551,13 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, if (tunnel_id) pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; pre_attr->modify_hdr = sample_flow->restore->modify_hdr; - pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE; + pre_attr->flags = MLX5_ATTR_FLAG_SAMPLE; pre_attr->inner_match_level = attr->inner_match_level; pre_attr->outer_match_level = attr->outer_match_level; pre_attr->chain = attr->chain; pre_attr->prio = attr->prio; - pre_attr->sample_attr = attr->sample_attr; - sample_attr->sampler_id = sample_flow->sampler->sampler_id; + pre_attr->ft = attr->ft; + pre_attr->sample_attr = *sample_attr; pre_esw_attr = pre_attr->esw_attr; pre_esw_attr->in_mdev = esw_attr->in_mdev; pre_esw_attr->in_rep = esw_attr->in_rep; @@ -610,9 +582,6 @@ err_sampler: if (sample_flow->post_rule) del_post_rule(esw, sample_flow, attr); err_post_rule: - if (post_act_handle) - mlx5e_tc_post_act_del(tc_psample->post_act, post_act_handle); -err_post_act: kfree(sample_flow); return ERR_PTR(err); } @@ -632,15 +601,13 @@ mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, * will hit fw syndromes. */ esw = tc_psample->esw; - sample_flow = attr->sample_attr->sample_flow; + sample_flow = attr->sample_attr.sample_flow; mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr); sample_restore_put(tc_psample, sample_flow->restore); - mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id); + mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr.restore_obj_id); sampler_put(tc_psample, sample_flow->sampler); - if (sample_flow->post_act_handle) - mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle); - else + if (sample_flow->post_rule) del_post_rule(esw, sample_flow, attr); kfree(sample_flow->pre_attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h index 9ef8a49d7801..a569367eae4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h @@ -26,8 +26,7 @@ void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj struct mlx5_flow_handle * mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - u32 tunnel_id); + struct mlx5_flow_attr *attr); void mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv, @@ -45,8 +44,7 @@ mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample); static inline struct mlx5_flow_handle * mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - u32 tunnel_id) + struct mlx5_flow_attr *attr) { return ERR_PTR(-EOPNOTSUPP); } static inline void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 2445e2ae3324..864ce0c393e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -14,18 +14,21 @@ #include <linux/workqueue.h> #include <linux/refcount.h> #include <linux/xarray.h> +#include <linux/if_macvlan.h> +#include <linux/debugfs.h> #include "lib/fs_chains.h" #include "en/tc_ct.h" +#include "en/tc/ct_fs.h" +#include "en/tc_priv.h" #include "en/mod_hdr.h" #include "en/mapping.h" #include "en/tc/post_act.h" #include "en.h" #include "en_tc.h" #include "en_rep.h" +#include "fs_core.h" -#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen) -#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) #define MLX5_CT_STATE_TRK_BIT BIT(2) #define MLX5_CT_STATE_NAT_BIT BIT(3) @@ -33,12 +36,27 @@ #define MLX5_CT_STATE_RELATED_BIT BIT(5) #define MLX5_CT_STATE_INVALID_BIT BIT(6) -#define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen) -#define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0) +#define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG) +#define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG) + +/* Statically allocate modify actions for + * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10. + * This will be increased dynamically if needed (for the ipv6 snat + dnat). + */ +#define MLX5_CT_MIN_MOD_ACTS 10 #define ct_dbg(fmt, args...)\ netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) +struct mlx5_tc_ct_debugfs { + struct { + atomic_t offloaded; + atomic_t rx_dropped; + } stats; + + struct dentry *root; +}; + struct mlx5_tc_ct_priv { struct mlx5_core_dev *dev; const struct net_device *netdev; @@ -55,19 +73,23 @@ struct mlx5_tc_ct_priv { struct mapping_ctx *labels_mapping; enum mlx5_flow_namespace_type ns_type; struct mlx5_fs_chains *chains; + struct mlx5_ct_fs *fs; + struct mlx5_ct_fs_ops *fs_ops; spinlock_t ht_lock; /* protects ft entries */ + struct workqueue_struct *wq; + + struct mlx5_tc_ct_debugfs debugfs; }; struct mlx5_ct_flow { struct mlx5_flow_attr *pre_ct_attr; struct mlx5_flow_handle *pre_ct_rule; - struct mlx5e_post_act_handle *post_act_handle; struct mlx5_ct_ft *ft; u32 chain_mapping; }; struct mlx5_ct_zone_rule { - struct mlx5_flow_handle *rule; + struct mlx5_ct_fs_rule *rule; struct mlx5e_mod_hdr_handle *mh; struct mlx5_flow_attr *attr; bool nat; @@ -251,7 +273,8 @@ mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) return -EOPNOTSUPP; } } else { - return -EOPNOTSUPP; + if (tuple->ip_proto != IPPROTO_GRE) + return -EOPNOTSUPP; } return 0; @@ -320,7 +343,33 @@ mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple, } static int -mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, +mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv, + struct net_device *ndev) +{ + struct mlx5e_priv *other_priv = netdev_priv(ndev); + struct mlx5_core_dev *mdev = ct_priv->dev; + bool vf_rep, uplink_rep; + + vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); + uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); + + if (vf_rep) + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; + if (uplink_rep) + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + if (is_vlan_dev(ndev)) + return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev)); + if (netif_is_macvlan(ndev)) + return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev)); + if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev)) + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT; +} + +static int +mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_flow_spec *spec, struct flow_rule *rule) { void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -335,8 +384,7 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, flow_rule_match_basic(rule, &match); - mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c, - headers_v); + mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v); MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, match.mask->ip_proto); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, @@ -432,6 +480,23 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, ntohs(match.key->flags)); } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { + struct flow_match_meta match; + + flow_rule_match_meta(rule, &match); + + if (match.key->ingress_ifindex & match.mask->ingress_ifindex) { + struct net_device *dev; + + dev = dev_get_by_index(&init_net, match.key->ingress_ifindex); + if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source)) + spec->flow_context.flow_source = + mlx5_tc_ct_get_flow_source_match(ct_priv, dev); + + dev_put(dev); + } + } + return 0; } @@ -456,7 +521,7 @@ mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone); - mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr); + ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule); mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh); mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); kfree(attr); @@ -468,6 +533,8 @@ mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, { mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); + + atomic_dec(&ct_priv->debugfs.stats.offloaded); } static struct flow_action_entry * @@ -530,6 +597,12 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, return 0; } +int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); +} + static int mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, char *modact) @@ -609,22 +682,15 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, struct flow_action *flow_action = &flow_rule->action; struct mlx5_core_dev *mdev = ct_priv->dev; struct flow_action_entry *act; - size_t action_size; char *modact; int err, i; - action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto); - flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_MANGLE: { - err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type, - mod_acts); - if (err) - return err; - - modact = mod_acts->actions + - mod_acts->num_actions * action_size; + modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts); + if (IS_ERR(modact)) + return PTR_ERR(modact); err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); if (err) @@ -650,9 +716,10 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_flow_attr *attr, struct flow_rule *flow_rule, struct mlx5e_mod_hdr_handle **mh, - u8 zone_restore_id, bool nat) + u8 zone_restore_id, bool nat_table, bool has_nat) { - struct mlx5e_tc_mod_hdr_acts mod_acts = {}; + DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS); + DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr); struct flow_action_entry *meta; u16 ct_state = 0; int err; @@ -665,11 +732,12 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, &attr->ct_attr.ct_labels_id); if (err) return -EOPNOTSUPP; - if (nat) { - err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, - &mod_acts); - if (err) - goto err_mapping; + if (nat_table) { + if (has_nat) { + err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts); + if (err) + goto err_mapping; + } ct_state |= MLX5_CT_STATE_NAT_BIT; } @@ -684,7 +752,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, if (err) goto err_mapping; - if (nat) { + if (nat_table && has_nat) { attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type, mod_acts.num_actions, mod_acts.actions); @@ -706,11 +774,11 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, attr->modify_hdr = mlx5e_mod_hdr_get(*mh); } - dealloc_mod_hdr_actions(&mod_acts); + mlx5e_mod_hdr_dealloc(&mod_acts); return 0; err_mapping: - dealloc_mod_hdr_actions(&mod_acts); + mlx5e_mod_hdr_dealloc(&mod_acts); mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); return err; } @@ -752,7 +820,9 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, &zone_rule->mh, - zone_restore_id, nat); + zone_restore_id, + nat, + mlx5_tc_ct_entry_has_nat(entry)); if (err) { ct_dbg("Failed to create ct entry mod hdr"); goto err_mod_hdr; @@ -764,16 +834,20 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, attr->dest_chain = 0; attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; - attr->outer_match_level = MLX5_MATCH_L4; + if (entry->tuple.ip_proto == IPPROTO_TCP || + entry->tuple.ip_proto == IPPROTO_UDP) + attr->outer_match_level = MLX5_MATCH_L4; + else + attr->outer_match_level = MLX5_MATCH_L3; attr->counter = entry->counter->counter; - attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) attr->esw_attr->in_mdev = priv->mdev; - mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule); + mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule); mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); - zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr); + zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule); if (IS_ERR(zone_rule->rule)) { err = PTR_ERR(zone_rule->rule); ct_dbg("Failed to add ct entry rule, nat: %d", nat); @@ -868,14 +942,11 @@ static void mlx5_tc_ct_entry_del_work(struct work_struct *work) static void __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) { - struct mlx5e_priv *priv; - if (!refcount_dec_and_test(&entry->refcnt)) return; - priv = netdev_priv(entry->ct_priv->netdev); INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); - queue_work(priv->wq, &entry->work); + queue_work(entry->ct_priv->wq, &entry->work); } static struct mlx5_ct_counter * @@ -907,12 +978,9 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple rev_tuple = entry->tuple; struct mlx5_ct_counter *shared_counter; struct mlx5_ct_entry *rev_entry; - __be16 tmp_port; /* get the reversed tuple */ - tmp_port = rev_tuple.port.src; - rev_tuple.port.src = rev_tuple.port.dst; - rev_tuple.port.dst = tmp_port; + swap(rev_tuple.port.src, rev_tuple.port.dst); if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { __be32 tmp_addr = rev_tuple.ip.src_v4; @@ -987,6 +1055,7 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, if (err) goto err_nat; + atomic_inc(&ct_priv->debugfs.stats.offloaded); return 0; err_nat: @@ -1114,7 +1183,6 @@ mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, } rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); - mlx5_tc_ct_entry_remove_from_tuples(entry); spin_unlock_bh(&ct_priv->ht_lock); mlx5_tc_ct_entry_put(entry); @@ -1184,16 +1252,20 @@ mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple, struct flow_keys flow_keys; skb_reset_network_header(skb); - skb_flow_dissect_flow_keys(skb, &flow_keys, 0); + skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); tuple->zone = zone; if (flow_keys.basic.ip_proto != IPPROTO_TCP && - flow_keys.basic.ip_proto != IPPROTO_UDP) + flow_keys.basic.ip_proto != IPPROTO_UDP && + flow_keys.basic.ip_proto != IPPROTO_GRE) return false; - tuple->port.src = flow_keys.ports.src; - tuple->port.dst = flow_keys.ports.dst; + if (flow_keys.basic.ip_proto == IPPROTO_TCP || + flow_keys.basic.ip_proto == IPPROTO_UDP) { + tuple->port.src = flow_keys.ports.src; + tuple->port.dst = flow_keys.ports.dst; + } tuple->n_proto = flow_keys.basic.n_proto; tuple->ip_proto = flow_keys.basic.ip_proto; @@ -1360,9 +1432,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { - bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; - int err; - if (!priv) { NL_SET_ERR_MSG_MOD(extack, "offload of ct action isn't available"); @@ -1373,17 +1442,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, attr->ct_attr.ct_action = act->ct.action; attr->ct_attr.nf_ft = act->ct.flow_table; - if (!clear_action) - goto out; - - err = mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Failed to set registers for ct clear"); - return err; - } - attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - -out: return 0; } @@ -1460,7 +1518,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, } pre_ct->miss_rule = rule; - dealloc_mod_hdr_actions(&pre_mod_acts); + mlx5e_mod_hdr_dealloc(&pre_mod_acts); kvfree(spec); return 0; @@ -1469,7 +1527,7 @@ err_miss_rule: err_flow_rule: mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); err_mapping: - dealloc_mod_hdr_actions(&pre_mod_acts); + mlx5e_mod_hdr_dealloc(&pre_mod_acts); kvfree(spec); return err; } @@ -1702,6 +1760,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) if (!refcount_dec_and_test(&ft->refcount)) return; + flush_workqueue(ct_priv->wq); nf_flow_table_offload_del_cb(ft->nf_ft, mlx5_tc_ct_block_flow_offload, ft); rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); @@ -1716,7 +1775,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) /* We translate the tc filter with CT action to the following HW model: * * +---------------------+ - * + ft prio (tc chain) + + * + ft prio (tc chain) + * + original match + * +---------------------+ * | set chain miss mapping @@ -1726,7 +1785,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * v * +---------------------+ * + pre_ct/pre_ct_nat + if matches +-------------------------+ - * + zone+nat match +---------------->+ post_act (see below) + + * + zone+nat match +---------------->+ post_act (see below) + * +---------------------+ set zone +-------------------------+ * | set zone * v @@ -1741,21 +1800,19 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * | do nat (if needed) * v * +--------------+ - * + post_act + original filter actions + * + post_act + original filter actions * + fte_id match +------------------------> * +--------------+ */ static struct mlx5_flow_handle * __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *orig_spec, struct mlx5_flow_attr *attr) { bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); - struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; + struct mlx5e_tc_mod_hdr_acts *pre_mod_acts; u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); - struct mlx5e_post_act_handle *handle; struct mlx5_flow_attr *pre_ct_attr; struct mlx5_modify_hdr *mod_hdr; struct mlx5_ct_flow *ct_flow; @@ -1764,7 +1821,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); if (!ct_flow) { - kfree(ct_flow); return ERR_PTR(-ENOMEM); } @@ -1778,14 +1834,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } ct_flow->ft = ft; - handle = mlx5e_tc_post_act_add(ct_priv->post_act, attr); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - ct_dbg("Failed to allocate post action handle"); - goto err_post_act_handle; - } - ct_flow->post_act_handle = handle; - /* Base flow attributes of both rules on original rule attribute */ ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); if (!ct_flow->pre_ct_attr) { @@ -1795,6 +1843,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, pre_ct_attr = ct_flow->pre_ct_attr; memcpy(pre_ct_attr, attr, attr_sz); + pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts; /* Modify the original rule's action to fwd and modify, leave decap */ pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; @@ -1813,30 +1862,22 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } ct_flow->chain_mapping = chain_mapping; - err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type, + err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type, CHAIN_TO_REG, chain_mapping); if (err) { ct_dbg("Failed to set chain register mapping"); goto err_mapping; } - err = mlx5e_tc_post_act_set_handle(priv->mdev, handle, &pre_mod_acts); - if (err) { - ct_dbg("Failed to set post action handle"); - goto err_mapping; - } - /* If original flow is decap, we do it before going into ct table * so add a rewrite for the tunnel match_id. */ if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && attr->chain == 0) { - u32 tun_id = mlx5e_tc_get_flow_tun_id(flow); - - err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, + err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type, TUNNEL_TO_REG, - tun_id); + attr->tunnel_id); if (err) { ct_dbg("Failed to set tunnel register mapping"); goto err_mapping; @@ -1844,8 +1885,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, - pre_mod_acts.num_actions, - pre_mod_acts.actions); + pre_mod_acts->num_actions, + pre_mod_acts->actions); if (IS_ERR(mod_hdr)) { err = PTR_ERR(mod_hdr); ct_dbg("Failed to create pre ct mod hdr"); @@ -1865,20 +1906,18 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } attr->ct_attr.ct_flow = ct_flow; - dealloc_mod_hdr_actions(&pre_mod_acts); + mlx5e_mod_hdr_dealloc(pre_mod_acts); return ct_flow->pre_ct_rule; err_insert_orig: mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); err_mapping: - dealloc_mod_hdr_actions(&pre_mod_acts); + mlx5e_mod_hdr_dealloc(pre_mod_acts); mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); err_get_chain: kfree(ct_flow->pre_ct_attr); err_alloc_pre: - mlx5e_tc_post_act_del(ct_priv->post_act, handle); -err_post_act_handle: mlx5_tc_ct_del_ft_cb(ct_priv, ft); err_ft: kfree(ct_flow); @@ -1886,87 +1925,19 @@ err_ft: return ERR_PTR(err); } -static struct mlx5_flow_handle * -__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5_flow_spec *orig_spec, - struct mlx5_flow_attr *attr, - struct mlx5e_tc_mod_hdr_acts *mod_acts) -{ - struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); - u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); - struct mlx5_flow_attr *pre_ct_attr; - struct mlx5_modify_hdr *mod_hdr; - struct mlx5_flow_handle *rule; - struct mlx5_ct_flow *ct_flow; - int err; - - ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); - if (!ct_flow) - return ERR_PTR(-ENOMEM); - - /* Base esw attributes on original rule attribute */ - pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); - if (!pre_ct_attr) { - err = -ENOMEM; - goto err_attr; - } - - memcpy(pre_ct_attr, attr, attr_sz); - - mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, - mod_acts->num_actions, - mod_acts->actions); - if (IS_ERR(mod_hdr)) { - err = PTR_ERR(mod_hdr); - ct_dbg("Failed to add create ct clear mod hdr"); - goto err_mod_hdr; - } - - pre_ct_attr->modify_hdr = mod_hdr; - - rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - ct_dbg("Failed to add ct clear rule"); - goto err_insert; - } - - attr->ct_attr.ct_flow = ct_flow; - ct_flow->pre_ct_attr = pre_ct_attr; - ct_flow->pre_ct_rule = rule; - return rule; - -err_insert: - mlx5_modify_header_dealloc(priv->mdev, mod_hdr); -err_mod_hdr: - netdev_warn(priv->netdev, - "Failed to offload ct clear flow, err %d\n", err); - kfree(pre_ct_attr); -err_attr: - kfree(ct_flow); - - return ERR_PTR(err); -} - struct mlx5_flow_handle * mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) { - bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; struct mlx5_flow_handle *rule; if (!priv) return ERR_PTR(-EOPNOTSUPP); mutex_lock(&priv->control_lock); - - if (clear_action) - rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts); - else - rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr); + rule = __mlx5_tc_ct_flow_offload(priv, spec, attr); mutex_unlock(&priv->control_lock); return rule; @@ -1974,21 +1945,17 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, static void __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5e_tc_flow *flow, - struct mlx5_ct_flow *ct_flow) + struct mlx5_ct_flow *ct_flow, + struct mlx5_flow_attr *attr) { struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr; struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); - mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, - pre_ct_attr); + mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr); mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); - if (ct_flow->post_act_handle) { - mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); - mlx5e_tc_post_act_del(ct_priv->post_act, ct_flow->post_act_handle); - mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); - } + mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); + mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); kfree(ct_flow->pre_ct_attr); kfree(ct_flow); @@ -1996,7 +1963,6 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) { struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; @@ -2008,11 +1974,43 @@ mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, return; mutex_lock(&priv->control_lock); - __mlx5_tc_ct_delete_flow(priv, flow, ct_flow); + __mlx5_tc_ct_delete_flow(priv, ct_flow, attr); mutex_unlock(&priv->control_lock); } static int +mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv) +{ + struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act); + struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get(); + int err; + + if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB && + ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) { + ct_dbg("Using SMFS ct flow steering provider"); + fs_ops = mlx5_ct_fs_smfs_ops_get(); + } + + ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL); + if (!ct_priv->fs) + return -ENOMEM; + + ct_priv->fs->netdev = ct_priv->netdev; + ct_priv->fs->dev = ct_priv->dev; + ct_priv->fs_ops = fs_ops; + + err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct); + if (err) + goto err_init; + + return 0; + +err_init: + kfree(ct_priv->fs); + return err; +} + +static int mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, const char **err_msg) { @@ -2064,7 +2062,7 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, /* Ignore_flow_level support isn't supported by default for VFs and so post_act * won't be supported. Skip showing error msg. */ - if (priv->mdev->coredev_type != MLX5_COREDEV_VF) + if (priv->mdev->coredev_type == MLX5_COREDEV_PF) err_msg = "post action is missing"; err = -EOPNOTSUPP; goto out_err; @@ -2079,6 +2077,29 @@ out_err: return err; } +static void +mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv) +{ + bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB; + struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs; + char dirname[16] = {}; + + if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0) + return; + + ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev)); + debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root, + &ct_dbgfs->stats.offloaded); + debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root, + &ct_dbgfs->stats.rx_dropped); +} + +static void +mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv) +{ + debugfs_remove_recursive(ct_priv->debugfs.root); +} + #define INIT_ERR_PREFIX "tc ct offload init failed" struct mlx5_tc_ct_priv * @@ -2150,8 +2171,23 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params)) goto err_ct_tuples_nat_ht; + ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0); + if (!ct_priv->wq) { + err = -ENOMEM; + goto err_wq; + } + + err = mlx5_tc_ct_fs_init(ct_priv); + if (err) + goto err_init_fs; + + mlx5_ct_tc_create_dbgfs(ct_priv); return ct_priv; +err_init_fs: + destroy_workqueue(ct_priv->wq); +err_wq: + rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); err_ct_tuples_nat_ht: rhashtable_destroy(&ct_priv->ct_tuples_ht); err_ct_tuples_ht: @@ -2180,8 +2216,13 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) if (!ct_priv) return; + destroy_workqueue(ct_priv->wq); + mlx5_ct_tc_remove_dbgfs(ct_priv); chains = ct_priv->chains; + ct_priv->fs_ops->destroy(ct_priv->fs); + kfree(ct_priv->fs); + mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); mlx5_chains_destroy_global_table(chains, ct_priv->ct); mapping_destroy(ct_priv->zone_mapping); @@ -2206,22 +2247,22 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, return true; if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone)) - return false; + goto out_inc_drop; if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone)) - return false; + goto out_inc_drop; spin_lock(&ct_priv->ht_lock); entry = mlx5_tc_ct_entry_get(ct_priv, &tuple); if (!entry) { spin_unlock(&ct_priv->ht_lock); - return false; + goto out_inc_drop; } if (IS_ERR(entry)) { spin_unlock(&ct_priv->ht_lock); - return false; + goto out_inc_drop; } spin_unlock(&ct_priv->ht_lock); @@ -2229,4 +2270,8 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, __mlx5_tc_ct_entry_put(entry); return true; + +out_inc_drop: + atomic_inc(&ct_priv->debugfs.stats.rx_dropped); + return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 99662af1e41a..5bbd6b92840f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -62,10 +62,11 @@ struct mlx5_ct_attr { misc_parameters_2.metadata_reg_c_4),\ } +/* 8 LSB of metadata C5 are reserved for packet color */ #define fteid_to_reg_ct {\ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5,\ - .moffset = 0,\ - .mlen = 32,\ + .moffset = 8,\ + .mlen = 24,\ .soffset = MLX5_BYTE_OFF(fte_match_param,\ misc_parameters_2.metadata_reg_c_5),\ } @@ -84,8 +85,8 @@ struct mlx5_ct_attr { .mlen = ESW_ZONE_ID_BITS,\ } -#define REG_MAPPING_MLEN(reg) (mlx5e_tc_attr_to_reg_mappings[reg].mlen) -#define REG_MAPPING_MOFFSET(reg) (mlx5e_tc_attr_to_reg_mappings[reg].moffset) +#define MLX5_CT_ZONE_BITS MLX5_REG_MAPPING_MBITS(ZONE_TO_REG) +#define MLX5_CT_ZONE_MASK MLX5_REG_MAPPING_MASK(ZONE_TO_REG) #if IS_ENABLED(CONFIG_MLX5_TC_CT) @@ -116,19 +117,21 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_handle * mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr); bool mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, struct sk_buff *skb, u8 zone_restore_id); +int +mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts); + #else /* CONFIG_MLX5_TC_CT */ static inline struct mlx5_tc_ct_priv * @@ -171,6 +174,13 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) } static inline int +mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + return -EOPNOTSUPP; +} + +static inline int mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_acts, @@ -183,7 +193,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, static inline struct mlx5_flow_handle * mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) @@ -193,7 +202,6 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, static inline void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) { } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index b689701ac7d8..2e42d7c5451e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -5,11 +5,13 @@ #define __MLX5_EN_TC_PRIV_H__ #include "en_tc.h" +#include "en/tc/act/act.h" #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1) #define MLX5E_TC_MAX_SPLITS 1 + enum { MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, @@ -32,13 +34,17 @@ enum { struct mlx5e_tc_flow_parse_attr { const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_mpls_info mpls_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; + struct pedit_headers_action hdrs[__PEDIT_CMD_MAX]; struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; - struct ethhdr eth; + struct mlx5e_tc_act_parse_state parse_state; }; +struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc); + /* Helper struct for accessing a struct containing list_head array. * Containing struct * |- Helper array @@ -90,6 +96,7 @@ struct mlx5e_tc_flow { struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5e_tc_flow *peer_flow; struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */ + struct mlx5e_mod_hdr_handle *slow_mh; /* attached mod header instance for slow path */ struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ struct list_head hairpin; /* flows sharing the same hairpin */ struct list_head peer; /* flows with peer flow */ @@ -103,10 +110,21 @@ struct mlx5e_tc_flow { struct rcu_head rcu_head; struct completion init_done; struct completion del_hw_done; - int tunnel_id; /* the mapped tunnel id of this flow */ struct mlx5_flow_attr *attr; + struct list_head attrs; + u32 chain_mapping; }; +struct mlx5_flow_handle * +mlx5e_tc_rule_offload(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); + +void +mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer); struct mlx5_flow_handle * @@ -115,7 +133,17 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr); +struct mlx5_flow_attr * +mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow); + +void mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow); +int mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow); + +bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow); +bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow); bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow); +int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow); +bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv); static inline void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag) { @@ -165,6 +193,7 @@ struct mlx5_flow_handle * mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec); + void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr); @@ -176,4 +205,14 @@ struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow); struct mlx5e_tc_int_port_priv * mlx5e_get_int_port_priv(struct mlx5e_priv *priv); + +struct mlx5e_flow_meters *mlx5e_get_flow_meters(struct mlx5_core_dev *dev); + +void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec); +void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec); + +int mlx5e_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack); + #endif /* __MLX5_EN_TC_PRIV_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index a5e450973225..e6f64d890fb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* Copyright (c) 2018 Mellanox Technologies. */ +#include <net/inet_ecn.h> #include <net/vxlan.h> #include <net/gre.h> #include <net/geneve.h> @@ -103,7 +104,7 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, } static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, - struct net_device *mirred_dev, + struct net_device *dev, struct mlx5e_tc_tun_route_attr *attr) { struct net_device *route_dev; @@ -122,13 +123,13 @@ static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); attr->fl.fl4.flowi4_oif = uplink_dev->ifindex; } else { - struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(mirred_dev); + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev); if (tunnel && tunnel->get_remote_ifindex) - attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(mirred_dev); + attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(dev); } - rt = ip_route_output_key(dev_net(mirred_dev), &attr->fl.fl4); + rt = ip_route_output_key(dev_net(dev), &attr->fl.fl4); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -235,7 +236,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, int err; /* add the IP fields */ - attr.fl.fl4.flowi4_tos = tun_key->tos; + attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK; attr.fl.fl4.daddr = tun_key->u.ipv4.dst; attr.fl.fl4.saddr = tun_key->u.ipv4.src; attr.ttl = tun_key->ttl; @@ -350,7 +351,7 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, int err; /* add the IP fields */ - attr.fl.fl4.flowi4_tos = tun_key->tos; + attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK; attr.fl.fl4.daddr = tun_key->u.ipv4.dst; attr.fl.fl4.saddr = tun_key->u.ipv4.src; attr.ttl = tun_key->ttl; @@ -440,10 +441,10 @@ release_neigh: #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv, - struct net_device *mirred_dev, + struct net_device *dev, struct mlx5e_tc_tun_route_attr *attr) { - struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(mirred_dev); + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev); struct net_device *route_dev; struct net_device *out_dev; struct dst_entry *dst; @@ -451,8 +452,8 @@ static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv, int ret; if (tunnel && tunnel->get_remote_ifindex) - attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(mirred_dev); - dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, &attr->fl.fl6, + attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev); + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6, NULL); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -505,7 +506,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, int err; attr.ttl = tun_key->ttl; - attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label); attr.fl.fl6.daddr = tun_key->u.ipv6.dst; attr.fl.fl6.saddr = tun_key->u.ipv6.src; @@ -619,7 +620,7 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, attr.ttl = tun_key->ttl; - attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label); attr.fl.fl6.daddr = tun_key->u.ipv6.dst; attr.fl.fl6.saddr = tun_key->u.ipv6.src; @@ -708,9 +709,11 @@ release_neigh: int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *flow_attr) + struct mlx5_flow_attr *flow_attr, + struct net_device *filter_dev) { struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_int_port *int_port; TC_TUN_ROUTE_ATTR_INIT(attr); u16 vport_num; @@ -720,14 +723,14 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, /* Addresses are swapped for decap */ attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4; attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4; - err = mlx5e_route_lookup_ipv4_get(priv, priv->netdev, &attr); + err = mlx5e_route_lookup_ipv4_get(priv, filter_dev, &attr); } #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) else if (flow_attr->tun_ip_version == 6) { /* Addresses are swapped for decap */ attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6; attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6; - err = mlx5e_route_lookup_ipv6_get(priv, priv->netdev, &attr); + err = mlx5e_route_lookup_ipv6_get(priv, filter_dev, &attr); } #endif else @@ -745,7 +748,7 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni); esw_attr->rx_tun_attr->decap_vport = vport_num; - } else if (netif_is_ovs_master(attr.route_dev)) { + } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) { int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), attr.route_dev->ifindex, MLX5E_TC_INT_PORT_INGRESS); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index aa092eaeaec3..b38f693bbb52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -94,7 +94,8 @@ mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, #endif int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr); + struct mlx5_flow_attr *attr, + struct net_device *filter_dev); bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 042b1abe1437..5aff97914367 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -173,19 +173,29 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, list_for_each_entry(flow, flow_list, tmp_list) { if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW)) continue; - attr = flow->attr; - esw_attr = attr->esw_attr; - spec = &attr->parse_attr->spec; + spec = &flow->attr->parse_attr->spec; + + attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; /* Do not offload flows with unresolved neighbors */ if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) continue; + + err = mlx5e_tc_offload_flow_post_acts(flow); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n", + err); + continue; + } + /* update from slow path rule to encap rule */ - rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr); if (IS_ERR(rule)) { + mlx5e_tc_unoffload_flow_post_acts(flow); err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", err); @@ -214,12 +224,13 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, list_for_each_entry(flow, flow_list, tmp_list) { if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) continue; - attr = flow->attr; - esw_attr = attr->esw_attr; - spec = &attr->parse_attr->spec; + spec = &flow->attr->parse_attr->spec; /* update from encap rule to slow path rule */ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); + + attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; /* mark the flow's encap dest as non-valid */ esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; @@ -230,7 +241,8 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, continue; } - mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); + mlx5e_tc_unoffload_flow_post_acts(flow); flow->rule[0] = rule; /* was unset when fast path rule removed */ flow_flag_set(flow, OFFLOADED); @@ -488,12 +500,17 @@ static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, int out_index); void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index) + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + int out_index) { struct mlx5e_encap_entry *e = flow->encaps[out_index].e; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - if (flow->attr->esw_attr->dests[out_index].flags & + if (!mlx5e_is_eswitch_flow(flow)) + return; + + if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) mlx5e_detach_encap_route(priv, flow, out_index); @@ -733,6 +750,7 @@ static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct mlx5e_encap_entry *e, bool new_encap_entry, unsigned long tbl_time_before, @@ -740,6 +758,7 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, @@ -748,8 +767,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; const struct ip_tunnel_info *tun_info; + const struct mlx5e_mpls_info *mpls_info; unsigned long tbl_time_before = 0; struct mlx5e_encap_entry *e; struct mlx5e_encap_key key; @@ -760,6 +779,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, parse_attr = attr->parse_attr; tun_info = parse_attr->tun_info[out_index]; + mpls_info = &parse_attr->mpls_info[out_index]; family = ip_tunnel_info_af(tun_info); key.ip_tun_key = &tun_info->key; key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); @@ -810,6 +830,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, goto out_err_init; } e->tun_info = tun_info; + memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info)); err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); if (err) goto out_err_init; @@ -834,8 +855,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, e->compl_result = 1; attach_flow: - err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before, - out_index); + err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created, + tbl_time_before, out_index); if (err) goto out_err; @@ -885,20 +906,18 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; struct mlx5_pkt_reformat_params reformat_params; - struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_decap_entry *d; struct mlx5e_decap_key key; uintptr_t hash_key; int err = 0; - parse_attr = flow->attr->parse_attr; - if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { + if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { NL_SET_ERR_MSG_MOD(extack, "encap header larger than max supported"); return -EOPNOTSUPP; } - key.key = parse_attr->eth; + key.key = attr->eth; hash_key = hash_decap_info(&key); mutex_lock(&esw->offloads.decap_tbl_lock); d = mlx5e_decap_get(priv, &key, hash_key); @@ -928,8 +947,8 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv, memset(&reformat_params, 0, sizeof(reformat_params)); reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; - reformat_params.size = sizeof(parse_attr->eth); - reformat_params.data = &parse_attr->eth; + reformat_params.size = sizeof(attr->eth); + reformat_params.data = &attr->eth; d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); @@ -1159,7 +1178,7 @@ int mlx5e_attach_decap_route(struct mlx5e_priv *priv, tbl_time_before = mlx5e_route_tbl_get_last_update(priv); tbl_time_after = tbl_time_before; - err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr); + err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev); if (err || !esw_attr->rx_tun_attr->decap_vport) goto out; @@ -1198,6 +1217,7 @@ out: static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct mlx5e_encap_entry *e, bool new_encap_entry, unsigned long tbl_time_before, @@ -1206,7 +1226,6 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; unsigned long tbl_time_after = tbl_time_before; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; const struct ip_tunnel_info *tun_info; struct mlx5_esw_flow_attr *esw_attr; struct mlx5e_route_entry *r; @@ -1357,17 +1376,19 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, list_for_each_entry(flow, encap_flows, tmp_list) { struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; struct mlx5_flow_spec *spec; if (flow_flag_test(flow, FAILED)) continue; + spec = &flow->attr->parse_attr->spec; + + attr = mlx5e_tc_get_encap_attr(flow); esw_attr = attr->esw_attr; parse_attr = attr->parse_attr; - spec = &parse_attr->spec; err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts, e->out_dev, e->route_dev_ifindex, @@ -1377,7 +1398,7 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, continue; } - err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); + err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr); if (err) { mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", err); @@ -1389,9 +1410,18 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) goto offload_to_slow_path; + + err = mlx5e_tc_offload_flow_post_acts(flow); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n", + err); + goto offload_to_slow_path; + } + /* update from slow path rule to encap rule */ - rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr); if (IS_ERR(rule)) { + mlx5e_tc_unoffload_flow_post_acts(flow); err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", err); @@ -1480,7 +1510,7 @@ static void mlx5e_reoffload_decap(struct mlx5e_priv *priv, parse_attr = attr->parse_attr; spec = &parse_attr->spec; - err = mlx5e_tc_tun_route_lookup(priv, spec, attr); + err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev); if (err) { mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n", err); @@ -1579,6 +1609,8 @@ mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, struct net_device *fib_dev; fen_info = container_of(info, struct fib_entry_notifier_info, info); + if (fen_info->fi->nh) + return NULL; fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops || fen_info->dst_len != 32) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h index 3391504d9a08..d542b8476491 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h @@ -7,15 +7,19 @@ #include "tc_priv.h" void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index); + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + int out_index); int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, struct net_device **encap_dev, bool *encap_valid); + int mlx5e_attach_decap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c index 60952b33b568..c5b1617d556f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c @@ -30,16 +30,15 @@ static int generate_ip_tun_hdr(char buf[], struct mlx5e_encap_entry *r) { const struct ip_tunnel_key *tun_key = &r->tun_info->key; + const struct mlx5e_mpls_info *mpls_info = &r->mpls_info; struct udphdr *udp = (struct udphdr *)(buf); struct mpls_shim_hdr *mpls; - u32 tun_id; - tun_id = be32_to_cpu(tunnel_id_to_key32(tun_key->tun_id)); mpls = (struct mpls_shim_hdr *)(udp + 1); *ip_proto = IPPROTO_UDP; udp->dest = tun_key->tp_dst; - *mpls = mpls_entry_encode(tun_id, tun_key->ttl, tun_key->tos, true); + *mpls = mpls_entry_encode(mpls_info->label, mpls_info->ttl, mpls_info->tc, mpls_info->bos); return 0; } @@ -60,37 +59,31 @@ static int parse_tunnel(struct mlx5e_priv *priv, void *headers_v) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); - struct flow_match_enc_keyid enc_keyid; struct flow_match_mpls match; void *misc2_c; void *misc2_v; - misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters_2); - misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters_2); - - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) - return 0; - - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) - return 0; - - flow_rule_match_enc_keyid(rule, &enc_keyid); - - if (!enc_keyid.mask->keyid) - return 0; - if (!MLX5_CAP_ETH(priv->mdev, tunnel_stateless_mpls_over_udp) && !(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_CW_MPLS_UDP)) return -EOPNOTSUPP; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return -EOPNOTSUPP; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) + return 0; + flow_rule_match_mpls(rule, &match); /* Only support matching the first LSE */ if (match.mask->used_lses != 1) return -EOPNOTSUPP; + misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2_c, outer_first_mpls_over_udp.mpls_label, match.mask->ls[0].mpls_label); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c index da169b816665..d4239e3b3c88 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c @@ -88,9 +88,6 @@ void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder, (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8); MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout); break; - case MLX5E_PACKET_MERGE_SHAMPO: - MLX5_SET(tirc, tirc, packet_merge_mask, MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO); - break; default: break; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index a55b066746cb..201ac7dd338f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -14,19 +14,26 @@ static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget) bool busy = false; int work_done = 0; + rcu_read_lock(); + ch_stats->poll++; work_done = mlx5e_poll_rx_cq(&rq->cq, budget); busy |= work_done == budget; busy |= rq->post_wqes(rq); - if (busy) - return budget; + if (busy) { + work_done = budget; + goto out; + } if (unlikely(!napi_complete_done(napi, work_done))) - return work_done; + goto out; mlx5e_cq_arm(&rq->cq); + +out: + rcu_read_unlock(); return work_done; } @@ -140,7 +147,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); t->stats = &priv->trap_stats.ch; - netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll, 64); + netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll); err = mlx5e_open_trap_rq(priv, t); if (unlikely(err)) @@ -172,6 +179,7 @@ static void mlx5e_activate_trap(struct mlx5e_trap *trap) { napi_enable(&trap->napi); mlx5e_activate_rq(&trap->rq); + mlx5e_trigger_napi_sched(&trap->napi); } void mlx5e_deactivate_trap(struct mlx5e_priv *priv) @@ -222,12 +230,12 @@ static int mlx5e_handle_action_trap(struct mlx5e_priv *priv, int trap_id) switch (trap_id) { case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER: - err = mlx5e_add_vlan_trap(priv, trap_id, mlx5e_trap_get_tirn(priv->en_trap)); + err = mlx5e_add_vlan_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap)); if (err) goto err_out; break; case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER: - err = mlx5e_add_mac_trap(priv, trap_id, mlx5e_trap_get_tirn(priv->en_trap)); + err = mlx5e_add_mac_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap)); if (err) goto err_out; break; @@ -248,10 +256,10 @@ static int mlx5e_handle_action_drop(struct mlx5e_priv *priv, int trap_id) { switch (trap_id) { case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER: - mlx5e_remove_vlan_trap(priv); + mlx5e_remove_vlan_trap(priv->fs); break; case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER: - mlx5e_remove_mac_trap(priv); + mlx5e_remove_mac_trap(priv->fs); break; default: netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 4cdf8e5b24c2..853f312cd757 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -9,20 +9,28 @@ #define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) -/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS - * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. - * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a - * full-session WQE be cache-aligned. +#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) + +/* IPSEC inline data includes: + * 1. ESP trailer: up to 255 bytes of padding, 1 byte for pad length, 1 byte for + * next header. + * 2. ESP authentication data: 16 bytes for ICV. */ -#if L1_CACHE_BYTES < 128 -#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) -#else -#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) -#endif +#define MLX5E_MAX_TX_IPSEC_DS DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + \ + 255 + 1 + 1 + 16, MLX5_SEND_WQE_DS) -#define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) +/* 366 should be big enough to cover all L2, L3 and L4 headers with possible + * encapsulations. + */ +#define MLX5E_MAX_TX_INLINE_DS DIV_ROUND_UP(366 - INL_HDR_START_SZ + VLAN_HLEN, \ + MLX5_SEND_WQE_DS) -#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) +/* Sync the calculation with mlx5e_sq_calc_wqe_attr. */ +#define MLX5E_MAX_TX_WQEBBS DIV_ROUND_UP(MLX5E_TX_WQE_EMPTY_DS_COUNT + \ + MLX5E_MAX_TX_INLINE_DS + \ + MLX5E_MAX_TX_IPSEC_DS + \ + MAX_SKB_FRAGS + 1, \ + MLX5_SEND_WQEBB_NUM_DS) #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) @@ -57,10 +65,8 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget); int mlx5e_poll_ico_cq(struct mlx5e_cq *cq); /* RX */ -void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info); -void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info, - bool recycle); +void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page); +void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle); INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)); INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); @@ -68,13 +74,17 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq); void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq); /* TX */ -u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev); netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); static inline bool +mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo) +{ + return (*fifo->pc - *fifo->cc) < fifo->mask; +} + +static inline bool mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n) { return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc); @@ -167,6 +177,11 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size) return pi; } +static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1); +} + struct mlx5e_shampo_umr { u16 len; }; @@ -303,9 +318,9 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more); void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq); -static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session) +static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) { - return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS; + return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; } static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) @@ -426,9 +441,11 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, } } -static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size) +#define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1) + +static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size) { - BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS); + WARN_ON_ONCE(PAGE_SIZE / MLX5_SEND_WQE_BB < mlx5e_get_max_sq_wqebbs(mdev)); /* A WQE must not cross the page boundary, hence two conditions: * 1. Its size must not exceed the page size. @@ -438,19 +455,36 @@ static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size) * stop room of X-1 + X. * WQE size is also limited by the hardware limit. */ + WARN_ONCE(wqe_size > mlx5e_get_max_sq_wqebbs(mdev), + "wqe_size %u is greater than max SQ WQEBBs %u", + wqe_size, mlx5e_get_max_sq_wqebbs(mdev)); - if (__builtin_constant_p(wqe_size)) - BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS); - else - WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS); + return MLX5E_STOP_ROOM(wqe_size); +} - return wqe_size * 2 - 1; +static inline u16 mlx5e_stop_room_for_max_wqe(struct mlx5_core_dev *mdev) +{ + return MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev)); +} + +static inline u16 mlx5e_stop_room_for_mpwqe(struct mlx5_core_dev *mdev) +{ + u8 mpwqe_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); + + return mlx5e_stop_room_for_wqe(mdev, mpwqe_wqebbs); } static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size) { - u16 room = sq->reserved_room + mlx5e_stop_room_for_wqe(wqe_size); + u16 room = sq->reserved_room + MLX5E_STOP_ROOM(wqe_size); return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room); } + +static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i) +{ + size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe); + + return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz)); +} #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 2f0df5cc1a2d..20507ef2f956 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -57,12 +57,14 @@ int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) static inline bool mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, - struct mlx5e_dma_info *di, struct xdp_buff *xdp) + struct page *page, struct xdp_buff *xdp) { + struct skb_shared_info *sinfo = NULL; struct mlx5e_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; struct xdp_frame *xdpf; dma_addr_t dma_addr; + int i; xdpf = xdp_convert_buff_to_frame(xdp); if (unlikely(!xdpf)) @@ -96,46 +98,77 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, xdptxd.dma_addr = dma_addr; xdpi.frame.xdpf = xdpf; xdpi.frame.dma_addr = dma_addr; - } else { - /* Driver assumes that xdp_convert_buff_to_frame returns - * an xdp_frame that points to the same memory region as - * the original xdp_buff. It allows to map the memory only - * once and to use the DMA_BIDIRECTIONAL mode. - */ - xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; + if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, + mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0))) + return false; + + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + return true; + } + + /* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame + * that points to the same memory region as the original xdp_buff. It + * allows to map the memory only once and to use the DMA_BIDIRECTIONAL + * mode. + */ + + xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; + xdpi.page.rq = rq; + + dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf); + dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL); + + if (unlikely(xdp_frame_has_frags(xdpf))) { + sinfo = xdp_get_shared_info_from_frame(xdpf); - dma_addr = di->addr + (xdpf->data - (void *)xdpf); - dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, - DMA_TO_DEVICE); + for (i = 0; i < sinfo->nr_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + dma_addr_t addr; + u32 len; - xdptxd.dma_addr = dma_addr; - xdpi.page.rq = rq; - xdpi.page.di = *di; + addr = page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); + len = skb_frag_size(frag); + dma_sync_single_for_device(sq->pdev, addr, len, + DMA_BIDIRECTIONAL); + } } - return INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, 0); + xdptxd.dma_addr = dma_addr; + + if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, + mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0))) + return false; + + xdpi.page.page = page; + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + + if (unlikely(xdp_frame_has_frags(xdpf))) { + for (i = 0; i < sinfo->nr_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + + xdpi.page.page = skb_frag_page(frag); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + } + } + + return true; } /* returns true if packet was consumed by xdp */ -bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - u32 *len, struct xdp_buff *xdp) +bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page, + struct bpf_prog *prog, struct xdp_buff *xdp) { - struct bpf_prog *prog = rcu_dereference(rq->xdp_prog); u32 act; int err; - if (!prog) - return false; - act = bpf_prog_run_xdp(prog, xdp); switch (act) { case XDP_PASS: - *len = xdp->data_end - xdp->data; return false; case XDP_TX: - if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, xdp))) + if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, page, xdp))) goto xdp_abort; __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ return true; @@ -147,11 +180,11 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL) - mlx5e_page_dma_unmap(rq, di); + mlx5e_page_dma_unmap(rq, page); rq->stats->xdp_redirect++; return true; default: - bpf_warn_invalid_xdp_action(act); + bpf_warn_invalid_xdp_action(rq->netdev, prog, act); fallthrough; case XDP_ABORTED: xdp_abort: @@ -199,7 +232,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) struct mlx5e_tx_wqe *wqe; u16 pi; - pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS); + pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); wqe = MLX5E_TX_FETCH_WQE(sq, pi); net_prefetchw(wqe->data); @@ -245,10 +278,8 @@ enum { INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) { if (unlikely(!sq->mpwqe.wqe)) { - const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - stop_room))) { + sq->stop_room))) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); sq->stats->full++; @@ -262,12 +293,26 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq } INDIRECT_CALLABLE_SCOPE bool +mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, + struct skb_shared_info *sinfo, int check_result); + +INDIRECT_CALLABLE_SCOPE bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct mlx5e_xdp_info *xdpi, int check_result) + struct skb_shared_info *sinfo, int check_result) { struct mlx5e_tx_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; + if (unlikely(sinfo)) { + /* MPWQE is enabled, but a multi-buffer packet is queued for + * transmission. MPWQE can't send fragmented packets, so close + * the current session and fall back to a regular WQE. + */ + if (unlikely(sq->mpwqe.wqe)) + mlx5e_xdp_mpwqe_complete(sq); + return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0); + } + if (unlikely(xdptxd->len > sq->hw_mtu)) { stats->err++; return false; @@ -288,17 +333,16 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); - if (unlikely(mlx5e_xdp_mpqwe_is_full(session))) + if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs))) mlx5e_xdp_mpwqe_complete(sq); - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); stats->xmit++; return true; } -INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) +static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room) { - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); sq->stats->full++; @@ -308,43 +352,76 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) return MLX5E_XDP_CHECK_OK; } +INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) +{ + return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1); +} + INDIRECT_CALLABLE_SCOPE bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct mlx5e_xdp_info *xdpi, int check_result) + struct skb_shared_info *sinfo, int check_result) { struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - struct mlx5_wqe_data_seg *dseg = wqe->data; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5e_tx_wqe *wqe; dma_addr_t dma_addr = xdptxd->dma_addr; u32 dma_len = xdptxd->len; + u16 ds_cnt, inline_hdr_sz; + u8 num_wqebbs = 1; + int num_frags = 0; + u16 pi; struct mlx5e_xdpsq_stats *stats = sq->stats; - net_prefetchw(wqe); - if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { stats->err++; return false; } - if (!check_result) - check_result = mlx5e_xmit_xdp_frame_check(sq); + ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1; + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) + ds_cnt++; + + /* check_result must be 0 if sinfo is passed. */ + if (!check_result) { + int stop_room = 1; + + if (unlikely(sinfo)) { + ds_cnt += sinfo->nr_frags; + num_frags = sinfo->nr_frags; + num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big + * enough to hold all fragments. + */ + stop_room = MLX5E_STOP_ROOM(num_wqebbs); + } + + check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room); + } if (unlikely(check_result < 0)) return false; - cseg->fm_ce_se = 0; + pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs); + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + net_prefetchw(wqe); + + cseg = &wqe->ctrl; + eseg = &wqe->eth; + dseg = wqe->data; + + inline_hdr_sz = 0; /* copy the inline part if required */ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); - eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start)); + memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start), + MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start)); dma_len -= MLX5E_XDP_MIN_INLINE; dma_addr += MLX5E_XDP_MIN_INLINE; + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; dseg++; } @@ -354,11 +431,45 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - sq->pc++; + if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) { + u8 num_pkts = 1 + num_frags; + int i; + + memset(&cseg->trailer, 0, sizeof(cseg->trailer)); + memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); + + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + dseg->lkey = sq->mkey_be; + + for (i = 0; i < num_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + dma_addr_t addr; + + addr = page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); + + dseg++; + dseg->addr = cpu_to_be64(addr); + dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); + dseg->lkey = sq->mkey_be; + } + + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + + sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { + .num_wqebbs = num_wqebbs, + .num_pkts = num_pkts, + }; + + sq->pc += num_wqebbs; + } else { + cseg->fm_ce_se = 0; + + sq->pc++; + } sq->doorbell_cseg = cseg; - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); stats->xmit++; return true; } @@ -384,7 +495,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, break; case MLX5E_XDP_XMIT_MODE_PAGE: /* XDP_TX from the regular RQ */ - mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle); + mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle); break; case MLX5E_XDP_XMIT_MODE_XSK: /* AF_XDP send */ @@ -537,12 +648,13 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, xdpi.frame.dma_addr = xdptxd.dma_addr; ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, 0); + mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0); if (unlikely(!ret)) { dma_unmap_single(sq->pdev, xdptxd.dma_addr, xdptxd.len, DMA_TO_DEVICE); break; } + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); nxmit++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 8d991c3b7a50..bc2d9034af5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -38,7 +38,6 @@ #include "en/txrx.h" #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) -#define MLX5E_XDP_TX_DS_COUNT (MLX5E_TX_WQE_EMPTY_DS_COUNT + 1 /* SG DS */) #define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16 #define MLX5E_XDP_INLINE_WQE_SZ_THRSD \ @@ -47,8 +46,8 @@ struct mlx5e_xsk_param; int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); -bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - u32 *len, struct xdp_buff *xdp); +bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page, + struct bpf_prog *prog, struct xdp_buff *xdp); void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq); bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); @@ -59,11 +58,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct mlx5e_xdp_info *xdpi, + struct skb_shared_info *sinfo, int check_result)); INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct mlx5e_xdp_info *xdpi, + struct skb_shared_info *sinfo, int check_result)); INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)); INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)); @@ -123,12 +122,13 @@ static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur) return cur; } -static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session) +static inline bool mlx5e_xdp_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) { if (session->inline_on) return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > - MLX5E_TX_MPW_MAX_NUM_DS; - return mlx5e_tx_mpwqe_is_full(session); + max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; + + return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs); } struct mlx5e_xdp_wqe_info { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index 7b562d2c8a19..ebada0c5af3c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -11,13 +11,13 @@ static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv, { struct device *dev = mlx5_core_dma_dev(priv->mdev); - return xsk_pool_dma_map(pool, dev, 0); + return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC); } static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv, struct xsk_buff_pool *pool) { - return xsk_pool_dma_unmap(pool, 0); + return xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC); } static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk) @@ -72,6 +72,7 @@ void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *x { xsk->headroom = xsk_pool_get_headroom(pool); xsk->chunk_size = xsk_pool_get_chunk_size(pool); + xsk->unaligned = pool->unaligned; } static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, @@ -98,6 +99,15 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, mlx5e_build_xsk_param(pool, &xsk); + if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + mlx5e_mpwrq_umr_mode(priv->mdev, &xsk) == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) { + const char *recommendation = is_power_of_2(xsk.chunk_size) ? + "Upgrade firmware" : "Disable striding RQ"; + + mlx5_core_warn(priv->mdev, "Expected slowdown with XSK frame size %u. %s for better performance.\n", + xsk.chunk_size, recommendation); + } + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { /* XSK objects will be created on open. */ goto validate_closed; @@ -117,20 +127,18 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, goto err_remove_pool; mlx5e_activate_xsk(c); + mlx5e_trigger_napi_icosq(c); /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide * any Fill Ring entries at the setup stage. */ - err = mlx5e_rx_res_xsk_activate(priv->rx_res, &priv->channels, ix); - if (unlikely(err)) - goto err_deactivate; + mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, true); - return 0; + mlx5e_deactivate_rq(&c->rq); + mlx5e_flush_rq(&c->rq, MLX5_RQC_STATE_RDY); -err_deactivate: - mlx5e_deactivate_xsk(c); - mlx5e_close_xsk(c); + return 0; err_remove_pool: mlx5e_xsk_remove_pool(&priv->xsk, ix); @@ -169,7 +177,13 @@ static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix) goto remove_pool; c = priv->channels.c[ix]; - mlx5e_rx_res_xsk_deactivate(priv->rx_res, ix); + + mlx5e_activate_rq(&c->rq); + mlx5e_trigger_napi_icosq(c); + mlx5e_wait_for_min_rx_wqes(&c->rq, MLX5E_RQ_WQES_TIMEOUT); + + mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, false); + mlx5e_deactivate_xsk(c); mlx5e_close_xsk(c); @@ -207,11 +221,10 @@ int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_params *params = &priv->channels.params; - u16 ix; - if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) + if (unlikely(qid >= params->num_channels)) return -EINVAL; - return pool ? mlx5e_xsk_enable_pool(priv, pool, ix) : - mlx5e_xsk_disable_pool(priv, ix); + return pool ? mlx5e_xsk_enable_pool(priv, pool, qid) : + mlx5e_xsk_disable_pool(priv, qid); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 8e7b877d8a12..c91b54d9ff27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -4,21 +4,225 @@ #include "rx.h" #include "en/xdp.h" #include <net/xdp_sock_drv.h> +#include <linux/filter.h> /* RX data path */ -static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data, - u32 cqe_bcnt) +int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); + struct mlx5e_icosq *icosq = rq->icosq; + struct mlx5_wq_cyc *wq = &icosq->wq; + struct mlx5e_umr_wqe *umr_wqe; + int batch, i; + u32 offset; /* 17-bit value with MTT. */ + u16 pi; + + if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe))) + goto err; + + BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk)); + batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units, + rq->mpwqe.pages_per_wqe); + + /* If batch < pages_per_wqe, either: + * 1. Some (or all) descriptors were invalid. + * 2. dma_need_sync is true, and it fell back to allocating one frame. + * In either case, try to continue allocating frames one by one, until + * the first error, which will mean there are no more valid descriptors. + */ + for (; batch < rq->mpwqe.pages_per_wqe; batch++) { + wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool); + if (unlikely(!wi->alloc_units[batch].xsk)) + goto err_reuse_batch; + } + + pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs); + umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); + memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); + + if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) { + for (i = 0; i < batch; i++) { + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + + umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { + .ptag = cpu_to_be64(addr | MLX5_EN_WR), + }; + } + } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { + for (i = 0; i < batch; i++) { + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + + umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr), + }; + } + } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) { + u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2); + + for (i = 0; i < batch; i++) { + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + + umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr), + }; + umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr + mapping_size), + }; + umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr + mapping_size * 2), + }; + umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(rq->wqe_overflow.addr), + }; + } + } else { + __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) - + rq->xsk_pool->chunk_size); + __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size); + + for (i = 0; i < batch; i++) { + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + + umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr), + .bcount = frame_size, + }; + umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) { + .key = rq->mkey_be, + .va = cpu_to_be64(rq->wqe_overflow.addr), + .bcount = pad_size, + }; + } + } + + bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); + wi->consumed_strides = 0; + + umr_wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); + + /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */ + offset = ix * rq->mpwqe.mtts_per_wqe; + if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; + else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED)) + offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD; + else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) + offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD; + umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); + + icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, + .num_wqebbs = rq->mpwqe.umr_wqebbs, + .umr.rq = rq, + }; + + icosq->pc += rq->mpwqe.umr_wqebbs; + + icosq->doorbell_cseg = &umr_wqe->ctrl; + + return 0; + +err_reuse_batch: + while (--batch >= 0) + xsk_buff_free(wi->alloc_units[batch].xsk); + +err: + rq->stats->buff_alloc_err++; + return -ENOMEM; +} + +int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct xdp_buff **buffs; + u32 contig, alloc; + int i; + + /* mlx5e_init_frags_partition creates a 1:1 mapping between + * rq->wqe.frags and rq->wqe.alloc_units, which allows us to + * allocate XDP buffers straight into alloc_units. + */ + BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) != + sizeof(rq->wqe.alloc_units[0].xsk)); + buffs = (struct xdp_buff **)rq->wqe.alloc_units; + contig = mlx5_wq_cyc_get_size(wq) - ix; + if (wqe_bulk <= contig) { + alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk); + } else { + alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig); + if (likely(alloc == contig)) + alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig); + } + + for (i = 0; i < alloc; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_wqe_frag_info *frag; + struct mlx5e_rx_wqe_cyc *wqe; + dma_addr_t addr; + + wqe = mlx5_wq_cyc_get_wqe(wq, j); + /* Assumes log_num_frags == 0. */ + frag = &rq->wqe.frags[j]; + + addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk); + wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); + } + + return alloc; +} + +int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int i; + + for (i = 0; i < wqe_bulk; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_wqe_frag_info *frag; + struct mlx5e_rx_wqe_cyc *wqe; + dma_addr_t addr; + + wqe = mlx5_wq_cyc_get_wqe(wq, j); + /* Assumes log_num_frags == 0. */ + frag = &rq->wqe.frags[j]; + + frag->au->xsk = xsk_buff_alloc(rq->xsk_pool); + if (unlikely(!frag->au->xsk)) + return i; + + addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk); + wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); + } + + return wqe_bulk; +} + +static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp) +{ + u32 totallen = xdp->data_end - xdp->data_meta; + u32 metalen = xdp->data - xdp->data_meta; struct sk_buff *skb; - skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt); + skb = napi_alloc_skb(rq->cq.napi, totallen); if (unlikely(!skb)) { rq->stats->buff_alloc_err++; return NULL; } - skb_put_data(skb, data, cqe_bcnt); + skb_put_data(skb, xdp->data_meta, totallen); + + if (metalen) { + skb_metadata_set(skb, metalen); + __skb_pull(skb, metalen); + } return skb; } @@ -29,8 +233,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, u32 head_offset, u32 page_idx) { - struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk; - u32 cqe_bcnt32 = cqe_bcnt; + struct xdp_buff *xdp = wi->alloc_units[page_idx].xsk; + struct bpf_prog *prog; /* Check packet size. Note LRO doesn't use linear SKB */ if (unlikely(cqe_bcnt > rq->hw_mtu)) { @@ -45,8 +249,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, */ WARN_ON_ONCE(head_offset); - xdp->data_end = xdp->data + cqe_bcnt32; - xdp_set_data_meta_invalid(xdp); + xsk_buff_set_size(xdp, cqe_bcnt); xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool); net_prefetch(xdp->data); @@ -65,7 +268,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, * allocated first from the Reuse Ring, so it has enough space. */ - if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp))) { + prog = rcu_dereference(rq->xdp_prog); + if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) { if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ return NULL; /* page/packet was consumed by XDP */ @@ -74,15 +278,15 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the * frame. On SKB allocation failure, NULL is returned. */ - return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt32); + return mlx5e_xsk_construct_skb(rq, xdp); } struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt) { - struct xdp_buff *xdp = wi->di->xsk; + struct xdp_buff *xdp = wi->au->xsk; + struct bpf_prog *prog; /* wi->offset is not used in this function, because xdp->data and the * DMA address point directly to the necessary place. Furthermore, the @@ -91,22 +295,17 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, */ WARN_ON_ONCE(wi->offset); - xdp->data_end = xdp->data + cqe_bcnt; - xdp_set_data_meta_invalid(xdp); + xsk_buff_set_size(xdp, cqe_bcnt); xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool); net_prefetch(xdp->data); - if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { - rq->stats->wqe_err++; - return NULL; - } - - if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp))) + prog = rcu_dereference(rq->xdp_prog); + if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) return NULL; /* page/packet was consumed by XDP */ /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse - * will be handled by mlx5e_put_rx_frag. + * will be handled by mlx5e_free_rx_wqe. * On SKB allocation failure, NULL is returned. */ - return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt); + return mlx5e_xsk_construct_skb(rq, xdp); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h index 7f88ccf67fdd..087c943bd8e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -5,48 +5,19 @@ #define __MLX5_EN_XSK_RX_H__ #include "en.h" -#include <net/xdp_sock_drv.h> /* RX data path */ +int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); +int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk); +int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk); struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, u16 cqe_bcnt, u32 head_offset, u32 page_idx); struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); -static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) -{ - dma_info->xsk = xsk_buff_alloc(rq->xsk_pool); - if (!dma_info->xsk) - return -ENOMEM; - - /* Store the DMA address without headroom. In striding RQ case, we just - * provide pages for UMR, and headroom is counted at the setup stage - * when creating a WQE. In non-striding RQ case, headroom is accounted - * in mlx5e_alloc_rx_wqe. - */ - dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk); - - return 0; -} - -static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err) -{ - if (!xsk_uses_need_wakeup(rq->xsk_pool)) - return alloc_err; - - if (unlikely(alloc_err)) - xsk_set_rx_need_wakeup(rq->xsk_pool); - else - xsk_clear_rx_need_wakeup(rq->xsk_pool); - - return false; -} - #endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 538bc2419bd8..ff03c43833bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -4,24 +4,20 @@ #include "setup.h" #include "en/params.h" #include "en/txrx.h" +#include "en/health.h" +#include <net/xdp_sock_drv.h> -/* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may - * change unexpectedly, and mlx5e has a minimum valid stride size for striding - * RQ, keep this check in the driver. +/* The limitation of 2048 can be altered, but shouldn't go beyond the minimal + * stride size of striding RQ. */ -#define MLX5E_MIN_XSK_CHUNK_SIZE 2048 +#define MLX5E_MIN_XSK_CHUNK_SIZE max(2048, XDP_UMEM_MIN_CHUNK_SIZE) bool mlx5e_validate_xsk_param(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct mlx5_core_dev *mdev) { /* AF_XDP doesn't support frames larger than PAGE_SIZE. */ - if (xsk->chunk_size > PAGE_SIZE || - xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) - return false; - - /* Current MTU and XSK headroom don't allow packets to fit the frames. */ - if (mlx5e_rx_get_min_frag_sz(params, xsk) > xsk->chunk_size) + if (xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) return false; /* frag_sz is different for regular and XSK RQs, so ensure that linear @@ -29,9 +25,9 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params, */ switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); + return !mlx5e_mpwrq_validate_xsk(mdev, params, xsk); default: /* MLX5_WQ_TYPE_CYCLIC */ - return mlx5e_rx_is_linear_skb(params, xsk); + return mlx5e_rx_is_linear_skb(mdev, params, xsk); } } @@ -42,7 +38,7 @@ static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev, struct mlx5e_channel_param *cparam) { mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq); - mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); + mlx5e_build_xdpsq_param(mdev, params, xsk, &cparam->xdp_sq); } static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, @@ -63,13 +59,14 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, rq->clock = &mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; + rq->channel = c; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); rq->xdpsq = &c->rq_xdpsq; rq->xsk_pool = pool; - rq->stats = &c->priv->channel_stats[c->ix].xskrq; + rq->stats = &c->priv->channel_stats[c->ix]->xskrq; rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); - rq_xdp_ix = c->ix + params->num_channels * MLX5E_RQ_GROUP_XSK; + rq_xdp_ix = c->ix; err = mlx5e_rq_set_handlers(rq, params, xsk); if (err) return err; @@ -157,7 +154,7 @@ err_free_cparam: void mlx5e_close_xsk(struct mlx5e_channel *c) { clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); - synchronize_net(); /* Sync with the XSK wakeup and with NAPI. */ + synchronize_net(); /* Sync with NAPI. */ mlx5e_close_rq(&c->xskrq); mlx5e_close_cq(&c->xskrq.cq); @@ -170,16 +167,25 @@ void mlx5e_close_xsk(struct mlx5e_channel *c) void mlx5e_activate_xsk(struct mlx5e_channel *c) { + /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid + * activating XSKRQ in the middle of recovery. + */ + mlx5e_reporter_icosq_suspend_recovery(c); set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); - /* TX queue is created active. */ + mlx5e_reporter_icosq_resume_recovery(c); - spin_lock_bh(&c->async_icosq_lock); - mlx5e_trigger_irq(&c->async_icosq); - spin_unlock_bh(&c->async_icosq_lock); + /* TX queue is created active. */ } void mlx5e_deactivate_xsk(struct mlx5e_channel *c) { - mlx5e_deactivate_rq(&c->xskrq); + /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the + * middle of recovery. Suspend the recovery to avoid it. + */ + mlx5e_reporter_icosq_suspend_recovery(c); + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ + /* TX queue is disabled on close. */ } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c index 8e96260fce1d..367a9505ca4f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -12,18 +12,14 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_params *params = &priv->channels.params; struct mlx5e_channel *c; - u16 ix; if (unlikely(!mlx5e_xdp_is_active(priv))) return -ENETDOWN; - if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) + if (unlikely(qid >= params->num_channels)) return -EINVAL; - c = priv->channels.c[ix]; - - if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))) - return -ENXIO; + c = priv->channels.c[qid]; if (!napi_if_scheduled_mark_missed(&c->napi)) { /* To avoid WQE overrun, don't post a NOP if async_icosq is not @@ -36,9 +32,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state)) return 0; - spin_lock_bh(&c->async_icosq_lock); - mlx5e_trigger_irq(&c->async_icosq); - spin_unlock_bh(&c->async_icosq_lock); + mlx5e_trigger_napi_icosq(c); } return 0; @@ -103,12 +97,15 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len); ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, check_result); + mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, + check_result); if (unlikely(!ret)) { if (sq->mpwqe.wqe) mlx5e_xdp_mpwqe_complete(sq); mlx5e_xsk_tx_post_err(sq, &xdpi); + } else { + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); } flush = true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h index a05085035f23..9c505158b975 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -5,7 +5,6 @@ #define __MLX5_EN_XSK_TX_H__ #include "en.h" -#include <net/xdp_sock_drv.h> /* TX data path */ @@ -13,15 +12,4 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags); bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); -static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq) -{ - if (!xsk_uses_need_wakeup(sq->xsk_pool)) - return; - - if (sq->pc != sq->cc) - xsk_clear_tx_need_wakeup(sq->xsk_pool); - else - xsk_set_tx_need_wakeup(sq->xsk_pool); -} - #endif /* __MLX5_EN_XSK_TX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index d964665eaa63..07187028f0d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -37,8 +37,9 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> #include "en_accel/ipsec_rxtx.h" -#include "en_accel/tls.h" -#include "en_accel/tls_rxtx.h" +#include "en_accel/ktls.h" +#include "en_accel/ktls_txrx.h" +#include <en_accel/macsec.h> #include "en.h" #include "en/txrx.h" @@ -124,8 +125,9 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev, #ifdef CONFIG_MLX5_EN_TLS /* May send SKBs and WQEs. */ - if (mlx5e_tls_skb_offloaded(skb)) - if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, &state->tls))) + if (mlx5e_ktls_skb_offloaded(skb)) + if (unlikely(!mlx5e_ktls_handle_tx_skb(dev, sq, skb, + &state->tls))) return false; #endif @@ -136,16 +138,16 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev, } #endif - return true; -} +#ifdef CONFIG_MLX5_EN_MACSEC + if (unlikely(mlx5e_macsec_skb_is_offload(skb))) { + struct mlx5e_priv *priv = netdev_priv(dev); -static inline bool mlx5e_accel_tx_is_ipsec_flow(struct mlx5e_accel_tx_state *state) -{ -#ifdef CONFIG_MLX5_EN_IPSEC - return mlx5e_ipsec_is_tx_flow(&state->ipsec); -#else - return false; + if (unlikely(!mlx5e_macsec_handle_tx_skb(priv->macsec, skb))) + return false; + } #endif + + return true; } static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq, @@ -171,6 +173,11 @@ static inline void mlx5e_accel_tx_eseg(struct mlx5e_priv *priv, mlx5e_ipsec_tx_build_eseg(priv, skb, eseg); #endif +#ifdef CONFIG_MLX5_EN_MACSEC + if (unlikely(mlx5e_macsec_skb_is_offload(skb))) + mlx5e_macsec_tx_build_eseg(priv->macsec, skb, eseg); +#endif + #if IS_ENABLED(CONFIG_GENEVE) if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL) mlx5e_tx_tunnel_accel(skb, eseg, ihs); @@ -183,7 +190,7 @@ static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq, struct mlx5_wqe_inline_seg *inlseg) { #ifdef CONFIG_MLX5_EN_TLS - mlx5e_tls_handle_tx_wqe(&wqe->ctrl, &state->tls); + mlx5e_ktls_handle_tx_wqe(&wqe->ctrl, &state->tls); #endif #ifdef CONFIG_MLX5_EN_IPSEC @@ -202,4 +209,14 @@ static inline void mlx5e_accel_cleanup_rx(struct mlx5e_priv *priv) { mlx5e_ktls_cleanup_rx(priv); } + +static inline int mlx5e_accel_init_tx(struct mlx5e_priv *priv) +{ + return mlx5e_ktls_init_tx(priv); +} + +static inline void mlx5e_accel_cleanup_tx(struct mlx5e_priv *priv) +{ + mlx5e_ktls_cleanup_tx(priv); +} #endif /* __MLX5E_EN_ACCEL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index 4c4ee524176c..285d32d2fd08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ -#include <linux/netdevice.h> +#include <mlx5_core.h> #include "en_accel/fs_tcp.h" #include "fs_core.h" @@ -71,13 +71,13 @@ void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } -struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv, +struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs, struct sock *sk, u32 tirn, uint32_t flow_tag) { + struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs); struct mlx5_flow_destination dest = {}; struct mlx5e_flow_table *ft = NULL; - struct mlx5e_accel_fs_tcp *fs_tcp; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *flow; struct mlx5_flow_spec *spec; @@ -86,23 +86,21 @@ struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv, if (!spec) return ERR_PTR(-ENOMEM); - fs_tcp = priv->fs.accel_tcp; - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; switch (sk->sk_family) { case AF_INET: accel_fs_tcp_set_ipv4_flow(spec, sk); ft = &fs_tcp->tables[ACCEL_FS_IPV4_TCP]; - mlx5e_dbg(HW, priv, "%s flow is %pI4:%d -> %pI4:%d\n", __func__, - &inet_sk(sk)->inet_rcv_saddr, - inet_sk(sk)->inet_sport, - &inet_sk(sk)->inet_daddr, - inet_sk(sk)->inet_dport); + fs_dbg(fs, "%s flow is %pI4:%d -> %pI4:%d\n", __func__, + &inet_sk(sk)->inet_rcv_saddr, + inet_sk(sk)->inet_sport, + &inet_sk(sk)->inet_daddr, + inet_sk(sk)->inet_dport); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - if (!sk->sk_ipv6only && + if (!ipv6_only_sock(sk) && ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) { accel_fs_tcp_set_ipv4_flow(spec, sk); ft = &fs_tcp->tables[ACCEL_FS_IPV4_TCP]; @@ -140,34 +138,32 @@ struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv, flow = mlx5_add_flow_rules(ft->t, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) - netdev_err(priv->netdev, "mlx5_add_flow_rules() failed, flow is %ld\n", - PTR_ERR(flow)); + fs_err(fs, "mlx5_add_flow_rules() failed, flow is %ld\n", PTR_ERR(flow)); out: kvfree(spec); return flow; } -static int accel_fs_tcp_add_default_rule(struct mlx5e_priv *priv, +static int accel_fs_tcp_add_default_rule(struct mlx5e_flow_steering *fs, enum accel_fs_tcp_type type) { + struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs); + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); struct mlx5e_flow_table *accel_fs_t; struct mlx5_flow_destination dest; - struct mlx5e_accel_fs_tcp *fs_tcp; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; int err = 0; - fs_tcp = priv->fs.accel_tcp; accel_fs_t = &fs_tcp->tables[type]; - dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_accel2tt(type)); + dest = mlx5_ttc_get_default_dest(ttc, fs_accel2tt(type)); rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); - netdev_err(priv->netdev, - "%s: add default rule failed, accel_fs type=%d, err %d\n", - __func__, type, err); + fs_err(fs, "%s: add default rule failed, accel_fs type=%d, err %d\n", + __func__, type, err); return err; } @@ -265,9 +261,11 @@ out: return err; } -static int accel_fs_tcp_create_table(struct mlx5e_priv *priv, enum accel_fs_tcp_type type) +static int accel_fs_tcp_create_table(struct mlx5e_flow_steering *fs, enum accel_fs_tcp_type type) { - struct mlx5e_flow_table *ft = &priv->fs.accel_tcp->tables[type]; + struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs); + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false); + struct mlx5e_flow_table *ft = &accel_tcp->tables[type]; struct mlx5_flow_table_attr ft_attr = {}; int err; @@ -277,21 +275,21 @@ static int accel_fs_tcp_create_table(struct mlx5e_priv *priv, enum accel_fs_tcp_ ft_attr.level = MLX5E_ACCEL_FS_TCP_FT_LEVEL; ft_attr.prio = MLX5E_NIC_PRIO; - ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + ft->t = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; return err; } - netdev_dbg(priv->netdev, "Created fs accel table id %u level %u\n", - ft->t->id, ft->t->level); + fs_dbg(fs, "Created fs accel table id %u level %u\n", + ft->t->id, ft->t->level); err = accel_fs_tcp_create_groups(ft, type); if (err) goto err; - err = accel_fs_tcp_add_default_rule(priv, type); + err = accel_fs_tcp_add_default_rule(fs, type); if (err) goto err; @@ -301,17 +299,18 @@ err: return err; } -static int accel_fs_tcp_disable(struct mlx5e_priv *priv) +static int accel_fs_tcp_disable(struct mlx5e_flow_steering *fs) { + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); int err, i; for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) { /* Modify ttc rules destination to point back to the indir TIRs */ - err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_accel2tt(i)); + err = mlx5_ttc_fwd_default_dest(ttc, fs_accel2tt(i)); if (err) { - netdev_err(priv->netdev, - "%s: modify ttc[%d] default destination failed, err(%d)\n", - __func__, fs_accel2tt(i), err); + fs_err(fs, + "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, fs_accel2tt(i), err); return err; } } @@ -319,32 +318,32 @@ static int accel_fs_tcp_disable(struct mlx5e_priv *priv) return 0; } -static int accel_fs_tcp_enable(struct mlx5e_priv *priv) +static int accel_fs_tcp_enable(struct mlx5e_flow_steering *fs) { + struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs); + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); struct mlx5_flow_destination dest = {}; int err, i; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) { - dest.ft = priv->fs.accel_tcp->tables[i].t; + dest.ft = accel_tcp->tables[i].t; /* Modify ttc rules destination to point on the accel_fs FTs */ - err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_accel2tt(i), &dest); + err = mlx5_ttc_fwd_dest(ttc, fs_accel2tt(i), &dest); if (err) { - netdev_err(priv->netdev, - "%s: modify ttc[%d] destination to accel failed, err(%d)\n", - __func__, fs_accel2tt(i), err); + fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", + __func__, fs_accel2tt(i), err); return err; } } return 0; } -static void accel_fs_tcp_destroy_table(struct mlx5e_priv *priv, int i) +static void accel_fs_tcp_destroy_table(struct mlx5e_flow_steering *fs, int i) { - struct mlx5e_accel_fs_tcp *fs_tcp; + struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs); - fs_tcp = priv->fs.accel_tcp; if (IS_ERR_OR_NULL(fs_tcp->tables[i].t)) return; @@ -353,40 +352,43 @@ static void accel_fs_tcp_destroy_table(struct mlx5e_priv *priv, int i) fs_tcp->tables[i].t = NULL; } -void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv) +void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) { + struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs); int i; - if (!priv->fs.accel_tcp) + if (!accel_tcp) return; - accel_fs_tcp_disable(priv); + accel_fs_tcp_disable(fs); for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) - accel_fs_tcp_destroy_table(priv, i); + accel_fs_tcp_destroy_table(fs, i); - kfree(priv->fs.accel_tcp); - priv->fs.accel_tcp = NULL; + kfree(accel_tcp); + mlx5e_fs_set_accel_tcp(fs, NULL); } -int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv) +int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) { + struct mlx5e_accel_fs_tcp *accel_tcp; int i, err; - if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version)) + if (!MLX5_CAP_FLOWTABLE_NIC_RX(mlx5e_fs_get_mdev(fs), ft_field_support.outer_ip_version)) return -EOPNOTSUPP; - priv->fs.accel_tcp = kzalloc(sizeof(*priv->fs.accel_tcp), GFP_KERNEL); - if (!priv->fs.accel_tcp) + accel_tcp = kvzalloc(sizeof(*accel_tcp), GFP_KERNEL); + if (!accel_tcp) return -ENOMEM; + mlx5e_fs_set_accel_tcp(fs, accel_tcp); for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) { - err = accel_fs_tcp_create_table(priv, i); + err = accel_fs_tcp_create_table(fs, i); if (err) goto err_destroy_tables; } - err = accel_fs_tcp_enable(priv); + err = accel_fs_tcp_enable(fs); if (err) goto err_destroy_tables; @@ -394,9 +396,8 @@ int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv) err_destroy_tables: while (--i >= 0) - accel_fs_tcp_destroy_table(priv, i); - - kfree(priv->fs.accel_tcp); - priv->fs.accel_tcp = NULL; + accel_fs_tcp_destroy_table(fs, i); + kfree(accel_tcp); + mlx5e_fs_set_accel_tcp(fs, NULL); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h index 589235824543..a032bff482a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h @@ -4,19 +4,19 @@ #ifndef __MLX5E_ACCEL_FS_TCP_H__ #define __MLX5E_ACCEL_FS_TCP_H__ -#include "en.h" +#include "en/fs.h" #ifdef CONFIG_MLX5_EN_TLS -int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv); -void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv); -struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv, +int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs); +void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs); +struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs, struct sock *sk, u32 tirn, uint32_t flow_tag); void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule); #else -static inline int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv) { return 0; } -static inline void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv) {} -static inline struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv, +static inline int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) { return 0; } +static inline void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) {} +static inline struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs, struct sock *sk, u32 tirn, uint32_t flow_tag) { return ERR_PTR(-EOPNOTSUPP); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 7cab08a2f715..a715601865d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -35,26 +35,14 @@ #include <crypto/aead.h> #include <linux/inetdevice.h> #include <linux/netdevice.h> -#include <linux/module.h> #include "en.h" -#include "en_accel/ipsec.h" -#include "en_accel/ipsec_rxtx.h" -#include "en_accel/ipsec_fs.h" +#include "ipsec.h" +#include "ipsec_rxtx.h" static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x) { - struct mlx5e_ipsec_sa_entry *sa; - - if (!x) - return NULL; - - sa = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; - if (!sa) - return NULL; - - WARN_ON(sa->x != x); - return sa; + return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; } struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec, @@ -75,9 +63,9 @@ struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec, return ret; } -static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry, - unsigned int handle) +static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry) { + unsigned int handle = sa_entry->ipsec_obj_id; struct mlx5e_ipsec *ipsec = sa_entry->ipsec; struct mlx5e_ipsec_sa_entry *_sa_entry; unsigned long flags; @@ -113,7 +101,6 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) struct xfrm_replay_state_esn *replay_esn; u32 seq_bottom = 0; u8 overlap; - u32 *esn; if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) { sa_entry->esn_state.trigger = 0; @@ -128,11 +115,9 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x, htonl(seq_bottom)); - esn = &sa_entry->esn_state.esn; sa_entry->esn_state.trigger = 1; if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { - ++(*esn); sa_entry->esn_state.overlap = 0; return true; } else if (unlikely(!overlap && @@ -149,7 +134,7 @@ mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs) { struct xfrm_state *x = sa_entry->x; - struct aes_gcm_keymat *aes_gcm = &attrs->keymat.aes_gcm; + struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm; struct aead_geniv_ctx *geniv_ctx; struct crypto_aead *aead; unsigned int crypto_data_len, key_len; @@ -183,23 +168,17 @@ mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; } - /* rx handle */ - attrs->sa_handle = sa_entry->handle; - - /* algo type */ - attrs->keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; - /* action */ - attrs->action = (!(x->xso.flags & XFRM_OFFLOAD_INBOUND)) ? - MLX5_ACCEL_ESP_ACTION_ENCRYPT : - MLX5_ACCEL_ESP_ACTION_DECRYPT; + attrs->action = (x->xso.dir == XFRM_DEV_OFFLOAD_OUT) ? + MLX5_ACCEL_ESP_ACTION_ENCRYPT : + MLX5_ACCEL_ESP_ACTION_DECRYPT; /* flags */ attrs->flags |= (x->props.mode == XFRM_MODE_TRANSPORT) ? MLX5_ACCEL_ESP_FLAGS_TRANSPORT : MLX5_ACCEL_ESP_FLAGS_TUNNEL; /* spi */ - attrs->spi = x->id.spi; + attrs->spi = be32_to_cpu(x->id.spi); /* source , destination ips */ memcpy(&attrs->saddr, x->props.saddr.a6, sizeof(attrs->saddr)); @@ -227,8 +206,7 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) return -EINVAL; } if (x->props.flags & XFRM_STATE_ESN && - !(mlx5_accel_ipsec_device_caps(priv->mdev) & - MLX5_ACCEL_IPSEC_CAP_ESN)) { + !(mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_ESN)) { netdev_info(netdev, "Cannot offload ESN xfrm states\n"); return -EINVAL; } @@ -275,46 +253,29 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n"); return -EINVAL; } - if (x->props.family == AF_INET6 && - !(mlx5_accel_ipsec_device_caps(priv->mdev) & - MLX5_ACCEL_IPSEC_CAP_IPV6)) { - netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n"); - return -EINVAL; - } return 0; } -static int mlx5e_xfrm_fs_add_rule(struct mlx5e_priv *priv, - struct mlx5e_ipsec_sa_entry *sa_entry) -{ - if (!mlx5_is_ipsec_device(priv->mdev)) - return 0; - - return mlx5e_accel_ipsec_fs_add_rule(priv, &sa_entry->xfrm->attrs, - sa_entry->ipsec_obj_id, - &sa_entry->ipsec_rule); -} - -static void mlx5e_xfrm_fs_del_rule(struct mlx5e_priv *priv, - struct mlx5e_ipsec_sa_entry *sa_entry) +static void _update_xfrm_state(struct work_struct *work) { - if (!mlx5_is_ipsec_device(priv->mdev)) - return; + struct mlx5e_ipsec_modify_state_work *modify_work = + container_of(work, struct mlx5e_ipsec_modify_state_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = container_of( + modify_work, struct mlx5e_ipsec_sa_entry, modify_work); - mlx5e_accel_ipsec_fs_del_rule(priv, &sa_entry->xfrm->attrs, - &sa_entry->ipsec_rule); + mlx5_accel_esp_modify_xfrm(sa_entry, &modify_work->attrs); } static int mlx5e_xfrm_add_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = NULL; struct net_device *netdev = x->xso.real_dev; - struct mlx5_accel_esp_xfrm_attrs attrs; struct mlx5e_priv *priv; - unsigned int sa_handle; int err; priv = netdev_priv(netdev); + if (!priv->ipsec) + return -EOPNOTSUPP; err = mlx5e_xfrm_validate_state(x); if (err) @@ -332,33 +293,18 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x) /* check esn */ mlx5e_ipsec_update_esn_state(sa_entry); - /* create xfrm */ - mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs); - sa_entry->xfrm = - mlx5_accel_esp_create_xfrm(priv->mdev, &attrs, - MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA); - if (IS_ERR(sa_entry->xfrm)) { - err = PTR_ERR(sa_entry->xfrm); - goto err_sa_entry; - } - + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs); /* create hw context */ - sa_entry->hw_context = - mlx5_accel_esp_create_hw_context(priv->mdev, - sa_entry->xfrm, - &sa_handle); - if (IS_ERR(sa_entry->hw_context)) { - err = PTR_ERR(sa_entry->hw_context); + err = mlx5_ipsec_create_sa_ctx(sa_entry); + if (err) goto err_xfrm; - } - sa_entry->ipsec_obj_id = sa_handle; - err = mlx5e_xfrm_fs_add_rule(priv, sa_entry); + err = mlx5e_accel_ipsec_fs_add_rule(priv, sa_entry); if (err) goto err_hw_ctx; - if (x->xso.flags & XFRM_OFFLOAD_INBOUND) { - err = mlx5e_ipsec_sadb_rx_add(sa_entry, sa_handle); + if (x->xso.dir == XFRM_DEV_OFFLOAD_IN) { + err = mlx5e_ipsec_sadb_rx_add(sa_entry); if (err) goto err_add_rule; } else { @@ -366,18 +312,16 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x) mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv; } + INIT_WORK(&sa_entry->modify_work.work, _update_xfrm_state); x->xso.offload_handle = (unsigned long)sa_entry; goto out; err_add_rule: - mlx5e_xfrm_fs_del_rule(priv, sa_entry); + mlx5e_accel_ipsec_fs_del_rule(priv, sa_entry); err_hw_ctx: - mlx5_accel_esp_free_hw_context(priv->mdev, sa_entry->hw_context); + mlx5_ipsec_free_sa_ctx(sa_entry); err_xfrm: - mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm); -err_sa_entry: kfree(sa_entry); - out: return err; } @@ -386,10 +330,7 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); - if (!sa_entry) - return; - - if (x->xso.flags & XFRM_OFFLOAD_INBOUND) + if (x->xso.dir == XFRM_DEV_OFFLOAD_IN) mlx5e_ipsec_sadb_rx_del(sa_entry); } @@ -398,24 +339,18 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x) struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); struct mlx5e_priv *priv = netdev_priv(x->xso.dev); - if (!sa_entry) - return; - - if (sa_entry->hw_context) { - flush_workqueue(sa_entry->ipsec->wq); - mlx5e_xfrm_fs_del_rule(priv, sa_entry); - mlx5_accel_esp_free_hw_context(sa_entry->xfrm->mdev, sa_entry->hw_context); - mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm); - } - + cancel_work_sync(&sa_entry->modify_work.work); + mlx5e_accel_ipsec_fs_del_rule(priv, sa_entry); + mlx5_ipsec_free_sa_ctx(sa_entry); kfree(sa_entry); } int mlx5e_ipsec_init(struct mlx5e_priv *priv) { - struct mlx5e_ipsec *ipsec = NULL; + struct mlx5e_ipsec *ipsec; + int ret; - if (!MLX5_IPSEC_DEV(priv->mdev)) { + if (!mlx5_ipsec_device_caps(priv->mdev)) { netdev_dbg(priv->netdev, "Not an IPSec offload device\n"); return 0; } @@ -426,21 +361,27 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv) hash_init(ipsec->sadb_rx); spin_lock_init(&ipsec->sadb_rx_lock); - ida_init(&ipsec->halloc); - ipsec->en_priv = priv; - ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) & - MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER); + ipsec->mdev = priv->mdev; ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0, priv->netdev->name); if (!ipsec->wq) { - kfree(ipsec); - return -ENOMEM; + ret = -ENOMEM; + goto err_wq; } + ret = mlx5e_accel_ipsec_fs_init(ipsec); + if (ret) + goto err_fs_init; + priv->ipsec = ipsec; - mlx5e_accel_ipsec_fs_init(priv); netdev_dbg(priv->netdev, "IPSec attached to netdevice\n"); return 0; + +err_fs_init: + destroy_workqueue(ipsec->wq); +err_wq: + kfree(ipsec); + return (ret != -EOPNOTSUPP) ? ret : 0; } void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) @@ -450,10 +391,8 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) if (!ipsec) return; - mlx5e_accel_ipsec_fs_cleanup(priv); + mlx5e_accel_ipsec_fs_cleanup(ipsec); destroy_workqueue(ipsec->wq); - - ida_destroy(&ipsec->halloc); kfree(ipsec); priv->ipsec = NULL; } @@ -473,50 +412,19 @@ static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) return true; } -struct mlx5e_ipsec_modify_state_work { - struct work_struct work; - struct mlx5_accel_esp_xfrm_attrs attrs; - struct mlx5e_ipsec_sa_entry *sa_entry; -}; - -static void _update_xfrm_state(struct work_struct *work) -{ - int ret; - struct mlx5e_ipsec_modify_state_work *modify_work = - container_of(work, struct mlx5e_ipsec_modify_state_work, work); - struct mlx5e_ipsec_sa_entry *sa_entry = modify_work->sa_entry; - - ret = mlx5_accel_esp_modify_xfrm(sa_entry->xfrm, - &modify_work->attrs); - if (ret) - netdev_warn(sa_entry->ipsec->en_priv->netdev, - "Not an IPSec offload device\n"); - - kfree(modify_work); -} - static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); - struct mlx5e_ipsec_modify_state_work *modify_work; + struct mlx5e_ipsec_modify_state_work *modify_work = + &sa_entry->modify_work; bool need_update; - if (!sa_entry) - return; - need_update = mlx5e_ipsec_update_esn_state(sa_entry); if (!need_update) return; - modify_work = kzalloc(sizeof(*modify_work), GFP_ATOMIC); - if (!modify_work) - return; - mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs); - modify_work->sa_entry = sa_entry; - - INIT_WORK(&modify_work->work, _update_xfrm_state); - WARN_ON(!queue_work(sa_entry->ipsec->wq, &modify_work->work)); + queue_work(sa_entry->ipsec->wq, &modify_work->work); } static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { @@ -532,11 +440,8 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) struct mlx5_core_dev *mdev = priv->mdev; struct net_device *netdev = priv->netdev; - if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) || - !MLX5_CAP_ETH(mdev, swp)) { - mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n"); + if (!mlx5_ipsec_device_caps(mdev)) return; - } mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n"); netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; @@ -551,15 +456,12 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) netdev->features |= NETIF_F_HW_ESP_TX_CSUM; netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM; - if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) || - !MLX5_CAP_ETH(mdev, swp_lso)) { + if (!MLX5_CAP_ETH(mdev, swp_lso)) { mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n"); return; } - if (mlx5_is_ipsec_device(mdev)) - netdev->gso_partial_features |= NETIF_F_GSO_ESP; - + netdev->gso_partial_features |= NETIF_F_GSO_ESP; mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n"); netdev->features |= NETIF_F_GSO_ESP; netdev->hw_features |= NETIF_F_GSO_ESP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 6164c7f59efb..16bcceec16c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -40,11 +40,56 @@ #include <net/xfrm.h> #include <linux/idr.h> -#include "accel/ipsec.h" - #define MLX5E_IPSEC_SADB_RX_BITS 10 #define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L +enum mlx5_accel_esp_flags { + MLX5_ACCEL_ESP_FLAGS_TUNNEL = 0, /* Default */ + MLX5_ACCEL_ESP_FLAGS_TRANSPORT = 1UL << 0, + MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED = 1UL << 1, + MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP = 1UL << 2, +}; + +enum mlx5_accel_esp_action { + MLX5_ACCEL_ESP_ACTION_DECRYPT, + MLX5_ACCEL_ESP_ACTION_ENCRYPT, +}; + +struct aes_gcm_keymat { + u64 seq_iv; + + u32 salt; + u32 icv_len; + + u32 key_len; + u32 aes_key[256 / 32]; +}; + +struct mlx5_accel_esp_xfrm_attrs { + enum mlx5_accel_esp_action action; + u32 esn; + u32 spi; + u32 flags; + struct aes_gcm_keymat aes_gcm; + + union { + __be32 a4; + __be32 a6[4]; + } saddr; + + union { + __be32 a4; + __be32 a6[4]; + } daddr; + + u8 is_ipv6; +}; + +enum mlx5_ipsec_cap { + MLX5_IPSEC_CAP_CRYPTO = 1 << 0, + MLX5_IPSEC_CAP_ESN = 1 << 1, +}; + struct mlx5e_priv; struct mlx5e_ipsec_sw_stats { @@ -55,37 +100,16 @@ struct mlx5e_ipsec_sw_stats { atomic64_t ipsec_tx_drop_no_state; atomic64_t ipsec_tx_drop_not_ip; atomic64_t ipsec_tx_drop_trailer; - atomic64_t ipsec_tx_drop_metadata; -}; - -struct mlx5e_ipsec_stats { - u64 ipsec_dec_in_packets; - u64 ipsec_dec_out_packets; - u64 ipsec_dec_bypass_packets; - u64 ipsec_enc_in_packets; - u64 ipsec_enc_out_packets; - u64 ipsec_enc_bypass_packets; - u64 ipsec_dec_drop_packets; - u64 ipsec_dec_auth_fail_packets; - u64 ipsec_enc_drop_packets; - u64 ipsec_add_sa_success; - u64 ipsec_add_sa_fail; - u64 ipsec_del_sa_success; - u64 ipsec_del_sa_fail; - u64 ipsec_cmd_drop; }; struct mlx5e_accel_fs_esp; struct mlx5e_ipsec_tx; struct mlx5e_ipsec { - struct mlx5e_priv *en_priv; + struct mlx5_core_dev *mdev; DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS); - bool no_trailer; - spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */ - struct ida halloc; + spinlock_t sadb_rx_lock; /* Protects sadb_rx */ struct mlx5e_ipsec_sw_stats sw_stats; - struct mlx5e_ipsec_stats stats; struct workqueue_struct *wq; struct mlx5e_accel_fs_esp *rx_fs; struct mlx5e_ipsec_tx *tx_fs; @@ -102,21 +126,26 @@ struct mlx5e_ipsec_rule { struct mlx5_modify_hdr *set_modify_hdr; }; +struct mlx5e_ipsec_modify_state_work { + struct work_struct work; + struct mlx5_accel_esp_xfrm_attrs attrs; +}; + struct mlx5e_ipsec_sa_entry { struct hlist_node hlist; /* Item in SADB_RX hashtable */ struct mlx5e_ipsec_esn_state esn_state; unsigned int handle; /* Handle in SADB_RX */ struct xfrm_state *x; struct mlx5e_ipsec *ipsec; - struct mlx5_accel_esp_xfrm *xfrm; - void *hw_context; + struct mlx5_accel_esp_xfrm_attrs attrs; void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x, struct xfrm_offload *xo); u32 ipsec_obj_id; + u32 enc_key_id; struct mlx5e_ipsec_rule ipsec_rule; + struct mlx5e_ipsec_modify_state_work modify_work; }; -void mlx5e_ipsec_build_inverse_table(void); int mlx5e_ipsec_init(struct mlx5e_priv *priv); void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv); void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv); @@ -124,12 +153,27 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv); struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev, unsigned int handle); -#else +void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec); +int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec); +int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv, + struct mlx5e_ipsec_sa_entry *sa_entry); +void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv, + struct mlx5e_ipsec_sa_entry *sa_entry); + +int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry); +void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry); -static inline void mlx5e_ipsec_build_inverse_table(void) +u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev); + +void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry, + const struct mlx5_accel_esp_xfrm_attrs *attrs); + +static inline struct mlx5_core_dev * +mlx5e_ipsec_sa2dev(struct mlx5e_ipsec_sa_entry *sa_entry) { + return sa_entry->ipsec->mdev; } - +#else static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv) { return 0; @@ -143,6 +187,10 @@ static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) { } +static inline u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + return 0; +} #endif #endif /* __MLX5E_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 17da23dff0ed..b859e4a4c744 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -2,8 +2,9 @@ /* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ #include <linux/netdevice.h> -#include "accel/ipsec_offload.h" -#include "ipsec_fs.h" +#include "en.h" +#include "en/fs.h" +#include "ipsec.h" #include "fs_core.h" #define NUM_IPSEC_FTE BIT(15) @@ -35,6 +36,7 @@ struct mlx5e_accel_fs_esp { }; struct mlx5e_ipsec_tx { + struct mlx5_flow_namespace *ns; struct mlx5_flow_table *ft; struct mutex mutex; /* Protect IPsec TX steering */ u32 refcnt; @@ -58,7 +60,7 @@ static int rx_err_add_rule(struct mlx5e_priv *priv, struct mlx5_modify_hdr *modify_hdr; struct mlx5_flow_handle *fte; struct mlx5_flow_spec *spec; - int err = 0; + int err; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) @@ -94,101 +96,27 @@ static int rx_err_add_rule(struct mlx5e_priv *priv, goto out; } + kvfree(spec); rx_err->rule = fte; rx_err->copy_modify_hdr = modify_hdr; + return 0; out: - if (err) - mlx5_modify_header_dealloc(mdev, modify_hdr); + mlx5_modify_header_dealloc(mdev, modify_hdr); out_spec: kvfree(spec); return err; } -static void rx_err_del_rule(struct mlx5e_priv *priv, - struct mlx5e_ipsec_rx_err *rx_err) -{ - if (rx_err->rule) { - mlx5_del_flow_rules(rx_err->rule); - rx_err->rule = NULL; - } - - if (rx_err->copy_modify_hdr) { - mlx5_modify_header_dealloc(priv->mdev, rx_err->copy_modify_hdr); - rx_err->copy_modify_hdr = NULL; - } -} - -static void rx_err_destroy_ft(struct mlx5e_priv *priv, struct mlx5e_ipsec_rx_err *rx_err) -{ - rx_err_del_rule(priv, rx_err); - - if (rx_err->ft) { - mlx5_destroy_flow_table(rx_err->ft); - rx_err->ft = NULL; - } -} - -static int rx_err_create_ft(struct mlx5e_priv *priv, - struct mlx5e_accel_fs_esp_prot *fs_prot, - struct mlx5e_ipsec_rx_err *rx_err) -{ - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_table *ft; - int err; - - ft_attr.max_fte = 1; - ft_attr.autogroup.max_num_groups = 1; - ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL; - ft_attr.prio = MLX5E_NIC_PRIO; - ft = mlx5_create_auto_grouped_flow_table(priv->fs.ns, &ft_attr); - if (IS_ERR(ft)) { - err = PTR_ERR(ft); - netdev_err(priv->netdev, "fail to create ipsec rx inline ft err=%d\n", err); - return err; - } - - rx_err->ft = ft; - err = rx_err_add_rule(priv, fs_prot, rx_err); - if (err) - goto out_err; - - return 0; - -out_err: - mlx5_destroy_flow_table(ft); - rx_err->ft = NULL; - return err; -} - -static void rx_fs_destroy(struct mlx5e_accel_fs_esp_prot *fs_prot) -{ - if (fs_prot->miss_rule) { - mlx5_del_flow_rules(fs_prot->miss_rule); - fs_prot->miss_rule = NULL; - } - - if (fs_prot->miss_group) { - mlx5_destroy_flow_group(fs_prot->miss_group); - fs_prot->miss_group = NULL; - } - - if (fs_prot->ft) { - mlx5_destroy_flow_table(fs_prot->ft); - fs_prot->ft = NULL; - } -} - static int rx_fs_create(struct mlx5e_priv *priv, struct mlx5e_accel_fs_esp_prot *fs_prot) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *ft = fs_prot->ft; struct mlx5_flow_group *miss_group; struct mlx5_flow_handle *miss_rule; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_spec *spec; - struct mlx5_flow_table *ft; u32 *flow_group_in; int err = 0; @@ -199,20 +127,6 @@ static int rx_fs_create(struct mlx5e_priv *priv, goto out; } - /* Create FT */ - ft_attr.max_fte = NUM_IPSEC_FTE; - ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_LEVEL; - ft_attr.prio = MLX5E_NIC_PRIO; - ft_attr.autogroup.num_reserved_entries = 1; - ft_attr.autogroup.max_num_groups = 1; - ft = mlx5_create_auto_grouped_flow_table(priv->fs.ns, &ft_attr); - if (IS_ERR(ft)) { - err = PTR_ERR(ft); - netdev_err(priv->netdev, "fail to create ipsec rx ft err=%d\n", err); - goto out; - } - fs_prot->ft = ft; - /* Create miss_group */ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1); @@ -227,19 +141,19 @@ static int rx_fs_create(struct mlx5e_priv *priv, /* Create miss rule */ miss_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &fs_prot->default_dest, 1); if (IS_ERR(miss_rule)) { + mlx5_destroy_flow_group(fs_prot->miss_group); err = PTR_ERR(miss_rule); netdev_err(priv->netdev, "fail to create ipsec rx miss_rule err=%d\n", err); goto out; } fs_prot->miss_rule = miss_rule; - out: kvfree(flow_group_in); kvfree(spec); return err; } -static int rx_destroy(struct mlx5e_priv *priv, enum accel_fs_esp_type type) +static void rx_destroy(struct mlx5e_priv *priv, enum accel_fs_esp_type type) { struct mlx5e_accel_fs_esp_prot *fs_prot; struct mlx5e_accel_fs_esp *accel_esp; @@ -249,38 +163,75 @@ static int rx_destroy(struct mlx5e_priv *priv, enum accel_fs_esp_type type) /* The netdev unreg already happened, so all offloaded rule are already removed */ fs_prot = &accel_esp->fs_prot[type]; - rx_fs_destroy(fs_prot); - - rx_err_destroy_ft(priv, &fs_prot->rx_err); + mlx5_del_flow_rules(fs_prot->miss_rule); + mlx5_destroy_flow_group(fs_prot->miss_group); + mlx5_destroy_flow_table(fs_prot->ft); - return 0; + mlx5_del_flow_rules(fs_prot->rx_err.rule); + mlx5_modify_header_dealloc(priv->mdev, fs_prot->rx_err.copy_modify_hdr); + mlx5_destroy_flow_table(fs_prot->rx_err.ft); } static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type) { + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(priv->fs, false); + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false); + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_accel_fs_esp_prot *fs_prot; struct mlx5e_accel_fs_esp *accel_esp; + struct mlx5_flow_table *ft; int err; accel_esp = priv->ipsec->rx_fs; fs_prot = &accel_esp->fs_prot[type]; - fs_prot->default_dest = - mlx5_ttc_get_default_dest(priv->fs.ttc, fs_esp2tt(type)); + mlx5_ttc_get_default_dest(ttc, fs_esp2tt(type)); + + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) + return PTR_ERR(ft); - err = rx_err_create_ft(priv, fs_prot, &fs_prot->rx_err); + fs_prot->rx_err.ft = ft; + err = rx_err_add_rule(priv, fs_prot, &fs_prot->rx_err); if (err) - return err; + goto err_add; + + /* Create FT */ + ft_attr.max_fte = NUM_IPSEC_FTE; + ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + ft_attr.autogroup.num_reserved_entries = 1; + ft_attr.autogroup.max_num_groups = 1; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_fs_ft; + } + fs_prot->ft = ft; err = rx_fs_create(priv, fs_prot); if (err) - rx_destroy(priv, type); + goto err_fs; + + return 0; +err_fs: + mlx5_destroy_flow_table(fs_prot->ft); +err_fs_ft: + mlx5_del_flow_rules(fs_prot->rx_err.rule); + mlx5_modify_header_dealloc(priv->mdev, fs_prot->rx_err.copy_modify_hdr); +err_add: + mlx5_destroy_flow_table(fs_prot->rx_err.ft); return err; } static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type) { + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false); struct mlx5e_accel_fs_esp_prot *fs_prot; struct mlx5_flow_destination dest = {}; struct mlx5e_accel_fs_esp *accel_esp; @@ -289,21 +240,21 @@ static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type) accel_esp = priv->ipsec->rx_fs; fs_prot = &accel_esp->fs_prot[type]; mutex_lock(&fs_prot->prot_mutex); - if (fs_prot->refcnt++) - goto out; + if (fs_prot->refcnt) + goto skip; /* create FT */ err = rx_create(priv, type); - if (err) { - fs_prot->refcnt--; + if (err) goto out; - } /* connect */ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = fs_prot->ft; - mlx5_ttc_fwd_dest(priv->fs.ttc, fs_esp2tt(type), &dest); + mlx5_ttc_fwd_dest(ttc, fs_esp2tt(type), &dest); +skip: + fs_prot->refcnt++; out: mutex_unlock(&fs_prot->prot_mutex); return err; @@ -311,17 +262,19 @@ out: static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type) { + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false); struct mlx5e_accel_fs_esp_prot *fs_prot; struct mlx5e_accel_fs_esp *accel_esp; accel_esp = priv->ipsec->rx_fs; fs_prot = &accel_esp->fs_prot[type]; mutex_lock(&fs_prot->prot_mutex); - if (--fs_prot->refcnt) + fs_prot->refcnt--; + if (fs_prot->refcnt) goto out; /* disconnect */ - mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_esp2tt(type)); + mlx5_ttc_fwd_default_dest(ttc, fs_esp2tt(type)); /* remove FT */ rx_destroy(priv, type); @@ -338,15 +291,9 @@ static int tx_create(struct mlx5e_priv *priv) struct mlx5_flow_table *ft; int err; - priv->fs.egress_ns = - mlx5_get_flow_namespace(priv->mdev, - MLX5_FLOW_NAMESPACE_EGRESS_KERNEL); - if (!priv->fs.egress_ns) - return -EOPNOTSUPP; - ft_attr.max_fte = NUM_IPSEC_FTE; ft_attr.autogroup.max_num_groups = 1; - ft = mlx5_create_auto_grouped_flow_table(priv->fs.egress_ns, &ft_attr); + ft = mlx5_create_auto_grouped_flow_table(ipsec->tx_fs->ns, &ft_attr); if (IS_ERR(ft)) { err = PTR_ERR(ft); netdev_err(priv->netdev, "fail to create ipsec tx ft err=%d\n", err); @@ -356,32 +303,20 @@ static int tx_create(struct mlx5e_priv *priv) return 0; } -static void tx_destroy(struct mlx5e_priv *priv) -{ - struct mlx5e_ipsec *ipsec = priv->ipsec; - - if (IS_ERR_OR_NULL(ipsec->tx_fs->ft)) - return; - - mlx5_destroy_flow_table(ipsec->tx_fs->ft); - ipsec->tx_fs->ft = NULL; -} - static int tx_ft_get(struct mlx5e_priv *priv) { struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs; int err = 0; mutex_lock(&tx_fs->mutex); - if (tx_fs->refcnt++) - goto out; + if (tx_fs->refcnt) + goto skip; err = tx_create(priv); - if (err) { - tx_fs->refcnt--; + if (err) goto out; - } - +skip: + tx_fs->refcnt++; out: mutex_unlock(&tx_fs->mutex); return err; @@ -392,11 +327,11 @@ static void tx_ft_put(struct mlx5e_priv *priv) struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs; mutex_lock(&tx_fs->mutex); - if (--tx_fs->refcnt) + tx_fs->refcnt--; + if (tx_fs->refcnt) goto out; - tx_destroy(priv); - + mlx5_destroy_flow_table(tx_fs->ft); out: mutex_unlock(&tx_fs->mutex); } @@ -424,8 +359,8 @@ static void setup_fte_common(struct mlx5_accel_esp_xfrm_attrs *attrs, /* SPI number */ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters.outer_esp_spi); - MLX5_SET(fte_match_param, spec->match_value, misc_parameters.outer_esp_spi, - be32_to_cpu(attrs->spi)); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters.outer_esp_spi, attrs->spi); if (ip_version == 4) { memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, @@ -453,16 +388,18 @@ static void setup_fte_common(struct mlx5_accel_esp_xfrm_attrs *attrs, 0xff, 16); } - flow_act->ipsec_obj_id = ipsec_obj_id; + flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC; + flow_act->crypto.obj_id = ipsec_obj_id; flow_act->flags |= FLOW_ACT_NO_APPEND; } static int rx_add_rule(struct mlx5e_priv *priv, - struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 ipsec_obj_id, - struct mlx5e_ipsec_rule *ipsec_rule) + struct mlx5e_ipsec_sa_entry *sa_entry) { u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule; + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; + u32 ipsec_obj_id = sa_entry->ipsec_obj_id; struct mlx5_modify_hdr *modify_hdr = NULL; struct mlx5e_accel_fs_esp_prot *fs_prot; struct mlx5_flow_destination dest = {}; @@ -508,7 +445,7 @@ static int rx_add_rule(struct mlx5e_priv *priv, } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_IPSEC_DECRYPT | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; flow_act.modify_hdr = modify_hdr; @@ -536,9 +473,7 @@ out: } static int tx_add_rule(struct mlx5e_priv *priv, - struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 ipsec_obj_id, - struct mlx5e_ipsec_rule *ipsec_rule) + struct mlx5e_ipsec_sa_entry *sa_entry) { struct mlx5_flow_act flow_act = {}; struct mlx5_flow_handle *rule; @@ -555,7 +490,8 @@ static int tx_add_rule(struct mlx5e_priv *priv, goto out; } - setup_fte_common(attrs, ipsec_obj_id, spec, &flow_act); + setup_fte_common(&sa_entry->attrs, sa_entry->ipsec_obj_id, spec, + &flow_act); /* Add IPsec indicator in metadata_reg_a */ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; @@ -565,16 +501,16 @@ static int tx_add_rule(struct mlx5e_priv *priv, MLX5_ETH_WQE_FT_META_IPSEC); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | - MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT; + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT; rule = mlx5_add_flow_rules(priv->ipsec->tx_fs->ft, spec, &flow_act, NULL, 0); if (IS_ERR(rule)) { err = PTR_ERR(rule); netdev_err(priv->netdev, "fail to add ipsec rule attrs->action=0x%x, err=%d\n", - attrs->action, err); + sa_entry->attrs.action, err); goto out; } - ipsec_rule->rule = rule; + sa_entry->ipsec_rule.rule = rule; out: kvfree(spec); @@ -583,133 +519,88 @@ out: return err; } -static void rx_del_rule(struct mlx5e_priv *priv, - struct mlx5_accel_esp_xfrm_attrs *attrs, - struct mlx5e_ipsec_rule *ipsec_rule) -{ - mlx5_del_flow_rules(ipsec_rule->rule); - ipsec_rule->rule = NULL; - - mlx5_modify_header_dealloc(priv->mdev, ipsec_rule->set_modify_hdr); - ipsec_rule->set_modify_hdr = NULL; - - rx_ft_put(priv, attrs->is_ipv6 ? ACCEL_FS_ESP6 : ACCEL_FS_ESP4); -} - -static void tx_del_rule(struct mlx5e_priv *priv, - struct mlx5e_ipsec_rule *ipsec_rule) -{ - mlx5_del_flow_rules(ipsec_rule->rule); - ipsec_rule->rule = NULL; - - tx_ft_put(priv); -} - int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv, - struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 ipsec_obj_id, - struct mlx5e_ipsec_rule *ipsec_rule) + struct mlx5e_ipsec_sa_entry *sa_entry) { - if (!priv->ipsec->rx_fs) - return -EOPNOTSUPP; + if (sa_entry->attrs.action == MLX5_ACCEL_ESP_ACTION_ENCRYPT) + return tx_add_rule(priv, sa_entry); - if (attrs->action == MLX5_ACCEL_ESP_ACTION_DECRYPT) - return rx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule); - else - return tx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule); + return rx_add_rule(priv, sa_entry); } void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv, - struct mlx5_accel_esp_xfrm_attrs *attrs, - struct mlx5e_ipsec_rule *ipsec_rule) + struct mlx5e_ipsec_sa_entry *sa_entry) { - if (!priv->ipsec->rx_fs) - return; + struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule; + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); - if (attrs->action == MLX5_ACCEL_ESP_ACTION_DECRYPT) - rx_del_rule(priv, attrs, ipsec_rule); - else - tx_del_rule(priv, ipsec_rule); -} + mlx5_del_flow_rules(ipsec_rule->rule); -static void fs_cleanup_tx(struct mlx5e_priv *priv) -{ - mutex_destroy(&priv->ipsec->tx_fs->mutex); - WARN_ON(priv->ipsec->tx_fs->refcnt); - kfree(priv->ipsec->tx_fs); - priv->ipsec->tx_fs = NULL; + if (sa_entry->attrs.action == MLX5_ACCEL_ESP_ACTION_ENCRYPT) { + tx_ft_put(priv); + return; + } + + mlx5_modify_header_dealloc(mdev, ipsec_rule->set_modify_hdr); + rx_ft_put(priv, + sa_entry->attrs.is_ipv6 ? ACCEL_FS_ESP6 : ACCEL_FS_ESP4); } -static void fs_cleanup_rx(struct mlx5e_priv *priv) +void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec) { struct mlx5e_accel_fs_esp_prot *fs_prot; struct mlx5e_accel_fs_esp *accel_esp; enum accel_fs_esp_type i; - accel_esp = priv->ipsec->rx_fs; + if (!ipsec->rx_fs) + return; + + mutex_destroy(&ipsec->tx_fs->mutex); + WARN_ON(ipsec->tx_fs->refcnt); + kfree(ipsec->tx_fs); + + accel_esp = ipsec->rx_fs; for (i = 0; i < ACCEL_FS_ESP_NUM_TYPES; i++) { fs_prot = &accel_esp->fs_prot[i]; mutex_destroy(&fs_prot->prot_mutex); WARN_ON(fs_prot->refcnt); } - kfree(priv->ipsec->rx_fs); - priv->ipsec->rx_fs = NULL; -} - -static int fs_init_tx(struct mlx5e_priv *priv) -{ - priv->ipsec->tx_fs = - kzalloc(sizeof(struct mlx5e_ipsec_tx), GFP_KERNEL); - if (!priv->ipsec->tx_fs) - return -ENOMEM; - - mutex_init(&priv->ipsec->tx_fs->mutex); - return 0; + kfree(ipsec->rx_fs); } -static int fs_init_rx(struct mlx5e_priv *priv) +int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec) { struct mlx5e_accel_fs_esp_prot *fs_prot; struct mlx5e_accel_fs_esp *accel_esp; + struct mlx5_flow_namespace *ns; enum accel_fs_esp_type i; + int err = -ENOMEM; + + ns = mlx5_get_flow_namespace(ipsec->mdev, + MLX5_FLOW_NAMESPACE_EGRESS_IPSEC); + if (!ns) + return -EOPNOTSUPP; - priv->ipsec->rx_fs = - kzalloc(sizeof(struct mlx5e_accel_fs_esp), GFP_KERNEL); - if (!priv->ipsec->rx_fs) + ipsec->tx_fs = kzalloc(sizeof(*ipsec->tx_fs), GFP_KERNEL); + if (!ipsec->tx_fs) return -ENOMEM; - accel_esp = priv->ipsec->rx_fs; + ipsec->rx_fs = kzalloc(sizeof(*ipsec->rx_fs), GFP_KERNEL); + if (!ipsec->rx_fs) + goto err_rx; + + mutex_init(&ipsec->tx_fs->mutex); + ipsec->tx_fs->ns = ns; + + accel_esp = ipsec->rx_fs; for (i = 0; i < ACCEL_FS_ESP_NUM_TYPES; i++) { fs_prot = &accel_esp->fs_prot[i]; mutex_init(&fs_prot->prot_mutex); } return 0; -} - -void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv) -{ - if (!priv->ipsec->rx_fs) - return; - - fs_cleanup_tx(priv); - fs_cleanup_rx(priv); -} - -int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv) -{ - int err; - - if (!mlx5_is_ipsec_device(priv->mdev) || !priv->ipsec) - return -EOPNOTSUPP; - - err = fs_init_tx(priv); - if (err) - return err; - - err = fs_init_rx(priv); - if (err) - fs_cleanup_tx(priv); +err_rx: + kfree(ipsec->tx_fs); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h deleted file mode 100644 index 3389b3bb3ef8..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ - -#ifndef __MLX5_IPSEC_STEERING_H__ -#define __MLX5_IPSEC_STEERING_H__ - -#include "en.h" -#include "ipsec.h" -#include "accel/ipsec_offload.h" -#include "en/fs.h" - -#ifdef CONFIG_MLX5_EN_IPSEC -void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv); -int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv); -int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv, - struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 ipsec_obj_id, - struct mlx5e_ipsec_rule *ipsec_rule); -void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv, - struct mlx5_accel_esp_xfrm_attrs *attrs, - struct mlx5e_ipsec_rule *ipsec_rule); -#else -static inline void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv) {} -static inline int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv) { return 0; } -#endif -#endif /* __MLX5_IPSEC_STEERING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c new file mode 100644 index 000000000000..792724ce7336 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "ipsec.h" +#include "lib/mlx5.h" + +u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + u32 caps = 0; + + if (!MLX5_CAP_GEN(mdev, ipsec_offload)) + return 0; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return 0; + + if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC)) + return 0; + + if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ipsec_encrypt) || + !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ipsec_decrypt)) + return 0; + + if (!MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_encrypt) || + !MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_decrypt)) + return 0; + + if (MLX5_CAP_IPSEC(mdev, ipsec_crypto_offload) && + MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp)) + caps |= MLX5_IPSEC_CAP_CRYPTO; + + if (!caps) + return 0; + + if (MLX5_CAP_IPSEC(mdev, ipsec_esn)) + caps |= MLX5_IPSEC_CAP_ESN; + + /* We can accommodate up to 2^24 different IPsec objects + * because we use up to 24 bit in flow table metadata + * to hold the IPsec Object unique handle. + */ + WARN_ON_ONCE(MLX5_CAP_IPSEC(mdev, log_max_ipsec_offload) > 24); + return caps; +} +EXPORT_SYMBOL_GPL(mlx5_ipsec_device_caps); + +static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u32 in[MLX5_ST_SZ_DW(create_ipsec_obj_in)] = {}; + void *obj, *salt_p, *salt_iv_p; + int err; + + obj = MLX5_ADDR_OF(create_ipsec_obj_in, in, ipsec_object); + + /* salt and seq_iv */ + salt_p = MLX5_ADDR_OF(ipsec_obj, obj, salt); + memcpy(salt_p, &aes_gcm->salt, sizeof(aes_gcm->salt)); + + MLX5_SET(ipsec_obj, obj, icv_length, MLX5_IPSEC_OBJECT_ICV_LEN_16B); + salt_iv_p = MLX5_ADDR_OF(ipsec_obj, obj, implicit_iv); + memcpy(salt_iv_p, &aes_gcm->seq_iv, sizeof(aes_gcm->seq_iv)); + /* esn */ + if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) { + MLX5_SET(ipsec_obj, obj, esn_en, 1); + MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn); + if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) + MLX5_SET(ipsec_obj, obj, esn_overlap, 1); + } + + MLX5_SET(ipsec_obj, obj, dekn, sa_entry->enc_key_id); + + /* general object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_IPSEC); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) + sa_entry->ipsec_obj_id = + MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return err; +} + +static void mlx5_destroy_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_IPSEC); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sa_entry->ipsec_obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct aes_gcm_keymat *aes_gcm = &sa_entry->attrs.aes_gcm; + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + int err; + + /* key */ + err = mlx5_create_encryption_key(mdev, aes_gcm->aes_key, + aes_gcm->key_len / BITS_PER_BYTE, + MLX5_ACCEL_OBJ_IPSEC_KEY, + &sa_entry->enc_key_id); + if (err) { + mlx5_core_dbg(mdev, "Failed to create encryption key (err = %d)\n", err); + return err; + } + + err = mlx5_create_ipsec_obj(sa_entry); + if (err) { + mlx5_core_dbg(mdev, "Failed to create IPsec object (err = %d)\n", err); + goto err_enc_key; + } + + return 0; + +err_enc_key: + mlx5_destroy_encryption_key(mdev, sa_entry->enc_key_id); + return err; +} + +void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + + mlx5_destroy_ipsec_obj(sa_entry); + mlx5_destroy_encryption_key(mdev, sa_entry->enc_key_id); +} + +static int mlx5_modify_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + u32 in[MLX5_ST_SZ_DW(modify_ipsec_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(query_ipsec_obj_out)]; + u64 modify_field_select = 0; + u64 general_obj_types; + void *obj; + int err; + + if (!(attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED)) + return 0; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC)) + return -EINVAL; + + /* general object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_IPSEC); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sa_entry->ipsec_obj_id); + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) { + mlx5_core_err(mdev, "Query IPsec object failed (Object id %d), err = %d\n", + sa_entry->ipsec_obj_id, err); + return err; + } + + obj = MLX5_ADDR_OF(query_ipsec_obj_out, out, ipsec_object); + modify_field_select = MLX5_GET64(ipsec_obj, obj, modify_field_select); + + /* esn */ + if (!(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP) || + !(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB)) + return -EOPNOTSUPP; + + obj = MLX5_ADDR_OF(modify_ipsec_obj_in, in, ipsec_object); + MLX5_SET64(ipsec_obj, obj, modify_field_select, + MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP | + MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB); + MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn); + if (attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) + MLX5_SET(ipsec_obj, obj, esn_overlap, 1); + + /* general object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + int err; + + err = mlx5_modify_ipsec_obj(sa_entry, attrs); + if (err) + return; + + memcpy(&sa_entry->attrs, attrs, sizeof(sa_entry->attrs)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 2db9573a3fe6..6859f1c1a831 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -34,78 +34,15 @@ #include <crypto/aead.h> #include <net/xfrm.h> #include <net/esp.h> -#include "accel/ipsec_offload.h" -#include "en_accel/ipsec_rxtx.h" -#include "en_accel/ipsec.h" -#include "accel/accel.h" +#include "ipsec.h" +#include "ipsec_rxtx.h" #include "en.h" enum { - MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11, - MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12, - MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO = 0x17, -}; - -struct mlx5e_ipsec_rx_metadata { - unsigned char nexthdr; - __be32 sa_handle; -} __packed; - -enum { MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8, MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9, }; -struct mlx5e_ipsec_tx_metadata { - __be16 mss_inv; /* 1/MSS in 16bit fixed point, only for LSO */ - __be16 seq; /* LSBs of the first TCP seq, only for LSO */ - u8 esp_next_proto; /* Next protocol of ESP */ -} __packed; - -struct mlx5e_ipsec_metadata { - unsigned char syndrome; - union { - unsigned char raw[5]; - /* from FPGA to host, on successful decrypt */ - struct mlx5e_ipsec_rx_metadata rx; - /* from host to FPGA */ - struct mlx5e_ipsec_tx_metadata tx; - } __packed content; - /* packet type ID field */ - __be16 ethertype; -} __packed; - -#define MAX_LSO_MSS 2048 - -/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */ -static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS]; - -static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb) -{ - return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size]; -} - -static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb) -{ - struct mlx5e_ipsec_metadata *mdata; - struct ethhdr *eth; - - if (unlikely(skb_cow_head(skb, sizeof(*mdata)))) - return ERR_PTR(-ENOMEM); - - eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata)); - skb->mac_header -= sizeof(*mdata); - mdata = (struct mlx5e_ipsec_metadata *)(eth + 1); - - memmove(skb->data, skb->data + sizeof(*mdata), - 2 * ETH_ALEN); - - eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE); - - memset(mdata->content.raw, 0, sizeof(mdata->content.raw)); - return mdata; -} - static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x) { unsigned int alen = crypto_aead_authsize(x->data); @@ -157,11 +94,20 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb, /* Tunnel mode */ if (mode == XFRM_MODE_TUNNEL) { eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; - eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; if (xo->proto == IPPROTO_IPV6) eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; - if (inner_ip_hdr(skb)->protocol == IPPROTO_UDP) + + switch (xo->inner_ipproto) { + case IPPROTO_UDP: eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + fallthrough; + case IPPROTO_TCP: + /* IP | ESP | IP | [TCP | UDP] */ + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + default: + break; + } return; } @@ -235,40 +181,6 @@ void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x, skb_store_bits(skb, iv_offset, &seqno, 8); } -static void mlx5e_ipsec_set_metadata(struct sk_buff *skb, - struct mlx5e_ipsec_metadata *mdata, - struct xfrm_offload *xo) -{ - struct ip_esp_hdr *esph; - struct tcphdr *tcph; - - if (skb_is_gso(skb)) { - /* Add LSO metadata indication */ - esph = ip_esp_hdr(skb); - tcph = inner_tcp_hdr(skb); - netdev_dbg(skb->dev, " Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n", - skb->network_header, - skb->transport_header, - skb->inner_network_header, - skb->inner_transport_header); - netdev_dbg(skb->dev, " Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n", - skb->len, skb_shinfo(skb)->gso_size, - ntohs(tcph->source), ntohs(tcph->dest), - ntohl(tcph->seq), ntohl(esph->seq_no)); - mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP; - mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb); - mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF); - } else { - mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD; - } - mdata->content.tx.esp_next_proto = xo->proto; - - netdev_dbg(skb->dev, " TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n", - mdata->syndrome, mdata->content.tx.esp_next_proto, - ntohs(mdata->content.tx.mss_inv), - ntohs(mdata->content.tx.seq)); -} - void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe, struct mlx5e_accel_tx_ipsec_state *ipsec_st, struct mlx5_wqe_inline_seg *inlseg) @@ -289,16 +201,14 @@ static int mlx5e_ipsec_set_state(struct mlx5e_priv *priv, ipsec_st->x = x; ipsec_st->xo = xo; - if (mlx5_is_ipsec_device(priv->mdev)) { - aead = x->data; - alen = crypto_aead_authsize(aead); - blksize = ALIGN(crypto_aead_blocksize(aead), 4); - clen = ALIGN(skb->len + 2, blksize); - plen = max_t(u32, clen - skb->len, 4); - tailen = plen + alen; - ipsec_st->plen = plen; - ipsec_st->tailen = tailen; - } + aead = x->data; + alen = crypto_aead_authsize(aead); + blksize = ALIGN(crypto_aead_blocksize(aead), 4); + clen = ALIGN(skb->len + 2, blksize); + plen = max_t(u32, clen - skb->len, 4); + tailen = plen + alen; + ipsec_st->plen = plen; + ipsec_st->tailen = tailen; return 0; } @@ -331,19 +241,17 @@ void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, ((struct iphdr *)skb_network_header(skb))->protocol : ((struct ipv6hdr *)skb_network_header(skb))->nexthdr; - if (mlx5_is_ipsec_device(priv->mdev)) { - eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC); - eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER); - encap = x->encap; - if (!encap) { - eseg->trailer |= (l3_proto == IPPROTO_ESP) ? - cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) : - cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC); - } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) { - eseg->trailer |= (l3_proto == IPPROTO_ESP) ? - cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) : - cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC); - } + eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC); + eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER); + encap = x->encap; + if (!encap) { + eseg->trailer |= (l3_proto == IPPROTO_ESP) ? + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) : + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC); + } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) { + eseg->trailer |= (l3_proto == IPPROTO_ESP) ? + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) : + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC); } } @@ -354,7 +262,6 @@ bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); struct xfrm_offload *xo = xfrm_offload(skb); struct mlx5e_ipsec_sa_entry *sa_entry; - struct mlx5e_ipsec_metadata *mdata; struct xfrm_state *x; struct sec_path *sp; @@ -383,19 +290,8 @@ bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, goto drop; } - if (MLX5_CAP_GEN(priv->mdev, fpga)) { - mdata = mlx5e_ipsec_add_metadata(skb); - if (IS_ERR(mdata)) { - atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata); - goto drop; - } - } - sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; sa_entry->set_iv_op(skb, x, xo); - if (MLX5_CAP_GEN(priv->mdev, fpga)) - mlx5e_ipsec_set_metadata(skb, mdata, xo); - mlx5e_ipsec_set_state(priv, skb, x, xo, ipsec_st); return true; @@ -405,79 +301,6 @@ drop: return false; } -static inline struct xfrm_state * -mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb, - struct mlx5e_ipsec_metadata *mdata) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - struct xfrm_offload *xo; - struct xfrm_state *xs; - struct sec_path *sp; - u32 sa_handle; - - sp = secpath_set(skb); - if (unlikely(!sp)) { - atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc); - return NULL; - } - - sa_handle = be32_to_cpu(mdata->content.rx.sa_handle); - xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle); - if (unlikely(!xs)) { - atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss); - return NULL; - } - - sp = skb_sec_path(skb); - sp->xvec[sp->len++] = xs; - sp->olen++; - - xo = xfrm_offload(skb); - xo->flags = CRYPTO_DONE; - switch (mdata->syndrome) { - case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED: - xo->status = CRYPTO_SUCCESS; - if (likely(priv->ipsec->no_trailer)) { - xo->flags |= XFRM_ESP_NO_TRAILER; - xo->proto = mdata->content.rx.nexthdr; - } - break; - case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED: - xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED; - break; - case MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO: - xo->status = CRYPTO_INVALID_PROTOCOL; - break; - default: - atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome); - return NULL; - } - return xs; -} - -struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, - struct sk_buff *skb, u32 *cqe_bcnt) -{ - struct mlx5e_ipsec_metadata *mdata; - struct xfrm_state *xs; - - if (!is_metadata_hdr_valid(skb)) - return skb; - - /* Use the metadata */ - mdata = (struct mlx5e_ipsec_metadata *)(skb->data + ETH_HLEN); - xs = mlx5e_ipsec_build_sp(netdev, skb, mdata); - if (unlikely(!xs)) { - kfree_skb(skb); - return NULL; - } - - remove_metadata_hdr(skb); - *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN; - - return skb; -} - enum { MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED, MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED, @@ -509,7 +332,6 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, return; } - sp = skb_sec_path(skb); sp->xvec[sp->len++] = xs; sp->olen++; @@ -519,8 +341,6 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, switch (MLX5_IPSEC_METADATA_SYNDROM(ipsec_meta_data)) { case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED: xo->status = CRYPTO_SUCCESS; - if (WARN_ON_ONCE(priv->ipsec->no_trailer)) - xo->flags |= XFRM_ESP_NO_TRAILER; break; case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED: xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED; @@ -532,21 +352,3 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome); } } - -void mlx5e_ipsec_build_inverse_table(void) -{ - u16 mss_inv; - u32 mss; - - /* Calculate 1/x inverse table for use in GSO data path. - * Using this table, we provide the IPSec accelerator with the value of - * 1/gso_size so that it can infer the position of each segment inside - * the GSO, and increment the ESP sequence number, and generate the IV. - * The HW needs this value in Q0.16 fixed-point number format - */ - mlx5e_ipsec_inverse_table[1] = htons(0xFFFF); - for (mss = 2; mss < MAX_LSO_MSS; mss++) { - mss_inv = div_u64(1ULL << 32, mss) >> 16; - mlx5e_ipsec_inverse_table[mss] = htons(mss_inv); - } -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index b98db50c3418..1878a70b9031 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -39,9 +39,9 @@ #include "en.h" #include "en/txrx.h" -/* Bit31: IPsec marker, Bit30-24: IPsec syndrome, Bit23-0: IPsec obj id */ +/* Bit31: IPsec marker, Bit30: reserved, Bit29-24: IPsec syndrome, Bit23-0: IPsec obj id */ #define MLX5_IPSEC_METADATA_MARKER(metadata) (((metadata) >> 31) & 0x1) -#define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(6, 0)) +#define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(5, 0)) #define MLX5_IPSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(23, 0)) struct mlx5e_accel_tx_ipsec_state { @@ -53,9 +53,6 @@ struct mlx5e_accel_tx_ipsec_state { #ifdef CONFIG_MLX5_EN_IPSEC -struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, - struct sk_buff *skb, u32 *cqe_bcnt); - void mlx5e_ipsec_inverse_table_init(void); void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, struct xfrm_offload *xo); @@ -80,11 +77,6 @@ static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe) return MLX5_IPSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata)); } -static inline bool mlx5e_ipsec_is_tx_flow(struct mlx5e_accel_tx_ipsec_state *ipsec_st) -{ - return ipsec_st->x; -} - static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg) { return eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC); @@ -131,14 +123,17 @@ static inline bool mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) { - struct xfrm_offload *xo = xfrm_offload(skb); + u8 inner_ipproto; if (!mlx5e_ipsec_eseg_meta(eseg)) return false; eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; - if (xo->inner_ipproto) { - eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L3_INNER_CSUM; + inner_ipproto = xfrm_offload(skb)->inner_ipproto; + if (inner_ipproto) { + eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; + if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP) + eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; sq->stats->csum_partial_inner++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c index 5cb936541b9e..9de84821dafb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c @@ -35,27 +35,7 @@ #include <net/sock.h> #include "en.h" -#include "accel/ipsec.h" -#include "fpga/sdk.h" -#include "en_accel/ipsec.h" -#include "fpga/ipsec.h" - -static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_out_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_bypass_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_in_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_out_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_bypass_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_drop_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_auth_fail_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_drop_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_success) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_fail) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_success) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_fail) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_cmd_drop) }, -}; +#include "ipsec.h" static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) }, @@ -65,13 +45,11 @@ static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) }, { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) }, { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_metadata) }, }; #define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) -#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc) #define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_sw) @@ -103,45 +81,4 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw) return idx; } -static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_hw) -{ - return (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev)) ? NUM_IPSEC_HW_COUNTERS : 0; -} - -static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_hw) -{ - int ret = 0; - - if (priv->ipsec) - ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats, - NUM_IPSEC_HW_COUNTERS); - if (ret) - memset(&priv->ipsec->stats, 0, sizeof(priv->ipsec->stats)); -} - -static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_hw) -{ - unsigned int i; - - if (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev)) - for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - mlx5e_ipsec_hw_stats_desc[i].format); - - return idx; -} - -static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_hw) -{ - int i; - - if (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev)) - for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats, - mlx5e_ipsec_hw_stats_desc, - i); - return idx; -} - MLX5E_DEFINE_STATS_GRP(ipsec_sw, 0); -MLX5E_DEFINE_STATS_GRP(ipsec_hw, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index d93aadbf10da..da2184c94203 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -2,11 +2,49 @@ // Copyright (c) 2019 Mellanox Technologies. #include "en.h" -#include "en_accel/tls.h" +#include "lib/mlx5.h" #include "en_accel/ktls.h" #include "en_accel/ktls_utils.h" #include "en_accel/fs_tcp.h" +int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id) +{ + u32 sz_bytes; + void *key; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + + key = info->key; + sz_bytes = sizeof(info->key); + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = + (struct tls12_crypto_info_aes_gcm_256 *)crypto_info; + + key = info->key; + sz_bytes = sizeof(info->key); + break; + } + default: + return -EINVAL; + } + + return mlx5_create_encryption_key(mdev, key, sz_bytes, + MLX5_ACCEL_OBJ_TLS_KEY, + p_key_id); +} + +void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) +{ + mlx5_destroy_encryption_key(mdev, key_id); +} + static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk, enum tls_offload_ctx_dir direction, struct tls_crypto_info *crypto_info, @@ -16,7 +54,7 @@ static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk, struct mlx5_core_dev *mdev = priv->mdev; int err; - if (WARN_ON(!mlx5e_ktls_type_check(mdev, crypto_info))) + if (!mlx5e_ktls_type_check(mdev, crypto_info)) return -EOPNOTSUPP; if (direction == TLS_OFFLOAD_CTX_DIR_TX) @@ -54,20 +92,38 @@ static const struct tlsdev_ops mlx5e_ktls_ops = { .tls_dev_resync = mlx5e_ktls_resync, }; +bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) +{ + u8 max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); + + if (is_kdump_kernel() || !MLX5_CAP_GEN(mdev, tls_rx)) + return false; + + /* Check the possibility to post the required ICOSQ WQEs. */ + if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS)) + return false; + if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS)) + return false; + if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_KTLS_GET_PROGRESS_WQEBBS)) + return false; + + return true; +} + void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; - if (!mlx5e_accel_is_ktls_tx(mdev) && !mlx5e_accel_is_ktls_rx(mdev)) + if (!mlx5e_is_ktls_tx(mdev) && !mlx5e_is_ktls_rx(mdev)) return; - if (mlx5e_accel_is_ktls_tx(mdev)) { + if (mlx5e_is_ktls_tx(mdev)) { netdev->hw_features |= NETIF_F_HW_TLS_TX; netdev->features |= NETIF_F_HW_TLS_TX; } - if (mlx5e_accel_is_ktls_rx(mdev)) + if (mlx5e_is_ktls_rx(mdev)) netdev->hw_features |= NETIF_F_HW_TLS_RX; netdev->tlsdev_ops = &mlx5e_ktls_ops; @@ -80,9 +136,9 @@ int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable) mutex_lock(&priv->state_lock); if (enable) - err = mlx5e_accel_fs_tcp_create(priv); + err = mlx5e_accel_fs_tcp_create(priv->fs); else - mlx5e_accel_fs_tcp_destroy(priv); + mlx5e_accel_fs_tcp_destroy(priv->fs); mutex_unlock(&priv->state_lock); return err; @@ -92,7 +148,7 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv) { int err; - if (!mlx5e_accel_is_ktls_rx(priv->mdev)) + if (!mlx5e_is_ktls_rx(priv->mdev)) return 0; priv->tls->rx_wq = create_singlethread_workqueue("mlx5e_tls_rx"); @@ -100,7 +156,7 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv) return -ENOMEM; if (priv->netdev->features & NETIF_F_HW_TLS_RX) { - err = mlx5e_accel_fs_tcp_create(priv); + err = mlx5e_accel_fs_tcp_create(priv->fs); if (err) { destroy_workqueue(priv->tls->rx_wq); return err; @@ -112,11 +168,32 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv) void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv) { - if (!mlx5e_accel_is_ktls_rx(priv->mdev)) + if (!mlx5e_is_ktls_rx(priv->mdev)) return; if (priv->netdev->features & NETIF_F_HW_TLS_RX) - mlx5e_accel_fs_tcp_destroy(priv); + mlx5e_accel_fs_tcp_destroy(priv->fs); destroy_workqueue(priv->tls->rx_wq); } + +int mlx5e_ktls_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tls *tls; + + if (!mlx5e_is_ktls_device(priv->mdev)) + return 0; + + tls = kzalloc(sizeof(*tls), GFP_KERNEL); + if (!tls) + return -ENOMEM; + + priv->tls = tls; + return 0; +} + +void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) +{ + kfree(priv->tls); + priv->tls = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index 5833deb2354c..1c35045e41fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -4,11 +4,51 @@ #ifndef __MLX5E_KTLS_H__ #define __MLX5E_KTLS_H__ +#include <linux/tls.h> +#include <net/tls.h> #include "en.h" #ifdef CONFIG_MLX5_EN_TLS +int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id); +void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id); + +static inline bool mlx5e_is_ktls_device(struct mlx5_core_dev *mdev) +{ + if (is_kdump_kernel()) + return false; + + if (!MLX5_CAP_GEN(mdev, tls_tx) && !MLX5_CAP_GEN(mdev, tls_rx)) + return false; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return false; + + return (MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128) || + MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_256)); +} + +static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info) +{ + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + if (crypto_info->version == TLS_1_2_VERSION) + return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); + break; + case TLS_CIPHER_AES_GCM_256: + if (crypto_info->version == TLS_1_2_VERSION) + return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_256); + break; + } + + return false; +} void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv); +int mlx5e_ktls_init_tx(struct mlx5e_priv *priv); +void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv); int mlx5e_ktls_init_rx(struct mlx5e_priv *priv); void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv); int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable); @@ -16,27 +56,46 @@ struct mlx5e_ktls_resync_resp * mlx5e_ktls_rx_resync_create_resp_list(void); void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list); -static inline bool mlx5e_accel_is_ktls_tx(struct mlx5_core_dev *mdev) +static inline bool mlx5e_is_ktls_tx(struct mlx5_core_dev *mdev) { - return !is_kdump_kernel() && - mlx5_accel_is_ktls_tx(mdev); + return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_tx); } -static inline bool mlx5e_accel_is_ktls_rx(struct mlx5_core_dev *mdev) +bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev); + +struct mlx5e_tls_sw_stats { + atomic64_t tx_tls_ctx; + atomic64_t tx_tls_del; + atomic64_t tx_tls_pool_alloc; + atomic64_t tx_tls_pool_free; + atomic64_t rx_tls_ctx; + atomic64_t rx_tls_del; +}; + +struct mlx5e_tls { + struct mlx5e_tls_sw_stats sw_stats; + struct workqueue_struct *rx_wq; + struct mlx5e_tls_tx_pool *tx_pool; +}; + +int mlx5e_ktls_init(struct mlx5e_priv *priv); +void mlx5e_ktls_cleanup(struct mlx5e_priv *priv); + +int mlx5e_ktls_get_count(struct mlx5e_priv *priv); +int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data); +int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data); + +#else +static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) { - return !is_kdump_kernel() && - mlx5_accel_is_ktls_rx(mdev); } -static inline bool mlx5e_accel_is_ktls_device(struct mlx5_core_dev *mdev) +static inline int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) { - return !is_kdump_kernel() && - mlx5_accel_is_ktls_device(mdev); + return 0; } -#else - -static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) +static inline void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) { } @@ -64,10 +123,23 @@ mlx5e_ktls_rx_resync_create_resp_list(void) static inline void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) {} -static inline bool mlx5e_accel_is_ktls_tx(struct mlx5_core_dev *mdev) { return false; } -static inline bool mlx5e_accel_is_ktls_rx(struct mlx5_core_dev *mdev) { return false; } -static inline bool mlx5e_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; } +static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) +{ + return false; +} + +static inline int mlx5e_ktls_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) { } +static inline int mlx5e_ktls_get_count(struct mlx5e_priv *priv) { return 0; } +static inline int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + return 0; +} +static inline int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data) +{ + return 0; +} #endif #endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 15711814d2d2..3e54834747ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -3,7 +3,7 @@ #include <net/inet6_hashtables.h> #include "en_accel/en_accel.h" -#include "en_accel/tls.h" +#include "en_accel/ktls.h" #include "en_accel/ktls_txrx.h" #include "en_accel/ktls_utils.h" #include "en_accel/fs_tcp.h" @@ -43,7 +43,7 @@ struct mlx5e_ktls_rx_resync_ctx { }; struct mlx5e_ktls_offload_context_rx { - struct tls12_crypto_info_aes_gcm_128 crypto_info; + union mlx5e_crypto_info crypto_info; struct accel_rule rule; struct sock *sk; struct mlx5e_rq_stats *rq_stats; @@ -111,7 +111,7 @@ static void accel_rule_handle_work(struct work_struct *work) if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) goto out; - rule = mlx5e_accel_fs_add_sk(accel_rule->priv, priv_rx->sk, + rule = mlx5e_accel_fs_add_sk(accel_rule->priv->fs, priv_rx->sk, mlx5e_tir_get_tirn(&priv_rx->tir), MLX5_FS_DEFAULT_FLOW_TAG); if (!IS_ERR_OR_NULL(rule)) @@ -231,8 +231,7 @@ mlx5e_set_ktls_rx_priv_ctx(struct tls_context *tls_ctx, struct mlx5e_ktls_offload_context_rx **ctx = __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX); - BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_rx *) > - TLS_OFFLOAD_CONTEXT_SIZE_RX); + BUILD_BUG_ON(sizeof(priv_rx) > TLS_DRIVER_STATE_SIZE_RX); *ctx = priv_rx; } @@ -363,7 +362,6 @@ static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx, struct mlx5e_channel *c) { - struct tls12_crypto_info_aes_gcm_128 *info = &priv_rx->crypto_info; struct mlx5e_ktls_resync_resp *ktls_resync; struct mlx5e_icosq *sq; bool trigger_poll; @@ -374,7 +372,31 @@ static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_r spin_lock_bh(&ktls_resync->lock); spin_lock_bh(&priv_rx->lock); - memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq)); + switch (priv_rx->crypto_info.crypto_info.cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + &priv_rx->crypto_info.crypto_info_128; + + memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, + sizeof(info->rec_seq)); + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = + &priv_rx->crypto_info.crypto_info_256; + + memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, + sizeof(info->rec_seq)); + break; + } + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + priv_rx->crypto_info.crypto_info.cipher_type); + spin_unlock_bh(&priv_rx->lock); + spin_unlock_bh(&ktls_resync->lock); + return; + } + if (list_empty(&priv_rx->list)) { list_add_tail(&priv_rx->list, &ktls_resync->list); trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); @@ -462,6 +484,7 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) { struct ethhdr *eth = (struct ethhdr *)(skb->data); struct net_device *netdev = rq->netdev; + struct net *net = dev_net(netdev); struct sock *sk = NULL; unsigned int datalen; struct iphdr *iph; @@ -476,7 +499,7 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) depth += sizeof(struct iphdr); th = (void *)iph + sizeof(struct iphdr); - sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo, + sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, iph->saddr, th->source, iph->daddr, th->dest, netdev->ifindex); #if IS_ENABLED(CONFIG_IPV6) @@ -486,7 +509,7 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) depth += sizeof(struct ipv6hdr); th = (void *)ipv6h + sizeof(struct ipv6hdr); - sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo, + sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &ipv6h->saddr, th->source, &ipv6h->daddr, ntohs(th->dest), netdev->ifindex, 0); @@ -604,14 +627,26 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, INIT_LIST_HEAD(&priv_rx->list); spin_lock_init(&priv_rx->lock); - priv_rx->crypto_info = - *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + priv_rx->crypto_info.crypto_info_128 = + *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + break; + case TLS_CIPHER_AES_GCM_256: + priv_rx->crypto_info.crypto_info_256 = + *(struct tls12_crypto_info_aes_gcm_256 *)crypto_info; + break; + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + crypto_info->cipher_type); + return -EOPNOTSUPP; + } rxq = mlx5e_ktls_sk_get_rxq(sk); priv_rx->rxq = rxq; priv_rx->sk = sk; - priv_rx->rq_stats = &priv->channel_stats[rxq].rq; + priv_rx->rq_stats = &priv->channel_stats[rxq]->rq; priv_rx->sw_stats = &priv->tls->sw_stats; mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c index 56e7b2aee85f..7c1c0eb16787 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c @@ -36,18 +36,13 @@ #include "en.h" #include "fpga/sdk.h" -#include "en_accel/tls.h" - -static const struct counter_desc mlx5e_tls_sw_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_metadata) }, - { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_resync_alloc) }, - { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_no_sync_data) }, - { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_bypass_required) }, -}; +#include "en_accel/ktls.h" static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_del) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_alloc) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_free) }, { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_del) }, }; @@ -55,51 +50,43 @@ static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = { #define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) -static const struct counter_desc *get_tls_atomic_stats(struct mlx5e_priv *priv) -{ - if (!priv->tls) - return NULL; - if (mlx5e_accel_is_ktls_device(priv->mdev)) - return mlx5e_ktls_sw_stats_desc; - return mlx5e_tls_sw_stats_desc; -} - -int mlx5e_tls_get_count(struct mlx5e_priv *priv) +int mlx5e_ktls_get_count(struct mlx5e_priv *priv) { if (!priv->tls) return 0; - if (mlx5e_accel_is_ktls_device(priv->mdev)) - return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc); - return ARRAY_SIZE(mlx5e_tls_sw_stats_desc); + + return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc); } -int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) +int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { - const struct counter_desc *stats_desc; unsigned int i, n, idx = 0; - stats_desc = get_tls_atomic_stats(priv); - n = mlx5e_tls_get_count(priv); + if (!priv->tls) + return 0; + + n = mlx5e_ktls_get_count(priv); for (i = 0; i < n; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - stats_desc[i].format); + mlx5e_ktls_sw_stats_desc[i].format); return n; } -int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) +int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data) { - const struct counter_desc *stats_desc; unsigned int i, n, idx = 0; - stats_desc = get_tls_atomic_stats(priv); - n = mlx5e_tls_get_count(priv); + if (!priv->tls) + return 0; + + n = mlx5e_ktls_get_count(priv); for (i = 0; i < n; i++) - data[idx++] = - MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats, - stats_desc, i); + data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats, + mlx5e_ktls_sw_stats_desc, + i); return n; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 9ad3459fb63a..2e0335246967 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2019 Mellanox Technologies. -#include "en_accel/tls.h" +#include "en_accel/ktls.h" #include "en_accel/ktls_txrx.h" #include "en_accel/ktls_utils.h" @@ -27,38 +27,78 @@ u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *pa { u16 num_dumps, stop_room = 0; - if (!mlx5e_accel_is_ktls_tx(mdev)) + if (!mlx5e_is_ktls_tx(mdev)) return 0; num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE); - stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS); - stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS); - stop_room += num_dumps * mlx5e_stop_room_for_wqe(MLX5E_KTLS_DUMP_WQEBBS); + stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS); + stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS); + stop_room += num_dumps * mlx5e_stop_room_for_wqe(mdev, MLX5E_KTLS_DUMP_WQEBBS); + stop_room += 1; /* fence nop */ return stop_room; } +static void mlx5e_ktls_set_tisc(struct mlx5_core_dev *mdev, void *tisc) +{ + MLX5_SET(tisc, tisc, tls_en, 1); + MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); +} + static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; - void *tisc; - tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx)); - MLX5_SET(tisc, tisc, tls_en, 1); + return mlx5_core_create_tis(mdev, in, tisn); +} + +static int mlx5e_ktls_create_tis_cb(struct mlx5_core_dev *mdev, + struct mlx5_async_ctx *async_ctx, + u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; - return mlx5e_create_tis(mdev, in, tisn); + mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx)); + MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); + + return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in), + out, outlen, callback, context); +} + +static int mlx5e_ktls_destroy_tis_cb(struct mlx5_core_dev *mdev, u32 tisn, + struct mlx5_async_ctx *async_ctx, + u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {}; + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, tisn, tisn); + + return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in), + out, outlen, callback, context); } struct mlx5e_ktls_offload_context_tx { - struct tls_offload_context_tx *tx_ctx; - struct tls12_crypto_info_aes_gcm_128 crypto_info; - struct mlx5e_tls_sw_stats *sw_stats; + /* fast path */ u32 expected_seq; u32 tisn; - u32 key_id; bool ctx_post_pending; + /* control / resync */ + struct list_head list_node; /* member of the pool */ + union mlx5e_crypto_info crypto_info; + struct tls_offload_context_tx *tx_ctx; + struct mlx5_core_dev *mdev; + struct mlx5e_tls_sw_stats *sw_stats; + u32 key_id; + u8 create_err : 1; }; static void @@ -68,8 +108,7 @@ mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx, struct mlx5e_ktls_offload_context_tx **ctx = __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX); - BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_tx *) > - TLS_OFFLOAD_CONTEXT_SIZE_TX); + BUILD_BUG_ON(sizeof(priv_tx) > TLS_DRIVER_STATE_SIZE_TX); *ctx = priv_tx; } @@ -83,65 +122,410 @@ mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx) return *ctx; } +/* struct for callback API management */ +struct mlx5e_async_ctx { + struct mlx5_async_work context; + struct mlx5_async_ctx async_ctx; + struct work_struct work; + struct mlx5e_ktls_offload_context_tx *priv_tx; + struct completion complete; + int err; + union { + u32 out_create[MLX5_ST_SZ_DW(create_tis_out)]; + u32 out_destroy[MLX5_ST_SZ_DW(destroy_tis_out)]; + }; +}; + +static struct mlx5e_async_ctx *mlx5e_bulk_async_init(struct mlx5_core_dev *mdev, int n) +{ + struct mlx5e_async_ctx *bulk_async; + int i; + + bulk_async = kvcalloc(n, sizeof(struct mlx5e_async_ctx), GFP_KERNEL); + if (!bulk_async) + return NULL; + + for (i = 0; i < n; i++) { + struct mlx5e_async_ctx *async = &bulk_async[i]; + + mlx5_cmd_init_async_ctx(mdev, &async->async_ctx); + init_completion(&async->complete); + } + + return bulk_async; +} + +static void mlx5e_bulk_async_cleanup(struct mlx5e_async_ctx *bulk_async, int n) +{ + int i; + + for (i = 0; i < n; i++) { + struct mlx5e_async_ctx *async = &bulk_async[i]; + + mlx5_cmd_cleanup_async_ctx(&async->async_ctx); + } + kvfree(bulk_async); +} + +static void create_tis_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5e_async_ctx *async = + container_of(context, struct mlx5e_async_ctx, context); + struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx; + + if (status) { + async->err = status; + priv_tx->create_err = 1; + goto out; + } + + priv_tx->tisn = MLX5_GET(create_tis_out, async->out_create, tisn); +out: + complete(&async->complete); +} + +static void destroy_tis_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5e_async_ctx *async = + container_of(context, struct mlx5e_async_ctx, context); + struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx; + + complete(&async->complete); + kfree(priv_tx); +} + +static struct mlx5e_ktls_offload_context_tx * +mlx5e_tls_priv_tx_init(struct mlx5_core_dev *mdev, struct mlx5e_tls_sw_stats *sw_stats, + struct mlx5e_async_ctx *async) +{ + struct mlx5e_ktls_offload_context_tx *priv_tx; + int err; + + priv_tx = kzalloc(sizeof(*priv_tx), GFP_KERNEL); + if (!priv_tx) + return ERR_PTR(-ENOMEM); + + priv_tx->mdev = mdev; + priv_tx->sw_stats = sw_stats; + + if (!async) { + err = mlx5e_ktls_create_tis(mdev, &priv_tx->tisn); + if (err) + goto err_out; + } else { + async->priv_tx = priv_tx; + err = mlx5e_ktls_create_tis_cb(mdev, &async->async_ctx, + async->out_create, sizeof(async->out_create), + create_tis_callback, &async->context); + if (err) + goto err_out; + } + + return priv_tx; + +err_out: + kfree(priv_tx); + return ERR_PTR(err); +} + +static void mlx5e_tls_priv_tx_cleanup(struct mlx5e_ktls_offload_context_tx *priv_tx, + struct mlx5e_async_ctx *async) +{ + if (priv_tx->create_err) { + complete(&async->complete); + kfree(priv_tx); + return; + } + async->priv_tx = priv_tx; + mlx5e_ktls_destroy_tis_cb(priv_tx->mdev, priv_tx->tisn, + &async->async_ctx, + async->out_destroy, sizeof(async->out_destroy), + destroy_tis_callback, &async->context); +} + +static void mlx5e_tls_priv_tx_list_cleanup(struct mlx5_core_dev *mdev, + struct list_head *list, int size) +{ + struct mlx5e_ktls_offload_context_tx *obj, *n; + struct mlx5e_async_ctx *bulk_async; + int i; + + bulk_async = mlx5e_bulk_async_init(mdev, size); + if (!bulk_async) + return; + + i = 0; + list_for_each_entry_safe(obj, n, list, list_node) { + mlx5e_tls_priv_tx_cleanup(obj, &bulk_async[i]); + i++; + } + + for (i = 0; i < size; i++) { + struct mlx5e_async_ctx *async = &bulk_async[i]; + + wait_for_completion(&async->complete); + } + mlx5e_bulk_async_cleanup(bulk_async, size); +} + +/* Recycling pool API */ + +#define MLX5E_TLS_TX_POOL_BULK (16) +#define MLX5E_TLS_TX_POOL_HIGH (4 * 1024) +#define MLX5E_TLS_TX_POOL_LOW (MLX5E_TLS_TX_POOL_HIGH / 4) + +struct mlx5e_tls_tx_pool { + struct mlx5_core_dev *mdev; + struct mlx5e_tls_sw_stats *sw_stats; + struct mutex lock; /* Protects access to the pool */ + struct list_head list; + size_t size; + + struct workqueue_struct *wq; + struct work_struct create_work; + struct work_struct destroy_work; +}; + +static void create_work(struct work_struct *work) +{ + struct mlx5e_tls_tx_pool *pool = + container_of(work, struct mlx5e_tls_tx_pool, create_work); + struct mlx5e_ktls_offload_context_tx *obj; + struct mlx5e_async_ctx *bulk_async; + LIST_HEAD(local_list); + int i, j, err = 0; + + bulk_async = mlx5e_bulk_async_init(pool->mdev, MLX5E_TLS_TX_POOL_BULK); + if (!bulk_async) + return; + + for (i = 0; i < MLX5E_TLS_TX_POOL_BULK; i++) { + obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, &bulk_async[i]); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + list_add(&obj->list_node, &local_list); + } + + for (j = 0; j < i; j++) { + struct mlx5e_async_ctx *async = &bulk_async[j]; + + wait_for_completion(&async->complete); + if (!err && async->err) + err = async->err; + } + atomic64_add(i, &pool->sw_stats->tx_tls_pool_alloc); + mlx5e_bulk_async_cleanup(bulk_async, MLX5E_TLS_TX_POOL_BULK); + if (err) + goto err_out; + + mutex_lock(&pool->lock); + if (pool->size + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH) { + mutex_unlock(&pool->lock); + goto err_out; + } + list_splice(&local_list, &pool->list); + pool->size += MLX5E_TLS_TX_POOL_BULK; + if (pool->size <= MLX5E_TLS_TX_POOL_LOW) + queue_work(pool->wq, work); + mutex_unlock(&pool->lock); + return; + +err_out: + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, i); + atomic64_add(i, &pool->sw_stats->tx_tls_pool_free); +} + +static void destroy_work(struct work_struct *work) +{ + struct mlx5e_tls_tx_pool *pool = + container_of(work, struct mlx5e_tls_tx_pool, destroy_work); + struct mlx5e_ktls_offload_context_tx *obj; + LIST_HEAD(local_list); + int i = 0; + + mutex_lock(&pool->lock); + if (pool->size < MLX5E_TLS_TX_POOL_HIGH) { + mutex_unlock(&pool->lock); + return; + } + + list_for_each_entry(obj, &pool->list, list_node) + if (++i == MLX5E_TLS_TX_POOL_BULK) + break; + + list_cut_position(&local_list, &pool->list, &obj->list_node); + pool->size -= MLX5E_TLS_TX_POOL_BULK; + if (pool->size >= MLX5E_TLS_TX_POOL_HIGH) + queue_work(pool->wq, work); + mutex_unlock(&pool->lock); + + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK); + atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free); +} + +static struct mlx5e_tls_tx_pool *mlx5e_tls_tx_pool_init(struct mlx5_core_dev *mdev, + struct mlx5e_tls_sw_stats *sw_stats) +{ + struct mlx5e_tls_tx_pool *pool; + + BUILD_BUG_ON(MLX5E_TLS_TX_POOL_LOW + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH); + + pool = kvzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return NULL; + + pool->wq = create_singlethread_workqueue("mlx5e_tls_tx_pool"); + if (!pool->wq) + goto err_free; + + INIT_LIST_HEAD(&pool->list); + mutex_init(&pool->lock); + + INIT_WORK(&pool->create_work, create_work); + INIT_WORK(&pool->destroy_work, destroy_work); + + pool->mdev = mdev; + pool->sw_stats = sw_stats; + + return pool; + +err_free: + kvfree(pool); + return NULL; +} + +static void mlx5e_tls_tx_pool_list_cleanup(struct mlx5e_tls_tx_pool *pool) +{ + while (pool->size > MLX5E_TLS_TX_POOL_BULK) { + struct mlx5e_ktls_offload_context_tx *obj; + LIST_HEAD(local_list); + int i = 0; + + list_for_each_entry(obj, &pool->list, list_node) + if (++i == MLX5E_TLS_TX_POOL_BULK) + break; + + list_cut_position(&local_list, &pool->list, &obj->list_node); + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK); + atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free); + pool->size -= MLX5E_TLS_TX_POOL_BULK; + } + if (pool->size) { + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &pool->list, pool->size); + atomic64_add(pool->size, &pool->sw_stats->tx_tls_pool_free); + } +} + +static void mlx5e_tls_tx_pool_cleanup(struct mlx5e_tls_tx_pool *pool) +{ + mlx5e_tls_tx_pool_list_cleanup(pool); + destroy_workqueue(pool->wq); + kvfree(pool); +} + +static void pool_push(struct mlx5e_tls_tx_pool *pool, struct mlx5e_ktls_offload_context_tx *obj) +{ + mutex_lock(&pool->lock); + list_add(&obj->list_node, &pool->list); + if (++pool->size == MLX5E_TLS_TX_POOL_HIGH) + queue_work(pool->wq, &pool->destroy_work); + mutex_unlock(&pool->lock); +} + +static struct mlx5e_ktls_offload_context_tx *pool_pop(struct mlx5e_tls_tx_pool *pool) +{ + struct mlx5e_ktls_offload_context_tx *obj; + + mutex_lock(&pool->lock); + if (unlikely(pool->size == 0)) { + /* pool is empty: + * - trigger the populating work, and + * - serve the current context via the regular blocking api. + */ + queue_work(pool->wq, &pool->create_work); + mutex_unlock(&pool->lock); + obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, NULL); + if (!IS_ERR(obj)) + atomic64_inc(&pool->sw_stats->tx_tls_pool_alloc); + return obj; + } + + obj = list_first_entry(&pool->list, struct mlx5e_ktls_offload_context_tx, + list_node); + list_del(&obj->list_node); + if (--pool->size == MLX5E_TLS_TX_POOL_LOW) + queue_work(pool->wq, &pool->create_work); + mutex_unlock(&pool->lock); + return obj; +} + +/* End of pool API */ + int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn) { struct mlx5e_ktls_offload_context_tx *priv_tx; + struct mlx5e_tls_tx_pool *pool; struct tls_context *tls_ctx; - struct mlx5_core_dev *mdev; struct mlx5e_priv *priv; int err; tls_ctx = tls_get_ctx(sk); priv = netdev_priv(netdev); - mdev = priv->mdev; + pool = priv->tls->tx_pool; - priv_tx = kzalloc(sizeof(*priv_tx), GFP_KERNEL); - if (!priv_tx) - return -ENOMEM; + priv_tx = pool_pop(pool); + if (IS_ERR(priv_tx)) + return PTR_ERR(priv_tx); - err = mlx5_ktls_create_key(mdev, crypto_info, &priv_tx->key_id); + err = mlx5_ktls_create_key(pool->mdev, crypto_info, &priv_tx->key_id); if (err) goto err_create_key; - priv_tx->sw_stats = &priv->tls->sw_stats; priv_tx->expected_seq = start_offload_tcp_sn; - priv_tx->crypto_info = - *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + priv_tx->crypto_info.crypto_info_128 = + *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + break; + case TLS_CIPHER_AES_GCM_256: + priv_tx->crypto_info.crypto_info_256 = + *(struct tls12_crypto_info_aes_gcm_256 *)crypto_info; + break; + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + crypto_info->cipher_type); + return -EOPNOTSUPP; + } priv_tx->tx_ctx = tls_offload_ctx_tx(tls_ctx); mlx5e_set_ktls_tx_priv_ctx(tls_ctx, priv_tx); - err = mlx5e_ktls_create_tis(mdev, &priv_tx->tisn); - if (err) - goto err_create_tis; - priv_tx->ctx_post_pending = true; atomic64_inc(&priv_tx->sw_stats->tx_tls_ctx); return 0; -err_create_tis: - mlx5_ktls_destroy_key(mdev, priv_tx->key_id); err_create_key: - kfree(priv_tx); + pool_push(pool, priv_tx); return err; } void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx) { struct mlx5e_ktls_offload_context_tx *priv_tx; - struct mlx5_core_dev *mdev; + struct mlx5e_tls_tx_pool *pool; struct mlx5e_priv *priv; priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); priv = netdev_priv(netdev); - mdev = priv->mdev; + pool = priv->tls->tx_pool; atomic64_inc(&priv_tx->sw_stats->tx_tls_del); - mlx5e_destroy_tis(mdev, priv_tx->tisn); - mlx5_ktls_destroy_key(mdev, priv_tx->key_id); - kfree(priv_tx); + mlx5_ktls_destroy_key(priv_tx->mdev, priv_tx->key_id); + pool_push(pool, priv_tx); } static void tx_fill_wi(struct mlx5e_txqsq *sq, @@ -202,6 +586,16 @@ post_progress_params(struct mlx5e_txqsq *sq, sq->pc += num_wqebbs; } +static void tx_post_fence_nop(struct mlx5e_txqsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + + tx_fill_wi(sq, pi, 1, 0, NULL); + + mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); +} + static void mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, struct mlx5e_ktls_offload_context_tx *priv_tx, @@ -213,6 +607,7 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, post_static_params(sq, priv_tx, fence_first_post); post_progress_params(sq, priv_tx, progress_fence); + tx_post_fence_nop(sq); } struct tx_sync_info { @@ -288,14 +683,31 @@ tx_post_resync_params(struct mlx5e_txqsq *sq, struct mlx5e_ktls_offload_context_tx *priv_tx, u64 rcd_sn) { - struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info; __be64 rn_be = cpu_to_be64(rcd_sn); bool skip_static_post; u16 rec_seq_sz; char *rec_seq; - rec_seq = info->rec_seq; - rec_seq_sz = sizeof(info->rec_seq); + switch (priv_tx->crypto_info.crypto_info.cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info.crypto_info_128; + + rec_seq = info->rec_seq; + rec_seq_sz = sizeof(info->rec_seq); + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = &priv_tx->crypto_info.crypto_info_256; + + rec_seq = info->rec_seq; + rec_seq_sz = sizeof(info->rec_seq); + break; + } + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + priv_tx->crypto_info.crypto_info.cipher_type); + return; + } skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz); if (!skip_static_post) @@ -305,7 +717,7 @@ tx_post_resync_params(struct mlx5e_txqsq *sq, } static int -tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool first) +tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn) { struct mlx5_wqe_ctrl_seg *cseg; struct mlx5_wqe_data_seg *dseg; @@ -327,7 +739,6 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); cseg->tis_tir_num = cpu_to_be32(tisn << 8); - cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; fsz = skb_frag_size(frag); dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, @@ -362,67 +773,39 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, stats->tls_dump_bytes += wi->num_bytes; } -static void tx_post_fence_nop(struct mlx5e_txqsq *sq) -{ - struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - - tx_fill_wi(sq, pi, 1, 0, NULL); - - mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); -} - static enum mlx5e_ktls_sync_retval mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, struct mlx5e_txqsq *sq, int datalen, u32 seq) { - struct mlx5e_sq_stats *stats = sq->stats; enum mlx5e_ktls_sync_retval ret; struct tx_sync_info info = {}; - int i = 0; + int i; ret = tx_sync_info_get(priv_tx, seq, datalen, &info); - if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) { - if (ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA) { - stats->tls_skip_no_sync_data++; - return MLX5E_KTLS_SYNC_SKIP_NO_DATA; - } - /* We might get here if a retransmission reaches the driver - * after the relevant record is acked. + if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) + /* We might get here with ret == FAIL if a retransmission + * reaches the driver after the relevant record is acked. * It should be safe to drop the packet in this case */ - stats->tls_drop_no_sync_data++; - goto err_out; - } - - stats->tls_ooo++; + return ret; tx_post_resync_params(sq, priv_tx, info.rcd_sn); - /* If no dump WQE was sent, we need to have a fence NOP WQE before the - * actual data xmit. - */ - if (!info.nr_frags) { - tx_post_fence_nop(sq); - return MLX5E_KTLS_SYNC_DONE; - } - - for (; i < info.nr_frags; i++) { + for (i = 0; i < info.nr_frags; i++) { unsigned int orig_fsz, frag_offset = 0, n = 0; skb_frag_t *f = &info.frags[i]; orig_fsz = skb_frag_size(f); do { - bool fence = !(i || frag_offset); unsigned int fsz; n++; fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - frag_offset); skb_frag_size_set(f, fsz); - if (tx_post_resync_dump(sq, f, priv_tx->tisn, fence)) { + if (tx_post_resync_dump(sq, f, priv_tx->tisn)) { page_ref_add(skb_frag_page(f), n - 1); goto err_out; } @@ -448,34 +831,55 @@ err_out: return MLX5E_KTLS_SYNC_FAIL; } -bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq, - struct sk_buff *skb, int datalen, +bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, + struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state) { struct mlx5e_ktls_offload_context_tx *priv_tx; struct mlx5e_sq_stats *stats = sq->stats; + struct net_device *tls_netdev; + struct tls_context *tls_ctx; + int datalen; u32 seq; + datalen = skb->len - skb_tcp_all_headers(skb); + if (!datalen) + return true; + + mlx5e_tx_mpwqe_ensure_complete(sq); + + tls_ctx = tls_get_ctx(skb->sk); + tls_netdev = rcu_dereference_bh(tls_ctx->netdev); + /* Don't WARN on NULL: if tls_device_down is running in parallel, + * netdev might become NULL, even if tls_is_sk_tx_device_offloaded was + * true. Rather continue processing this packet. + */ + if (WARN_ON_ONCE(tls_netdev && tls_netdev != netdev)) + goto err_out; + priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); - if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) { + if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false); - } seq = ntohl(tcp_hdr(skb)->seq); if (unlikely(priv_tx->expected_seq != seq)) { enum mlx5e_ktls_sync_retval ret = mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq); + stats->tls_ooo++; + switch (ret) { case MLX5E_KTLS_SYNC_DONE: break; case MLX5E_KTLS_SYNC_SKIP_NO_DATA: + stats->tls_skip_no_sync_data++; if (likely(!skb->decrypted)) goto out; WARN_ON_ONCE(1); - fallthrough; + goto err_out; case MLX5E_KTLS_SYNC_FAIL: + stats->tls_drop_no_sync_data++; goto err_out; } } @@ -494,3 +898,24 @@ err_out: dev_kfree_skb_any(skb); return false; } + +int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) +{ + if (!mlx5e_is_ktls_tx(priv->mdev)) + return 0; + + priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats); + if (!priv->tls->tx_pool) + return -ENOMEM; + + return 0; +} + +void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) +{ + if (!mlx5e_is_ktls_tx(priv->mdev)) + return; + + mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool); + priv->tls->tx_pool = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c index ac29aeb8af49..570a912dd6fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c @@ -21,7 +21,7 @@ enum { static void fill_static_params(struct mlx5_wqe_tls_static_params_seg *params, - struct tls12_crypto_info_aes_gcm_128 *info, + union mlx5e_crypto_info *crypto_info, u32 key_id, u32 resync_tcp_sn) { char *initial_rn, *gcm_iv; @@ -32,7 +32,26 @@ fill_static_params(struct mlx5_wqe_tls_static_params_seg *params, ctx = params->ctx; - EXTRACT_INFO_FIELDS; + switch (crypto_info->crypto_info.cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + &crypto_info->crypto_info_128; + + EXTRACT_INFO_FIELDS; + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = + &crypto_info->crypto_info_256; + + EXTRACT_INFO_FIELDS; + break; + } + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + crypto_info->crypto_info.cipher_type); + return; + } gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv); initial_rn = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number); @@ -54,7 +73,7 @@ fill_static_params(struct mlx5_wqe_tls_static_params_seg *params, void mlx5e_ktls_build_static_params(struct mlx5e_set_tls_static_params_wqe *wqe, u16 pc, u32 sqn, - struct tls12_crypto_info_aes_gcm_128 *info, + union mlx5e_crypto_info *crypto_info, u32 tis_tir_num, u32 key_id, u32 resync_tcp_sn, bool fence, enum tls_offload_ctx_dir direction) { @@ -75,7 +94,7 @@ mlx5e_ktls_build_static_params(struct mlx5e_set_tls_static_params_wqe *wqe, ucseg->flags = MLX5_UMR_INLINE; ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16); - fill_static_params(&wqe->params, info, key_id, resync_tcp_sn); + fill_static_params(&wqe->params, crypto_info, key_id, resync_tcp_sn); } static void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index 08c9d5134479..2dd78dd4ad65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -16,8 +16,8 @@ struct mlx5e_accel_tx_tls_state { u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); -bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq, - struct sk_buff *skb, int datalen, +bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, + struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state); void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 *cqe_bcnt); @@ -48,6 +48,18 @@ mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) { return budget && test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &c->async_icosq.state); } + +static inline bool mlx5e_ktls_skb_offloaded(struct sk_buff *skb) +{ + return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk); +} + +static inline void +mlx5e_ktls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg, + struct mlx5e_accel_tx_tls_state *state) +{ + cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8); +} #else static inline bool mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, @@ -69,6 +81,18 @@ mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) return false; } +static inline u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return 0; +} + +static inline void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, + struct sk_buff *skb, + struct mlx5_cqe64 *cqe, + u32 *cqe_bcnt) +{ +} #endif /* CONFIG_MLX5_EN_TLS */ #endif /* __MLX5E_TLS_TXRX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h index e5c180f2403b..3d79cd379890 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h @@ -6,7 +6,6 @@ #include <net/tls.h> #include "en.h" -#include "accel/tls.h" enum { MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0, @@ -28,6 +27,12 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx); void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk, u32 seq, u8 *rcd_sn); +union mlx5e_crypto_info { + struct tls_crypto_info crypto_info; + struct tls12_crypto_info_aes_gcm_128 crypto_info_128; + struct tls12_crypto_info_aes_gcm_256 crypto_info_256; +}; + struct mlx5e_set_tls_static_params_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_umr_ctrl_seg uctrl; @@ -73,7 +78,7 @@ struct mlx5e_get_tls_progress_params_wqe { void mlx5e_ktls_build_static_params(struct mlx5e_set_tls_static_params_wqe *wqe, u16 pc, u32 sqn, - struct tls12_crypto_info_aes_gcm_128 *info, + union mlx5e_crypto_info *crypto_info, u32 tis_tir_num, u32 key_id, u32 resync_tcp_sn, bool fence, enum tls_offload_ctx_dir direction); void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c new file mode 100644 index 000000000000..2ef36cb9555a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -0,0 +1,1861 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/mlx5/device.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/xarray.h> + +#include "en.h" +#include "lib/aso.h" +#include "lib/mlx5.h" +#include "en_accel/macsec.h" +#include "en_accel/macsec_fs.h" + +#define MLX5_MACSEC_EPN_SCOPE_MID 0x80000000L +#define MLX5E_MACSEC_ASO_CTX_SZ MLX5_ST_SZ_BYTES(macsec_aso) + +enum mlx5_macsec_aso_event_arm { + MLX5E_ASO_EPN_ARM = BIT(0), +}; + +enum { + MLX5_MACSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET, +}; + +struct mlx5e_macsec_handle { + struct mlx5e_macsec *macsec; + u32 obj_id; + u8 idx; +}; + +enum { + MLX5_MACSEC_EPN, +}; + +struct mlx5e_macsec_aso_out { + u8 event_arm; + u32 mode_param; +}; + +struct mlx5e_macsec_aso_in { + u8 mode; + u32 obj_id; +}; + +struct mlx5e_macsec_epn_state { + u32 epn_msb; + u8 epn_enabled; + u8 overlap; +}; + +struct mlx5e_macsec_async_work { + struct mlx5e_macsec *macsec; + struct mlx5_core_dev *mdev; + struct work_struct work; + u32 obj_id; +}; + +struct mlx5e_macsec_sa { + bool active; + u8 assoc_num; + u32 macsec_obj_id; + u32 enc_key_id; + u32 next_pn; + sci_t sci; + salt_t salt; + + struct rhash_head hash; + u32 fs_id; + union mlx5e_macsec_rule *macsec_rule; + struct rcu_head rcu_head; + struct mlx5e_macsec_epn_state epn_state; +}; + +struct mlx5e_macsec_rx_sc; +struct mlx5e_macsec_rx_sc_xarray_element { + u32 fs_id; + struct mlx5e_macsec_rx_sc *rx_sc; +}; + +struct mlx5e_macsec_rx_sc { + bool active; + sci_t sci; + struct mlx5e_macsec_sa *rx_sa[MACSEC_NUM_AN]; + struct list_head rx_sc_list_element; + struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element; + struct metadata_dst *md_dst; + struct rcu_head rcu_head; +}; + +struct mlx5e_macsec_umr { + dma_addr_t dma_addr; + u8 ctx[MLX5_ST_SZ_BYTES(macsec_aso)]; + u32 mkey; +}; + +struct mlx5e_macsec_aso { + /* ASO */ + struct mlx5_aso *maso; + /* Protects macsec ASO */ + struct mutex aso_lock; + /* UMR */ + struct mlx5e_macsec_umr *umr; + + u32 pdn; +}; + +static const struct rhashtable_params rhash_sci = { + .key_len = sizeof_field(struct mlx5e_macsec_sa, sci), + .key_offset = offsetof(struct mlx5e_macsec_sa, sci), + .head_offset = offsetof(struct mlx5e_macsec_sa, hash), + .automatic_shrinking = true, + .min_size = 1, +}; + +struct mlx5e_macsec_device { + const struct net_device *netdev; + struct mlx5e_macsec_sa *tx_sa[MACSEC_NUM_AN]; + struct list_head macsec_rx_sc_list_head; + unsigned char *dev_addr; + struct list_head macsec_device_list_element; +}; + +struct mlx5e_macsec { + struct list_head macsec_device_list_head; + int num_of_devices; + struct mlx5e_macsec_fs *macsec_fs; + struct mutex lock; /* Protects mlx5e_macsec internal contexts */ + + /* Tx sci -> fs id mapping handling */ + struct rhashtable sci_hash; /* sci -> mlx5e_macsec_sa */ + + /* Rx fs_id -> rx_sc mapping */ + struct xarray sc_xarray; + + struct mlx5_core_dev *mdev; + + /* Stats manage */ + struct mlx5e_macsec_stats stats; + + /* ASO */ + struct mlx5e_macsec_aso aso; + + struct notifier_block nb; + struct workqueue_struct *wq; +}; + +struct mlx5_macsec_obj_attrs { + u32 aso_pdn; + u32 next_pn; + __be64 sci; + u32 enc_key_id; + bool encrypt; + struct mlx5e_macsec_epn_state epn_state; + salt_t salt; + __be32 ssci; + bool replay_protect; + u32 replay_window; +}; + +struct mlx5_aso_ctrl_param { + u8 data_mask_mode; + u8 condition_0_operand; + u8 condition_1_operand; + u8 condition_0_offset; + u8 condition_1_offset; + u8 data_offset; + u8 condition_operand; + u32 condition_0_data; + u32 condition_0_mask; + u32 condition_1_data; + u32 condition_1_mask; + u64 bitwise_data; + u64 data_mask; +}; + +static int mlx5e_macsec_aso_reg_mr(struct mlx5_core_dev *mdev, struct mlx5e_macsec_aso *aso) +{ + struct mlx5e_macsec_umr *umr; + struct device *dma_device; + dma_addr_t dma_addr; + int err; + + umr = kzalloc(sizeof(*umr), GFP_KERNEL); + if (!umr) { + err = -ENOMEM; + return err; + } + + dma_device = &mdev->pdev->dev; + dma_addr = dma_map_single(dma_device, umr->ctx, sizeof(umr->ctx), DMA_BIDIRECTIONAL); + err = dma_mapping_error(dma_device, dma_addr); + if (err) { + mlx5_core_err(mdev, "Can't map dma device, err=%d\n", err); + goto out_dma; + } + + err = mlx5e_create_mkey(mdev, aso->pdn, &umr->mkey); + if (err) { + mlx5_core_err(mdev, "Can't create mkey, err=%d\n", err); + goto out_mkey; + } + + umr->dma_addr = dma_addr; + + aso->umr = umr; + + return 0; + +out_mkey: + dma_unmap_single(dma_device, dma_addr, sizeof(umr->ctx), DMA_BIDIRECTIONAL); +out_dma: + kfree(umr); + return err; +} + +static void mlx5e_macsec_aso_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5e_macsec_aso *aso) +{ + struct mlx5e_macsec_umr *umr = aso->umr; + + mlx5_core_destroy_mkey(mdev, umr->mkey); + dma_unmap_single(&mdev->pdev->dev, umr->dma_addr, sizeof(umr->ctx), DMA_BIDIRECTIONAL); + kfree(umr); +} + +static int macsec_set_replay_protection(struct mlx5_macsec_obj_attrs *attrs, void *aso_ctx) +{ + u8 window_sz; + + if (!attrs->replay_protect) + return 0; + + switch (attrs->replay_window) { + case 256: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_256BIT; + break; + case 128: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_128BIT; + break; + case 64: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_64BIT; + break; + case 32: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_32BIT; + break; + default: + return -EINVAL; + } + MLX5_SET(macsec_aso, aso_ctx, window_size, window_sz); + MLX5_SET(macsec_aso, aso_ctx, mode, MLX5_MACSEC_ASO_REPLAY_PROTECTION); + + return 0; +} + +static int mlx5e_macsec_create_object(struct mlx5_core_dev *mdev, + struct mlx5_macsec_obj_attrs *attrs, + bool is_tx, + u32 *macsec_obj_id) +{ + u32 in[MLX5_ST_SZ_DW(create_macsec_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + void *aso_ctx; + void *obj; + int err; + + obj = MLX5_ADDR_OF(create_macsec_obj_in, in, macsec_object); + aso_ctx = MLX5_ADDR_OF(macsec_offload_obj, obj, macsec_aso); + + MLX5_SET(macsec_offload_obj, obj, confidentiality_en, attrs->encrypt); + MLX5_SET(macsec_offload_obj, obj, dekn, attrs->enc_key_id); + MLX5_SET(macsec_offload_obj, obj, aso_return_reg, MLX5_MACSEC_ASO_REG_C_4_5); + MLX5_SET(macsec_offload_obj, obj, macsec_aso_access_pd, attrs->aso_pdn); + MLX5_SET(macsec_aso, aso_ctx, mode_parameter, attrs->next_pn); + + /* Epn */ + if (attrs->epn_state.epn_enabled) { + void *salt_p; + int i; + + MLX5_SET(macsec_aso, aso_ctx, epn_event_arm, 1); + MLX5_SET(macsec_offload_obj, obj, epn_en, 1); + MLX5_SET(macsec_offload_obj, obj, epn_msb, attrs->epn_state.epn_msb); + MLX5_SET(macsec_offload_obj, obj, epn_overlap, attrs->epn_state.overlap); + MLX5_SET64(macsec_offload_obj, obj, sci, (__force u64)attrs->ssci); + salt_p = MLX5_ADDR_OF(macsec_offload_obj, obj, salt); + for (i = 0; i < 3 ; i++) + memcpy((u32 *)salt_p + i, &attrs->salt.bytes[4 * (2 - i)], 4); + } else { + MLX5_SET64(macsec_offload_obj, obj, sci, (__force u64)(attrs->sci)); + } + + MLX5_SET(macsec_aso, aso_ctx, valid, 0x1); + if (is_tx) { + MLX5_SET(macsec_aso, aso_ctx, mode, MLX5_MACSEC_ASO_INC_SN); + } else { + err = macsec_set_replay_protection(attrs, aso_ctx); + if (err) + return err; + } + + /* general object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) { + mlx5_core_err(mdev, + "MACsec offload: Failed to create MACsec object (err = %d)\n", + err); + return err; + } + + *macsec_obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return err; +} + +static void mlx5e_macsec_destroy_object(struct mlx5_core_dev *mdev, u32 macsec_obj_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, macsec_obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, + struct mlx5e_macsec_sa *sa, + bool is_tx) +{ + int action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : + MLX5_ACCEL_MACSEC_ACTION_DECRYPT; + + if ((is_tx) && sa->fs_id) { + /* Make sure ongoing datapath readers sees a valid SA */ + rhashtable_remove_fast(&macsec->sci_hash, &sa->hash, rhash_sci); + sa->fs_id = 0; + } + + if (!sa->macsec_rule) + return; + + mlx5e_macsec_fs_del_rule(macsec->macsec_fs, sa->macsec_rule, action); + mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id); + sa->macsec_rule = NULL; +} + +static int mlx5e_macsec_init_sa(struct macsec_context *ctx, + struct mlx5e_macsec_sa *sa, + bool encrypt, + bool is_tx) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec *macsec = priv->macsec; + struct mlx5_macsec_rule_attrs rule_attrs; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_macsec_obj_attrs obj_attrs; + union mlx5e_macsec_rule *macsec_rule; + struct macsec_key *key; + int err; + + obj_attrs.next_pn = sa->next_pn; + obj_attrs.sci = cpu_to_be64((__force u64)sa->sci); + obj_attrs.enc_key_id = sa->enc_key_id; + obj_attrs.encrypt = encrypt; + obj_attrs.aso_pdn = macsec->aso.pdn; + obj_attrs.epn_state = sa->epn_state; + + if (is_tx) { + obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci); + key = &ctx->sa.tx_sa->key; + } else { + obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); + key = &ctx->sa.rx_sa->key; + } + + memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); + obj_attrs.replay_window = ctx->secy->replay_window; + obj_attrs.replay_protect = ctx->secy->replay_protect; + + err = mlx5e_macsec_create_object(mdev, &obj_attrs, is_tx, &sa->macsec_obj_id); + if (err) + return err; + + rule_attrs.macsec_obj_id = sa->macsec_obj_id; + rule_attrs.sci = sa->sci; + rule_attrs.assoc_num = sa->assoc_num; + rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : + MLX5_ACCEL_MACSEC_ACTION_DECRYPT; + + macsec_rule = mlx5e_macsec_fs_add_rule(macsec->macsec_fs, ctx, &rule_attrs, &sa->fs_id); + if (!macsec_rule) { + err = -ENOMEM; + goto destroy_macsec_object; + } + + sa->macsec_rule = macsec_rule; + + if (is_tx) { + err = rhashtable_insert_fast(&macsec->sci_hash, &sa->hash, rhash_sci); + if (err) + goto destroy_macsec_object_and_rule; + } + + return 0; + +destroy_macsec_object_and_rule: + mlx5e_macsec_cleanup_sa(macsec, sa, is_tx); +destroy_macsec_object: + mlx5e_macsec_destroy_object(mdev, sa->macsec_obj_id); + + return err; +} + +static struct mlx5e_macsec_rx_sc * +mlx5e_macsec_get_rx_sc_from_sc_list(const struct list_head *list, sci_t sci) +{ + struct mlx5e_macsec_rx_sc *iter; + + list_for_each_entry_rcu(iter, list, rx_sc_list_element) { + if (iter->sci == sci) + return iter; + } + + return NULL; +} + +static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec, + struct mlx5e_macsec_sa *rx_sa, + bool active) +{ + struct mlx5_core_dev *mdev = macsec->mdev; + struct mlx5_macsec_obj_attrs attrs = {}; + int err = 0; + + if (rx_sa->active != active) + return 0; + + rx_sa->active = active; + if (!active) { + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + return 0; + } + + attrs.sci = cpu_to_be64((__force u64)rx_sa->sci); + attrs.enc_key_id = rx_sa->enc_key_id; + err = mlx5e_macsec_create_object(mdev, &attrs, false, &rx_sa->macsec_obj_id); + if (err) + return err; + + return 0; +} + +static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx) +{ + const struct net_device *netdev = ctx->netdev; + const struct macsec_secy *secy = ctx->secy; + + if (secy->validate_frames != MACSEC_VALIDATE_STRICT) { + netdev_err(netdev, + "MACsec offload is supported only when validate_frame is in strict mode\n"); + return false; + } + + if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) { + netdev_err(netdev, "MACsec offload is supported only when icv_len is %d\n", + MACSEC_DEFAULT_ICV_LEN); + return false; + } + + if (!secy->protect_frames) { + netdev_err(netdev, + "MACsec offload is supported only when protect_frames is set\n"); + return false; + } + + return true; +} + +static struct mlx5e_macsec_device * +mlx5e_macsec_get_macsec_device_context(const struct mlx5e_macsec *macsec, + const struct macsec_context *ctx) +{ + struct mlx5e_macsec_device *iter; + const struct list_head *list; + + list = &macsec->macsec_device_list_head; + list_for_each_entry_rcu(iter, list, macsec_device_list_element) { + if (iter->netdev == ctx->secy->netdev) + return iter; + } + + return NULL; +} + +static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_key *key, + const pn_t *next_pn_halves) +{ + struct mlx5e_macsec_epn_state *epn_state = &sa->epn_state; + + sa->salt = key->salt; + epn_state->epn_enabled = 1; + epn_state->epn_msb = next_pn_halves->upper; + epn_state->overlap = next_pn_halves->lower < MLX5_MACSEC_EPN_SCOPE_MID ? 0 : 1; +} + +static int mlx5e_macsec_add_txsa(struct macsec_context *ctx) +{ + const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc; + const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + const struct macsec_secy *secy = ctx->secy; + struct mlx5e_macsec_device *macsec_device; + struct mlx5_core_dev *mdev = priv->mdev; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EEXIST; + goto out; + } + + if (macsec_device->tx_sa[assoc_num]) { + netdev_err(ctx->netdev, "MACsec offload tx_sa: %d already exist\n", assoc_num); + err = -EEXIST; + goto out; + } + + tx_sa = kzalloc(sizeof(*tx_sa), GFP_KERNEL); + if (!tx_sa) { + err = -ENOMEM; + goto out; + } + + tx_sa->active = ctx_tx_sa->active; + tx_sa->next_pn = ctx_tx_sa->next_pn_halves.lower; + tx_sa->sci = secy->sci; + tx_sa->assoc_num = assoc_num; + + if (secy->xpn) + update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves); + + err = mlx5_create_encryption_key(mdev, ctx->sa.key, secy->key_len, + MLX5_ACCEL_OBJ_MACSEC_KEY, + &tx_sa->enc_key_id); + if (err) + goto destroy_sa; + + macsec_device->tx_sa[assoc_num] = tx_sa; + if (!secy->operational || + assoc_num != tx_sc->encoding_sa || + !tx_sa->active) + goto out; + + err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true); + if (err) + goto destroy_encryption_key; + + mutex_unlock(&macsec->lock); + + return 0; + +destroy_encryption_key: + macsec_device->tx_sa[assoc_num] = NULL; + mlx5_destroy_encryption_key(mdev, tx_sa->enc_key_id); +destroy_sa: + kfree(tx_sa); +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) +{ + const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc; + const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + struct net_device *netdev; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + netdev = ctx->netdev; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + tx_sa = macsec_device->tx_sa[assoc_num]; + if (!tx_sa) { + netdev_err(netdev, "MACsec offload: TX sa 0x%x doesn't exist\n", assoc_num); + err = -EEXIST; + goto out; + } + + if (tx_sa->next_pn != ctx_tx_sa->next_pn_halves.lower) { + netdev_err(netdev, "MACsec offload: update TX sa %d PN isn't supported\n", + assoc_num); + err = -EINVAL; + goto out; + } + + if (tx_sa->active == ctx_tx_sa->active) + goto out; + + if (tx_sa->assoc_num != tx_sc->encoding_sa) + goto out; + + if (ctx_tx_sa->active) { + err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true); + if (err) + goto out; + } else { + if (!tx_sa->macsec_rule) { + err = -EINVAL; + goto out; + } + + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); + } + + tx_sa->active = ctx_tx_sa->active; +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_del_txsa(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + int err = 0; + + mutex_lock(&priv->macsec->lock); + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + tx_sa = macsec_device->tx_sa[assoc_num]; + if (!tx_sa) { + netdev_err(ctx->netdev, "MACsec offload: TX sa 0x%x doesn't exist\n", assoc_num); + err = -EEXIST; + goto out; + } + + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); + mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id); + kfree_rcu(tx_sa); + macsec_device->tx_sa[assoc_num] = NULL; + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static u32 mlx5e_macsec_get_sa_from_hashtable(struct rhashtable *sci_hash, sci_t *sci) +{ + struct mlx5e_macsec_sa *macsec_sa; + u32 fs_id = 0; + + rcu_read_lock(); + macsec_sa = rhashtable_lookup(sci_hash, sci, rhash_sci); + if (macsec_sa) + fs_id = macsec_sa->fs_id; + rcu_read_unlock(); + + return fs_id; +} + +static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx) +{ + struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc; + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_rx_sc *rx_sc; + struct list_head *rx_sc_list; + struct mlx5e_macsec *macsec; + int err = 0; + + mutex_lock(&priv->macsec->lock); + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + rx_sc_list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(rx_sc_list, ctx_rx_sc->sci); + if (rx_sc) { + netdev_err(ctx->netdev, "MACsec offload: rx_sc (sci %lld) already exists\n", + ctx_rx_sc->sci); + err = -EEXIST; + goto out; + } + + rx_sc = kzalloc(sizeof(*rx_sc), GFP_KERNEL); + if (!rx_sc) { + err = -ENOMEM; + goto out; + } + + sc_xarray_element = kzalloc(sizeof(*sc_xarray_element), GFP_KERNEL); + if (!sc_xarray_element) { + err = -ENOMEM; + goto destroy_rx_sc; + } + + sc_xarray_element->rx_sc = rx_sc; + err = xa_alloc(&macsec->sc_xarray, &sc_xarray_element->fs_id, sc_xarray_element, + XA_LIMIT(1, USHRT_MAX), GFP_KERNEL); + if (err) + goto destroy_sc_xarray_elemenet; + + rx_sc->md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL); + if (!rx_sc->md_dst) { + err = -ENOMEM; + goto erase_xa_alloc; + } + + rx_sc->sci = ctx_rx_sc->sci; + rx_sc->active = ctx_rx_sc->active; + list_add_rcu(&rx_sc->rx_sc_list_element, rx_sc_list); + + rx_sc->sc_xarray_element = sc_xarray_element; + rx_sc->md_dst->u.macsec_info.sci = rx_sc->sci; + mutex_unlock(&macsec->lock); + + return 0; + +erase_xa_alloc: + xa_erase(&macsec->sc_xarray, sc_xarray_element->fs_id); +destroy_sc_xarray_elemenet: + kfree(sc_xarray_element); +destroy_rx_sc: + kfree(rx_sc); + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc; + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_rx_sc *rx_sc; + struct mlx5e_macsec_sa *rx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int i; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, ctx_rx_sc->sci); + if (!rx_sc) { + err = -EINVAL; + goto out; + } + + rx_sc->active = ctx_rx_sc->active; + if (rx_sc->active == ctx_rx_sc->active) + goto out; + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, rx_sa->active && ctx_rx_sc->active); + if (err) + goto out; + } + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_rx_sc *rx_sc; + struct mlx5e_macsec_sa *rx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + int i; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, ctx->rx_sc->sci); + if (!rx_sc) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld doesn't exist\n", + ctx->sa.rx_sa->sc->sci); + err = -EINVAL; + goto out; + } + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); + + kfree(rx_sa); + rx_sc->rx_sa[i] = NULL; + } + +/* + * At this point the relevant MACsec offload Rx rule already removed at + * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current + * Rx related data propagating using xa_erase which uses rcu to sync, + * once fs_id is erased then this rx_sc is hidden from datapath. + */ + list_del_rcu(&rx_sc->rx_sc_list_element); + xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id); + metadata_dst_free(rx_sc->md_dst); + kfree(rx_sc->sc_xarray_element); + + kfree_rcu(rx_sc); + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx) +{ + const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + struct mlx5_core_dev *mdev = priv->mdev; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_rx_sc *rx_sc; + sci_t sci = ctx_rx_sa->sc->sci; + struct mlx5e_macsec_sa *rx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci); + if (!rx_sc) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld doesn't exist\n", + ctx->sa.rx_sa->sc->sci); + err = -EINVAL; + goto out; + } + + if (rx_sc->rx_sa[assoc_num]) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld rx_sa %d already exist\n", + sci, assoc_num); + err = -EEXIST; + goto out; + } + + rx_sa = kzalloc(sizeof(*rx_sa), GFP_KERNEL); + if (!rx_sa) { + err = -ENOMEM; + goto out; + } + + rx_sa->active = ctx_rx_sa->active; + rx_sa->next_pn = ctx_rx_sa->next_pn; + rx_sa->sci = sci; + rx_sa->assoc_num = assoc_num; + rx_sa->fs_id = rx_sc->sc_xarray_element->fs_id; + + if (ctx->secy->xpn) + update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves); + + err = mlx5_create_encryption_key(mdev, ctx->sa.key, ctx->secy->key_len, + MLX5_ACCEL_OBJ_MACSEC_KEY, + &rx_sa->enc_key_id); + if (err) + goto destroy_sa; + + rx_sc->rx_sa[assoc_num] = rx_sa; + if (!rx_sa->active) + goto out; + + //TODO - add support for both authentication and encryption flows + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); + if (err) + goto destroy_encryption_key; + + goto out; + +destroy_encryption_key: + rx_sc->rx_sa[assoc_num] = NULL; + mlx5_destroy_encryption_key(mdev, rx_sa->enc_key_id); +destroy_sa: + kfree(rx_sa); +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx) +{ + const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_rx_sc *rx_sc; + sci_t sci = ctx_rx_sa->sc->sci; + struct mlx5e_macsec_sa *rx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci); + if (!rx_sc) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld doesn't exist\n", + ctx->sa.rx_sa->sc->sci); + err = -EINVAL; + goto out; + } + + rx_sa = rx_sc->rx_sa[assoc_num]; + if (!rx_sa) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld rx_sa %d doesn't exist\n", + sci, assoc_num); + err = -EINVAL; + goto out; + } + + if (rx_sa->next_pn != ctx_rx_sa->next_pn_halves.lower) { + netdev_err(ctx->netdev, + "MACsec offload update RX sa %d PN isn't supported\n", + assoc_num); + err = -EINVAL; + goto out; + } + + err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, ctx_rx_sa->active); +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + sci_t sci = ctx->sa.rx_sa->sc->sci; + struct mlx5e_macsec_rx_sc *rx_sc; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_sa *rx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci); + if (!rx_sc) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld doesn't exist\n", + ctx->sa.rx_sa->sc->sci); + err = -EINVAL; + goto out; + } + + rx_sa = rx_sc->rx_sa[assoc_num]; + if (!rx_sa) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld rx_sa %d doesn't exist\n", + sci, assoc_num); + err = -EINVAL; + goto out; + } + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); + kfree(rx_sa); + rx_sc->rx_sa[assoc_num] = NULL; + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_add_secy(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + const struct net_device *dev = ctx->secy->netdev; + const struct net_device *netdev = ctx->netdev; + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec *macsec; + int err = 0; + + if (!mlx5e_macsec_secy_features_validate(ctx)) + return -EINVAL; + + mutex_lock(&priv->macsec->lock); + macsec = priv->macsec; + if (mlx5e_macsec_get_macsec_device_context(macsec, ctx)) { + netdev_err(netdev, "MACsec offload: MACsec net_device already exist\n"); + goto out; + } + + if (macsec->num_of_devices >= MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES) { + netdev_err(netdev, "Currently, only %d MACsec offload devices can be set\n", + MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES); + err = -EBUSY; + goto out; + } + + macsec_device = kzalloc(sizeof(*macsec_device), GFP_KERNEL); + if (!macsec_device) { + err = -ENOMEM; + goto out; + } + + macsec_device->dev_addr = kmemdup(dev->dev_addr, dev->addr_len, GFP_KERNEL); + if (!macsec_device->dev_addr) { + kfree(macsec_device); + err = -ENOMEM; + goto out; + } + + macsec_device->netdev = dev; + + INIT_LIST_HEAD_RCU(&macsec_device->macsec_rx_sc_list_head); + list_add_rcu(&macsec_device->macsec_device_list_element, &macsec->macsec_device_list_head); + + ++macsec->num_of_devices; +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int macsec_upd_secy_hw_address(struct macsec_context *ctx, + struct mlx5e_macsec_device *macsec_device) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + const struct net_device *dev = ctx->secy->netdev; + struct mlx5e_macsec *macsec = priv->macsec; + struct mlx5e_macsec_rx_sc *rx_sc, *tmp; + struct mlx5e_macsec_sa *rx_sa; + struct list_head *list; + int i, err = 0; + + + list = &macsec_device->macsec_rx_sc_list_head; + list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) { + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa || !rx_sa->macsec_rule) + continue; + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + } + } + + list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) { + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + if (rx_sa->active) { + err = mlx5e_macsec_init_sa(ctx, rx_sa, false, false); + if (err) + goto out; + } + } + } + + memcpy(macsec_device->dev_addr, dev->dev_addr, dev->addr_len); +out: + return err; +} + +/* this function is called from 2 macsec ops functions: + * macsec_set_mac_address – MAC address was changed, therefore we need to destroy + * and create new Tx contexts(macsec object + steering). + * macsec_changelink – in this case the tx SC or SecY may be changed, therefore need to + * destroy Tx and Rx contexts(macsec object + steering) + */ +static int mlx5e_macsec_upd_secy(struct macsec_context *ctx) +{ + const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + const struct net_device *dev = ctx->secy->netdev; + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + int i, err = 0; + + if (!mlx5e_macsec_secy_features_validate(ctx)) + return -EINVAL; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + /* if the dev_addr hasn't change, it mean the callback is from macsec_changelink */ + if (!memcmp(macsec_device->dev_addr, dev->dev_addr, dev->addr_len)) { + err = macsec_upd_secy_hw_address(ctx, macsec_device); + if (err) + goto out; + } + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + tx_sa = macsec_device->tx_sa[i]; + if (!tx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); + } + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + tx_sa = macsec_device->tx_sa[i]; + if (!tx_sa) + continue; + + if (tx_sa->assoc_num == tx_sc->encoding_sa && tx_sa->active) { + err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true); + if (err) + goto out; + } + } + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_del_secy(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_rx_sc *rx_sc, *tmp; + struct mlx5e_macsec_sa *rx_sa; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + int i; + + mutex_lock(&priv->macsec->lock); + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + + goto out; + } + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + tx_sa = macsec_device->tx_sa[i]; + if (!tx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); + mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id); + kfree(tx_sa); + macsec_device->tx_sa[i] = NULL; + } + + list = &macsec_device->macsec_rx_sc_list_head; + list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) { + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); + kfree(rx_sa); + rx_sc->rx_sa[i] = NULL; + } + + list_del_rcu(&rx_sc->rx_sc_list_element); + + kfree_rcu(rx_sc); + } + + kfree(macsec_device->dev_addr); + macsec_device->dev_addr = NULL; + + list_del_rcu(&macsec_device->macsec_device_list_element); + --macsec->num_of_devices; + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static void macsec_build_accel_attrs(struct mlx5e_macsec_sa *sa, + struct mlx5_macsec_obj_attrs *attrs) +{ + attrs->epn_state.epn_msb = sa->epn_state.epn_msb; + attrs->epn_state.overlap = sa->epn_state.overlap; +} + +static void macsec_aso_build_wqe_ctrl_seg(struct mlx5e_macsec_aso *macsec_aso, + struct mlx5_wqe_aso_ctrl_seg *aso_ctrl, + struct mlx5_aso_ctrl_param *param) +{ + memset(aso_ctrl, 0, sizeof(*aso_ctrl)); + if (macsec_aso->umr->dma_addr) { + aso_ctrl->va_l = cpu_to_be32(macsec_aso->umr->dma_addr | ASO_CTRL_READ_EN); + aso_ctrl->va_h = cpu_to_be32((u64)macsec_aso->umr->dma_addr >> 32); + aso_ctrl->l_key = cpu_to_be32(macsec_aso->umr->mkey); + } + + if (!param) + return; + + aso_ctrl->data_mask_mode = param->data_mask_mode << 6; + aso_ctrl->condition_1_0_operand = param->condition_1_operand | + param->condition_0_operand << 4; + aso_ctrl->condition_1_0_offset = param->condition_1_offset | + param->condition_0_offset << 4; + aso_ctrl->data_offset_condition_operand = param->data_offset | + param->condition_operand << 6; + aso_ctrl->condition_0_data = cpu_to_be32(param->condition_0_data); + aso_ctrl->condition_0_mask = cpu_to_be32(param->condition_0_mask); + aso_ctrl->condition_1_data = cpu_to_be32(param->condition_1_data); + aso_ctrl->condition_1_mask = cpu_to_be32(param->condition_1_mask); + aso_ctrl->bitwise_data = cpu_to_be64(param->bitwise_data); + aso_ctrl->data_mask = cpu_to_be64(param->data_mask); +} + +static int mlx5e_macsec_modify_obj(struct mlx5_core_dev *mdev, struct mlx5_macsec_obj_attrs *attrs, + u32 macsec_id) +{ + u32 in[MLX5_ST_SZ_DW(modify_macsec_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(query_macsec_obj_out)]; + u64 modify_field_select = 0; + void *obj; + int err; + + /* General object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, macsec_id); + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) { + mlx5_core_err(mdev, "Query MACsec object failed (Object id %d), err = %d\n", + macsec_id, err); + return err; + } + + obj = MLX5_ADDR_OF(query_macsec_obj_out, out, macsec_object); + modify_field_select = MLX5_GET64(macsec_offload_obj, obj, modify_field_select); + + /* EPN */ + if (!(modify_field_select & MLX5_MODIFY_MACSEC_BITMASK_EPN_OVERLAP) || + !(modify_field_select & MLX5_MODIFY_MACSEC_BITMASK_EPN_MSB)) { + mlx5_core_dbg(mdev, "MACsec object field is not modifiable (Object id %d)\n", + macsec_id); + return -EOPNOTSUPP; + } + + obj = MLX5_ADDR_OF(modify_macsec_obj_in, in, macsec_object); + MLX5_SET64(macsec_offload_obj, obj, modify_field_select, + MLX5_MODIFY_MACSEC_BITMASK_EPN_OVERLAP | MLX5_MODIFY_MACSEC_BITMASK_EPN_MSB); + MLX5_SET(macsec_offload_obj, obj, epn_msb, attrs->epn_state.epn_msb); + MLX5_SET(macsec_offload_obj, obj, epn_overlap, attrs->epn_state.overlap); + + /* General object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +static void macsec_aso_build_ctrl(struct mlx5e_macsec_aso *aso, + struct mlx5_wqe_aso_ctrl_seg *aso_ctrl, + struct mlx5e_macsec_aso_in *in) +{ + struct mlx5_aso_ctrl_param param = {}; + + param.data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT; + param.condition_0_operand = MLX5_ASO_ALWAYS_TRUE; + param.condition_1_operand = MLX5_ASO_ALWAYS_TRUE; + if (in->mode == MLX5_MACSEC_EPN) { + param.data_offset = MLX5_MACSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; + param.bitwise_data = BIT_ULL(54); + param.data_mask = param.bitwise_data; + } + macsec_aso_build_wqe_ctrl_seg(aso, aso_ctrl, ¶m); +} + +static int macsec_aso_set_arm_event(struct mlx5_core_dev *mdev, struct mlx5e_macsec *macsec, + struct mlx5e_macsec_aso_in *in) +{ + struct mlx5e_macsec_aso *aso; + struct mlx5_aso_wqe *aso_wqe; + struct mlx5_aso *maso; + int err; + + aso = &macsec->aso; + maso = aso->maso; + + mutex_lock(&aso->aso_lock); + aso_wqe = mlx5_aso_get_wqe(maso); + mlx5_aso_build_wqe(maso, MLX5_MACSEC_ASO_DS_CNT, aso_wqe, in->obj_id, + MLX5_ACCESS_ASO_OPC_MOD_MACSEC); + macsec_aso_build_ctrl(aso, &aso_wqe->aso_ctrl, in); + mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl); + err = mlx5_aso_poll_cq(maso, false); + mutex_unlock(&aso->aso_lock); + + return err; +} + +static int macsec_aso_query(struct mlx5_core_dev *mdev, struct mlx5e_macsec *macsec, + struct mlx5e_macsec_aso_in *in, struct mlx5e_macsec_aso_out *out) +{ + struct mlx5e_macsec_aso *aso; + struct mlx5_aso_wqe *aso_wqe; + struct mlx5_aso *maso; + int err; + + aso = &macsec->aso; + maso = aso->maso; + + mutex_lock(&aso->aso_lock); + + aso_wqe = mlx5_aso_get_wqe(maso); + mlx5_aso_build_wqe(maso, MLX5_MACSEC_ASO_DS_CNT, aso_wqe, in->obj_id, + MLX5_ACCESS_ASO_OPC_MOD_MACSEC); + macsec_aso_build_wqe_ctrl_seg(aso, &aso_wqe->aso_ctrl, NULL); + + mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl); + err = mlx5_aso_poll_cq(maso, false); + if (err) + goto err_out; + + if (MLX5_GET(macsec_aso, aso->umr->ctx, epn_event_arm)) + out->event_arm |= MLX5E_ASO_EPN_ARM; + + out->mode_param = MLX5_GET(macsec_aso, aso->umr->ctx, mode_parameter); + +err_out: + mutex_unlock(&aso->aso_lock); + return err; +} + +static struct mlx5e_macsec_sa *get_macsec_tx_sa_from_obj_id(const struct mlx5e_macsec *macsec, + const u32 obj_id) +{ + const struct list_head *device_list; + struct mlx5e_macsec_sa *macsec_sa; + struct mlx5e_macsec_device *iter; + int i; + + device_list = &macsec->macsec_device_list_head; + + list_for_each_entry(iter, device_list, macsec_device_list_element) { + for (i = 0; i < MACSEC_NUM_AN; ++i) { + macsec_sa = iter->tx_sa[i]; + if (!macsec_sa || !macsec_sa->active) + continue; + if (macsec_sa->macsec_obj_id == obj_id) + return macsec_sa; + } + } + + return NULL; +} + +static struct mlx5e_macsec_sa *get_macsec_rx_sa_from_obj_id(const struct mlx5e_macsec *macsec, + const u32 obj_id) +{ + const struct list_head *device_list, *sc_list; + struct mlx5e_macsec_rx_sc *mlx5e_rx_sc; + struct mlx5e_macsec_sa *macsec_sa; + struct mlx5e_macsec_device *iter; + int i; + + device_list = &macsec->macsec_device_list_head; + + list_for_each_entry(iter, device_list, macsec_device_list_element) { + sc_list = &iter->macsec_rx_sc_list_head; + list_for_each_entry(mlx5e_rx_sc, sc_list, rx_sc_list_element) { + for (i = 0; i < MACSEC_NUM_AN; ++i) { + macsec_sa = mlx5e_rx_sc->rx_sa[i]; + if (!macsec_sa || !macsec_sa->active) + continue; + if (macsec_sa->macsec_obj_id == obj_id) + return macsec_sa; + } + } + } + + return NULL; +} + +static void macsec_epn_update(struct mlx5e_macsec *macsec, struct mlx5_core_dev *mdev, + struct mlx5e_macsec_sa *sa, u32 obj_id, u32 mode_param) +{ + struct mlx5_macsec_obj_attrs attrs = {}; + struct mlx5e_macsec_aso_in in = {}; + + /* When the bottom of the replay protection window (mode_param) crosses 2^31 (half sequence + * number wraparound) hence mode_param > MLX5_MACSEC_EPN_SCOPE_MID the SW should update the + * esn_overlap to OLD (1). + * When the bottom of the replay protection window (mode_param) crosses 2^32 (full sequence + * number wraparound) hence mode_param < MLX5_MACSEC_EPN_SCOPE_MID since it did a + * wraparound, the SW should update the esn_overlap to NEW (0), and increment the esn_msb. + */ + + if (mode_param < MLX5_MACSEC_EPN_SCOPE_MID) { + sa->epn_state.epn_msb++; + sa->epn_state.overlap = 0; + } else { + sa->epn_state.overlap = 1; + } + + macsec_build_accel_attrs(sa, &attrs); + mlx5e_macsec_modify_obj(mdev, &attrs, obj_id); + + /* Re-set EPN arm event */ + in.obj_id = obj_id; + in.mode = MLX5_MACSEC_EPN; + macsec_aso_set_arm_event(mdev, macsec, &in); +} + +static void macsec_async_event(struct work_struct *work) +{ + struct mlx5e_macsec_async_work *async_work; + struct mlx5e_macsec_aso_out out = {}; + struct mlx5e_macsec_aso_in in = {}; + struct mlx5e_macsec_sa *macsec_sa; + struct mlx5e_macsec *macsec; + struct mlx5_core_dev *mdev; + u32 obj_id; + + async_work = container_of(work, struct mlx5e_macsec_async_work, work); + macsec = async_work->macsec; + mdev = async_work->mdev; + obj_id = async_work->obj_id; + macsec_sa = get_macsec_tx_sa_from_obj_id(macsec, obj_id); + if (!macsec_sa) { + macsec_sa = get_macsec_rx_sa_from_obj_id(macsec, obj_id); + if (!macsec_sa) { + mlx5_core_dbg(mdev, "MACsec SA is not found (SA object id %d)\n", obj_id); + goto out_async_work; + } + } + + /* Query MACsec ASO context */ + in.obj_id = obj_id; + macsec_aso_query(mdev, macsec, &in, &out); + + /* EPN case */ + if (macsec_sa->epn_state.epn_enabled && !(out.event_arm & MLX5E_ASO_EPN_ARM)) + macsec_epn_update(macsec, mdev, macsec_sa, obj_id, out.mode_param); + +out_async_work: + kfree(async_work); +} + +static int macsec_obj_change_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5e_macsec *macsec = container_of(nb, struct mlx5e_macsec, nb); + struct mlx5e_macsec_async_work *async_work; + struct mlx5_eqe_obj_change *obj_change; + struct mlx5_eqe *eqe = data; + u16 obj_type; + u32 obj_id; + + if (event != MLX5_EVENT_TYPE_OBJECT_CHANGE) + return NOTIFY_DONE; + + obj_change = &eqe->data.obj_change; + obj_type = be16_to_cpu(obj_change->obj_type); + obj_id = be32_to_cpu(obj_change->obj_id); + + if (obj_type != MLX5_GENERAL_OBJECT_TYPES_MACSEC) + return NOTIFY_DONE; + + async_work = kzalloc(sizeof(*async_work), GFP_ATOMIC); + if (!async_work) + return NOTIFY_DONE; + + async_work->macsec = macsec; + async_work->mdev = macsec->mdev; + async_work->obj_id = obj_id; + + INIT_WORK(&async_work->work, macsec_async_event); + + WARN_ON(!queue_work(macsec->wq, &async_work->work)); + + return NOTIFY_OK; +} + +static int mlx5e_macsec_aso_init(struct mlx5e_macsec_aso *aso, struct mlx5_core_dev *mdev) +{ + struct mlx5_aso *maso; + int err; + + err = mlx5_core_alloc_pd(mdev, &aso->pdn); + if (err) { + mlx5_core_err(mdev, + "MACsec offload: Failed to alloc pd for MACsec ASO, err=%d\n", + err); + return err; + } + + maso = mlx5_aso_create(mdev, aso->pdn); + if (IS_ERR(maso)) { + err = PTR_ERR(maso); + goto err_aso; + } + + err = mlx5e_macsec_aso_reg_mr(mdev, aso); + if (err) + goto err_aso_reg; + + mutex_init(&aso->aso_lock); + + aso->maso = maso; + + return 0; + +err_aso_reg: + mlx5_aso_destroy(maso); +err_aso: + mlx5_core_dealloc_pd(mdev, aso->pdn); + return err; +} + +static void mlx5e_macsec_aso_cleanup(struct mlx5e_macsec_aso *aso, struct mlx5_core_dev *mdev) +{ + if (!aso) + return; + + mlx5e_macsec_aso_dereg_mr(mdev, aso); + + mlx5_aso_destroy(aso->maso); + + mlx5_core_dealloc_pd(mdev, aso->pdn); +} + +bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev) +{ + if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD)) + return false; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return false; + + if (!MLX5_CAP_MACSEC(mdev, log_max_macsec_offload)) + return false; + + if (!MLX5_CAP_FLOWTABLE_NIC_RX(mdev, macsec_decrypt) || + !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, reformat_remove_macsec)) + return false; + + if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, macsec_encrypt) || + !MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_macsec)) + return false; + + if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_encrypt) && + !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_encrypt)) + return false; + + if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_decrypt) && + !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_decrypt)) + return false; + + return true; +} + +void mlx5e_macsec_get_stats_fill(struct mlx5e_macsec *macsec, void *macsec_stats) +{ + mlx5e_macsec_fs_get_stats_fill(macsec->macsec_fs, macsec_stats); +} + +struct mlx5e_macsec_stats *mlx5e_macsec_get_stats(struct mlx5e_macsec *macsec) +{ + if (!macsec) + return NULL; + + return &macsec->stats; +} + +static const struct macsec_ops macsec_offload_ops = { + .mdo_add_txsa = mlx5e_macsec_add_txsa, + .mdo_upd_txsa = mlx5e_macsec_upd_txsa, + .mdo_del_txsa = mlx5e_macsec_del_txsa, + .mdo_add_rxsc = mlx5e_macsec_add_rxsc, + .mdo_upd_rxsc = mlx5e_macsec_upd_rxsc, + .mdo_del_rxsc = mlx5e_macsec_del_rxsc, + .mdo_add_rxsa = mlx5e_macsec_add_rxsa, + .mdo_upd_rxsa = mlx5e_macsec_upd_rxsa, + .mdo_del_rxsa = mlx5e_macsec_del_rxsa, + .mdo_add_secy = mlx5e_macsec_add_secy, + .mdo_upd_secy = mlx5e_macsec_upd_secy, + .mdo_del_secy = mlx5e_macsec_del_secy, +}; + +bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + u32 fs_id; + + fs_id = mlx5e_macsec_get_sa_from_hashtable(&macsec->sci_hash, &md_dst->u.macsec_info.sci); + if (!fs_id) + goto err_out; + + return true; + +err_out: + dev_kfree_skb_any(skb); + return false; +} + +void mlx5e_macsec_tx_build_eseg(struct mlx5e_macsec *macsec, + struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + u32 fs_id; + + fs_id = mlx5e_macsec_get_sa_from_hashtable(&macsec->sci_hash, &md_dst->u.macsec_info.sci); + if (!fs_id) + return; + + eseg->flow_table_metadata = cpu_to_be32(MLX5_ETH_WQE_FT_META_MACSEC | fs_id << 2); +} + +void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5_cqe64 *cqe) +{ + struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element; + u32 macsec_meta_data = be32_to_cpu(cqe->ft_metadata); + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_macsec_rx_sc *rx_sc; + struct mlx5e_macsec *macsec; + u32 fs_id; + + macsec = priv->macsec; + if (!macsec) + return; + + fs_id = MLX5_MACSEC_METADATA_HANDLE(macsec_meta_data); + + rcu_read_lock(); + sc_xarray_element = xa_load(&macsec->sc_xarray, fs_id); + rx_sc = sc_xarray_element->rx_sc; + if (rx_sc) { + dst_hold(&rx_sc->md_dst->dst); + skb_dst_set(skb, &rx_sc->md_dst->dst); + } + + rcu_read_unlock(); +} + +void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + + if (!mlx5e_is_macsec_device(priv->mdev)) + return; + + /* Enable MACsec */ + mlx5_core_dbg(priv->mdev, "mlx5e: MACsec acceleration enabled\n"); + netdev->macsec_ops = &macsec_offload_ops; + netdev->features |= NETIF_F_HW_MACSEC; + netif_keep_dst(netdev); +} + +int mlx5e_macsec_init(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_macsec *macsec = NULL; + struct mlx5e_macsec_fs *macsec_fs; + int err; + + if (!mlx5e_is_macsec_device(priv->mdev)) { + mlx5_core_dbg(mdev, "Not a MACsec offload device\n"); + return 0; + } + + macsec = kzalloc(sizeof(*macsec), GFP_KERNEL); + if (!macsec) + return -ENOMEM; + + INIT_LIST_HEAD(&macsec->macsec_device_list_head); + mutex_init(&macsec->lock); + + err = rhashtable_init(&macsec->sci_hash, &rhash_sci); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to init SCI hash table, err=%d\n", + err); + goto err_hash; + } + + err = mlx5e_macsec_aso_init(&macsec->aso, priv->mdev); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to init aso, err=%d\n", err); + goto err_aso; + } + + macsec->wq = alloc_ordered_workqueue("mlx5e_macsec_%s", 0, priv->netdev->name); + if (!macsec->wq) { + err = -ENOMEM; + goto err_wq; + } + + xa_init_flags(&macsec->sc_xarray, XA_FLAGS_ALLOC1); + + priv->macsec = macsec; + + macsec->mdev = mdev; + + macsec_fs = mlx5e_macsec_fs_init(mdev, priv->netdev); + if (!macsec_fs) { + err = -ENOMEM; + goto err_out; + } + + macsec->macsec_fs = macsec_fs; + + macsec->nb.notifier_call = macsec_obj_change_event; + mlx5_notifier_register(mdev, &macsec->nb); + + mlx5_core_dbg(mdev, "MACsec attached to netdevice\n"); + + return 0; + +err_out: + destroy_workqueue(macsec->wq); +err_wq: + mlx5e_macsec_aso_cleanup(&macsec->aso, priv->mdev); +err_aso: + rhashtable_destroy(&macsec->sci_hash); +err_hash: + kfree(macsec); + priv->macsec = NULL; + return err; +} + +void mlx5e_macsec_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_macsec *macsec = priv->macsec; + struct mlx5_core_dev *mdev = priv->mdev; + + if (!macsec) + return; + + mlx5_notifier_unregister(mdev, &macsec->nb); + mlx5e_macsec_fs_cleanup(macsec->macsec_fs); + destroy_workqueue(macsec->wq); + mlx5e_macsec_aso_cleanup(&macsec->aso, mdev); + rhashtable_destroy(&macsec->sci_hash); + mutex_destroy(&macsec->lock); + kfree(macsec); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h new file mode 100644 index 000000000000..d580b4a91253 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_ACCEL_MACSEC_H__ +#define __MLX5_EN_ACCEL_MACSEC_H__ + +#ifdef CONFIG_MLX5_EN_MACSEC + +#include <linux/mlx5/driver.h> +#include <net/macsec.h> +#include <net/dst_metadata.h> + +/* Bit31 - 30: MACsec marker, Bit3-0: MACsec id */ +#define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1) +#define MLX5_MACSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(3, 0)) + +struct mlx5e_priv; +struct mlx5e_macsec; + +struct mlx5e_macsec_stats { + u64 macsec_rx_pkts; + u64 macsec_rx_bytes; + u64 macsec_rx_pkts_drop; + u64 macsec_rx_bytes_drop; + u64 macsec_tx_pkts; + u64 macsec_tx_bytes; + u64 macsec_tx_pkts_drop; + u64 macsec_tx_bytes_drop; +}; + +void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv); +int mlx5e_macsec_init(struct mlx5e_priv *priv); +void mlx5e_macsec_cleanup(struct mlx5e_priv *priv); +bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb); +void mlx5e_macsec_tx_build_eseg(struct mlx5e_macsec *macsec, + struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg); + +static inline bool mlx5e_macsec_skb_is_offload(struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + + return md_dst && (md_dst->type == METADATA_MACSEC); +} + +static inline bool mlx5e_macsec_is_rx_flow(struct mlx5_cqe64 *cqe) +{ + return MLX5_MACSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata)); +} + +void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, + struct mlx5_cqe64 *cqe); +bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev); +void mlx5e_macsec_get_stats_fill(struct mlx5e_macsec *macsec, void *macsec_stats); +struct mlx5e_macsec_stats *mlx5e_macsec_get_stats(struct mlx5e_macsec *macsec); + +#else + +static inline void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv) {} +static inline int mlx5e_macsec_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_macsec_cleanup(struct mlx5e_priv *priv) {} +static inline bool mlx5e_macsec_skb_is_offload(struct sk_buff *skb) { return false; } +static inline bool mlx5e_macsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false; } +static inline void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5_cqe64 *cqe) +{} +static inline bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev) { return false; } +#endif /* CONFIG_MLX5_EN_MACSEC */ + +#endif /* __MLX5_ACCEL_EN_MACSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c new file mode 100644 index 000000000000..1ac0cf04e811 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c @@ -0,0 +1,1384 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <net/macsec.h> +#include <linux/netdevice.h> +#include <linux/mlx5/qp.h> +#include "fs_core.h" +#include "en/fs.h" +#include "en_accel/macsec_fs.h" +#include "mlx5_core.h" + +/* MACsec TX flow steering */ +#define CRYPTO_NUM_MAXSEC_FTE BIT(15) +#define CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE 1 + +#define TX_CRYPTO_TABLE_LEVEL 0 +#define TX_CRYPTO_TABLE_NUM_GROUPS 3 +#define TX_CRYPTO_TABLE_MKE_GROUP_SIZE 1 +#define TX_CRYPTO_TABLE_SA_GROUP_SIZE \ + (CRYPTO_NUM_MAXSEC_FTE - (TX_CRYPTO_TABLE_MKE_GROUP_SIZE + \ + CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE)) +#define TX_CHECK_TABLE_LEVEL 1 +#define TX_CHECK_TABLE_NUM_FTE 2 +#define RX_CRYPTO_TABLE_LEVEL 0 +#define RX_CHECK_TABLE_LEVEL 1 +#define RX_CHECK_TABLE_NUM_FTE 3 +#define RX_CRYPTO_TABLE_NUM_GROUPS 3 +#define RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE \ + ((CRYPTO_NUM_MAXSEC_FTE - CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE) / 2) +#define RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE \ + (CRYPTO_NUM_MAXSEC_FTE - RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE) +#define RX_NUM_OF_RULES_PER_SA 2 + +#define MLX5_MACSEC_TAG_LEN 8 /* SecTAG length with ethertype and without the optional SCI */ +#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK 0x23 +#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET 0x8 +#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET 0x5 +#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT (0x1 << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET) +#define MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI 0x8 +#define MLX5_SECTAG_HEADER_SIZE_WITH_SCI (MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI + MACSEC_SCI_LEN) + +/* MACsec RX flow steering */ +#define MLX5_ETH_WQE_FT_META_MACSEC_MASK 0x3E + +struct mlx5_sectag_header { + __be16 ethertype; + u8 tci_an; + u8 sl; + u32 pn; + u8 sci[MACSEC_SCI_LEN]; /* optional */ +} __packed; + +struct mlx5e_macsec_tx_rule { + struct mlx5_flow_handle *rule; + struct mlx5_pkt_reformat *pkt_reformat; + u32 fs_id; +}; + +struct mlx5e_macsec_tables { + struct mlx5e_flow_table ft_crypto; + struct mlx5_flow_handle *crypto_miss_rule; + + struct mlx5_flow_table *ft_check; + struct mlx5_flow_group *ft_check_group; + struct mlx5_fc *check_miss_rule_counter; + struct mlx5_flow_handle *check_miss_rule; + struct mlx5_fc *check_rule_counter; + + u32 refcnt; +}; + +struct mlx5e_macsec_tx { + struct mlx5_flow_handle *crypto_mke_rule; + struct mlx5_flow_handle *check_rule; + + struct ida tx_halloc; + + struct mlx5e_macsec_tables tables; +}; + +struct mlx5e_macsec_rx_rule { + struct mlx5_flow_handle *rule[RX_NUM_OF_RULES_PER_SA]; + struct mlx5_modify_hdr *meta_modhdr; +}; + +struct mlx5e_macsec_rx { + struct mlx5_flow_handle *check_rule[2]; + struct mlx5_pkt_reformat *check_rule_pkt_reformat[2]; + + struct mlx5e_macsec_tables tables; +}; + +union mlx5e_macsec_rule { + struct mlx5e_macsec_tx_rule tx_rule; + struct mlx5e_macsec_rx_rule rx_rule; +}; + +struct mlx5e_macsec_fs { + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5e_macsec_tx *tx_fs; + struct mlx5e_macsec_rx *rx_fs; +}; + +static void macsec_fs_tx_destroy(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5e_macsec_tables *tx_tables; + + tx_tables = &tx_fs->tables; + + /* Tx check table */ + if (tx_fs->check_rule) { + mlx5_del_flow_rules(tx_fs->check_rule); + tx_fs->check_rule = NULL; + } + + if (tx_tables->check_miss_rule) { + mlx5_del_flow_rules(tx_tables->check_miss_rule); + tx_tables->check_miss_rule = NULL; + } + + if (tx_tables->ft_check_group) { + mlx5_destroy_flow_group(tx_tables->ft_check_group); + tx_tables->ft_check_group = NULL; + } + + if (tx_tables->ft_check) { + mlx5_destroy_flow_table(tx_tables->ft_check); + tx_tables->ft_check = NULL; + } + + /* Tx crypto table */ + if (tx_fs->crypto_mke_rule) { + mlx5_del_flow_rules(tx_fs->crypto_mke_rule); + tx_fs->crypto_mke_rule = NULL; + } + + if (tx_tables->crypto_miss_rule) { + mlx5_del_flow_rules(tx_tables->crypto_miss_rule); + tx_tables->crypto_miss_rule = NULL; + } + + mlx5e_destroy_flow_table(&tx_tables->ft_crypto); +} + +static int macsec_fs_tx_create_crypto_table_groups(struct mlx5e_flow_table *ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int mclen = MLX5_ST_SZ_BYTES(fte_match_param); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(TX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + /* Flow Group for MKE match */ + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += TX_CRYPTO_TABLE_MKE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow Group for SA rules */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); + MLX5_SET(fte_match_param, mc, misc_parameters_2.metadata_reg_a, + MLX5_ETH_WQE_FT_META_MACSEC_MASK); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += TX_CRYPTO_TABLE_SA_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow Group for l2 traps */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static struct mlx5_flow_table + *macsec_fs_auto_group_table_create(struct mlx5_flow_namespace *ns, int flags, + int level, int max_fte) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *fdb = NULL; + + /* reserve entry for the match all miss group and rule */ + ft_attr.autogroup.num_reserved_entries = 1; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.prio = 0; + ft_attr.flags = flags; + ft_attr.level = level; + ft_attr.max_fte = max_fte; + + fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + + return fdb; +} + +static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct net_device *netdev = macsec_fs->netdev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5e_macsec_tables *tx_tables; + struct mlx5_flow_act flow_act = {}; + struct mlx5e_flow_table *ft_crypto; + struct mlx5_flow_table *flow_table; + struct mlx5_flow_group *flow_group; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + u32 *flow_group_in; + int err = 0; + + ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); + if (!ns) + return -ENOMEM; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + goto out_spec; + + tx_tables = &tx_fs->tables; + ft_crypto = &tx_tables->ft_crypto; + + /* Tx crypto table */ + ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + ft_attr.level = TX_CRYPTO_TABLE_LEVEL; + ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE; + + flow_table = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + netdev_err(netdev, "Failed to create MACsec Tx crypto table err(%d)\n", err); + goto out_flow_group; + } + ft_crypto->t = flow_table; + + /* Tx crypto table groups */ + err = macsec_fs_tx_create_crypto_table_groups(ft_crypto); + if (err) { + netdev_err(netdev, + "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", + err); + goto err; + } + + /* Tx crypto table MKE rule - MKE packets shouldn't be offloaded */ + memset(&flow_act, 0, sizeof(flow_act)); + memset(spec, 0, sizeof(*spec)); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_PAE); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + + rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, "Failed to add MACsec TX MKE rule, err=%d\n", err); + goto err; + } + tx_fs->crypto_mke_rule = rule; + + /* Tx crypto table Default miss rule */ + memset(&flow_act, 0, sizeof(flow_act)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, "Failed to add MACsec Tx table default miss rule %d\n", err); + goto err; + } + tx_tables->crypto_miss_rule = rule; + + /* Tx check table */ + flow_table = macsec_fs_auto_group_table_create(ns, 0, TX_CHECK_TABLE_LEVEL, + TX_CHECK_TABLE_NUM_FTE); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + netdev_err(netdev, "fail to create MACsec TX check table, err(%d)\n", err); + goto err; + } + tx_tables->ft_check = flow_table; + + /* Tx check table Default miss group/rule */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1); + flow_group = mlx5_create_flow_group(tx_tables->ft_check, flow_group_in); + if (IS_ERR(flow_group)) { + err = PTR_ERR(flow_group); + netdev_err(netdev, + "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", + err); + goto err; + } + tx_tables->ft_check_group = flow_group; + + /* Tx check table default drop rule */ + memset(&dest, 0, sizeof(struct mlx5_flow_destination)); + memset(&flow_act, 0, sizeof(flow_act)); + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(tx_tables->check_miss_rule_counter); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + rule = mlx5_add_flow_rules(tx_tables->ft_check, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, "Failed to added MACsec tx check drop rule, err(%d)\n", err); + goto err; + } + tx_tables->check_miss_rule = rule; + + /* Tx check table rule */ + memset(spec, 0, sizeof(struct mlx5_flow_spec)); + memset(&dest, 0, sizeof(struct mlx5_flow_destination)); + memset(&flow_act, 0, sizeof(flow_act)); + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + flow_act.flags = FLOW_ACT_NO_APPEND; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(tx_tables->check_rule_counter); + rule = mlx5_add_flow_rules(tx_tables->ft_check, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, "Failed to add MACsec check rule, err=%d\n", err); + goto err; + } + tx_fs->check_rule = rule; + + goto out_flow_group; + +err: + macsec_fs_tx_destroy(macsec_fs); +out_flow_group: + kvfree(flow_group_in); +out_spec: + kvfree(spec); + return err; +} + +static int macsec_fs_tx_ft_get(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5e_macsec_tables *tx_tables; + int err = 0; + + tx_tables = &tx_fs->tables; + if (tx_tables->refcnt) + goto out; + + err = macsec_fs_tx_create(macsec_fs); + if (err) + return err; + +out: + tx_tables->refcnt++; + return err; +} + +static void macsec_fs_tx_ft_put(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables; + + if (--tx_tables->refcnt) + return; + + macsec_fs_tx_destroy(macsec_fs); +} + +static int macsec_fs_tx_setup_fte(struct mlx5e_macsec_fs *macsec_fs, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + u32 macsec_obj_id, + u32 *fs_id) +{ + struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; + int err = 0; + u32 id; + + err = ida_alloc_range(&tx_fs->tx_halloc, 1, + MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES, + GFP_KERNEL); + if (err < 0) + return err; + + id = err; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + + /* Metadata match */ + MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a, + MLX5_ETH_WQE_FT_META_MACSEC_MASK); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a, + MLX5_ETH_WQE_FT_META_MACSEC | id << 2); + + *fs_id = id; + flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC; + flow_act->crypto.obj_id = macsec_obj_id; + + mlx5_core_dbg(macsec_fs->mdev, "Tx fte: macsec obj_id %u, fs_id %u\n", macsec_obj_id, id); + return 0; +} + +static void macsec_fs_tx_create_sectag_header(const struct macsec_context *ctx, + char *reformatbf, + size_t *reformat_size) +{ + const struct macsec_secy *secy = ctx->secy; + bool sci_present = macsec_send_sci(secy); + struct mlx5_sectag_header sectag = {}; + const struct macsec_tx_sc *tx_sc; + + tx_sc = &secy->tx_sc; + sectag.ethertype = htons(ETH_P_MACSEC); + + if (sci_present) { + sectag.tci_an |= MACSEC_TCI_SC; + memcpy(§ag.sci, &secy->sci, + sizeof(sectag.sci)); + } else { + if (tx_sc->end_station) + sectag.tci_an |= MACSEC_TCI_ES; + if (tx_sc->scb) + sectag.tci_an |= MACSEC_TCI_SCB; + } + + /* With GCM, C/E clear for !encrypt, both set for encrypt */ + if (tx_sc->encrypt) + sectag.tci_an |= MACSEC_TCI_CONFID; + else if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) + sectag.tci_an |= MACSEC_TCI_C; + + sectag.tci_an |= tx_sc->encoding_sa; + + *reformat_size = MLX5_MACSEC_TAG_LEN + (sci_present ? MACSEC_SCI_LEN : 0); + + memcpy(reformatbf, §ag, *reformat_size); +} + +static void macsec_fs_tx_del_rule(struct mlx5e_macsec_fs *macsec_fs, + struct mlx5e_macsec_tx_rule *tx_rule) +{ + if (tx_rule->rule) { + mlx5_del_flow_rules(tx_rule->rule); + tx_rule->rule = NULL; + } + + if (tx_rule->pkt_reformat) { + mlx5_packet_reformat_dealloc(macsec_fs->mdev, tx_rule->pkt_reformat); + tx_rule->pkt_reformat = NULL; + } + + if (tx_rule->fs_id) { + ida_free(&macsec_fs->tx_fs->tx_halloc, tx_rule->fs_id); + tx_rule->fs_id = 0; + } + + kfree(tx_rule); + + macsec_fs_tx_ft_put(macsec_fs); +} + +static union mlx5e_macsec_rule * +macsec_fs_tx_add_rule(struct mlx5e_macsec_fs *macsec_fs, + const struct macsec_context *macsec_ctx, + struct mlx5_macsec_rule_attrs *attrs, + u32 *sa_fs_id) +{ + char reformatbf[MLX5_MACSEC_TAG_LEN + MACSEC_SCI_LEN]; + struct mlx5_pkt_reformat_params reformat_params = {}; + struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct net_device *netdev = macsec_fs->netdev; + union mlx5e_macsec_rule *macsec_rule = NULL; + struct mlx5_flow_destination dest = {}; + struct mlx5e_macsec_tables *tx_tables; + struct mlx5e_macsec_tx_rule *tx_rule; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + size_t reformat_size; + int err = 0; + u32 fs_id; + + tx_tables = &tx_fs->tables; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return NULL; + + err = macsec_fs_tx_ft_get(macsec_fs); + if (err) + goto out_spec; + + macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL); + if (!macsec_rule) { + macsec_fs_tx_ft_put(macsec_fs); + goto out_spec; + } + + tx_rule = &macsec_rule->tx_rule; + + /* Tx crypto table crypto rule */ + macsec_fs_tx_create_sectag_header(macsec_ctx, reformatbf, &reformat_size); + + reformat_params.type = MLX5_REFORMAT_TYPE_ADD_MACSEC; + reformat_params.size = reformat_size; + reformat_params.data = reformatbf; + flow_act.pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev, + &reformat_params, + MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); + if (IS_ERR(flow_act.pkt_reformat)) { + err = PTR_ERR(flow_act.pkt_reformat); + netdev_err(netdev, "Failed to allocate MACsec Tx reformat context err=%d\n", err); + goto err; + } + tx_rule->pkt_reformat = flow_act.pkt_reformat; + + err = macsec_fs_tx_setup_fte(macsec_fs, spec, &flow_act, attrs->macsec_obj_id, &fs_id); + if (err) { + netdev_err(netdev, + "Failed to add packet reformat for MACsec TX crypto rule, err=%d\n", + err); + goto err; + } + + tx_rule->fs_id = fs_id; + *sa_fs_id = fs_id; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = tx_tables->ft_check; + rule = mlx5_add_flow_rules(tx_tables->ft_crypto.t, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, "Failed to add MACsec TX crypto rule, err=%d\n", err); + goto err; + } + tx_rule->rule = rule; + + goto out_spec; + +err: + macsec_fs_tx_del_rule(macsec_fs, tx_rule); + macsec_rule = NULL; +out_spec: + kvfree(spec); + + return macsec_rule; +} + +static void macsec_fs_tx_cleanup(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5e_macsec_tables *tx_tables; + + if (!tx_fs) + return; + + tx_tables = &tx_fs->tables; + if (tx_tables->refcnt) { + netdev_err(macsec_fs->netdev, + "Can't destroy MACsec offload tx_fs, refcnt(%u) isn't 0\n", + tx_tables->refcnt); + return; + } + + ida_destroy(&tx_fs->tx_halloc); + + if (tx_tables->check_miss_rule_counter) { + mlx5_fc_destroy(mdev, tx_tables->check_miss_rule_counter); + tx_tables->check_miss_rule_counter = NULL; + } + + if (tx_tables->check_rule_counter) { + mlx5_fc_destroy(mdev, tx_tables->check_rule_counter); + tx_tables->check_rule_counter = NULL; + } + + kfree(tx_fs); + macsec_fs->tx_fs = NULL; +} + +static int macsec_fs_tx_init(struct mlx5e_macsec_fs *macsec_fs) +{ + struct net_device *netdev = macsec_fs->netdev; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5e_macsec_tables *tx_tables; + struct mlx5e_macsec_tx *tx_fs; + struct mlx5_fc *flow_counter; + int err; + + tx_fs = kzalloc(sizeof(*tx_fs), GFP_KERNEL); + if (!tx_fs) + return -ENOMEM; + + tx_tables = &tx_fs->tables; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + netdev_err(netdev, + "Failed to create MACsec Tx encrypt flow counter, err(%d)\n", + err); + goto err_encrypt_counter; + } + tx_tables->check_rule_counter = flow_counter; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + netdev_err(netdev, + "Failed to create MACsec Tx drop flow counter, err(%d)\n", + err); + goto err_drop_counter; + } + tx_tables->check_miss_rule_counter = flow_counter; + + ida_init(&tx_fs->tx_halloc); + + macsec_fs->tx_fs = tx_fs; + + return 0; + +err_drop_counter: + mlx5_fc_destroy(mdev, tx_tables->check_rule_counter); + tx_tables->check_rule_counter = NULL; + +err_encrypt_counter: + kfree(tx_fs); + macsec_fs->tx_fs = NULL; + + return err; +} + +static void macsec_fs_rx_destroy(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5e_macsec_tables *rx_tables; + int i; + + /* Rx check table */ + for (i = 1; i >= 0; --i) { + if (rx_fs->check_rule[i]) { + mlx5_del_flow_rules(rx_fs->check_rule[i]); + rx_fs->check_rule[i] = NULL; + } + + if (rx_fs->check_rule_pkt_reformat[i]) { + mlx5_packet_reformat_dealloc(macsec_fs->mdev, + rx_fs->check_rule_pkt_reformat[i]); + rx_fs->check_rule_pkt_reformat[i] = NULL; + } + } + + rx_tables = &rx_fs->tables; + + if (rx_tables->check_miss_rule) { + mlx5_del_flow_rules(rx_tables->check_miss_rule); + rx_tables->check_miss_rule = NULL; + } + + if (rx_tables->ft_check_group) { + mlx5_destroy_flow_group(rx_tables->ft_check_group); + rx_tables->ft_check_group = NULL; + } + + if (rx_tables->ft_check) { + mlx5_destroy_flow_table(rx_tables->ft_check); + rx_tables->ft_check = NULL; + } + + /* Rx crypto table */ + if (rx_tables->crypto_miss_rule) { + mlx5_del_flow_rules(rx_tables->crypto_miss_rule); + rx_tables->crypto_miss_rule = NULL; + } + + mlx5e_destroy_flow_table(&rx_tables->ft_crypto); +} + +static int macsec_fs_rx_create_crypto_table_groups(struct mlx5e_flow_table *ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int mclen = MLX5_ST_SZ_BYTES(fte_match_param); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(RX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + /* Flow group for SA rule with SCI */ + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS_5); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_2); + MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_3); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow group for SA rule without SCI */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS_5); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_15_0); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow Group for l2 traps */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static int macsec_fs_rx_create_check_decap_rule(struct mlx5e_macsec_fs *macsec_fs, + struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec, + int reformat_param_size) +{ + int rule_index = (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI) ? 0 : 1; + u8 mlx5_reformat_buf[MLX5_SECTAG_HEADER_SIZE_WITH_SCI]; + struct mlx5_pkt_reformat_params reformat_params = {}; + struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct net_device *netdev = macsec_fs->netdev; + struct mlx5e_macsec_tables *rx_tables; + struct mlx5_flow_handle *rule; + int err = 0; + + rx_tables = &rx_fs->tables; + + /* Rx check table decap 16B rule */ + memset(dest, 0, sizeof(*dest)); + memset(flow_act, 0, sizeof(*flow_act)); + memset(spec, 0, sizeof(*spec)); + + reformat_params.type = MLX5_REFORMAT_TYPE_DEL_MACSEC; + reformat_params.size = reformat_param_size; + reformat_params.data = mlx5_reformat_buf; + flow_act->pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev, + &reformat_params, + MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); + if (IS_ERR(flow_act->pkt_reformat)) { + err = PTR_ERR(flow_act->pkt_reformat); + netdev_err(netdev, "Failed to allocate MACsec Rx reformat context err=%d\n", err); + return err; + } + rx_fs->check_rule_pkt_reformat[rule_index] = flow_act->pkt_reformat; + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + /* MACsec syndrome match */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.macsec_syndrome); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.macsec_syndrome, 0); + /* ASO return reg syndrome match */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; + /* Sectag TCI SC present bit*/ + MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + if (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI) + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT << + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + flow_act->flags = FLOW_ACT_NO_APPEND; + flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO | + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest->type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest->counter_id = mlx5_fc_id(rx_tables->check_rule_counter); + rule = mlx5_add_flow_rules(rx_tables->ft_check, spec, flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, "Failed to add MACsec Rx check rule, err=%d\n", err); + return err; + } + + rx_fs->check_rule[rule_index] = rule; + + return 0; +} + +static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct net_device *netdev = macsec_fs->netdev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5e_macsec_tables *rx_tables; + struct mlx5e_flow_table *ft_crypto; + struct mlx5_flow_table *flow_table; + struct mlx5_flow_group *flow_group; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + u32 *flow_group_in; + int err = 0; + + ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); + if (!ns) + return -ENOMEM; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + goto free_spec; + + rx_tables = &rx_fs->tables; + ft_crypto = &rx_tables->ft_crypto; + + /* Rx crypto table */ + ft_attr.level = RX_CRYPTO_TABLE_LEVEL; + ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE; + + flow_table = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + netdev_err(netdev, "Failed to create MACsec Rx crypto table err(%d)\n", err); + goto out_flow_group; + } + ft_crypto->t = flow_table; + + /* Rx crypto table groups */ + err = macsec_fs_rx_create_crypto_table_groups(ft_crypto); + if (err) { + netdev_err(netdev, + "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", + err); + goto err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, + "Failed to add MACsec Rx crypto table default miss rule %d\n", + err); + goto err; + } + rx_tables->crypto_miss_rule = rule; + + /* Rx check table */ + flow_table = macsec_fs_auto_group_table_create(ns, + MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT, + RX_CHECK_TABLE_LEVEL, + RX_CHECK_TABLE_NUM_FTE); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + netdev_err(netdev, "fail to create MACsec RX check table, err(%d)\n", err); + goto err; + } + rx_tables->ft_check = flow_table; + + /* Rx check table Default miss group/rule */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1); + flow_group = mlx5_create_flow_group(rx_tables->ft_check, flow_group_in); + if (IS_ERR(flow_group)) { + err = PTR_ERR(flow_group); + netdev_err(netdev, + "Failed to create default flow group for MACsec Rx check table err(%d)\n", + err); + goto err; + } + rx_tables->ft_check_group = flow_group; + + /* Rx check table default drop rule */ + memset(&flow_act, 0, sizeof(flow_act)); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(rx_tables->check_miss_rule_counter); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + rule = mlx5_add_flow_rules(rx_tables->ft_check, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, "Failed to added MACsec Rx check drop rule, err(%d)\n", err); + goto err; + } + rx_tables->check_miss_rule = rule; + + /* Rx check table decap rules */ + err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec, + MLX5_SECTAG_HEADER_SIZE_WITH_SCI); + if (err) + goto err; + + err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec, + MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI); + if (err) + goto err; + + goto out_flow_group; + +err: + macsec_fs_rx_destroy(macsec_fs); +out_flow_group: + kvfree(flow_group_in); +free_spec: + kvfree(spec); + return err; +} + +static int macsec_fs_rx_ft_get(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; + int err = 0; + + if (rx_tables->refcnt) + goto out; + + err = macsec_fs_rx_create(macsec_fs); + if (err) + return err; + +out: + rx_tables->refcnt++; + return err; +} + +static void macsec_fs_rx_ft_put(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; + + if (--rx_tables->refcnt) + return; + + macsec_fs_rx_destroy(macsec_fs); +} + +static void macsec_fs_rx_del_rule(struct mlx5e_macsec_fs *macsec_fs, + struct mlx5e_macsec_rx_rule *rx_rule) +{ + int i; + + for (i = 0; i < RX_NUM_OF_RULES_PER_SA; ++i) { + if (rx_rule->rule[i]) { + mlx5_del_flow_rules(rx_rule->rule[i]); + rx_rule->rule[i] = NULL; + } + } + + if (rx_rule->meta_modhdr) { + mlx5_modify_header_dealloc(macsec_fs->mdev, rx_rule->meta_modhdr); + rx_rule->meta_modhdr = NULL; + } + + kfree(rx_rule); + + macsec_fs_rx_ft_put(macsec_fs); +} + +static void macsec_fs_rx_setup_fte(struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_macsec_rule_attrs *attrs, + bool sci_present) +{ + u8 tci_an = (sci_present << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET) | attrs->assoc_num; + struct mlx5_flow_act_crypto_params *crypto_params = &flow_act->crypto; + __be32 *sci_p = (__be32 *)(&attrs->sci); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + /* MACsec ethertype */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_MACSEC); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; + + /* Sectag AN + TCI SC present bit*/ + MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0, + tci_an << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + if (sci_present) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_5.macsec_tag_2); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_2, + be32_to_cpu(sci_p[0])); + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_5.macsec_tag_3); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_3, + be32_to_cpu(sci_p[1])); + } else { + /* When SCI isn't present in the Sectag, need to match the source */ + /* MAC address only if the SCI contains the default MACsec PORT */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.smac_47_16), + sci_p, ETH_ALEN); + } + + crypto_params->type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC; + crypto_params->obj_id = attrs->macsec_obj_id; +} + +static union mlx5e_macsec_rule * +macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs, + const struct macsec_context *macsec_ctx, + struct mlx5_macsec_rule_attrs *attrs, + u32 fs_id) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct net_device *netdev = macsec_fs->netdev; + union mlx5e_macsec_rule *macsec_rule = NULL; + struct mlx5_modify_hdr *modify_hdr = NULL; + struct mlx5_flow_destination dest = {}; + struct mlx5e_macsec_tables *rx_tables; + struct mlx5e_macsec_rx_rule *rx_rule; + struct mlx5_flow_act flow_act = {}; + struct mlx5e_flow_table *ft_crypto; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return NULL; + + err = macsec_fs_rx_ft_get(macsec_fs); + if (err) + goto out_spec; + + macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL); + if (!macsec_rule) { + macsec_fs_rx_ft_put(macsec_fs); + goto out_spec; + } + + rx_rule = &macsec_rule->rx_rule; + rx_tables = &rx_fs->tables; + ft_crypto = &rx_tables->ft_crypto; + + /* Set bit[31 - 30] macsec marker - 0x01 */ + /* Set bit[3-0] fs id */ + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); + MLX5_SET(set_action_in, action, data, fs_id | BIT(30)); + MLX5_SET(set_action_in, action, offset, 0); + MLX5_SET(set_action_in, action, length, 32); + + modify_hdr = mlx5_modify_header_alloc(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC, + 1, action); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + netdev_err(netdev, "fail to alloc MACsec set modify_header_id err=%d\n", err); + modify_hdr = NULL; + goto err; + } + rx_rule->meta_modhdr = modify_hdr; + + /* Rx crypto table with SCI rule */ + macsec_fs_rx_setup_fte(spec, &flow_act, attrs, true); + + flow_act.modify_hdr = modify_hdr; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = rx_tables->ft_check; + rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, + "Failed to add SA with SCI rule to Rx crypto rule, err=%d\n", + err); + goto err; + } + rx_rule->rule[0] = rule; + + /* Rx crypto table without SCI rule */ + if ((cpu_to_be64((__force u64)attrs->sci) & 0xFFFF) == ntohs(MACSEC_PORT_ES)) { + memset(spec, 0, sizeof(struct mlx5_flow_spec)); + memset(&dest, 0, sizeof(struct mlx5_flow_destination)); + memset(&flow_act, 0, sizeof(flow_act)); + + macsec_fs_rx_setup_fte(spec, &flow_act, attrs, false); + + flow_act.modify_hdr = modify_hdr; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = rx_tables->ft_check; + rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(netdev, + "Failed to add SA without SCI rule to Rx crypto rule, err=%d\n", + err); + goto err; + } + rx_rule->rule[1] = rule; + } + + return macsec_rule; + +err: + macsec_fs_rx_del_rule(macsec_fs, rx_rule); + macsec_rule = NULL; +out_spec: + kvfree(spec); + return macsec_rule; +} + +static int macsec_fs_rx_init(struct mlx5e_macsec_fs *macsec_fs) +{ + struct net_device *netdev = macsec_fs->netdev; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5e_macsec_tables *rx_tables; + struct mlx5e_macsec_rx *rx_fs; + struct mlx5_fc *flow_counter; + int err; + + rx_fs = kzalloc(sizeof(*rx_fs), GFP_KERNEL); + if (!rx_fs) + return -ENOMEM; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + netdev_err(netdev, + "Failed to create MACsec Rx encrypt flow counter, err(%d)\n", + err); + goto err_encrypt_counter; + } + + rx_tables = &rx_fs->tables; + rx_tables->check_rule_counter = flow_counter; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + netdev_err(netdev, + "Failed to create MACsec Rx drop flow counter, err(%d)\n", + err); + goto err_drop_counter; + } + rx_tables->check_miss_rule_counter = flow_counter; + + macsec_fs->rx_fs = rx_fs; + + return 0; + +err_drop_counter: + mlx5_fc_destroy(mdev, rx_tables->check_rule_counter); + rx_tables->check_rule_counter = NULL; + +err_encrypt_counter: + kfree(rx_fs); + macsec_fs->rx_fs = NULL; + + return err; +} + +static void macsec_fs_rx_cleanup(struct mlx5e_macsec_fs *macsec_fs) +{ + struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5e_macsec_tables *rx_tables; + + if (!rx_fs) + return; + + rx_tables = &rx_fs->tables; + + if (rx_tables->refcnt) { + netdev_err(macsec_fs->netdev, + "Can't destroy MACsec offload rx_fs, refcnt(%u) isn't 0\n", + rx_tables->refcnt); + return; + } + + if (rx_tables->check_miss_rule_counter) { + mlx5_fc_destroy(mdev, rx_tables->check_miss_rule_counter); + rx_tables->check_miss_rule_counter = NULL; + } + + if (rx_tables->check_rule_counter) { + mlx5_fc_destroy(mdev, rx_tables->check_rule_counter); + rx_tables->check_rule_counter = NULL; + } + + kfree(rx_fs); + macsec_fs->rx_fs = NULL; +} + +void mlx5e_macsec_fs_get_stats_fill(struct mlx5e_macsec_fs *macsec_fs, void *macsec_stats) +{ + struct mlx5e_macsec_stats *stats = (struct mlx5e_macsec_stats *)macsec_stats; + struct mlx5e_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables; + struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + + if (tx_tables->check_rule_counter) + mlx5_fc_query(mdev, tx_tables->check_rule_counter, + &stats->macsec_tx_pkts, &stats->macsec_tx_bytes); + + if (tx_tables->check_miss_rule_counter) + mlx5_fc_query(mdev, tx_tables->check_miss_rule_counter, + &stats->macsec_tx_pkts_drop, &stats->macsec_tx_bytes_drop); + + if (rx_tables->check_rule_counter) + mlx5_fc_query(mdev, rx_tables->check_rule_counter, + &stats->macsec_rx_pkts, &stats->macsec_rx_bytes); + + if (rx_tables->check_miss_rule_counter) + mlx5_fc_query(mdev, rx_tables->check_miss_rule_counter, + &stats->macsec_rx_pkts_drop, &stats->macsec_rx_bytes_drop); +} + +union mlx5e_macsec_rule * +mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs, + const struct macsec_context *macsec_ctx, + struct mlx5_macsec_rule_attrs *attrs, + u32 *sa_fs_id) +{ + return (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ? + macsec_fs_tx_add_rule(macsec_fs, macsec_ctx, attrs, sa_fs_id) : + macsec_fs_rx_add_rule(macsec_fs, macsec_ctx, attrs, *sa_fs_id); +} + +void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs, + union mlx5e_macsec_rule *macsec_rule, + int action) +{ + (action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ? + macsec_fs_tx_del_rule(macsec_fs, &macsec_rule->tx_rule) : + macsec_fs_rx_del_rule(macsec_fs, &macsec_rule->rx_rule); +} + +void mlx5e_macsec_fs_cleanup(struct mlx5e_macsec_fs *macsec_fs) +{ + macsec_fs_rx_cleanup(macsec_fs); + macsec_fs_tx_cleanup(macsec_fs); + kfree(macsec_fs); +} + +struct mlx5e_macsec_fs * +mlx5e_macsec_fs_init(struct mlx5_core_dev *mdev, + struct net_device *netdev) +{ + struct mlx5e_macsec_fs *macsec_fs; + int err; + + macsec_fs = kzalloc(sizeof(*macsec_fs), GFP_KERNEL); + if (!macsec_fs) + return NULL; + + macsec_fs->mdev = mdev; + macsec_fs->netdev = netdev; + + err = macsec_fs_tx_init(macsec_fs); + if (err) { + netdev_err(netdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err); + goto err; + } + + err = macsec_fs_rx_init(macsec_fs); + if (err) { + netdev_err(netdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err); + goto tx_cleanup; + } + + return macsec_fs; + +tx_cleanup: + macsec_fs_tx_cleanup(macsec_fs); +err: + kfree(macsec_fs); + return NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h new file mode 100644 index 000000000000..b429648d4ee7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_MACSEC_STEERING_H__ +#define __MLX5_MACSEC_STEERING_H__ + +#ifdef CONFIG_MLX5_EN_MACSEC + +#include "en_accel/macsec.h" + +#define MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES 16 + +struct mlx5e_macsec_fs; +union mlx5e_macsec_rule; + +struct mlx5_macsec_rule_attrs { + sci_t sci; + u32 macsec_obj_id; + u8 assoc_num; + int action; +}; + +enum mlx5_macsec_action { + MLX5_ACCEL_MACSEC_ACTION_ENCRYPT, + MLX5_ACCEL_MACSEC_ACTION_DECRYPT, +}; + +void mlx5e_macsec_fs_cleanup(struct mlx5e_macsec_fs *macsec_fs); + +struct mlx5e_macsec_fs * +mlx5e_macsec_fs_init(struct mlx5_core_dev *mdev, struct net_device *netdev); + +union mlx5e_macsec_rule * +mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs, + const struct macsec_context *ctx, + struct mlx5_macsec_rule_attrs *attrs, + u32 *sa_fs_id); + +void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs, + union mlx5e_macsec_rule *macsec_rule, + int action); + +void mlx5e_macsec_fs_get_stats_fill(struct mlx5e_macsec_fs *macsec_fs, void *macsec_stats); + +#endif + +#endif /* __MLX5_MACSEC_STEERING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c new file mode 100644 index 000000000000..e50a2e3f3d18 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/ethtool.h> +#include <net/sock.h> + +#include "en.h" +#include "en_accel/macsec.h" + +static const struct counter_desc mlx5e_macsec_hw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_pkts_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_bytes_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_pkts_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_bytes_drop) }, +}; + +#define NUM_MACSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_macsec_hw_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(macsec_hw) +{ + if (!priv->macsec) + return 0; + + if (mlx5e_is_macsec_device(priv->mdev)) + return NUM_MACSEC_HW_COUNTERS; + + return 0; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(macsec_hw) {} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(macsec_hw) +{ + unsigned int i; + + if (!priv->macsec) + return idx; + + if (!mlx5e_is_macsec_device(priv->mdev)) + return idx; + + for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_macsec_hw_stats_desc[i].format); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(macsec_hw) +{ + int i; + + if (!priv->macsec) + return idx; + + if (!mlx5e_is_macsec_device(priv->mdev)) + return idx; + + mlx5e_macsec_get_stats_fill(priv->macsec, mlx5e_macsec_get_stats(priv->macsec)); + for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(mlx5e_macsec_get_stats(priv->macsec), + mlx5e_macsec_hw_stats_desc, + i); + + return idx; +} + +MLX5E_DEFINE_STATS_GRP(macsec_hw, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c deleted file mode 100644 index b8fc863aa68d..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/netdevice.h> -#include <net/ipv6.h> -#include "en_accel/tls.h" -#include "accel/tls.h" - -static void mlx5e_tls_set_ipv4_flow(void *flow, struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - - MLX5_SET(tls_flow, flow, ipv6, 0); - memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &inet->inet_daddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4)); - memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv4_layout.ipv4), - &inet->inet_rcv_saddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4)); -} - -#if IS_ENABLED(CONFIG_IPV6) -static void mlx5e_tls_set_ipv6_flow(void *flow, struct sock *sk) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - - MLX5_SET(tls_flow, flow, ipv6, 1); - memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &sk->sk_v6_daddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); - memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &np->saddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); -} -#endif - -static void mlx5e_tls_set_flow_tcp_ports(void *flow, struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - - memcpy(MLX5_ADDR_OF(tls_flow, flow, src_port), &inet->inet_sport, - MLX5_FLD_SZ_BYTES(tls_flow, src_port)); - memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_port), &inet->inet_dport, - MLX5_FLD_SZ_BYTES(tls_flow, dst_port)); -} - -static int mlx5e_tls_set_flow(void *flow, struct sock *sk, u32 caps) -{ - switch (sk->sk_family) { - case AF_INET: - mlx5e_tls_set_ipv4_flow(flow, sk); - break; -#if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - if (!sk->sk_ipv6only && - ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) { - mlx5e_tls_set_ipv4_flow(flow, sk); - break; - } - if (!(caps & MLX5_ACCEL_TLS_IPV6)) - goto error_out; - - mlx5e_tls_set_ipv6_flow(flow, sk); - break; -#endif - default: - goto error_out; - } - - mlx5e_tls_set_flow_tcp_ports(flow, sk); - return 0; -error_out: - return -EINVAL; -} - -static int mlx5e_tls_add(struct net_device *netdev, struct sock *sk, - enum tls_offload_ctx_dir direction, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct mlx5_core_dev *mdev = priv->mdev; - u32 caps = mlx5_accel_tls_device_caps(mdev); - int ret = -ENOMEM; - void *flow; - u32 swid; - - flow = kzalloc(MLX5_ST_SZ_BYTES(tls_flow), GFP_KERNEL); - if (!flow) - return ret; - - ret = mlx5e_tls_set_flow(flow, sk, caps); - if (ret) - goto free_flow; - - ret = mlx5_accel_tls_add_flow(mdev, flow, crypto_info, - start_offload_tcp_sn, &swid, - direction == TLS_OFFLOAD_CTX_DIR_TX); - if (ret < 0) - goto free_flow; - - if (direction == TLS_OFFLOAD_CTX_DIR_TX) { - struct mlx5e_tls_offload_context_tx *tx_ctx = - mlx5e_get_tls_tx_context(tls_ctx); - - tx_ctx->swid = htonl(swid); - tx_ctx->expected_seq = start_offload_tcp_sn; - } else { - struct mlx5e_tls_offload_context_rx *rx_ctx = - mlx5e_get_tls_rx_context(tls_ctx); - - rx_ctx->handle = htonl(swid); - } - - return 0; -free_flow: - kfree(flow); - return ret; -} - -static void mlx5e_tls_del(struct net_device *netdev, - struct tls_context *tls_ctx, - enum tls_offload_ctx_dir direction) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - unsigned int handle; - - handle = ntohl((direction == TLS_OFFLOAD_CTX_DIR_TX) ? - mlx5e_get_tls_tx_context(tls_ctx)->swid : - mlx5e_get_tls_rx_context(tls_ctx)->handle); - - mlx5_accel_tls_del_flow(priv->mdev, handle, - direction == TLS_OFFLOAD_CTX_DIR_TX); -} - -static int mlx5e_tls_resync(struct net_device *netdev, struct sock *sk, - u32 seq, u8 *rcd_sn_data, - enum tls_offload_ctx_dir direction) -{ - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_tls_offload_context_rx *rx_ctx; - __be64 rcd_sn = *(__be64 *)rcd_sn_data; - - if (WARN_ON_ONCE(direction != TLS_OFFLOAD_CTX_DIR_RX)) - return -EINVAL; - rx_ctx = mlx5e_get_tls_rx_context(tls_ctx); - - netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq, - be64_to_cpu(rcd_sn)); - mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn); - atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply); - - return 0; -} - -static const struct tlsdev_ops mlx5e_tls_ops = { - .tls_dev_add = mlx5e_tls_add, - .tls_dev_del = mlx5e_tls_del, - .tls_dev_resync = mlx5e_tls_resync, -}; - -void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) -{ - struct net_device *netdev = priv->netdev; - u32 caps; - - if (mlx5e_accel_is_ktls_device(priv->mdev)) { - mlx5e_ktls_build_netdev(priv); - return; - } - - /* FPGA */ - if (!mlx5e_accel_is_tls_device(priv->mdev)) - return; - - caps = mlx5_accel_tls_device_caps(priv->mdev); - if (caps & MLX5_ACCEL_TLS_TX) { - netdev->features |= NETIF_F_HW_TLS_TX; - netdev->hw_features |= NETIF_F_HW_TLS_TX; - } - - if (caps & MLX5_ACCEL_TLS_RX) { - netdev->features |= NETIF_F_HW_TLS_RX; - netdev->hw_features |= NETIF_F_HW_TLS_RX; - } - - if (!(caps & MLX5_ACCEL_TLS_LRO)) { - netdev->features &= ~NETIF_F_LRO; - netdev->hw_features &= ~NETIF_F_LRO; - } - - netdev->tlsdev_ops = &mlx5e_tls_ops; -} - -int mlx5e_tls_init(struct mlx5e_priv *priv) -{ - struct mlx5e_tls *tls; - - if (!mlx5e_accel_is_tls_device(priv->mdev)) - return 0; - - tls = kzalloc(sizeof(*tls), GFP_KERNEL); - if (!tls) - return -ENOMEM; - - priv->tls = tls; - return 0; -} - -void mlx5e_tls_cleanup(struct mlx5e_priv *priv) -{ - struct mlx5e_tls *tls = priv->tls; - - if (!tls) - return; - - kfree(tls); - priv->tls = NULL; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h deleted file mode 100644 index 62ecf14bf86a..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ -#ifndef __MLX5E_TLS_H__ -#define __MLX5E_TLS_H__ - -#include "accel/tls.h" -#include "en_accel/ktls.h" - -#ifdef CONFIG_MLX5_EN_TLS -#include <net/tls.h> -#include "en.h" - -struct mlx5e_tls_sw_stats { - atomic64_t tx_tls_ctx; - atomic64_t tx_tls_del; - atomic64_t tx_tls_drop_metadata; - atomic64_t tx_tls_drop_resync_alloc; - atomic64_t tx_tls_drop_no_sync_data; - atomic64_t tx_tls_drop_bypass_required; - atomic64_t rx_tls_ctx; - atomic64_t rx_tls_del; - atomic64_t rx_tls_drop_resync_request; - atomic64_t rx_tls_resync_request; - atomic64_t rx_tls_resync_reply; - atomic64_t rx_tls_auth_fail; -}; - -struct mlx5e_tls { - struct mlx5e_tls_sw_stats sw_stats; - struct workqueue_struct *rx_wq; -}; - -struct mlx5e_tls_offload_context_tx { - struct tls_offload_context_tx base; - u32 expected_seq; - __be32 swid; -}; - -static inline struct mlx5e_tls_offload_context_tx * -mlx5e_get_tls_tx_context(struct tls_context *tls_ctx) -{ - BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_tx) > - TLS_OFFLOAD_CONTEXT_SIZE_TX); - return container_of(tls_offload_ctx_tx(tls_ctx), - struct mlx5e_tls_offload_context_tx, - base); -} - -struct mlx5e_tls_offload_context_rx { - struct tls_offload_context_rx base; - __be32 handle; -}; - -static inline struct mlx5e_tls_offload_context_rx * -mlx5e_get_tls_rx_context(struct tls_context *tls_ctx) -{ - BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_rx) > - TLS_OFFLOAD_CONTEXT_SIZE_RX); - return container_of(tls_offload_ctx_rx(tls_ctx), - struct mlx5e_tls_offload_context_rx, - base); -} - -static inline bool mlx5e_is_tls_on(struct mlx5e_priv *priv) -{ - return priv->tls; -} - -void mlx5e_tls_build_netdev(struct mlx5e_priv *priv); -int mlx5e_tls_init(struct mlx5e_priv *priv); -void mlx5e_tls_cleanup(struct mlx5e_priv *priv); - -int mlx5e_tls_get_count(struct mlx5e_priv *priv); -int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data); -int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data); - -static inline bool mlx5e_accel_is_tls_device(struct mlx5_core_dev *mdev) -{ - return !is_kdump_kernel() && - mlx5_accel_is_tls_device(mdev); -} - -#else - -static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) -{ - if (!is_kdump_kernel() && - mlx5_accel_is_ktls_device(priv->mdev)) - mlx5e_ktls_build_netdev(priv); -} - -static inline bool mlx5e_is_tls_on(struct mlx5e_priv *priv) { return false; } -static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; } -static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { } -static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; } -static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; } -static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; } -static inline bool mlx5e_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; } - -#endif - -#endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c deleted file mode 100644 index 7a700f913582..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include "en_accel/tls.h" -#include "en_accel/tls_rxtx.h" -#include "accel/accel.h" - -#include <net/inet6_hashtables.h> -#include <linux/ipv6.h> - -#define SYNDROM_DECRYPTED 0x30 -#define SYNDROM_RESYNC_REQUEST 0x31 -#define SYNDROM_AUTH_FAILED 0x32 - -#define SYNDROME_OFFLOAD_REQUIRED 32 -#define SYNDROME_SYNC 33 - -struct sync_info { - u64 rcd_sn; - s32 sync_len; - int nr_frags; - skb_frag_t frags[MAX_SKB_FRAGS]; -}; - -struct recv_metadata_content { - u8 syndrome; - u8 reserved; - __be32 sync_seq; -} __packed; - -struct send_metadata_content { - /* One byte of syndrome followed by 3 bytes of swid */ - __be32 syndrome_swid; - __be16 first_seq; -} __packed; - -struct mlx5e_tls_metadata { - union { - /* from fpga to host */ - struct recv_metadata_content recv; - /* from host to fpga */ - struct send_metadata_content send; - unsigned char raw[6]; - } __packed content; - /* packet type ID field */ - __be16 ethertype; -} __packed; - -static int mlx5e_tls_add_metadata(struct sk_buff *skb, __be32 swid) -{ - struct mlx5e_tls_metadata *pet; - struct ethhdr *eth; - - if (skb_cow_head(skb, sizeof(struct mlx5e_tls_metadata))) - return -ENOMEM; - - eth = (struct ethhdr *)skb_push(skb, sizeof(struct mlx5e_tls_metadata)); - skb->mac_header -= sizeof(struct mlx5e_tls_metadata); - pet = (struct mlx5e_tls_metadata *)(eth + 1); - - memmove(skb->data, skb->data + sizeof(struct mlx5e_tls_metadata), - 2 * ETH_ALEN); - - eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE); - pet->content.send.syndrome_swid = - htonl(SYNDROME_OFFLOAD_REQUIRED << 24) | swid; - - return 0; -} - -static int mlx5e_tls_get_sync_data(struct mlx5e_tls_offload_context_tx *context, - u32 tcp_seq, struct sync_info *info) -{ - int remaining, i = 0, ret = -EINVAL; - struct tls_record_info *record; - unsigned long flags; - s32 sync_size; - - spin_lock_irqsave(&context->base.lock, flags); - record = tls_get_record(&context->base, tcp_seq, &info->rcd_sn); - - if (unlikely(!record)) - goto out; - - sync_size = tcp_seq - tls_record_start_seq(record); - info->sync_len = sync_size; - if (unlikely(sync_size < 0)) { - if (tls_record_is_start_marker(record)) - goto done; - - goto out; - } - - remaining = sync_size; - while (remaining > 0) { - info->frags[i] = record->frags[i]; - __skb_frag_ref(&info->frags[i]); - remaining -= skb_frag_size(&info->frags[i]); - - if (remaining < 0) - skb_frag_size_add(&info->frags[i], remaining); - - i++; - } - info->nr_frags = i; -done: - ret = 0; -out: - spin_unlock_irqrestore(&context->base.lock, flags); - return ret; -} - -static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb, - struct sk_buff *nskb, u32 tcp_seq, - int headln, __be64 rcd_sn) -{ - struct mlx5e_tls_metadata *pet; - u8 syndrome = SYNDROME_SYNC; - struct iphdr *iph; - struct tcphdr *th; - int data_len, mss; - - nskb->dev = skb->dev; - skb_reset_mac_header(nskb); - skb_set_network_header(nskb, skb_network_offset(skb)); - skb_set_transport_header(nskb, skb_transport_offset(skb)); - memcpy(nskb->data, skb->data, headln); - memcpy(nskb->data + headln, &rcd_sn, sizeof(rcd_sn)); - - iph = ip_hdr(nskb); - iph->tot_len = htons(nskb->len - skb_network_offset(nskb)); - th = tcp_hdr(nskb); - data_len = nskb->len - headln; - tcp_seq -= data_len; - th->seq = htonl(tcp_seq); - - mss = nskb->dev->mtu - (headln - skb_network_offset(nskb)); - skb_shinfo(nskb)->gso_size = 0; - if (data_len > mss) { - skb_shinfo(nskb)->gso_size = mss; - skb_shinfo(nskb)->gso_segs = DIV_ROUND_UP(data_len, mss); - } - skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; - - pet = (struct mlx5e_tls_metadata *)(nskb->data + sizeof(struct ethhdr)); - memcpy(pet, &syndrome, sizeof(syndrome)); - pet->content.send.first_seq = htons(tcp_seq); - - /* MLX5 devices don't care about the checksum partial start, offset - * and pseudo header - */ - nskb->ip_summed = CHECKSUM_PARTIAL; - - nskb->queue_mapping = skb->queue_mapping; -} - -static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context, - struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5e_tls *tls) -{ - u32 tcp_seq = ntohl(tcp_hdr(skb)->seq); - struct sync_info info; - struct sk_buff *nskb; - int linear_len = 0; - int headln; - int i; - - sq->stats->tls_ooo++; - - if (mlx5e_tls_get_sync_data(context, tcp_seq, &info)) { - /* We might get here if a retransmission reaches the driver - * after the relevant record is acked. - * It should be safe to drop the packet in this case - */ - atomic64_inc(&tls->sw_stats.tx_tls_drop_no_sync_data); - goto err_out; - } - - if (unlikely(info.sync_len < 0)) { - u32 payload; - - headln = skb_transport_offset(skb) + tcp_hdrlen(skb); - payload = skb->len - headln; - if (likely(payload <= -info.sync_len)) - /* SKB payload doesn't require offload - */ - return true; - - atomic64_inc(&tls->sw_stats.tx_tls_drop_bypass_required); - goto err_out; - } - - if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) { - atomic64_inc(&tls->sw_stats.tx_tls_drop_metadata); - goto err_out; - } - - headln = skb_transport_offset(skb) + tcp_hdrlen(skb); - linear_len += headln + sizeof(info.rcd_sn); - nskb = alloc_skb(linear_len, GFP_ATOMIC); - if (unlikely(!nskb)) { - atomic64_inc(&tls->sw_stats.tx_tls_drop_resync_alloc); - goto err_out; - } - - context->expected_seq = tcp_seq + skb->len - headln; - skb_put(nskb, linear_len); - for (i = 0; i < info.nr_frags; i++) - skb_shinfo(nskb)->frags[i] = info.frags[i]; - - skb_shinfo(nskb)->nr_frags = info.nr_frags; - nskb->data_len = info.sync_len; - nskb->len += info.sync_len; - sq->stats->tls_resync_bytes += nskb->len; - mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln, - cpu_to_be64(info.rcd_sn)); - mlx5e_sq_xmit_simple(sq, nskb, true); - - return true; - -err_out: - dev_kfree_skb_any(skb); - return false; -} - -bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, - struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_tls_offload_context_tx *context; - struct tls_context *tls_ctx; - u32 expected_seq; - int datalen; - u32 skb_seq; - - datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); - if (!datalen) - return true; - - mlx5e_tx_mpwqe_ensure_complete(sq); - - tls_ctx = tls_get_ctx(skb->sk); - if (WARN_ON_ONCE(tls_ctx->netdev != netdev)) - goto err_out; - - if (mlx5e_accel_is_ktls_tx(sq->mdev)) - return mlx5e_ktls_handle_tx_skb(tls_ctx, sq, skb, datalen, state); - - /* FPGA */ - skb_seq = ntohl(tcp_hdr(skb)->seq); - context = mlx5e_get_tls_tx_context(tls_ctx); - expected_seq = context->expected_seq; - - if (unlikely(expected_seq != skb_seq)) - return mlx5e_tls_handle_ooo(context, sq, skb, priv->tls); - - if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) { - atomic64_inc(&priv->tls->sw_stats.tx_tls_drop_metadata); - dev_kfree_skb_any(skb); - return false; - } - - context->expected_seq = skb_seq + datalen; - return true; - -err_out: - dev_kfree_skb_any(skb); - return false; -} - -static int tls_update_resync_sn(struct net_device *netdev, - struct sk_buff *skb, - struct mlx5e_tls_metadata *mdata) -{ - struct sock *sk = NULL; - struct iphdr *iph; - struct tcphdr *th; - __be32 seq; - - if (mdata->ethertype != htons(ETH_P_IP)) - return -EINVAL; - - iph = (struct iphdr *)(mdata + 1); - - th = ((void *)iph) + iph->ihl * 4; - - if (iph->version == 4) { - sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo, - iph->saddr, th->source, iph->daddr, - th->dest, netdev->ifindex); -#if IS_ENABLED(CONFIG_IPV6) - } else { - struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph; - - sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo, - &ipv6h->saddr, th->source, - &ipv6h->daddr, ntohs(th->dest), - netdev->ifindex, 0); -#endif - } - if (!sk || sk->sk_state == TCP_TIME_WAIT) { - struct mlx5e_priv *priv = netdev_priv(netdev); - - atomic64_inc(&priv->tls->sw_stats.rx_tls_drop_resync_request); - goto out; - } - - skb->sk = sk; - skb->destructor = sock_edemux; - - memcpy(&seq, &mdata->content.recv.sync_seq, sizeof(seq)); - tls_offload_rx_resync_request(sk, seq); -out: - return 0; -} - -/* FPGA tls rx handler */ -void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb, - u32 *cqe_bcnt) -{ - struct mlx5e_tls_metadata *mdata; - struct mlx5e_priv *priv; - - /* Use the metadata */ - mdata = (struct mlx5e_tls_metadata *)(skb->data + ETH_HLEN); - switch (mdata->content.recv.syndrome) { - case SYNDROM_DECRYPTED: - skb->decrypted = 1; - break; - case SYNDROM_RESYNC_REQUEST: - tls_update_resync_sn(rq->netdev, skb, mdata); - priv = netdev_priv(rq->netdev); - atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_request); - break; - case SYNDROM_AUTH_FAILED: - /* Authentication failure will be observed and verified by kTLS */ - priv = netdev_priv(rq->netdev); - atomic64_inc(&priv->tls->sw_stats.rx_tls_auth_fail); - break; - default: - /* Bypass the metadata header to others */ - return; - } - - remove_metadata_hdr(skb); - *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN; -} - -u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) -{ - if (!mlx5e_accel_is_tls_device(mdev)) - return 0; - - if (mlx5e_accel_is_ktls_device(mdev)) - return mlx5e_ktls_get_stop_room(mdev, params); - - /* FPGA */ - /* Resync SKB. */ - return mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h deleted file mode 100644 index 0ca0a023fb8d..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __MLX5E_TLS_RXTX_H__ -#define __MLX5E_TLS_RXTX_H__ - -#include "accel/accel.h" -#include "en_accel/ktls_txrx.h" - -#ifdef CONFIG_MLX5_EN_TLS - -#include <linux/skbuff.h> -#include "en.h" -#include "en/txrx.h" - -u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); - -bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, - struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state); - -static inline bool mlx5e_tls_skb_offloaded(struct sk_buff *skb) -{ - return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk); -} - -static inline void -mlx5e_tls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg, - struct mlx5e_accel_tx_tls_state *state) -{ - cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8); -} - -void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb, - u32 *cqe_bcnt); - -static inline void -mlx5e_tls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, - struct mlx5_cqe64 *cqe, u32 *cqe_bcnt) -{ - if (unlikely(get_cqe_tls_offload(cqe))) /* cqe bit indicates a TLS device */ - return mlx5e_ktls_handle_rx_skb(rq, skb, cqe, cqe_bcnt); - - if (unlikely(test_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state) && is_metadata_hdr_valid(skb))) - return mlx5e_tls_handle_rx_skb_metadata(rq, skb, cqe_bcnt); -} - -#else - -static inline bool -mlx5e_accel_is_tls(struct mlx5_cqe64 *cqe, struct sk_buff *skb) { return false; } -static inline void -mlx5e_tls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, - struct mlx5_cqe64 *cqe, u32 *cqe_bcnt) {} -static inline u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) -{ - return 0; -} - -#endif /* CONFIG_MLX5_EN_TLS */ - -#endif /* __MLX5E_TLS_RXTX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index fe5d82fa6e92..0ae1865086ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -114,47 +114,49 @@ static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type) } } -static int arfs_disable(struct mlx5e_priv *priv) +static int arfs_disable(struct mlx5e_flow_steering *fs) { + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); int err, i; for (i = 0; i < ARFS_NUM_TYPES; i++) { /* Modify ttc rules destination back to their default */ - err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, arfs_get_tt(i)); + err = mlx5_ttc_fwd_default_dest(ttc, arfs_get_tt(i)); if (err) { - netdev_err(priv->netdev, - "%s: modify ttc[%d] default destination failed, err(%d)\n", - __func__, arfs_get_tt(i), err); + fs_err(fs, + "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, arfs_get_tt(i), err); return err; } } return 0; } -static void arfs_del_rules(struct mlx5e_priv *priv); +static void arfs_del_rules(struct mlx5e_flow_steering *fs); -int mlx5e_arfs_disable(struct mlx5e_priv *priv) +int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs) { - arfs_del_rules(priv); + arfs_del_rules(fs); - return arfs_disable(priv); + return arfs_disable(fs); } -int mlx5e_arfs_enable(struct mlx5e_priv *priv) +int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs) { + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); struct mlx5_flow_destination dest = {}; int err, i; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; for (i = 0; i < ARFS_NUM_TYPES; i++) { - dest.ft = priv->fs.arfs->arfs_tables[i].ft.t; + dest.ft = arfs->arfs_tables[i].ft.t; /* Modify ttc rules destination to point on the aRFS FTs */ - err = mlx5_ttc_fwd_dest(priv->fs.ttc, arfs_get_tt(i), &dest); + err = mlx5_ttc_fwd_dest(ttc, arfs_get_tt(i), &dest); if (err) { - netdev_err(priv->netdev, - "%s: modify ttc[%d] dest to arfs, failed err(%d)\n", - __func__, arfs_get_tt(i), err); - arfs_disable(priv); + fs_err(fs, "%s: modify ttc[%d] dest to arfs, failed err(%d)\n", + __func__, arfs_get_tt(i), err); + arfs_disable(fs); return err; } } @@ -167,31 +169,37 @@ static void arfs_destroy_table(struct arfs_table *arfs_t) mlx5e_destroy_flow_table(&arfs_t->ft); } -static void _mlx5e_cleanup_tables(struct mlx5e_priv *priv) +static void _mlx5e_cleanup_tables(struct mlx5e_flow_steering *fs) { + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); int i; - arfs_del_rules(priv); - destroy_workqueue(priv->fs.arfs->wq); + arfs_del_rules(fs); + destroy_workqueue(arfs->wq); for (i = 0; i < ARFS_NUM_TYPES; i++) { - if (!IS_ERR_OR_NULL(priv->fs.arfs->arfs_tables[i].ft.t)) - arfs_destroy_table(&priv->fs.arfs->arfs_tables[i]); + if (!IS_ERR_OR_NULL(arfs->arfs_tables[i].ft.t)) + arfs_destroy_table(&arfs->arfs_tables[i]); } } -void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) +void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple) { - if (!(priv->netdev->hw_features & NETIF_F_NTUPLE)) + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); + + if (!ntuple) return; - _mlx5e_cleanup_tables(priv); - kvfree(priv->fs.arfs); + _mlx5e_cleanup_tables(fs); + mlx5e_fs_set_arfs(fs, NULL); + kvfree(arfs); } -static int arfs_add_default_rule(struct mlx5e_priv *priv, +static int arfs_add_default_rule(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, enum arfs_type type) { - struct arfs_table *arfs_t = &priv->fs.arfs->arfs_tables[type]; + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); + struct arfs_table *arfs_t = &arfs->arfs_tables[type]; struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); enum mlx5_traffic_types tt; @@ -200,23 +208,21 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; tt = arfs_get_tt(type); if (tt == -EINVAL) { - netdev_err(priv->netdev, "%s: bad arfs_type: %d\n", - __func__, type); + fs_err(fs, "%s: bad arfs_type: %d\n", __func__, type); return -EINVAL; } /* FIXME: Must use mlx5_ttc_get_default_dest(), * but can't since TTC default is not setup yet ! */ - dest.tir_num = mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt); + dest.tir_num = mlx5e_rx_res_get_tirn_rss(rx_res, tt); arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL, &flow_act, &dest, 1); if (IS_ERR(arfs_t->default_rule)) { err = PTR_ERR(arfs_t->default_rule); arfs_t->default_rule = NULL; - netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n", - __func__, type); + fs_err(fs, "%s: add rule failed, arfs type=%d\n", __func__, type); } return err; @@ -318,10 +324,12 @@ out: return err; } -static int arfs_create_table(struct mlx5e_priv *priv, +static int arfs_create_table(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, enum arfs_type type) { - struct mlx5e_arfs_tables *arfs = priv->fs.arfs; + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false); + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft; struct mlx5_flow_table_attr ft_attr = {}; int err; @@ -332,7 +340,7 @@ static int arfs_create_table(struct mlx5e_priv *priv, ft_attr.level = MLX5E_ARFS_FT_LEVEL; ft_attr.prio = MLX5E_NIC_PRIO; - ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + ft->t = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -343,7 +351,7 @@ static int arfs_create_table(struct mlx5e_priv *priv, if (err) goto err; - err = arfs_add_default_rule(priv, type); + err = arfs_add_default_rule(fs, rx_res, type); if (err) goto err; @@ -353,35 +361,40 @@ err: return err; } -int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) +int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, bool ntuple) { + struct mlx5e_arfs_tables *arfs; int err = -ENOMEM; int i; - if (!(priv->netdev->hw_features & NETIF_F_NTUPLE)) + if (!ntuple) return 0; - priv->fs.arfs = kvzalloc(sizeof(*priv->fs.arfs), GFP_KERNEL); - if (!priv->fs.arfs) + arfs = kvzalloc(sizeof(*arfs), GFP_KERNEL); + if (!arfs) return -ENOMEM; - spin_lock_init(&priv->fs.arfs->arfs_lock); - INIT_LIST_HEAD(&priv->fs.arfs->rules); - priv->fs.arfs->wq = create_singlethread_workqueue("mlx5e_arfs"); - if (!priv->fs.arfs->wq) + spin_lock_init(&arfs->arfs_lock); + INIT_LIST_HEAD(&arfs->rules); + arfs->wq = create_singlethread_workqueue("mlx5e_arfs"); + if (!arfs->wq) goto err; + mlx5e_fs_set_arfs(fs, arfs); + for (i = 0; i < ARFS_NUM_TYPES; i++) { - err = arfs_create_table(priv, i); + err = arfs_create_table(fs, rx_res, i); if (err) goto err_des; } return 0; err_des: - _mlx5e_cleanup_tables(priv); + _mlx5e_cleanup_tables(fs); err: - kvfree(priv->fs.arfs); + mlx5e_fs_set_arfs(fs, NULL); + kvfree(arfs); return err; } @@ -389,6 +402,7 @@ err: static void arfs_may_expire_flow(struct mlx5e_priv *priv) { + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs); struct arfs_rule *arfs_rule; struct hlist_node *htmp; HLIST_HEAD(del_list); @@ -396,8 +410,8 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv) int i; int j; - spin_lock_bh(&priv->fs.arfs->arfs_lock); - mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs->arfs_tables, i, j) { + spin_lock_bh(&arfs->arfs_lock); + mlx5e_for_each_arfs_rule(arfs_rule, htmp, arfs->arfs_tables, i, j) { if (!work_pending(&arfs_rule->arfs_work) && rps_may_expire_flow(priv->netdev, arfs_rule->rxq, arfs_rule->flow_id, @@ -408,7 +422,7 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv) break; } } - spin_unlock_bh(&priv->fs.arfs->arfs_lock); + spin_unlock_bh(&arfs->arfs_lock); hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) { if (arfs_rule->rule) mlx5_del_flow_rules(arfs_rule->rule); @@ -417,20 +431,21 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv) } } -static void arfs_del_rules(struct mlx5e_priv *priv) +static void arfs_del_rules(struct mlx5e_flow_steering *fs) { + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); struct hlist_node *htmp; struct arfs_rule *rule; HLIST_HEAD(del_list); int i; int j; - spin_lock_bh(&priv->fs.arfs->arfs_lock); - mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs->arfs_tables, i, j) { + spin_lock_bh(&arfs->arfs_lock); + mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) { hlist_del_init(&rule->hlist); hlist_add_head(&rule->hlist, &del_list); } - spin_unlock_bh(&priv->fs.arfs->arfs_lock); + spin_unlock_bh(&arfs->arfs_lock); hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) { cancel_work_sync(&rule->arfs_work); @@ -474,7 +489,7 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, struct arfs_rule *arfs_rule) { - struct mlx5e_arfs_tables *arfs = priv->fs.arfs; + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs); struct arfs_tuple *tuple = &arfs_rule->tuple; struct mlx5_flow_handle *rule = NULL; struct mlx5_flow_destination dest = {}; @@ -556,7 +571,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); - priv->channel_stats[arfs_rule->rxq].rq.arfs_err++; + priv->channel_stats[arfs_rule->rxq]->rq.arfs_err++; mlx5e_dbg(HW, priv, "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n", __func__, arfs_rule->filter_id, arfs_rule->rxq, @@ -588,13 +603,15 @@ static void arfs_handle_work(struct work_struct *work) struct arfs_rule, arfs_work); struct mlx5e_priv *priv = arfs_rule->priv; + struct mlx5e_arfs_tables *arfs; struct mlx5_flow_handle *rule; + arfs = mlx5e_fs_get_arfs(priv->fs); mutex_lock(&priv->state_lock); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - spin_lock_bh(&priv->fs.arfs->arfs_lock); + spin_lock_bh(&arfs->arfs_lock); hlist_del(&arfs_rule->hlist); - spin_unlock_bh(&priv->fs.arfs->arfs_lock); + spin_unlock_bh(&arfs->arfs_lock); mutex_unlock(&priv->state_lock); kfree(arfs_rule); @@ -620,6 +637,7 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, const struct flow_keys *fk, u16 rxq, u32 flow_id) { + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs); struct arfs_rule *rule; struct arfs_tuple *tuple; @@ -647,7 +665,7 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, tuple->dst_port = fk->ports.dst; rule->flow_id = flow_id; - rule->filter_id = priv->fs.arfs->last_filter_id++ % RPS_NO_FILTER; + rule->filter_id = arfs->last_filter_id++ % RPS_NO_FILTER; hlist_add_head(&rule->hlist, arfs_hash_bucket(arfs_t, tuple->src_port, @@ -691,11 +709,12 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_arfs_tables *arfs = priv->fs.arfs; - struct arfs_table *arfs_t; + struct mlx5e_arfs_tables *arfs; struct arfs_rule *arfs_rule; + struct arfs_table *arfs_t; struct flow_keys fk; + arfs = mlx5e_fs_get_arfs(priv->fs); if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) return -EPROTONOSUPPORT; @@ -725,7 +744,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, return -ENOMEM; } } - queue_work(priv->fs.arfs->wq, &arfs_rule->arfs_work); + queue_work(arfs->wq, &arfs_rule->arfs_work); spin_unlock_bh(&arfs->arfs_lock); return arfs_rule->filter_id; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index c0f409c195bf..68f19324db93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -46,8 +46,7 @@ void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc) MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_pci_enable && ro_write); } -static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, - u32 *mkey) +int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey) { int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); void *mkc; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index a4c8d8d00d5a..2449731b7d79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1026,15 +1026,6 @@ void mlx5e_dcbnl_build_netdev(struct net_device *netdev) netdev->dcbnl_ops = &mlx5e_dcbnl_ops; } -void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; - - if (MLX5_CAP_GEN(mdev, qos)) - netdev->dcbnl_ops = &mlx5e_dcbnl_ops; -} - static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv, enum mlx5_dcbx_oper_mode *mode) { @@ -1142,7 +1133,7 @@ static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context) err = mlx5_set_trust_state(priv->mdev, *trust_state); if (err) return err; - priv->dcbx_dp.trust_state = *trust_state; + WRITE_ONCE(priv->dcbx_dp.trust_state, *trust_state); return 0; } @@ -1187,16 +1178,28 @@ static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio) static int mlx5e_trust_initialize(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + u8 trust_state; int err; - priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP; - - if (!MLX5_DSCP_SUPPORTED(mdev)) + if (!MLX5_DSCP_SUPPORTED(mdev)) { + WRITE_ONCE(priv->dcbx_dp.trust_state, MLX5_QPTS_TRUST_PCP); return 0; + } - err = mlx5_query_trust_state(priv->mdev, &priv->dcbx_dp.trust_state); + err = mlx5_query_trust_state(priv->mdev, &trust_state); if (err) return err; + WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state); + + if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_PCP && priv->dcbx.dscp_app_cnt) { + /* + * Align the driver state with the register state. + * Temporary state change is required to enable the app list reset. + */ + priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_DSCP; + mlx5e_dcbnl_delete_app(priv); + priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP; + } mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params, priv->dcbx_dp.trust_state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c2ea5fad48dd..24aa25da482b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -30,24 +30,27 @@ * SOFTWARE. */ +#include <linux/ethtool_netlink.h> + #include "en.h" #include "en/port.h" #include "en/params.h" #include "en/xsk/pool.h" #include "en/ptp.h" #include "lib/clock.h" +#include "en/fs_ethtool.h" void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) { struct mlx5_core_dev *mdev = priv->mdev; - strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); + strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); - strlcpy(drvinfo->bus_info, dev_name(mdev->device), + strscpy(drvinfo->bus_info, dev_name(mdev->device), sizeof(drvinfo->bus_info)); } @@ -305,20 +308,36 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, } void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, - struct ethtool_ringparam *param) + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param) { - param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + /* Limitation for regular RQ. XSK RQ may clamp the queue length in + * mlx5e_mpwqe_get_log_rq_size. + */ + u8 max_log_mpwrq_pkts = mlx5e_mpwrq_max_log_rq_pkts(priv->mdev, + PAGE_SHIFT, + MLX5E_MPWRQ_UMR_MODE_ALIGNED); + + param->rx_max_pending = 1 << min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE, + max_log_mpwrq_pkts); param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames; param->tx_pending = 1 << priv->channels.params.log_sq_size; + + kernel_param->tcp_data_split = + (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) ? + ETHTOOL_TCP_DATA_SPLIT_ENABLED : + ETHTOOL_TCP_DATA_SPLIT_DISABLED; } static void mlx5e_get_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = netdev_priv(dev); - mlx5e_ethtool_get_ringparam(priv, param); + mlx5e_ethtool_get_ringparam(priv, param, kernel_param); } int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, @@ -380,7 +399,9 @@ unlock: } static int mlx5e_set_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -447,7 +468,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, * because the numeration of the QoS SQs will change, while per-queue * qdiscs are attached. */ - if (priv->htb.maj_id) { + if (mlx5e_selq_is_htb_enabled(&priv->selq)) { err = -EINVAL; netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the number of channels\n", __func__); @@ -482,14 +503,14 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, arfs_enabled = opened && (priv->netdev->features & NETIF_F_NTUPLE); if (arfs_enabled) - mlx5e_arfs_disable(priv); + mlx5e_arfs_disable(priv->fs); /* Switch to new channels, set new parameters and close old ones */ err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_num_channels_changed_ctx, NULL, true); if (arfs_enabled) { - int err2 = mlx5e_arfs_enable(priv); + int err2 = mlx5e_arfs_enable(priv->fs); if (err2) netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n", @@ -511,7 +532,8 @@ static int mlx5e_set_channels(struct net_device *dev, } int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, - struct ethtool_coalesce *coal) + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal) { struct dim_cq_moder *rx_moder, *tx_moder; @@ -528,6 +550,11 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, coal->tx_max_coalesced_frames = tx_moder->pkts; coal->use_adaptive_tx_coalesce = priv->channels.params.tx_dim_enabled; + kernel_coal->use_cqe_mode_rx = + MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_BASED_MODER); + kernel_coal->use_cqe_mode_tx = + MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_CQE_BASED_MODER); + return 0; } @@ -538,7 +565,7 @@ static int mlx5e_get_coalesce(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); - return mlx5e_ethtool_get_coalesce(priv, coal); + return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal); } #define MLX5E_MAX_COAL_TIME MLX5_MAX_CQ_PERIOD @@ -578,14 +605,26 @@ mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coal } } +/* convert a boolean value of cq_mode to mlx5 period mode + * true : MLX5_CQ_PERIOD_MODE_START_FROM_CQE + * false : MLX5_CQ_PERIOD_MODE_START_FROM_EQE + */ +static int cqe_mode_to_period_mode(bool val) +{ + return val ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; +} + int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, - struct ethtool_coalesce *coal) + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) { struct dim_cq_moder *rx_moder, *tx_moder; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_params new_params; bool reset_rx, reset_tx; bool reset = true; + u8 cq_period_mode; int err = 0; if (!MLX5_CAP_GEN(mdev, cq_moderation)) @@ -605,6 +644,12 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, return -ERANGE; } + if ((kernel_coal->use_cqe_mode_rx || kernel_coal->use_cqe_mode_tx) && + !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) { + NL_SET_ERR_MSG_MOD(extack, "cqe_mode_rx/tx is not supported on this device"); + return -EOPNOTSUPP; + } + mutex_lock(&priv->state_lock); new_params = priv->channels.params; @@ -621,6 +666,18 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled; reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled; + cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_rx); + if (cq_period_mode != rx_moder->cq_period_mode) { + mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode); + reset_rx = true; + } + + cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_tx); + if (cq_period_mode != tx_moder->cq_period_mode) { + mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode); + reset_tx = true; + } + if (reset_rx) { u8 mode = MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_BASED_MODER); @@ -656,9 +713,9 @@ static int mlx5e_set_coalesce(struct net_device *netdev, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { - struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_priv *priv = netdev_priv(netdev); - return mlx5e_ethtool_set_coalesce(priv, coal); + return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack); } static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev, @@ -1752,7 +1809,7 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, if (size_read < 0) { netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n", __func__, size_read); - return 0; + return size_read; } i += size_read; @@ -1843,24 +1900,19 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, bool is_rx_cq) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_params new_params; - bool mode_changed; u8 cq_period_mode, current_cq_period_mode; + struct mlx5e_params new_params; + + if (enable && !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) + return -EOPNOTSUPP; + + cq_period_mode = cqe_mode_to_period_mode(enable); - cq_period_mode = enable ? - MLX5_CQ_PERIOD_MODE_START_FROM_CQE : - MLX5_CQ_PERIOD_MODE_START_FROM_EQE; current_cq_period_mode = is_rx_cq ? priv->channels.params.rx_cq_moderation.cq_period_mode : priv->channels.params.tx_cq_moderation.cq_period_mode; - mode_changed = cq_period_mode != current_cq_period_mode; - - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && - !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)) - return -EOPNOTSUPP; - if (!mode_changed) + if (cq_period_mode == current_cq_period_mode) return 0; new_params = priv->channels.params; @@ -1894,7 +1946,7 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val if (curr_val == new_val) return 0; - if (new_val && !priv->profile->rx_ptp_support && rx_filter) { + if (new_val && !mlx5e_profile_feature_cap(priv->profile, PTP_RX) && rx_filter) { netdev_err(priv->netdev, "Profile doesn't support enabling of CQE compression while hardware time-stamping is enabled.\n"); return -EINVAL; @@ -1953,10 +2005,14 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) struct mlx5e_params new_params; if (enable) { - if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) - return -EOPNOTSUPP; - if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params)) - return -EINVAL; + /* Checking the regular RQ here; mlx5e_validate_xsk_param called + * from mlx5e_open_xsk will check for each XSK queue, and + * mlx5e_safe_switch_params will be reverted if any check fails. + */ + int err = mlx5e_mpwrq_validate_regular(mdev, &priv->channels.params); + + if (err) + return err; } else if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) { netdev_warn(netdev, "Can't set legacy RQ with HW-GRO/LRO, disable them first\n"); return -EINVAL; @@ -2032,7 +2088,7 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) * the numeration of the QoS SQs will change, while per-queue qdiscs are * attached. */ - if (priv->htb.maj_id) { + if (mlx5e_selq_is_htb_enabled(&priv->selq)) { netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the PTP state\n", __func__); return -EINVAL; @@ -2358,7 +2414,8 @@ static void mlx5e_get_rmon_stats(struct net_device *netdev, const struct ethtool_ops mlx5e_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | - ETHTOOL_COALESCE_USE_ADAPTIVE, + ETHTOOL_COALESCE_USE_ADAPTIVE | + ETHTOOL_COALESCE_USE_CQE, .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, .get_link_ext_state = mlx5e_get_link_ext_state, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index aeff1d972a46..1892ccb889b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -36,14 +36,42 @@ #include <linux/tcp.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/mpfs.h> -#include "en.h" -#include "en_rep.h" +#include "en_tc.h" #include "lib/mpfs.h" #include "en/ptp.h" +#include "en/fs_ethtool.h" + +struct mlx5e_flow_steering { + struct work_struct set_rx_mode_work; + bool state_destroy; + bool vlan_strip_disable; + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_namespace *egress_ns; +#ifdef CONFIG_MLX5_EN_RXNFC + struct mlx5e_ethtool_steering *ethtool; +#endif + struct mlx5e_tc_table *tc; + struct mlx5e_promisc_table promisc; + struct mlx5e_vlan_table *vlan; + struct mlx5e_l2_table l2; + struct mlx5_ttc_table *ttc; + struct mlx5_ttc_table *inner_ttc; +#ifdef CONFIG_MLX5_EN_ARFS + struct mlx5e_arfs_tables *arfs; +#endif +#ifdef CONFIG_MLX5_EN_TLS + struct mlx5e_accel_fs_tcp *accel_tcp; +#endif + struct mlx5e_fs_udp *udp; + struct mlx5e_fs_any *any; + struct mlx5e_ptp_fs *ptp_fs; +}; -static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, +static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs, struct mlx5e_l2_rule *ai, int type); -static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, +static void mlx5e_del_l2_flow_rule(struct mlx5e_flow_steering *fs, struct mlx5e_l2_rule *ai); enum { @@ -132,9 +160,8 @@ struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan) return vlan->ft.t; } -static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) +static int mlx5e_vport_context_update_vlans(struct mlx5e_flow_steering *fs) { - struct net_device *ndev = priv->netdev; int max_list_size; int list_size; u16 *vlans; @@ -143,35 +170,34 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) int i; list_size = 0; - for_each_set_bit(vlan, priv->fs.vlan->active_cvlans, VLAN_N_VID) + for_each_set_bit(vlan, fs->vlan->active_cvlans, VLAN_N_VID) list_size++; - max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); + max_list_size = 1 << MLX5_CAP_GEN(fs->mdev, log_max_vlan_list); if (list_size > max_list_size) { - netdev_warn(ndev, - "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n", - list_size, max_list_size); + fs_warn(fs, "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n", + list_size, max_list_size); list_size = max_list_size; } - vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL); + vlans = kvcalloc(list_size, sizeof(*vlans), GFP_KERNEL); if (!vlans) return -ENOMEM; i = 0; - for_each_set_bit(vlan, priv->fs.vlan->active_cvlans, VLAN_N_VID) { + for_each_set_bit(vlan, fs->vlan->active_cvlans, VLAN_N_VID) { if (i >= list_size) break; vlans[i++] = vlan; } - err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size); + err = mlx5_modify_nic_vport_vlans(fs->mdev, vlans, list_size); if (err) - netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n", - err); + fs_err(fs, "Failed to modify vport vlans list err(%d)\n", + err); - kfree(vlans); + kvfree(vlans); return err; } @@ -183,18 +209,18 @@ enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, }; -static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, +static int __mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs, enum mlx5e_vlan_rule_type rule_type, u16 vid, struct mlx5_flow_spec *spec) { - struct mlx5_flow_table *ft = priv->fs.vlan->ft.t; + struct mlx5_flow_table *ft = fs->vlan->ft.t; struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle **rule_p; MLX5_DECLARE_FLOW_ACT(flow_act); int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = priv->fs.l2.ft.t; + dest.ft = fs->l2.ft.t; spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; @@ -204,24 +230,24 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, * disabled in match value means both S & C tags * don't exist (untagged of both) */ - rule_p = &priv->fs.vlan->untagged_rule; + rule_p = &fs->vlan->untagged_rule; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); break; case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: - rule_p = &priv->fs.vlan->any_cvlan_rule; + rule_p = &fs->vlan->any_cvlan_rule; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); break; case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: - rule_p = &priv->fs.vlan->any_svlan_rule; + rule_p = &fs->vlan->any_svlan_rule; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.svlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); break; case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: - rule_p = &priv->fs.vlan->active_svlans_rule[vid]; + rule_p = &fs->vlan->active_svlans_rule[vid]; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.svlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); @@ -231,7 +257,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, vid); break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */ - rule_p = &priv->fs.vlan->active_cvlans_rule[vid]; + rule_p = &fs->vlan->active_cvlans_rule[vid]; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); @@ -250,13 +276,13 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, if (IS_ERR(*rule_p)) { err = PTR_ERR(*rule_p); *rule_p = NULL; - netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + fs_err(fs, "%s: add rule failed\n", __func__); } return err; } -static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, +static int mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs, enum mlx5e_vlan_rule_type rule_type, u16 vid) { struct mlx5_flow_spec *spec; @@ -267,68 +293,68 @@ static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, return -ENOMEM; if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID) - mlx5e_vport_context_update_vlans(priv); + mlx5e_vport_context_update_vlans(fs); - err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec); + err = __mlx5e_add_vlan_rule(fs, rule_type, vid, spec); kvfree(spec); return err; } -static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, - enum mlx5e_vlan_rule_type rule_type, u16 vid) +static void mlx5e_fs_del_vlan_rule(struct mlx5e_flow_steering *fs, + enum mlx5e_vlan_rule_type rule_type, u16 vid) { switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - if (priv->fs.vlan->untagged_rule) { - mlx5_del_flow_rules(priv->fs.vlan->untagged_rule); - priv->fs.vlan->untagged_rule = NULL; + if (fs->vlan->untagged_rule) { + mlx5_del_flow_rules(fs->vlan->untagged_rule); + fs->vlan->untagged_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: - if (priv->fs.vlan->any_cvlan_rule) { - mlx5_del_flow_rules(priv->fs.vlan->any_cvlan_rule); - priv->fs.vlan->any_cvlan_rule = NULL; + if (fs->vlan->any_cvlan_rule) { + mlx5_del_flow_rules(fs->vlan->any_cvlan_rule); + fs->vlan->any_cvlan_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: - if (priv->fs.vlan->any_svlan_rule) { - mlx5_del_flow_rules(priv->fs.vlan->any_svlan_rule); - priv->fs.vlan->any_svlan_rule = NULL; + if (fs->vlan->any_svlan_rule) { + mlx5_del_flow_rules(fs->vlan->any_svlan_rule); + fs->vlan->any_svlan_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: - if (priv->fs.vlan->active_svlans_rule[vid]) { - mlx5_del_flow_rules(priv->fs.vlan->active_svlans_rule[vid]); - priv->fs.vlan->active_svlans_rule[vid] = NULL; + if (fs->vlan->active_svlans_rule[vid]) { + mlx5_del_flow_rules(fs->vlan->active_svlans_rule[vid]); + fs->vlan->active_svlans_rule[vid] = NULL; } break; case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID: - if (priv->fs.vlan->active_cvlans_rule[vid]) { - mlx5_del_flow_rules(priv->fs.vlan->active_cvlans_rule[vid]); - priv->fs.vlan->active_cvlans_rule[vid] = NULL; + if (fs->vlan->active_cvlans_rule[vid]) { + mlx5_del_flow_rules(fs->vlan->active_cvlans_rule[vid]); + fs->vlan->active_cvlans_rule[vid] = NULL; } - mlx5e_vport_context_update_vlans(priv); + mlx5e_vport_context_update_vlans(fs); break; } } -static void mlx5e_del_any_vid_rules(struct mlx5e_priv *priv) +static void mlx5e_fs_del_any_vid_rules(struct mlx5e_flow_steering *fs) { - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); } -static int mlx5e_add_any_vid_rules(struct mlx5e_priv *priv) +static int mlx5e_fs_add_any_vid_rules(struct mlx5e_flow_steering *fs) { int err; - err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); if (err) return err; - return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); + return mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); } static struct mlx5_flow_handle * @@ -352,103 +378,103 @@ mlx5e_add_trap_rule(struct mlx5_flow_table *ft, int trap_id, int tir_num) return rule; } -int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num) +int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num) { - struct mlx5_flow_table *ft = priv->fs.vlan->ft.t; + struct mlx5_flow_table *ft = fs->vlan->ft.t; struct mlx5_flow_handle *rule; int err; rule = mlx5e_add_trap_rule(ft, trap_id, tir_num); if (IS_ERR(rule)) { err = PTR_ERR(rule); - priv->fs.vlan->trap_rule = NULL; - netdev_err(priv->netdev, "%s: add VLAN trap rule failed, err %d\n", - __func__, err); + fs->vlan->trap_rule = NULL; + fs_err(fs, "%s: add VLAN trap rule failed, err %d\n", + __func__, err); return err; } - priv->fs.vlan->trap_rule = rule; + fs->vlan->trap_rule = rule; return 0; } -void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv) +void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs) { - if (priv->fs.vlan->trap_rule) { - mlx5_del_flow_rules(priv->fs.vlan->trap_rule); - priv->fs.vlan->trap_rule = NULL; + if (fs->vlan->trap_rule) { + mlx5_del_flow_rules(fs->vlan->trap_rule); + fs->vlan->trap_rule = NULL; } } -int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num) +int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num) { - struct mlx5_flow_table *ft = priv->fs.l2.ft.t; + struct mlx5_flow_table *ft = fs->l2.ft.t; struct mlx5_flow_handle *rule; int err; rule = mlx5e_add_trap_rule(ft, trap_id, tir_num); if (IS_ERR(rule)) { err = PTR_ERR(rule); - priv->fs.l2.trap_rule = NULL; - netdev_err(priv->netdev, "%s: add MAC trap rule failed, err %d\n", - __func__, err); + fs->l2.trap_rule = NULL; + fs_err(fs, "%s: add MAC trap rule failed, err %d\n", + __func__, err); return err; } - priv->fs.l2.trap_rule = rule; + fs->l2.trap_rule = rule; return 0; } -void mlx5e_remove_mac_trap(struct mlx5e_priv *priv) +void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs) { - if (priv->fs.l2.trap_rule) { - mlx5_del_flow_rules(priv->fs.l2.trap_rule); - priv->fs.l2.trap_rule = NULL; + if (fs->l2.trap_rule) { + mlx5_del_flow_rules(fs->l2.trap_rule); + fs->l2.trap_rule = NULL; } } -void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv) +void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc) { - if (!priv->fs.vlan->cvlan_filter_disabled) + if (!fs->vlan->cvlan_filter_disabled) return; - priv->fs.vlan->cvlan_filter_disabled = false; - if (priv->netdev->flags & IFF_PROMISC) + fs->vlan->cvlan_filter_disabled = false; + if (promisc) return; - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); } -void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv) +void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc) { - if (priv->fs.vlan->cvlan_filter_disabled) + if (fs->vlan->cvlan_filter_disabled) return; - priv->fs.vlan->cvlan_filter_disabled = true; - if (priv->netdev->flags & IFF_PROMISC) + fs->vlan->cvlan_filter_disabled = true; + if (promisc) return; - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); } -static int mlx5e_vlan_rx_add_cvid(struct mlx5e_priv *priv, u16 vid) +static int mlx5e_vlan_rx_add_cvid(struct mlx5e_flow_steering *fs, u16 vid) { int err; - set_bit(vid, priv->fs.vlan->active_cvlans); + set_bit(vid, fs->vlan->active_cvlans); - err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); if (err) - clear_bit(vid, priv->fs.vlan->active_cvlans); + clear_bit(vid, fs->vlan->active_cvlans); return err; } -static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid) +static int mlx5e_vlan_rx_add_svid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, u16 vid) { - struct net_device *netdev = priv->netdev; int err; - set_bit(vid, priv->fs.vlan->active_svlans); + set_bit(vid, fs->vlan->active_svlans); - err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); if (err) { - clear_bit(vid, priv->fs.vlan->active_svlans); + clear_bit(vid, fs->vlan->active_svlans); return err; } @@ -457,86 +483,91 @@ static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid) return err; } -int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, + __be16 proto, u16 vid) { - struct mlx5e_priv *priv = netdev_priv(dev); - if (mlx5e_is_uplink_rep(priv)) - return 0; /* no vlan table for uplink rep */ + if (!fs->vlan) { + fs_err(fs, "Vlan doesn't exist\n"); + return -EINVAL; + } if (be16_to_cpu(proto) == ETH_P_8021Q) - return mlx5e_vlan_rx_add_cvid(priv, vid); + return mlx5e_vlan_rx_add_cvid(fs, vid); else if (be16_to_cpu(proto) == ETH_P_8021AD) - return mlx5e_vlan_rx_add_svid(priv, vid); + return mlx5e_vlan_rx_add_svid(fs, netdev, vid); return -EOPNOTSUPP; } -int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, + __be16 proto, u16 vid) { - struct mlx5e_priv *priv = netdev_priv(dev); - - if (mlx5e_is_uplink_rep(priv)) - return 0; /* no vlan table for uplink rep */ + if (!fs->vlan) { + fs_err(fs, "Vlan doesn't exist\n"); + return -EINVAL; + } if (be16_to_cpu(proto) == ETH_P_8021Q) { - clear_bit(vid, priv->fs.vlan->active_cvlans); - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + clear_bit(vid, fs->vlan->active_cvlans); + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); } else if (be16_to_cpu(proto) == ETH_P_8021AD) { - clear_bit(vid, priv->fs.vlan->active_svlans); - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); - netdev_update_features(dev); + clear_bit(vid, fs->vlan->active_svlans); + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + netdev_update_features(netdev); } return 0; } -static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) +static void mlx5e_fs_add_vlan_rules(struct mlx5e_flow_steering *fs) { int i; - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - for_each_set_bit(i, priv->fs.vlan->active_cvlans, VLAN_N_VID) { - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); + for_each_set_bit(i, fs->vlan->active_cvlans, VLAN_N_VID) { + mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); } - for_each_set_bit(i, priv->fs.vlan->active_svlans, VLAN_N_VID) - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); + for_each_set_bit(i, fs->vlan->active_svlans, VLAN_N_VID) + mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); - if (priv->fs.vlan->cvlan_filter_disabled) - mlx5e_add_any_vid_rules(priv); + if (fs->vlan->cvlan_filter_disabled) + mlx5e_fs_add_any_vid_rules(fs); } -static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) +static void mlx5e_del_vlan_rules(struct mlx5e_flow_steering *fs) { int i; - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - for_each_set_bit(i, priv->fs.vlan->active_cvlans, VLAN_N_VID) { - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); + for_each_set_bit(i, fs->vlan->active_cvlans, VLAN_N_VID) { + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); } - for_each_set_bit(i, priv->fs.vlan->active_svlans, VLAN_N_VID) - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); + for_each_set_bit(i, fs->vlan->active_svlans, VLAN_N_VID) + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); - WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, &priv->state))); + WARN_ON_ONCE(fs->state_destroy); - mlx5e_remove_vlan_trap(priv); + mlx5e_remove_vlan_trap(fs); /* must be called after DESTROY bit is set and * set_rx_mode is called and flushed */ - if (priv->fs.vlan->cvlan_filter_disabled) - mlx5e_del_any_vid_rules(priv); + if (fs->vlan->cvlan_filter_disabled) + mlx5e_fs_del_any_vid_rules(fs); } #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \ hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) -static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, +static void mlx5e_execute_l2_action(struct mlx5e_flow_steering *fs, struct mlx5e_l2_hash_node *hn) { u8 action = hn->action; @@ -547,9 +578,9 @@ static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, switch (action) { case MLX5E_ACTION_ADD: - mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH); + mlx5e_add_l2_flow_rule(fs, &hn->ai, MLX5E_FULLMATCH); if (!is_multicast_ether_addr(mac_addr)) { - l2_err = mlx5_mpfs_add_mac(priv->mdev, mac_addr); + l2_err = mlx5_mpfs_add_mac(fs->mdev, mac_addr); hn->mpfs = !l2_err; } hn->action = MLX5E_ACTION_NONE; @@ -557,52 +588,51 @@ static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, case MLX5E_ACTION_DEL: if (!is_multicast_ether_addr(mac_addr) && hn->mpfs) - l2_err = mlx5_mpfs_del_mac(priv->mdev, mac_addr); - mlx5e_del_l2_flow_rule(priv, &hn->ai); + l2_err = mlx5_mpfs_del_mac(fs->mdev, mac_addr); + mlx5e_del_l2_flow_rule(fs, &hn->ai); mlx5e_del_l2_from_hash(hn); break; } if (l2_err) - netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n", - action == MLX5E_ACTION_ADD ? "add" : "del", mac_addr, l2_err); + fs_warn(fs, "MPFS, failed to %s mac %pM, err(%d)\n", + action == MLX5E_ACTION_ADD ? "add" : "del", + mac_addr, l2_err); } -static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) +static void mlx5e_sync_netdev_addr(struct mlx5e_flow_steering *fs, + struct net_device *netdev) { - struct net_device *netdev = priv->netdev; struct netdev_hw_addr *ha; netif_addr_lock_bh(netdev); - mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc, - priv->netdev->dev_addr); - + mlx5e_add_l2_to_hash(fs->l2.netdev_uc, netdev->dev_addr); netdev_for_each_uc_addr(ha, netdev) - mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc, ha->addr); + mlx5e_add_l2_to_hash(fs->l2.netdev_uc, ha->addr); netdev_for_each_mc_addr(ha, netdev) - mlx5e_add_l2_to_hash(priv->fs.l2.netdev_mc, ha->addr); + mlx5e_add_l2_to_hash(fs->l2.netdev_mc, ha->addr); netif_addr_unlock_bh(netdev); } -static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, +static void mlx5e_fill_addr_array(struct mlx5e_flow_steering *fs, int list_type, + struct net_device *ndev, u8 addr_array[][ETH_ALEN], int size) { bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); - struct net_device *ndev = priv->netdev; struct mlx5e_l2_hash_node *hn; struct hlist_head *addr_list; struct hlist_node *tmp; int i = 0; int hi; - addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc; + addr_list = is_uc ? fs->l2.netdev_uc : fs->l2.netdev_mc; if (is_uc) /* Make sure our own address is pushed first */ ether_addr_copy(addr_array[i++], ndev->dev_addr); - else if (priv->fs.l2.broadcast_enabled) + else if (fs->l2.broadcast_enabled) ether_addr_copy(addr_array[i++], ndev->broadcast); mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { @@ -614,7 +644,8 @@ static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, } } -static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, +static void mlx5e_vport_context_update_addr_list(struct mlx5e_flow_steering *fs, + struct net_device *netdev, int list_type) { bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); @@ -627,19 +658,18 @@ static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, int err; int hi; - size = is_uc ? 0 : (priv->fs.l2.broadcast_enabled ? 1 : 0); + size = is_uc ? 0 : (fs->l2.broadcast_enabled ? 1 : 0); max_size = is_uc ? - 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) : - 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list); + 1 << MLX5_CAP_GEN(fs->mdev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(fs->mdev, log_max_current_mc_list); - addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc; + addr_list = is_uc ? fs->l2.netdev_uc : fs->l2.netdev_mc; mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) size++; if (size > max_size) { - netdev_warn(priv->netdev, - "netdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n", - is_uc ? "UC" : "MC", size, max_size); + fs_warn(fs, "mdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n", + is_uc ? "UC" : "MC", size, max_size); size = max_size; } @@ -649,65 +679,66 @@ static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, err = -ENOMEM; goto out; } - mlx5e_fill_addr_array(priv, list_type, addr_array, size); + mlx5e_fill_addr_array(fs, list_type, netdev, addr_array, size); } - err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size); + err = mlx5_modify_nic_vport_mac_list(fs->mdev, list_type, addr_array, size); out: if (err) - netdev_err(priv->netdev, - "Failed to modify vport %s list err(%d)\n", - is_uc ? "UC" : "MC", err); + fs_err(fs, "Failed to modify vport %s list err(%d)\n", + is_uc ? "UC" : "MC", err); kfree(addr_array); } -static void mlx5e_vport_context_update(struct mlx5e_priv *priv) +static void mlx5e_vport_context_update(struct mlx5e_flow_steering *fs, + struct net_device *netdev) { - struct mlx5e_l2_table *ea = &priv->fs.l2; + struct mlx5e_l2_table *ea = &fs->l2; - mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC); - mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC); - mlx5_modify_nic_vport_promisc(priv->mdev, 0, + mlx5e_vport_context_update_addr_list(fs, netdev, MLX5_NVPRT_LIST_TYPE_UC); + mlx5e_vport_context_update_addr_list(fs, netdev, MLX5_NVPRT_LIST_TYPE_MC); + mlx5_modify_nic_vport_promisc(fs->mdev, 0, ea->allmulti_enabled, ea->promisc_enabled); } -static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) +static void mlx5e_apply_netdev_addr(struct mlx5e_flow_steering *fs) { struct mlx5e_l2_hash_node *hn; struct hlist_node *tmp; int i; - mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i) - mlx5e_execute_l2_action(priv, hn); + mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_uc, i) + mlx5e_execute_l2_action(fs, hn); - mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i) - mlx5e_execute_l2_action(priv, hn); + mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_mc, i) + mlx5e_execute_l2_action(fs, hn); } -static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv) +static void mlx5e_handle_netdev_addr(struct mlx5e_flow_steering *fs, + struct net_device *netdev) { struct mlx5e_l2_hash_node *hn; struct hlist_node *tmp; int i; - mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i) + mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_uc, i) hn->action = MLX5E_ACTION_DEL; - mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i) + mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_mc, i) hn->action = MLX5E_ACTION_DEL; - if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state)) - mlx5e_sync_netdev_addr(priv); + if (fs->state_destroy) + mlx5e_sync_netdev_addr(fs, netdev); - mlx5e_apply_netdev_addr(priv); + mlx5e_apply_netdev_addr(fs); } #define MLX5E_PROMISC_GROUP0_SIZE BIT(0) #define MLX5E_PROMISC_TABLE_SIZE MLX5E_PROMISC_GROUP0_SIZE -static int mlx5e_add_promisc_rule(struct mlx5e_priv *priv) +static int mlx5e_add_promisc_rule(struct mlx5e_flow_steering *fs) { - struct mlx5_flow_table *ft = priv->fs.promisc.ft.t; + struct mlx5_flow_table *ft = fs->promisc.ft.t; struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle **rule_p; MLX5_DECLARE_FLOW_ACT(flow_act); @@ -718,22 +749,22 @@ static int mlx5e_add_promisc_rule(struct mlx5e_priv *priv) if (!spec) return -ENOMEM; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc); + dest.ft = mlx5_get_ttc_flow_table(fs->ttc); - rule_p = &priv->fs.promisc.rule; + rule_p = &fs->promisc.rule; *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(*rule_p)) { err = PTR_ERR(*rule_p); *rule_p = NULL; - netdev_err(priv->netdev, "%s: add promiscuous rule failed\n", __func__); + fs_err(fs, "%s: add promiscuous rule failed\n", __func__); } kvfree(spec); return err; } -static int mlx5e_create_promisc_table(struct mlx5e_priv *priv) +static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs) { - struct mlx5e_flow_table *ft = &priv->fs.promisc.ft; + struct mlx5e_flow_table *ft = &fs->promisc.ft; struct mlx5_flow_table_attr ft_attr = {}; int err; @@ -742,14 +773,14 @@ static int mlx5e_create_promisc_table(struct mlx5e_priv *priv) ft_attr.level = MLX5E_PROMISC_FT_LEVEL; ft_attr.prio = MLX5E_NIC_PRIO; - ft->t = mlx5_create_auto_grouped_flow_table(priv->fs.ns, &ft_attr); + ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); - netdev_err(priv->netdev, "fail to create promisc table err=%d\n", err); + fs_err(fs, "fail to create promisc table err=%d\n", err); return err; } - err = mlx5e_add_promisc_rule(priv); + err = mlx5e_add_promisc_rule(fs); if (err) goto err_destroy_promisc_table; @@ -762,34 +793,31 @@ err_destroy_promisc_table: return err; } -static void mlx5e_del_promisc_rule(struct mlx5e_priv *priv) +static void mlx5e_del_promisc_rule(struct mlx5e_flow_steering *fs) { - if (WARN(!priv->fs.promisc.rule, "Trying to remove non-existing promiscuous rule")) + if (WARN(!fs->promisc.rule, "Trying to remove non-existing promiscuous rule")) return; - mlx5_del_flow_rules(priv->fs.promisc.rule); - priv->fs.promisc.rule = NULL; + mlx5_del_flow_rules(fs->promisc.rule); + fs->promisc.rule = NULL; } -static void mlx5e_destroy_promisc_table(struct mlx5e_priv *priv) +static void mlx5e_destroy_promisc_table(struct mlx5e_flow_steering *fs) { - if (WARN(!priv->fs.promisc.ft.t, "Trying to remove non-existing promiscuous table")) + if (WARN(!fs->promisc.ft.t, "Trying to remove non-existing promiscuous table")) return; - mlx5e_del_promisc_rule(priv); - mlx5_destroy_flow_table(priv->fs.promisc.ft.t); - priv->fs.promisc.ft.t = NULL; + mlx5e_del_promisc_rule(fs); + mlx5_destroy_flow_table(fs->promisc.ft.t); + fs->promisc.ft.t = NULL; } -void mlx5e_set_rx_mode_work(struct work_struct *work) +void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, + struct net_device *netdev) { - struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, - set_rx_mode_work); - - struct mlx5e_l2_table *ea = &priv->fs.l2; - struct net_device *ndev = priv->netdev; + struct mlx5e_l2_table *ea = &fs->l2; - bool rx_mode_enable = !test_bit(MLX5E_STATE_DESTROYING, &priv->state); - bool promisc_enabled = rx_mode_enable && (ndev->flags & IFF_PROMISC); - bool allmulti_enabled = rx_mode_enable && (ndev->flags & IFF_ALLMULTI); + bool rx_mode_enable = fs->state_destroy; + bool promisc_enabled = rx_mode_enable && (netdev->flags & IFF_PROMISC); + bool allmulti_enabled = rx_mode_enable && (netdev->flags & IFF_ALLMULTI); bool broadcast_enabled = rx_mode_enable; bool enable_promisc = !ea->promisc_enabled && promisc_enabled; @@ -801,32 +829,32 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) int err; if (enable_promisc) { - err = mlx5e_create_promisc_table(priv); + err = mlx5e_create_promisc_table(fs); if (err) enable_promisc = false; - if (!priv->channels.params.vlan_strip_disable && !err) - netdev_warn_once(ndev, - "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n"); + if (!fs->vlan_strip_disable && !err) + fs_warn_once(fs, + "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n"); } if (enable_allmulti) - mlx5e_add_l2_flow_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); + mlx5e_add_l2_flow_rule(fs, &ea->allmulti, MLX5E_ALLMULTI); if (enable_broadcast) - mlx5e_add_l2_flow_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); + mlx5e_add_l2_flow_rule(fs, &ea->broadcast, MLX5E_FULLMATCH); - mlx5e_handle_netdev_addr(priv); + mlx5e_handle_netdev_addr(fs, netdev); if (disable_broadcast) - mlx5e_del_l2_flow_rule(priv, &ea->broadcast); + mlx5e_del_l2_flow_rule(fs, &ea->broadcast); if (disable_allmulti) - mlx5e_del_l2_flow_rule(priv, &ea->allmulti); + mlx5e_del_l2_flow_rule(fs, &ea->allmulti); if (disable_promisc) - mlx5e_destroy_promisc_table(priv); + mlx5e_destroy_promisc_table(fs); ea->promisc_enabled = promisc_enabled; ea->allmulti_enabled = allmulti_enabled; ea->broadcast_enabled = broadcast_enabled; - mlx5e_vport_context_update(priv); + mlx5e_vport_context_update(fs, netdev); } static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft) @@ -841,9 +869,9 @@ static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft) ft->num_groups = 0; } -void mlx5e_init_l2_addr(struct mlx5e_priv *priv) +void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev) { - ether_addr_copy(priv->fs.l2.broadcast.addr, priv->netdev->broadcast); + ether_addr_copy(fs->l2.broadcast.addr, netdev->broadcast); } void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) @@ -854,14 +882,15 @@ void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) ft->t = NULL; } -static void mlx5e_set_inner_ttc_params(struct mlx5e_priv *priv, +static void mlx5e_set_inner_ttc_params(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, struct ttc_params *ttc_params) { struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; int tt; memset(ttc_params, 0, sizeof(*ttc_params)); - ttc_params->ns = mlx5_get_flow_namespace(priv->mdev, + ttc_params->ns = mlx5_get_flow_namespace(fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL); ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL; ft_attr->prio = MLX5E_NIC_PRIO; @@ -870,13 +899,14 @@ static void mlx5e_set_inner_ttc_params(struct mlx5e_priv *priv, ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; ttc_params->dests[tt].tir_num = tt == MLX5_TT_ANY ? - mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) : - mlx5e_rx_res_get_tirn_rss_inner(priv->rx_res, + mlx5e_rx_res_get_tirn_direct(rx_res, 0) : + mlx5e_rx_res_get_tirn_rss_inner(rx_res, tt); } } -void mlx5e_set_ttc_params(struct mlx5e_priv *priv, +void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, struct ttc_params *ttc_params, bool tunnel) { @@ -884,7 +914,7 @@ void mlx5e_set_ttc_params(struct mlx5e_priv *priv, int tt; memset(ttc_params, 0, sizeof(*ttc_params)); - ttc_params->ns = mlx5_get_flow_namespace(priv->mdev, + ttc_params->ns = mlx5_get_flow_namespace(fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL); ft_attr->level = MLX5E_TTC_FT_LEVEL; ft_attr->prio = MLX5E_NIC_PRIO; @@ -893,23 +923,23 @@ void mlx5e_set_ttc_params(struct mlx5e_priv *priv, ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; ttc_params->dests[tt].tir_num = tt == MLX5_TT_ANY ? - mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) : - mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt); + mlx5e_rx_res_get_tirn_direct(rx_res, 0) : + mlx5e_rx_res_get_tirn_rss(rx_res, tt); } ttc_params->inner_ttc = tunnel; - if (!tunnel || !mlx5_tunnel_inner_ft_supported(priv->mdev)) + if (!tunnel || !mlx5_tunnel_inner_ft_supported(fs->mdev)) return; for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { ttc_params->tunnel_dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; ttc_params->tunnel_dests[tt].ft = - mlx5_get_ttc_flow_table(priv->fs.inner_ttc); + mlx5_get_ttc_flow_table(fs->inner_ttc); } } -static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, +static void mlx5e_del_l2_flow_rule(struct mlx5e_flow_steering *fs, struct mlx5e_l2_rule *ai) { if (!IS_ERR_OR_NULL(ai->rule)) { @@ -918,10 +948,10 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, } } -static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, +static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs, struct mlx5e_l2_rule *ai, int type) { - struct mlx5_flow_table *ft = priv->fs.l2.ft.t; + struct mlx5_flow_table *ft = fs->l2.ft.t; struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_spec *spec; @@ -939,7 +969,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, outer_headers.dmac_47_16); dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc); + dest.ft = mlx5_get_ttc_flow_table(fs->ttc); switch (type) { case MLX5E_FULLMATCH: @@ -957,8 +987,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(ai->rule)) { - netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n", - __func__, mv_dmac); + fs_err(fs, "%s: add l2 rule(mac:%pM) failed\n", __func__, mv_dmac); err = PTR_ERR(ai->rule); ai->rule = NULL; } @@ -1042,14 +1071,14 @@ err_destroy_groups: return err; } -static void mlx5e_destroy_l2_table(struct mlx5e_priv *priv) +static void mlx5e_destroy_l2_table(struct mlx5e_flow_steering *fs) { - mlx5e_destroy_flow_table(&priv->fs.l2.ft); + mlx5e_destroy_flow_table(&fs->l2.ft); } -static int mlx5e_create_l2_table(struct mlx5e_priv *priv) +static int mlx5e_create_l2_table(struct mlx5e_flow_steering *fs) { - struct mlx5e_l2_table *l2_table = &priv->fs.l2; + struct mlx5e_l2_table *l2_table = &fs->l2; struct mlx5e_flow_table *ft = &l2_table->ft; struct mlx5_flow_table_attr ft_attr = {}; int err; @@ -1060,7 +1089,7 @@ static int mlx5e_create_l2_table(struct mlx5e_priv *priv) ft_attr.level = MLX5E_L2_FT_LEVEL; ft_attr.prio = MLX5E_NIC_PRIO; - ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + ft->t = mlx5_create_flow_table(fs->ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -1180,20 +1209,20 @@ static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft) return err; } -static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) +static int mlx5e_fs_create_vlan_table(struct mlx5e_flow_steering *fs) { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_flow_table *ft; int err; - ft = &priv->fs.vlan->ft; + ft = &fs->vlan->ft; ft->num_groups = 0; ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE; ft_attr.level = MLX5E_VLAN_FT_LEVEL; ft_attr.prio = MLX5E_NIC_PRIO; - ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + ft->t = mlx5_create_flow_table(fs->ns, &ft_attr); if (IS_ERR(ft->t)) return PTR_ERR(ft->t); @@ -1207,7 +1236,7 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) if (err) goto err_free_g; - mlx5e_add_vlan_rules(priv); + mlx5e_fs_add_vlan_rules(fs); return 0; @@ -1219,139 +1248,332 @@ err_destroy_vlan_table: return err; } -static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) +static void mlx5e_destroy_vlan_table(struct mlx5e_flow_steering *fs) { - mlx5e_del_vlan_rules(priv); - mlx5e_destroy_flow_table(&priv->fs.vlan->ft); + mlx5e_del_vlan_rules(fs); + mlx5e_destroy_flow_table(&fs->vlan->ft); } -static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) +static void mlx5e_destroy_inner_ttc_table(struct mlx5e_flow_steering *fs) { - if (!mlx5_tunnel_inner_ft_supported(priv->mdev)) + if (!mlx5_tunnel_inner_ft_supported(fs->mdev)) return; - mlx5_destroy_ttc_table(priv->fs.inner_ttc); + mlx5_destroy_ttc_table(fs->inner_ttc); } -void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) +void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs) { - mlx5_destroy_ttc_table(priv->fs.ttc); + mlx5_destroy_ttc_table(fs->ttc); } -static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) +static int mlx5e_create_inner_ttc_table(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res) { struct ttc_params ttc_params = {}; - if (!mlx5_tunnel_inner_ft_supported(priv->mdev)) + if (!mlx5_tunnel_inner_ft_supported(fs->mdev)) return 0; - mlx5e_set_inner_ttc_params(priv, &ttc_params); - priv->fs.inner_ttc = mlx5_create_inner_ttc_table(priv->mdev, - &ttc_params); - if (IS_ERR(priv->fs.inner_ttc)) - return PTR_ERR(priv->fs.inner_ttc); + mlx5e_set_inner_ttc_params(fs, rx_res, &ttc_params); + fs->inner_ttc = mlx5_create_inner_ttc_table(fs->mdev, + &ttc_params); + if (IS_ERR(fs->inner_ttc)) + return PTR_ERR(fs->inner_ttc); return 0; } -int mlx5e_create_ttc_table(struct mlx5e_priv *priv) +int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res) { struct ttc_params ttc_params = {}; - mlx5e_set_ttc_params(priv, &ttc_params, true); - priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); - if (IS_ERR(priv->fs.ttc)) - return PTR_ERR(priv->fs.ttc); + mlx5e_set_ttc_params(fs, rx_res, &ttc_params, true); + fs->ttc = mlx5_create_ttc_table(fs->mdev, &ttc_params); + if (IS_ERR(fs->ttc)) + return PTR_ERR(fs->ttc); return 0; } -int mlx5e_create_flow_steering(struct mlx5e_priv *priv) +int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + const struct mlx5e_profile *profile, + struct net_device *netdev) { + struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(fs->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); int err; - priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, - MLX5_FLOW_NAMESPACE_KERNEL); - - if (!priv->fs.ns) + if (!ns) return -EOPNOTSUPP; - err = mlx5e_arfs_create_tables(priv); + mlx5e_fs_set_ns(fs, ns, false); + err = mlx5e_arfs_create_tables(fs, rx_res, + !!(netdev->hw_features & NETIF_F_NTUPLE)); if (err) { - netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n", - err); - priv->netdev->hw_features &= ~NETIF_F_NTUPLE; + fs_err(fs, "Failed to create arfs tables, err=%d\n", err); + netdev->hw_features &= ~NETIF_F_NTUPLE; } - err = mlx5e_create_inner_ttc_table(priv); + err = mlx5e_create_inner_ttc_table(fs, rx_res); if (err) { - netdev_err(priv->netdev, - "Failed to create inner ttc table, err=%d\n", - err); + fs_err(fs, "Failed to create inner ttc table, err=%d\n", err); goto err_destroy_arfs_tables; } - err = mlx5e_create_ttc_table(priv); + err = mlx5e_create_ttc_table(fs, rx_res); if (err) { - netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", - err); + fs_err(fs, "Failed to create ttc table, err=%d\n", err); goto err_destroy_inner_ttc_table; } - err = mlx5e_create_l2_table(priv); + err = mlx5e_create_l2_table(fs); if (err) { - netdev_err(priv->netdev, "Failed to create l2 table, err=%d\n", - err); + fs_err(fs, "Failed to create l2 table, err=%d\n", err); goto err_destroy_ttc_table; } - err = mlx5e_create_vlan_table(priv); + err = mlx5e_fs_create_vlan_table(fs); if (err) { - netdev_err(priv->netdev, "Failed to create vlan table, err=%d\n", - err); + fs_err(fs, "Failed to create vlan table, err=%d\n", err); goto err_destroy_l2_table; } - err = mlx5e_ptp_alloc_rx_fs(priv); + err = mlx5e_ptp_alloc_rx_fs(fs, profile); if (err) goto err_destory_vlan_table; - mlx5e_ethtool_init_steering(priv); + mlx5e_ethtool_init_steering(fs); return 0; err_destory_vlan_table: - mlx5e_destroy_vlan_table(priv); + mlx5e_destroy_vlan_table(fs); err_destroy_l2_table: - mlx5e_destroy_l2_table(priv); + mlx5e_destroy_l2_table(fs); err_destroy_ttc_table: - mlx5e_destroy_ttc_table(priv); + mlx5e_destroy_ttc_table(fs); err_destroy_inner_ttc_table: - mlx5e_destroy_inner_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(fs); err_destroy_arfs_tables: - mlx5e_arfs_destroy_tables(priv); + mlx5e_arfs_destroy_tables(fs, !!(netdev->hw_features & NETIF_F_NTUPLE)); return err; } -void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) +void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple, + const struct mlx5e_profile *profile) { - mlx5e_ptp_free_rx_fs(priv); - mlx5e_destroy_vlan_table(priv); - mlx5e_destroy_l2_table(priv); - mlx5e_destroy_ttc_table(priv); - mlx5e_destroy_inner_ttc_table(priv); - mlx5e_arfs_destroy_tables(priv); - mlx5e_ethtool_cleanup_steering(priv); + mlx5e_ptp_free_rx_fs(fs, profile); + mlx5e_destroy_vlan_table(fs); + mlx5e_destroy_l2_table(fs); + mlx5e_destroy_ttc_table(fs); + mlx5e_destroy_inner_ttc_table(fs); + mlx5e_arfs_destroy_tables(fs, ntuple); + mlx5e_ethtool_cleanup_steering(fs); } -int mlx5e_fs_init(struct mlx5e_priv *priv) +static int mlx5e_fs_vlan_alloc(struct mlx5e_flow_steering *fs) { - priv->fs.vlan = kvzalloc(sizeof(*priv->fs.vlan), GFP_KERNEL); - if (!priv->fs.vlan) + fs->vlan = kvzalloc(sizeof(*fs->vlan), GFP_KERNEL); + if (!fs->vlan) return -ENOMEM; return 0; } -void mlx5e_fs_cleanup(struct mlx5e_priv *priv) +static void mlx5e_fs_vlan_free(struct mlx5e_flow_steering *fs) +{ + kvfree(fs->vlan); +} + +struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs) +{ + return fs->vlan; +} + +static int mlx5e_fs_tc_alloc(struct mlx5e_flow_steering *fs) +{ + fs->tc = mlx5e_tc_table_alloc(); + if (IS_ERR(fs->tc)) + return -ENOMEM; + return 0; +} + +static void mlx5e_fs_tc_free(struct mlx5e_flow_steering *fs) +{ + mlx5e_tc_table_free(fs->tc); +} + +struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs) +{ + return fs->tc; +} + +#ifdef CONFIG_MLX5_EN_RXNFC +static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs) +{ + return mlx5e_ethtool_alloc(&fs->ethtool); +} + +static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs) +{ + mlx5e_ethtool_free(fs->ethtool); +} + +struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs) +{ + return fs->ethtool; +} +#else +static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs) +{ return 0; } +static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs) { } +#endif + +struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile, + struct mlx5_core_dev *mdev, + bool state_destroy) +{ + struct mlx5e_flow_steering *fs; + int err; + + fs = kvzalloc(sizeof(*fs), GFP_KERNEL); + if (!fs) + goto err; + + fs->mdev = mdev; + fs->state_destroy = state_destroy; + if (mlx5e_profile_feature_cap(profile, FS_VLAN)) { + err = mlx5e_fs_vlan_alloc(fs); + if (err) + goto err_free_fs; + } + + if (mlx5e_profile_feature_cap(profile, FS_TC)) { + err = mlx5e_fs_tc_alloc(fs); + if (err) + goto err_free_vlan; + } + + err = mlx5e_fs_ethtool_alloc(fs); + if (err) + goto err_free_tc; + + return fs; +err_free_tc: + mlx5e_fs_tc_free(fs); +err_free_vlan: + mlx5e_fs_vlan_free(fs); +err_free_fs: + kvfree(fs); +err: + return NULL; +} + +void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs) +{ + mlx5e_fs_ethtool_free(fs); + mlx5e_fs_tc_free(fs); + mlx5e_fs_vlan_free(fs); + kvfree(fs); +} + +struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs) +{ + return &fs->l2; +} + +struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress) +{ + return egress ? fs->egress_ns : fs->ns; +} + +void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress) +{ + if (!egress) + fs->ns = ns; + else + fs->egress_ns = ns; +} + +struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner) +{ + return inner ? fs->inner_ttc : fs->ttc; +} + +void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner) +{ + if (!inner) + fs->ttc = ttc; + else + fs->inner_ttc = ttc; +} + +#ifdef CONFIG_MLX5_EN_ARFS +struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs) +{ + return fs->arfs; +} + +void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs) +{ + fs->arfs = arfs; +} +#endif + +struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs) +{ + return fs->ptp_fs; +} + +void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs) +{ + fs->ptp_fs = ptp_fs; +} + +struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs) +{ + return fs->any; +} + +void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any) +{ + fs->any = any; +} + +#ifdef CONFIG_MLX5_EN_TLS +struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs) +{ + return fs->accel_tcp; +} + +void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp) +{ + fs->accel_tcp = accel_tcp; +} +#endif + +void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy) +{ + fs->state_destroy = state_destroy; +} + +void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs, + bool vlan_strip_disable) +{ + fs->vlan_strip_disable = vlan_strip_disable; +} + +struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs) +{ + return fs->udp; +} + +void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp) +{ + fs->udp = udp; +} + +struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs) { - kvfree(priv->fs.vlan); - priv->fs.vlan = NULL; + return fs->mdev; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index ad0d234632a3..aac32e505c14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -34,6 +34,22 @@ #include "en.h" #include "en/params.h" #include "en/xsk/pool.h" +#include "en/fs_ethtool.h" + +struct mlx5e_ethtool_table { + struct mlx5_flow_table *ft; + int num_rules; +}; + +#define ETHTOOL_NUM_L3_L4_FTS 7 +#define ETHTOOL_NUM_L2_FTS 4 + +struct mlx5e_ethtool_steering { + struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS]; + struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS]; + struct list_head rules; + int tot_num_rules; +}; static int flow_type_to_traffic_type(u32 flow_type); @@ -66,6 +82,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, struct ethtool_rx_flow_spec *fs, int num_tuples) { + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_ethtool_table *eth_ft; struct mlx5_flow_namespace *ns; @@ -81,18 +98,18 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, case UDP_V6_FLOW: max_tuples = ETHTOOL_NUM_L3_L4_FTS; prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); - eth_ft = &priv->fs.ethtool.l3_l4_ft[prio]; + eth_ft = ðtool->l3_l4_ft[prio]; break; case IP_USER_FLOW: case IPV6_USER_FLOW: max_tuples = ETHTOOL_NUM_L3_L4_FTS; prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); - eth_ft = &priv->fs.ethtool.l3_l4_ft[prio]; + eth_ft = ðtool->l3_l4_ft[prio]; break; case ETHER_FLOW: max_tuples = ETHTOOL_NUM_L2_FTS; prio = max_tuples - num_tuples; - eth_ft = &priv->fs.ethtool.l2_ft[prio]; + eth_ft = ðtool->l2_ft[prio]; prio += MLX5E_ETHTOOL_L2_PRIO; break; default: @@ -382,15 +399,16 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v, static void add_rule_to_list(struct mlx5e_priv *priv, struct mlx5e_ethtool_rule *rule) { + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); + struct list_head *head = ðtool->rules; struct mlx5e_ethtool_rule *iter; - struct list_head *head = &priv->fs.ethtool.rules; - list_for_each_entry(iter, &priv->fs.ethtool.rules, list) { + list_for_each_entry(iter, ðtool->rules, list) { if (iter->flow_spec.location > rule->flow_spec.location) break; head = &iter->list; } - priv->fs.ethtool.tot_num_rules++; + ethtool->tot_num_rules++; list_add(&rule->list, head); } @@ -433,15 +451,7 @@ static int flow_get_tirn(struct mlx5e_priv *priv, eth_rule->rss = rss; mlx5e_rss_refcnt_inc(eth_rule->rss); } else { - struct mlx5e_params *params = &priv->channels.params; - enum mlx5e_rq_group group; - u16 ix; - - mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group); - - *tirn = group == MLX5E_RQ_GROUP_XSK ? - mlx5e_rx_res_get_tirn_xsk(priv->rx_res, ix) : - mlx5e_rx_res_get_tirn_direct(priv->rx_res, ix); + *tirn = mlx5e_rx_res_get_tirn_direct(priv->rx_res, fs->ring_cookie); } return 0; @@ -499,15 +509,16 @@ free: return err ? ERR_PTR(err) : rule; } -static void del_ethtool_rule(struct mlx5e_priv *priv, +static void del_ethtool_rule(struct mlx5e_flow_steering *fs, struct mlx5e_ethtool_rule *eth_rule) { + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs); if (eth_rule->rule) mlx5_del_flow_rules(eth_rule->rule); if (eth_rule->rss) mlx5e_rss_refcnt_dec(eth_rule->rss); list_del(ð_rule->list); - priv->fs.ethtool.tot_num_rules--; + ethtool->tot_num_rules--; put_flow_table(eth_rule->eth_ft); kfree(eth_rule); } @@ -515,9 +526,10 @@ static void del_ethtool_rule(struct mlx5e_priv *priv, static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv, int location) { + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); struct mlx5e_ethtool_rule *iter; - list_for_each_entry(iter, &priv->fs.ethtool.rules, list) { + list_for_each_entry(iter, ðtool->rules, list) { if (iter->flow_spec.location == location) return iter; } @@ -531,7 +543,7 @@ static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv, eth_rule = find_ethtool_rule(priv, location); if (eth_rule) - del_ethtool_rule(priv, eth_rule); + del_ethtool_rule(priv->fs, eth_rule); eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL); if (!eth_rule) @@ -662,8 +674,7 @@ static int validate_flow(struct mlx5e_priv *priv, return -ENOSPC; if (fs->ring_cookie != RX_CLS_FLOW_DISC) - if (!mlx5e_qid_validate(priv->profile, &priv->channels.params, - fs->ring_cookie)) + if (fs->ring_cookie >= priv->channels.params.num_channels) return -EINVAL; switch (flow_type_mask(fs->flow_type)) { @@ -742,10 +753,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, eth_rule->flow_spec = *fs; eth_rule->eth_ft = eth_ft; - if (!eth_ft->ft) { - err = -EINVAL; - goto del_ethtool_rule; - } + rule = add_ethtool_flow_rule(priv, eth_rule, eth_ft->ft, fs, rss_context); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -757,7 +765,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, return 0; del_ethtool_rule: - del_ethtool_rule(priv, eth_rule); + del_ethtool_rule(priv->fs, eth_rule); return err; } @@ -777,7 +785,7 @@ mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, int location) goto out; } - del_ethtool_rule(priv, eth_rule); + del_ethtool_rule(priv->fs, eth_rule); out: return err; } @@ -786,12 +794,13 @@ static int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, int location) { + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); struct mlx5e_ethtool_rule *eth_rule; if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES) return -EINVAL; - list_for_each_entry(eth_rule, &priv->fs.ethtool.rules, list) { + list_for_each_entry(eth_rule, ðtool->rules, list) { int index; if (eth_rule->flow_spec.location != location) @@ -829,18 +838,34 @@ mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, return err; } -void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) +int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool) +{ + *ethtool = kvzalloc(sizeof(**ethtool), GFP_KERNEL); + if (!*ethtool) + return -ENOMEM; + return 0; +} + +void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool) { + kvfree(ethtool); +} + +void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs); struct mlx5e_ethtool_rule *iter; struct mlx5e_ethtool_rule *temp; - list_for_each_entry_safe(iter, temp, &priv->fs.ethtool.rules, list) - del_ethtool_rule(priv, iter); + list_for_each_entry_safe(iter, temp, ðtool->rules, list) + del_ethtool_rule(fs, iter); } -void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) +void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs) { - INIT_LIST_HEAD(&priv->fs.ethtool.rules); + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs); + + INIT_LIST_HEAD(ðtool->rules); } static int flow_type_to_traffic_type(u32 flow_type) @@ -962,11 +987,12 @@ int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, u32 *rule_locs) { + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); int err = 0; switch (info->cmd) { case ETHTOOL_GRXCLSRLCNT: - info->rule_cnt = priv->fs.ethtool.tot_num_rules; + info->rule_cnt = ethtool->tot_num_rules; break; case ETHTOOL_GRXCLSRULE: err = mlx5e_ethtool_get_flow(priv, info, info->fs.location); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 65571593ec5c..e3a4f01bcceb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -31,12 +31,12 @@ */ #include <net/tc_act/tc_gact.h> -#include <net/pkt_cls.h> #include <linux/mlx5/fs.h> #include <net/vxlan.h> #include <net/geneve.h> #include <linux/bpf.h> #include <linux/if_bridge.h> +#include <linux/filter.h> #include <net/page_pool.h> #include <net/xdp_sock_drv.h> #include "eswitch.h" @@ -45,10 +45,9 @@ #include "en_tc.h" #include "en_rep.h" #include "en_accel/ipsec.h" +#include "en_accel/macsec.h" #include "en_accel/en_accel.h" -#include "en_accel/tls.h" -#include "accel/ipsec.h" -#include "accel/tls.h" +#include "en_accel/ktls.h" #include "lib/vxlan.h" #include "lib/clock.h" #include "en/port.h" @@ -65,25 +64,29 @@ #include "en/devlink.h" #include "lib/mlx5.h" #include "en/ptp.h" +#include "en/htb.h" #include "qos.h" #include "en/trap.h" -#include "fpga/ipsec.h" -bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) +bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) { - bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && - MLX5_CAP_GEN(mdev, umr_ptr_rlky) && - MLX5_CAP_ETH(mdev, reg_umr_sq); - u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq); - bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap; + u16 umr_wqebbs, max_wqebbs; + bool striding_rq_umr; + striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); if (!striding_rq_umr) return false; - if (!inline_umr) { - mlx5_core_warn(mdev, "Cannot support Striding RQ: UMR WQE size (%d) exceeds maximum supported (%d).\n", - (int)MLX5E_UMR_WQE_INLINE_SZ, max_wqe_sz_cap); + + umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode); + max_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); + /* Sanity check; should never happen, because mlx5e_mpwrq_umr_wqebbs is + * calculated from mlx5e_get_max_sq_aligned_wqebbs. + */ + if (WARN_ON(umr_wqebbs > max_wqebbs)) return false; - } + return true; } @@ -200,21 +203,35 @@ static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv) mlx5_blocking_notifier_unregister(priv->mdev, &priv->blocking_events_nb); } +static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); + + WARN_ON_ONCE(entries * umr_entry_size % MLX5_OCTWORD); + + return entries * umr_entry_size / MLX5_OCTWORD; +} + static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *wqe) { struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; - u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS); + u16 octowords; + u8 ds_cnt; + + ds_cnt = DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(rq->mdev, rq->mpwqe.page_shift, + rq->mpwqe.umr_mode), + MLX5_SEND_WQE_DS); cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | ds_cnt); - cseg->umr_mkey = rq->mkey_be; + cseg->umr_mkey = rq->mpwqe.umr_mkey_be; ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; - ucseg->xlt_octowords = - cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); + octowords = mlx5e_mpwrq_umr_octowords(rq->mpwqe.pages_per_wqe, rq->mpwqe.umr_mode); + ucseg->xlt_octowords = cpu_to_be16(octowords); ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } @@ -260,10 +277,12 @@ static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) { int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); + size_t alloc_size; + + alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, alloc_units, + rq->mpwqe.pages_per_wqe)); - rq->mpwqe.info = kvzalloc_node(array_size(wq_sz, - sizeof(*rq->mpwqe.info)), - GFP_KERNEL, node); + rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node); if (!rq->mpwqe.info) return -ENOMEM; @@ -272,18 +291,52 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) return 0; } -static int mlx5e_create_umr_mtt_mkey(struct mlx5_core_dev *mdev, - u64 npages, u8 page_shift, u32 *umr_mkey, - dma_addr_t filler_addr) + +static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode) +{ + switch (umr_mode) { + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + return MLX5_MKC_ACCESS_MODE_MTT; + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + return MLX5_MKC_ACCESS_MODE_KSM; + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + return MLX5_MKC_ACCESS_MODE_KLMS; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + return MLX5_MKC_ACCESS_MODE_KSM; + } + WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); + return 0; +} + +static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, + u32 npages, u8 page_shift, u32 *umr_mkey, + dma_addr_t filler_addr, + enum mlx5e_mpwrq_umr_mode umr_mode, + u32 xsk_chunk_size) { struct mlx5_mtt *mtt; + struct mlx5_ksm *ksm; + struct mlx5_klm *klm; + u32 octwords; int inlen; void *mkc; u32 *in; int err; int i; - inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + sizeof(*mtt) * npages; + if ((umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED || + umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) && + !MLX5_CAP_GEN(mdev, fixed_buffer_size)) { + mlx5_core_warn(mdev, "Unaligned AF_XDP requires fixed_buffer_size capability\n"); + return -EINVAL; + } + + octwords = mlx5e_mpwrq_umr_octowords(npages, umr_mode); + + inlen = MLX5_FLEXIBLE_INLEN(mdev, MLX5_ST_SZ_BYTES(create_mkey_in), + MLX5_OCTWORD, octwords); + if (inlen < 0) + return inlen; in = kvzalloc(inlen, GFP_KERNEL); if (!in) @@ -295,16 +348,17 @@ static int mlx5e_create_umr_mtt_mkey(struct mlx5_core_dev *mdev, MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, lw, 1); MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, access_mode_1_0, mlx5e_mpwrq_access_mode(umr_mode)); mlx5e_mkey_set_relaxed_ordering(mdev, mkc); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); MLX5_SET64(mkc, mkc, len, npages << page_shift); - MLX5_SET(mkc, mkc, translations_octword_size, - MLX5_MTT_OCTW(npages)); - MLX5_SET(mkc, mkc, log_page_size, page_shift); - MLX5_SET(create_mkey_in, in, translations_octword_actual_size, - MLX5_MTT_OCTW(npages)); + MLX5_SET(mkc, mkc, translations_octword_size, octwords); + if (umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) + MLX5_SET(mkc, mkc, log_page_size, page_shift - 2); + else if (umr_mode != MLX5E_MPWRQ_UMR_MODE_OVERSIZED) + MLX5_SET(mkc, mkc, log_page_size, page_shift); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octwords); /* Initialize the mkey with all MTTs pointing to a default * page (filler_addr). When the channels are activated, UMR @@ -312,9 +366,47 @@ static int mlx5e_create_umr_mtt_mkey(struct mlx5_core_dev *mdev, * the RQ's pool, while the gaps (wqe_overflow) remain mapped * to the default page. */ - mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); - for (i = 0 ; i < npages ; i++) - mtt[i].ptag = cpu_to_be64(filler_addr); + switch (umr_mode) { + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + klm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < npages; i++) { + klm[i << 1] = (struct mlx5_klm) { + .va = cpu_to_be64(filler_addr), + .bcount = cpu_to_be32(xsk_chunk_size), + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + }; + klm[(i << 1) + 1] = (struct mlx5_klm) { + .va = cpu_to_be64(filler_addr), + .bcount = cpu_to_be32((1 << page_shift) - xsk_chunk_size), + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + }; + } + break; + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < npages; i++) + ksm[i] = (struct mlx5_ksm) { + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + .va = cpu_to_be64(filler_addr), + }; + break; + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < npages; i++) + mtt[i] = (struct mlx5_mtt) { + .ptag = cpu_to_be64(filler_addr), + }; + break; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < npages * 4; i++) { + ksm[i] = (struct mlx5_ksm) { + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + .va = cpu_to_be64(filler_addr), + }; + } + break; + } err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); @@ -357,10 +449,27 @@ static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev, static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) { - u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq)); + u32 xsk_chunk_size = rq->xsk_pool ? rq->xsk_pool->chunk_size : 0; + u32 wq_size = mlx5_wq_ll_get_size(&rq->mpwqe.wq); + u32 num_entries, max_num_entries; + u32 umr_mkey; + int err; + + max_num_entries = mlx5e_mpwrq_max_num_entries(mdev, rq->mpwqe.umr_mode); - return mlx5e_create_umr_mtt_mkey(mdev, num_mtts, PAGE_SHIFT, - &rq->umr_mkey, rq->wqe_overflow.addr); + /* Shouldn't overflow, the result is at most MLX5E_MAX_RQ_NUM_MTTS. */ + if (WARN_ON_ONCE(check_mul_overflow(wq_size, (u32)rq->mpwqe.mtts_per_wqe, + &num_entries) || + num_entries > max_num_entries)) + mlx5_core_err(mdev, "%s: multiplication overflow: %u * %u > %u\n", + __func__, wq_size, rq->mpwqe.mtts_per_wqe, + max_num_entries); + + err = mlx5e_create_umr_mkey(mdev, num_entries, rq->mpwqe.page_shift, + &umr_mkey, rq->wqe_overflow.addr, + rq->mpwqe.umr_mode, xsk_chunk_size); + rq->mpwqe.umr_mkey_be = cpu_to_be32(umr_mkey); + return err; } static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev, @@ -377,18 +486,20 @@ static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev, &rq->mpwqe.shampo->mkey); } -static u64 mlx5e_get_mpwqe_offset(u16 wqe_ix) -{ - return MLX5E_REQUIRED_MTTS(wqe_ix) << PAGE_SHIFT; -} - static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) { struct mlx5e_wqe_frag_info next_frag = {}; struct mlx5e_wqe_frag_info *prev = NULL; int i; - next_frag.di = &rq->wqe.di[0]; + if (rq->xsk_pool) { + /* Assumptions used by XSK batched allocator. */ + WARN_ON(rq->wqe.info.num_frags != 1); + WARN_ON(rq->wqe.info.log_num_frags != 0); + WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE); + } + + next_frag.au = &rq->wqe.alloc_units[0]; for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; @@ -398,7 +509,7 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) { if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) { - next_frag.di++; + next_frag.au++; next_frag.offset = 0; if (prev) prev->last_in_page = true; @@ -415,12 +526,13 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) prev->last_in_page = true; } -int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node) +static int mlx5e_init_au_list(struct mlx5e_rq *rq, int wq_sz, int node) { int len = wq_sz << rq->wqe.info.log_num_frags; - rq->wqe.di = kvzalloc_node(array_size(len, sizeof(*rq->wqe.di)), GFP_KERNEL, node); - if (!rq->wqe.di) + rq->wqe.alloc_units = kvzalloc_node(array_size(len, sizeof(*rq->wqe.alloc_units)), + GFP_KERNEL, node); + if (!rq->wqe.alloc_units) return -ENOMEM; mlx5e_init_frags_partition(rq); @@ -428,9 +540,9 @@ int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node) return 0; } -void mlx5e_free_di_list(struct mlx5e_rq *rq) +static void mlx5e_free_au_list(struct mlx5e_rq *rq) { - kvfree(rq->wqe.di); + kvfree(rq->wqe.alloc_units); } static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work) @@ -476,16 +588,17 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param rq->clock = &mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; + rq->channel = c; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); rq->xdpsq = &c->rq_xdpsq; - rq->stats = &c->priv->channel_stats[c->ix].rq; + rq->stats = &c->priv->channel_stats[c->ix]->rq; rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); err = mlx5e_rq_set_handlers(rq, params, NULL); if (err) return err; - return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0); + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id); } static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, @@ -572,6 +685,8 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); pool_size = 1 << params->log_rq_mtu_frames; + rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey); + switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq, @@ -587,19 +702,31 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); - pool_size = MLX5_MPWRQ_PAGES_PER_WQE << - mlx5e_mpwqe_get_log_rq_size(params, xsk); + rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + rq->mpwqe.pages_per_wqe = + mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift, + rq->mpwqe.umr_mode); + rq->mpwqe.umr_wqebbs = + mlx5e_mpwrq_umr_wqebbs(mdev, rq->mpwqe.page_shift, + rq->mpwqe.umr_mode); + rq->mpwqe.mtts_per_wqe = + mlx5e_mpwrq_mtts_per_wqe(mdev, rq->mpwqe.page_shift, + rq->mpwqe.umr_mode); + + pool_size = rq->mpwqe.pages_per_wqe << + mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk); rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); + rq->mpwqe.min_wqe_bulk = mlx5e_mpwqe_get_min_wqe_bulk(wq_sz); rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz); err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) goto err_rq_drop_page; - rq->mkey_be = cpu_to_be32(rq->umr_mkey); err = mlx5e_rq_alloc_mpwqe_info(rq, node); if (err) @@ -607,7 +734,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, &pool_size, node); if (err) - goto err_free_by_rq_type; + goto err_free_mpwqe_info; break; default: /* MLX5_WQ_TYPE_CYCLIC */ @@ -632,11 +759,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, goto err_rq_wq_destroy; } - err = mlx5e_init_di_list(rq, wq_sz, node); + err = mlx5e_init_au_list(rq, wq_sz, node); if (err) goto err_rq_frags; - - rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey); } if (xsk) { @@ -661,14 +786,14 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, if (IS_ERR(rq->page_pool)) { err = PTR_ERR(rq->page_pool); rq->page_pool = NULL; - goto err_free_shampo; + goto err_free_by_rq_type; } if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_PAGE_POOL, rq->page_pool); } if (err) - goto err_free_shampo; + goto err_destroy_page_pool; for (i = 0; i < wq_sz; i++) { if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { @@ -676,13 +801,14 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i); u32 byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; - u64 dma_offset = mlx5e_get_mpwqe_offset(i); + u64 dma_offset = mul_u32_u32(i, rq->mpwqe.mtts_per_wqe) << + rq->mpwqe.page_shift; u16 headroom = test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) ? 0 : rq->buff.headroom; wqe->data[0].addr = cpu_to_be64(dma_offset + headroom); wqe->data[0].byte_count = cpu_to_be32(byte_count); - wqe->data[0].lkey = rq->mkey_be; + wqe->data[0].lkey = rq->mpwqe.umr_mkey_be; } else { struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i); @@ -720,19 +846,21 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, return 0; -err_free_shampo: - mlx5e_rq_free_shampo(rq); +err_destroy_page_pool: + page_pool_destroy(rq->page_pool); err_free_by_rq_type: switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + mlx5e_rq_free_shampo(rq); +err_free_mpwqe_info: kvfree(rq->mpwqe.info); err_rq_mkey: - mlx5_core_destroy_mkey(mdev, rq->umr_mkey); + mlx5_core_destroy_mkey(mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be)); err_rq_drop_page: mlx5e_free_mpwqe_rq_drop_page(rq); break; default: /* MLX5_WQ_TYPE_CYCLIC */ - mlx5e_free_di_list(rq); + mlx5e_free_au_list(rq); err_rq_frags: kvfree(rq->wqe.frags); } @@ -760,24 +888,22 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: kvfree(rq->mpwqe.info); - mlx5_core_destroy_mkey(rq->mdev, rq->umr_mkey); + mlx5_core_destroy_mkey(rq->mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be)); mlx5e_free_mpwqe_rq_drop_page(rq); mlx5e_rq_free_shampo(rq); break; default: /* MLX5_WQ_TYPE_CYCLIC */ kvfree(rq->wqe.frags); - mlx5e_free_di_list(rq); + mlx5e_free_au_list(rq); } for (i = rq->page_cache.head; i != rq->page_cache.tail; i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) { - struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i]; - /* With AF_XDP, page_cache is not used, so this loop is not * entered, and it's safe to call mlx5e_page_release_dynamic * directly. */ - mlx5e_page_release_dynamic(rq, dma_info, false); + mlx5e_page_release_dynamic(rq, rq->page_cache.page_cache[i], false); } xdp_rxq_info_unreg(&rq->xdp_rxq); @@ -832,7 +958,7 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) return err; } -int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) +static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) { struct mlx5_core_dev *mdev = rq->mdev; @@ -861,6 +987,32 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) return err; } +static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) +{ + struct net_device *dev = rq->netdev; + int err; + + err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST); + if (err) { + netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); + return err; + } + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) { + netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); + return err; + } + + return 0; +} + +int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) +{ + mlx5e_free_rx_descs(rq); + + return mlx5e_rq_to_ready(rq, curr_state); +} + static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable) { struct mlx5_core_dev *mdev = rq->mdev; @@ -1033,9 +1185,6 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, if (err) goto err_destroy_rq; - if (mlx5e_is_tls_on(rq->priv) && !mlx5e_accel_is_ktls_device(mdev)) - __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */ - if (MLX5_CAP_ETH(mdev, cqe_checksum_full)) __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state); @@ -1069,13 +1218,6 @@ err_free_rq: void mlx5e_activate_rq(struct mlx5e_rq *rq) { set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - if (rq->icosq) { - mlx5e_trigger_irq(rq->icosq); - } else { - local_bh_disable(); - napi_schedule(rq->cq.napi); - local_bh_enable(); - } } void mlx5e_deactivate_rq(struct mlx5e_rq *rq) @@ -1087,8 +1229,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); - if (rq->icosq) - cancel_work_sync(&rq->icosq->recover_work); cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); @@ -1161,10 +1301,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->xsk_pool = xsk_pool; sq->stats = sq->xsk_pool ? - &c->priv->channel_stats[c->ix].xsksq : + &c->priv->channel_stats[c->ix]->xsksq : is_redirect ? - &c->priv->channel_stats[c->ix].xdpsq : - &c->priv->channel_stats[c->ix].rq_xdpsq; + &c->priv->channel_stats[c->ix]->xdpsq : + &c->priv->channel_stats[c->ix]->rq_xdpsq; + sq->stop_room = param->is_mpw ? mlx5e_stop_room_for_mpwqe(mdev) : + mlx5e_stop_room_for_max_wqe(mdev); + sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1216,9 +1359,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_icosq_cqe_err(sq); } +static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, + recover_work); + + /* Not implemented yet. */ + + netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n"); +} + static int mlx5e_alloc_icosq(struct mlx5e_channel *c, struct mlx5e_sq_param *param, - struct mlx5e_icosq *sq) + struct mlx5e_icosq *sq, + work_func_t recover_work_func) { void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); struct mlx5_core_dev *mdev = c->mdev; @@ -1239,7 +1393,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work); + INIT_WORK(&sq->recover_work, recover_work_func); return 0; @@ -1303,7 +1457,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int err; sq->pdev = c->pdev; - sq->tstamp = c->tstamp; sq->clock = &mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; @@ -1314,10 +1467,11 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); - if (MLX5_IPSEC_DEV(c->priv->mdev)) + if (mlx5_ipsec_device_caps(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (param->is_mpw) set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); @@ -1575,13 +1729,14 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_tx_err_cqe(sq); } -int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct mlx5e_icosq *sq) +static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq, + work_func_t recover_work_func) { struct mlx5e_create_sq_param csp = {}; int err; - err = mlx5e_alloc_icosq(c, param, sq); + err = mlx5e_alloc_icosq(c, param, sq, recover_work_func); if (err) return err; @@ -1620,7 +1775,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) synchronize_net(); /* Sync with NAPI. */ } -void mlx5e_close_icosq(struct mlx5e_icosq *sq) +static void mlx5e_close_icosq(struct mlx5e_icosq *sq) { struct mlx5e_channel *c = sq->channel; @@ -1648,14 +1803,22 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + + /* Don't enable multi buffer on XDP_REDIRECT SQ, as it's not yet + * supported by upstream, and there is no defined trigger to allow + * transmitting redirected multi-buffer frames. + */ + if (param->is_xdp_mb && !is_redirect) + set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); if (err) goto err_free_xdpsq; mlx5e_set_xmit_fp(sq, param->is_mpw); - if (!param->is_mpw) { - unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT; + if (!param->is_mpw && !test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { + unsigned int ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1; unsigned int inline_hdr_sz = 0; int i; @@ -1898,8 +2061,7 @@ static int mlx5e_txq_get_qos_node_hw_id(struct mlx5e_params *params, int txq_ix, { int tc; - if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL || - !params->mqprio.channel.rl) { + if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL) { *hw_id = 0; return 0; } @@ -1908,7 +2070,14 @@ static int mlx5e_txq_get_qos_node_hw_id(struct mlx5e_params *params, int txq_ix, if (tc < 0) return tc; - return mlx5e_mqprio_rl_get_node_hw_id(params->mqprio.channel.rl, tc, hw_id); + if (tc >= params->mqprio.num_tc) { + WARN(1, "Unexpected TCs configuration. tc %d is out of range of %u", + tc, params->mqprio.num_tc); + return -EINVAL; + } + + *hw_id = params->mqprio.channel.hw_id[tc]; + return 0; } static int mlx5e_open_sqs(struct mlx5e_channel *c, @@ -1928,7 +2097,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, params, &cparam->txq_sq, &c->sq[tc], tc, qos_queue_group_id, - &c->priv->channel_stats[c->ix].sq[tc]); + &c->priv->channel_stats[c->ix]->sq[tc]); if (err) goto err_close_sqs; } @@ -2084,11 +2253,15 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, spin_lock_init(&c->async_icosq_lock); - err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq); + err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq, + mlx5e_async_icosq_err_cqe_work); if (err) goto err_close_xdpsq_cq; - err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); + mutex_init(&c->icosq_recovery_lock); + + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq, + mlx5e_icosq_err_cqe_work); if (err) goto err_close_async_icosq; @@ -2156,9 +2329,12 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) mlx5e_close_xdpsq(&c->xdpsq); if (c->xdp) mlx5e_close_xdpsq(&c->rq_xdpsq); + /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */ + cancel_work_sync(&c->icosq.recover_work); mlx5e_close_rq(&c->rq); mlx5e_close_sqs(c); mlx5e_close_icosq(&c->icosq); + mutex_destroy(&c->icosq_recovery_lock); mlx5e_close_icosq(&c->async_icosq); if (c->xdp) mlx5e_close_cq(&c->rq_xdpsq.cq); @@ -2176,6 +2352,44 @@ static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix) return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev); } +static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu) +{ + if (ix > priv->stats_nch) { + netdev_warn(priv->netdev, "Unexpected channel stats index %d > %d\n", ix, + priv->stats_nch); + return -EINVAL; + } + + if (priv->channel_stats[ix]) + return 0; + + /* Asymmetric dynamic memory allocation. + * Freed in mlx5e_priv_arrays_free, not on channel closure. + */ + mlx5e_dbg(DRV, priv, "Creating channel stats %d\n", ix); + priv->channel_stats[ix] = kvzalloc_node(sizeof(**priv->channel_stats), + GFP_KERNEL, cpu_to_node(cpu)); + if (!priv->channel_stats[ix]) + return -ENOMEM; + priv->stats_nch++; + + return 0; +} + +void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c) +{ + spin_lock_bh(&c->async_icosq_lock); + mlx5e_trigger_irq(&c->async_icosq); + spin_unlock_bh(&c->async_icosq_lock); +} + +void mlx5e_trigger_napi_sched(struct napi_struct *napi) +{ + local_bh_disable(); + napi_schedule(napi); + local_bh_enable(); +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, struct mlx5e_channel_param *cparam, @@ -2193,6 +2407,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, if (err) return err; + err = mlx5e_channel_stats_alloc(priv, ix, cpu); + if (err) + return err; + c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); if (!c) return -ENOMEM; @@ -2207,11 +2425,11 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); c->num_tc = mlx5e_get_dcb_num_tc(params); c->xdp = !!params->xdp_prog; - c->stats = &priv->channel_stats[ix].ch; + c->stats = &priv->channel_stats[ix]->ch; c->aff_mask = irq_get_effective_affinity_mask(irq); c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); - netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll); err = mlx5e_open_queues(c, params, cparam); if (unlikely(err)) @@ -2249,10 +2467,13 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_icosq(&c->icosq); mlx5e_activate_icosq(&c->async_icosq); - mlx5e_activate_rq(&c->rq); if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) mlx5e_activate_xsk(c); + else + mlx5e_activate_rq(&c->rq); + + mlx5e_trigger_napi_icosq(c); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) @@ -2261,8 +2482,9 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) mlx5e_deactivate_xsk(c); + else + mlx5e_deactivate_rq(&c->rq); - mlx5e_deactivate_rq(&c->rq); mlx5e_deactivate_icosq(&c->async_icosq); mlx5e_deactivate_icosq(&c->icosq); for (tc = 0; tc < c->num_tc; tc++) @@ -2318,9 +2540,11 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, goto err_close_channels; } - err = mlx5e_qos_open_queues(priv, chs); - if (err) - goto err_close_ptp; + if (priv->htb) { + err = mlx5e_qos_open_queues(priv, chs); + if (err) + goto err_close_ptp; + } mlx5e_health_channels_update(priv); kvfree(cparam); @@ -2352,8 +2576,6 @@ static void mlx5e_activate_channels(struct mlx5e_channels *chs) mlx5e_ptp_activate_channel(chs->ptp); } -#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */ - static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) { int err = 0; @@ -2361,8 +2583,12 @@ static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) for (i = 0; i < chs->num; i++) { int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT; + struct mlx5e_channel *c = chs->c[i]; + + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + continue; - err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout); + err |= mlx5e_wait_for_min_rx_wqes(&c->rq, timeout); /* Don't wait on the XSK RQ, because the newer xdpsock sample * doesn't provide any Fill Ring entries at the setup stage. @@ -2502,9 +2728,11 @@ static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc, int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) { - int qos_queues, nch, ntc, num_txqs, err; + int nch, ntc, num_txqs, err; + int qos_queues = 0; - qos_queues = mlx5e_qos_cur_leaf_nodes(priv); + if (priv->htb) + qos_queues = mlx5e_htb_cur_leaf_nodes(priv->htb); nch = priv->channels.params.num_channels; ntc = mlx5e_get_dcb_num_tc(&priv->channels.params); @@ -2525,7 +2753,7 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) struct netdev_tc_txq old_tc_to_txq[TC_MAX_QUEUE], *tc_to_txq; struct net_device *netdev = priv->netdev; int old_num_txqs, old_ntc; - int num_rxqs, nch, ntc; + int nch, ntc; int err; int i; @@ -2536,7 +2764,6 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) nch = priv->channels.params.num_channels; ntc = priv->channels.params.mqprio.num_tc; - num_rxqs = nch * priv->profile->rq_groups; tc_to_txq = priv->channels.params.mqprio.tc_to_txq; err = mlx5e_netdev_set_tcs(netdev, nch, ntc, tc_to_txq); @@ -2545,18 +2772,11 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) err = mlx5e_update_tx_netdev_queues(priv); if (err) goto err_tcs; - err = netif_set_real_num_rx_queues(netdev, num_rxqs); + err = netif_set_real_num_rx_queues(netdev, nch); if (err) { netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err); goto err_txqs; } - if (priv->mqprio_rl != priv->channels.params.mqprio.channel.rl) { - if (priv->mqprio_rl) { - mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); - mlx5e_mqprio_rl_free(priv->mqprio_rl); - } - priv->mqprio_rl = priv->channels.params.mqprio.channel.rl; - } return 0; @@ -2598,7 +2818,7 @@ static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, } } -int mlx5e_num_channels_changed(struct mlx5e_priv *priv) +static int mlx5e_num_channels_changed(struct mlx5e_priv *priv) { u16 count = priv->channels.params.num_channels; int err; @@ -2631,43 +2851,46 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) struct mlx5e_txqsq *sq = &c->sq[tc]; priv->txq2sq[sq->txq_ix] = sq; - priv->channel_tc2realtxq[i][tc] = i + tc * ch; } } if (!priv->channels.ptp) - return; + goto out; if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state)) - return; + goto out; for (tc = 0; tc < num_tc; tc++) { struct mlx5e_ptp *c = priv->channels.ptp; struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq; priv->txq2sq[sq->txq_ix] = sq; - priv->port_ptp_tc2realtxq[tc] = priv->num_tc_x_num_ch + tc; } -} -static void mlx5e_update_num_tc_x_num_ch(struct mlx5e_priv *priv) -{ - /* Sync with mlx5e_select_queue. */ - WRITE_ONCE(priv->num_tc_x_num_ch, - mlx5e_get_dcb_num_tc(&priv->channels.params) * priv->channels.num); +out: + /* Make the change to txq2sq visible before the queue is started. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); } void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) { - mlx5e_update_num_tc_x_num_ch(priv); mlx5e_build_txq_maps(priv); mlx5e_activate_channels(&priv->channels); - mlx5e_qos_activate_queues(priv); + if (priv->htb) + mlx5e_qos_activate_queues(priv); mlx5e_xdp_tx_enable(priv); + + /* dev_watchdog() wants all TX queues to be started when the carrier is + * OK, including the ones in range real_num_tx_queues..num_tx_queues-1. + * Make it happy to avoid TX timeout false alarms. + */ netif_tx_start_all_queues(priv->netdev); if (mlx5e_is_vport_rep(priv)) - mlx5e_add_sqs_fwd_rules(priv); + mlx5e_rep_activate_channels(priv); mlx5e_wait_channels_min_rx_wqes(&priv->channels); @@ -2681,13 +2904,15 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) mlx5e_rx_res_channels_deactivate(priv->rx_res); if (mlx5e_is_vport_rep(priv)) - mlx5e_remove_sqs_fwd_rules(priv); + mlx5e_rep_deactivate_channels(priv); - /* FIXME: This is a W/A only for tx timeout watch dog false alarm when - * polling for inactive tx queues. + /* The results of ndo_select_queue are unreliable, while netdev config + * is being changed (real_num_tx_queues, num_tc). Stop all queues to + * prevent ndo_start_xmit from being called, so that it can assume that + * the selected queue is always valid. */ - netif_tx_stop_all_queues(priv->netdev); netif_tx_disable(priv->netdev); + mlx5e_xdp_tx_disable(priv); mlx5e_deactivate_channels(&priv->channels); } @@ -2747,6 +2972,7 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, mlx5e_close_channels(&old_chs); priv->profile->update_rx(priv); + mlx5e_selq_apply(&priv->selq); out: mlx5e_activate_priv_channels(priv); @@ -2770,13 +2996,24 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv, return mlx5e_switch_priv_params(priv, params, preactivate, context); new_chs.params = *params; + + mlx5e_selq_prepare_params(&priv->selq, &new_chs.params); + err = mlx5e_open_channels(priv, &new_chs); if (err) - return err; + goto err_cancel_selq; + err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context); if (err) - mlx5e_close_channels(&new_chs); + goto err_close; + return 0; + +err_close: + mlx5e_close_channels(&new_chs); + +err_cancel_selq: + mlx5e_selq_cancel(&priv->selq); return err; } @@ -2816,6 +3053,8 @@ int mlx5e_open_locked(struct net_device *netdev) struct mlx5e_priv *priv = netdev_priv(netdev); int err; + mlx5e_selq_prepare_params(&priv->selq, &priv->channels.params); + set_bit(MLX5E_STATE_OPENED, &priv->state); err = mlx5e_open_channels(priv, &priv->channels); @@ -2823,6 +3062,7 @@ int mlx5e_open_locked(struct net_device *netdev) goto err_clear_state_opened_flag; priv->profile->update_rx(priv); + mlx5e_selq_apply(&priv->selq); mlx5e_activate_priv_channels(priv); mlx5e_apply_traps(priv, true); if (priv->profile->update_carrier) @@ -2833,6 +3073,7 @@ int mlx5e_open_locked(struct net_device *netdev) err_clear_state_opened_flag: clear_bit(MLX5E_STATE_OPENED, &priv->state); + mlx5e_selq_cancel(&priv->selq); return err; } @@ -3050,6 +3291,12 @@ err_close_tises: static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { + if (priv->mqprio_rl) { + mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); + mlx5e_mqprio_rl_free(priv->mqprio_rl); + priv->mqprio_rl = NULL; + } + mlx5e_accel_cleanup_tx(priv); mlx5e_destroy_tises(priv); } @@ -3118,19 +3365,38 @@ static void mlx5e_params_mqprio_dcb_set(struct mlx5e_params *params, u8 num_tc) { params->mqprio.mode = TC_MQPRIO_MODE_DCB; params->mqprio.num_tc = num_tc; - params->mqprio.channel.rl = NULL; mlx5e_mqprio_build_default_tc_to_txq(params->mqprio.tc_to_txq, num_tc, params->num_channels); } +static void mlx5e_mqprio_rl_update_params(struct mlx5e_params *params, + struct mlx5e_mqprio_rl *rl) +{ + int tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) { + u32 hw_id = 0; + + if (rl) + mlx5e_mqprio_rl_get_node_hw_id(rl, tc, &hw_id); + params->mqprio.channel.hw_id[tc] = hw_id; + } +} + static void mlx5e_params_mqprio_channel_set(struct mlx5e_params *params, - struct tc_mqprio_qopt *qopt, + struct tc_mqprio_qopt_offload *mqprio, struct mlx5e_mqprio_rl *rl) { + int tc; + params->mqprio.mode = TC_MQPRIO_MODE_CHANNEL; - params->mqprio.num_tc = qopt->num_tc; - params->mqprio.channel.rl = rl; - mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, qopt); + params->mqprio.num_tc = mqprio->qopt.num_tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) + params->mqprio.channel.max_rate[tc] = mqprio->max_rate[tc]; + + mlx5e_mqprio_rl_update_params(params, rl); + mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, &mqprio->qopt); } static void mlx5e_params_mqprio_reset(struct mlx5e_params *params) @@ -3156,6 +3422,12 @@ static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv, err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_num_channels_changed_ctx, NULL, true); + if (!err && priv->mqprio_rl) { + mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); + mlx5e_mqprio_rl_free(priv->mqprio_rl); + priv->mqprio_rl = NULL; + } + priv->max_opened_tc = max_t(u8, priv->max_opened_tc, mlx5e_get_dcb_num_tc(&priv->channels.params)); return err; @@ -3214,16 +3486,38 @@ static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv, return 0; } -static bool mlx5e_mqprio_rate_limit(struct tc_mqprio_qopt_offload *mqprio) +static bool mlx5e_mqprio_rate_limit(u8 num_tc, u64 max_rate[]) { int tc; - for (tc = 0; tc < mqprio->qopt.num_tc; tc++) - if (mqprio->max_rate[tc]) + for (tc = 0; tc < num_tc; tc++) + if (max_rate[tc]) return true; return false; } +static struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_create(struct mlx5_core_dev *mdev, + u8 num_tc, u64 max_rate[]) +{ + struct mlx5e_mqprio_rl *rl; + int err; + + if (!mlx5e_mqprio_rate_limit(num_tc, max_rate)) + return NULL; + + rl = mlx5e_mqprio_rl_alloc(); + if (!rl) + return ERR_PTR(-ENOMEM); + + err = mlx5e_mqprio_rl_init(rl, mdev, num_tc, max_rate); + if (err) { + mlx5e_mqprio_rl_free(rl); + return ERR_PTR(err); + } + + return rl; +} + static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv, struct tc_mqprio_qopt_offload *mqprio) { @@ -3237,32 +3531,32 @@ static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv, if (err) return err; - rl = NULL; - if (mlx5e_mqprio_rate_limit(mqprio)) { - rl = mlx5e_mqprio_rl_alloc(); - if (!rl) - return -ENOMEM; - err = mlx5e_mqprio_rl_init(rl, priv->mdev, mqprio->qopt.num_tc, - mqprio->max_rate); - if (err) { - mlx5e_mqprio_rl_free(rl); - return err; - } - } + rl = mlx5e_mqprio_rl_create(priv->mdev, mqprio->qopt.num_tc, mqprio->max_rate); + if (IS_ERR(rl)) + return PTR_ERR(rl); new_params = priv->channels.params; - mlx5e_params_mqprio_channel_set(&new_params, &mqprio->qopt, rl); + mlx5e_params_mqprio_channel_set(&new_params, mqprio, rl); nch_changed = mlx5e_get_dcb_num_tc(&priv->channels.params) > 1; preactivate = nch_changed ? mlx5e_num_channels_changed_ctx : mlx5e_update_netdev_queues_ctx; err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, true); - if (err && rl) { - mlx5e_mqprio_rl_cleanup(rl); - mlx5e_mqprio_rl_free(rl); + if (err) { + if (rl) { + mlx5e_mqprio_rl_cleanup(rl); + mlx5e_mqprio_rl_free(rl); + } + return err; } - return err; + if (priv->mqprio_rl) { + mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); + mlx5e_mqprio_rl_free(priv->mqprio_rl); + } + priv->mqprio_rl = rl; + + return 0; } static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, @@ -3271,7 +3565,7 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, /* MQPRIO is another toplevel qdisc that can't be attached * simultaneously with the offloaded HTB. */ - if (WARN_ON(priv->htb.maj_id)) + if (WARN_ON(mlx5e_selq_is_htb_enabled(&priv->selq))) return -EINVAL; switch (mqprio->mode) { @@ -3284,47 +3578,6 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, } } -static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb) -{ - int res; - - switch (htb->command) { - case TC_HTB_CREATE: - return mlx5e_htb_root_add(priv, htb->parent_classid, htb->classid, - htb->extack); - case TC_HTB_DESTROY: - return mlx5e_htb_root_del(priv); - case TC_HTB_LEAF_ALLOC_QUEUE: - res = mlx5e_htb_leaf_alloc_queue(priv, htb->classid, htb->parent_classid, - htb->rate, htb->ceil, htb->extack); - if (res < 0) - return res; - htb->qid = res; - return 0; - case TC_HTB_LEAF_TO_INNER: - return mlx5e_htb_leaf_to_inner(priv, htb->parent_classid, htb->classid, - htb->rate, htb->ceil, htb->extack); - case TC_HTB_LEAF_DEL: - return mlx5e_htb_leaf_del(priv, &htb->classid, htb->extack); - case TC_HTB_LEAF_DEL_LAST: - case TC_HTB_LEAF_DEL_LAST_FORCE: - return mlx5e_htb_leaf_del_last(priv, htb->classid, - htb->command == TC_HTB_LEAF_DEL_LAST_FORCE, - htb->extack); - case TC_HTB_NODE_MODIFY: - return mlx5e_htb_node_modify(priv, htb->classid, htb->rate, htb->ceil, - htb->extack); - case TC_HTB_LEAF_QUERY_QUEUE: - res = mlx5e_get_txq_by_classid(priv, htb->classid); - if (res < 0) - return res; - htb->qid = res; - return 0; - default: - return -EOPNOTSUPP; - } -} - static LIST_HEAD(mlx5e_block_cb_list); static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, @@ -3358,7 +3611,7 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, return err; case TC_SETUP_QDISC_HTB: mutex_lock(&priv->state_lock); - err = mlx5e_setup_tc_htb(priv, type_data); + err = mlx5e_htb_setup_tc(priv, type_data); mutex_unlock(&priv->state_lock); return err; default: @@ -3371,7 +3624,7 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) int i; for (i = 0; i < priv->stats_nch; i++) { - struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i]; + struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i]; struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; int j; @@ -3446,7 +3699,8 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->rx_length_errors = PPORT_802_3_GET(pstats, a_in_range_length_errors) + PPORT_802_3_GET(pstats, a_out_of_range_length_field) + - PPORT_802_3_GET(pstats, a_frame_too_long_errors); + PPORT_802_3_GET(pstats, a_frame_too_long_errors) + + VNIC_ENV_GET(&priv->stats.vnic, eth_wqe_too_small); stats->rx_crc_errors = PPORT_802_3_GET(pstats, a_frame_check_sequence_errors); stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors); @@ -3509,20 +3763,7 @@ static int set_feature_lro(struct net_device *netdev, bool enable) mutex_lock(&priv->state_lock); - if (enable && priv->xsk.refcnt) { - netdev_warn(netdev, "LRO is incompatible with AF_XDP (%u XSKs are active)\n", - priv->xsk.refcnt); - err = -EINVAL; - goto out; - } - cur_params = &priv->channels.params; - if (enable && !MLX5E_GET_PFLAG(cur_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { - netdev_warn(netdev, "can't set LRO with legacy RQ\n"); - err = -EINVAL; - goto out; - } - new_params = *cur_params; if (enable) @@ -3559,11 +3800,6 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable) new_params = priv->channels.params; if (enable) { - if (MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { - netdev_warn(netdev, "Can't set HW-GRO when CQE compress is active\n"); - err = -EINVAL; - goto out; - } new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO; new_params.packet_merge.shampo.match_criteria_type = MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED; @@ -3575,8 +3811,7 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable) goto out; } - err = mlx5e_safe_switch_params(priv, &new_params, - mlx5e_modify_tirs_packet_merge_ctx, NULL, reset); + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); out: mutex_unlock(&priv->state_lock); return err; @@ -3587,9 +3822,11 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) struct mlx5e_priv *priv = netdev_priv(netdev); if (enable) - mlx5e_enable_cvlan_filter(priv); + mlx5e_enable_cvlan_filter(priv->fs, + !!(priv->netdev->flags & IFF_PROMISC)); else - mlx5e_disable_cvlan_filter(priv); + mlx5e_disable_cvlan_filter(priv->fs, + !!(priv->netdev->flags & IFF_PROMISC)); return 0; } @@ -3597,21 +3834,26 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) static int set_feature_hw_tc(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); + int err = 0; #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) - if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) { + int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) : + MLX5_TC_FLAG(NIC_OFFLOAD); + if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; } #endif - if (!enable && priv->htb.maj_id) { + mutex_lock(&priv->state_lock); + if (!enable && mlx5e_selq_is_htb_enabled(&priv->selq)) { netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n"); - return -EINVAL; + err = -EINVAL; } + mutex_unlock(&priv->state_lock); - return 0; + return err; } static int set_feature_rx_all(struct net_device *netdev, bool enable) @@ -3693,20 +3935,45 @@ static int set_feature_rx_vlan(struct net_device *netdev, bool enable) mutex_lock(&priv->state_lock); + mlx5e_fs_set_vlan_strip_disable(priv->fs, !enable); priv->channels.params.vlan_strip_disable = !enable; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) goto unlock; err = mlx5e_modify_channels_vsd(&priv->channels, !enable); - if (err) + if (err) { + mlx5e_fs_set_vlan_strip_disable(priv->fs, enable); priv->channels.params.vlan_strip_disable = enable; - + } unlock: mutex_unlock(&priv->state_lock); return err; } +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_flow_steering *fs = priv->fs; + + if (mlx5e_is_uplink_rep(priv)) + return 0; /* no vlan table for uplink rep */ + + return mlx5e_fs_vlan_rx_add_vid(fs, dev, proto, vid); +} + +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_flow_steering *fs = priv->fs; + + if (mlx5e_is_uplink_rep(priv)) + return 0; /* no vlan table for uplink rep */ + + return mlx5e_fs_vlan_rx_kill_vid(fs, dev, proto, vid); +} + #ifdef CONFIG_MLX5_EN_ARFS static int set_feature_arfs(struct net_device *netdev, bool enable) { @@ -3714,9 +3981,9 @@ static int set_feature_arfs(struct net_device *netdev, bool enable) int err; if (enable) - err = mlx5e_arfs_enable(priv); + err = mlx5e_arfs_enable(priv->fs); else - err = mlx5e_arfs_disable(priv); + err = mlx5e_arfs_disable(priv->fs); return err; } @@ -3724,12 +3991,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable) static int mlx5e_handle_feature(struct net_device *netdev, netdev_features_t *features, - netdev_features_t wanted_features, netdev_features_t feature, mlx5e_feature_handler feature_handler) { - netdev_features_t changes = wanted_features ^ netdev->features; - bool enable = !!(wanted_features & feature); + netdev_features_t changes = *features ^ netdev->features; + bool enable = !!(*features & feature); int err; if (!(changes & feature)) @@ -3737,22 +4003,22 @@ static int mlx5e_handle_feature(struct net_device *netdev, err = feature_handler(netdev, enable); if (err) { + MLX5E_SET_FEATURE(features, feature, !enable); netdev_err(netdev, "%s feature %pNF failed, err %d\n", enable ? "Enable" : "Disable", &feature, err); return err; } - MLX5E_SET_FEATURE(features, feature, enable); return 0; } int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) { - netdev_features_t oper_features = netdev->features; + netdev_features_t oper_features = features; int err = 0; #define MLX5E_HANDLE_FEATURE(feature, handler) \ - mlx5e_handle_feature(netdev, &oper_features, features, feature, handler) + mlx5e_handle_feature(netdev, &oper_features, feature, handler) err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro); @@ -3790,6 +4056,10 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev if (netdev->features & NETIF_F_NTUPLE) netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n"); + features &= ~NETIF_F_GRO_HW; + if (netdev->features & NETIF_F_GRO_HW) + netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n"); + return features; } @@ -3797,12 +4067,14 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_features_t features) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_vlan_table *vlan; struct mlx5e_params *params; + vlan = mlx5e_fs_get_vlan(priv->fs); mutex_lock(&priv->state_lock); params = &priv->channels.params; - if (!priv->fs.vlan || - !bitmap_empty(mlx5e_vlan_get_active_svlans(priv->fs.vlan), VLAN_N_VID)) { + if (!vlan || + !bitmap_empty(mlx5e_vlan_get_active_svlans(vlan), VLAN_N_VID)) { /* HW strips the outer C-tag header, this is a problem * for S-tag traffic. */ @@ -3822,10 +4094,39 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, } } + if (params->xdp_prog) { + if (features & NETIF_F_LRO) { + netdev_warn(netdev, "LRO is incompatible with XDP\n"); + features &= ~NETIF_F_LRO; + } + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "HW GRO is incompatible with XDP\n"); + features &= ~NETIF_F_GRO_HW; + } + } + + if (priv->xsk.refcnt) { + if (features & NETIF_F_LRO) { + netdev_warn(netdev, "LRO is incompatible with AF_XDP (%u XSKs are active)\n", + priv->xsk.refcnt); + features &= ~NETIF_F_LRO; + } + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n", + priv->xsk.refcnt); + features &= ~NETIF_F_GRO_HW; + } + } + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { features &= ~NETIF_F_RXHASH; if (netdev->features & NETIF_F_RXHASH) netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); + + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n"); + features &= ~NETIF_F_GRO_HW; + } } if (mlx5e_is_uplink_rep(priv)) @@ -3862,7 +4163,7 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE. */ max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr); - max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk); + max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0)); max_mtu = min(max_mtu_frame, max_mtu_page); netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u. Try MTU <= %d\n", @@ -3874,6 +4175,33 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, return true; } +static bool mlx5e_params_validate_xdp(struct net_device *netdev, + struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + bool is_linear; + + /* No XSK params: AF_XDP can't be enabled yet at the point of setting + * the XDP program. + */ + is_linear = mlx5e_rx_is_linear_skb(mdev, params, NULL); + + if (!is_linear && params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { + netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n", + params->sw_mtu, + mlx5e_xdp_max_mtu(params, NULL)); + return false; + } + if (!is_linear && !params->xdp_prog->aux->xdp_has_frags) { + netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n", + params->sw_mtu, + mlx5e_xdp_max_mtu(params, NULL)); + return false; + } + + return true; +} + int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, mlx5e_fp_preactivate preactivate) { @@ -3893,10 +4221,8 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, if (err) goto out; - if (params->xdp_prog && - !mlx5e_rx_is_linear_skb(&new_params, NULL)) { - netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n", - new_mtu, mlx5e_xdp_max_mtu(params, NULL)); + if (new_params.xdp_prog && !mlx5e_params_validate_xdp(netdev, priv->mdev, + &new_params)) { err = -EINVAL; goto out; } @@ -3911,19 +4237,21 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO) reset = false; - if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) { bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL); bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_params, NULL); - u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL); - u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_params, NULL); + u8 sz_old = mlx5e_mpwqe_get_log_rq_size(priv->mdev, params, NULL); + u8 sz_new = mlx5e_mpwqe_get_log_rq_size(priv->mdev, &new_params, NULL); /* Always reset in linear mode - hw_mtu is used in data path. * Check that the mode was non-linear and didn't change. * If XSK is active, XSK RQs are linear. + * Reset if the RQ size changed, even if it's non-linear. */ if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt && - ppw_old == ppw_new) + sz_old == sz_new) reset = false; } @@ -4038,7 +4366,7 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) goto err_unlock; } - if (!priv->profile->rx_ptp_support) + if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) err = mlx5e_hwstamp_config_no_ptp_rx(priv, config.rx_filter != HWTSTAMP_FILTER_NONE); else @@ -4370,24 +4698,11 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) return -EINVAL; } - if (mlx5_fpga_is_ipsec_device(priv->mdev)) { - netdev_warn(netdev, - "XDP is not available on Innova cards with IPsec support\n"); - return -EINVAL; - } - new_params = priv->channels.params; new_params.xdp_prog = prog; - /* No XSK params: AF_XDP can't be enabled yet at the point of setting - * the XDP program. - */ - if (!mlx5e_rx_is_linear_skb(&new_params, NULL)) { - netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n", - new_params.sw_mtu, - mlx5e_xdp_max_mtu(&new_params, NULL)); + if (!mlx5e_params_validate_xdp(netdev, priv->mdev, &new_params)) return -EINVAL; - } return 0; } @@ -4424,8 +4739,20 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) new_params = priv->channels.params; new_params.xdp_prog = prog; - if (reset) - mlx5e_set_rq_type(priv->mdev, &new_params); + + /* XDP affects striding RQ parameters. Block XDP if striding RQ won't be + * supported with the new parameters: if PAGE_SIZE is bigger than + * MLX5_MPWQE_LOG_STRIDE_SZ_MAX, striding RQ can't be used, even though + * the MTU is small enough for the linear mode, because XDP uses strides + * of PAGE_SIZE on regular RQs. + */ + if (reset && MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { + /* Checking for regular RQs here; XSK RQs were checked on XSK bind. */ + err = mlx5e_mpwrq_validate_regular(priv->mdev, &new_params); + if (err) + goto unlock; + } + old_prog = priv->channels.params.xdp_prog; err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); @@ -4454,6 +4781,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) unlock: mutex_unlock(&priv->state_lock); + + /* Need to fix some features. */ + if (!err) + netdev_update_features(netdev); + return err; } @@ -4592,11 +4924,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 priv->max_nch); mlx5e_params_mqprio_reset(params); - /* Set an initial non-zero value, so that mlx5e_select_queue won't - * divide by zero if called before first activating channels. - */ - priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc; - /* SQ */ params->log_sq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : @@ -4618,14 +4945,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 /* RQ */ mlx5e_build_rq_params(mdev, params); - /* HW LRO */ - if (MLX5_CAP_ETH(mdev, lro_cap) && - params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { - /* No XSK params: checking the availability of striding RQ in general. */ - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) - params->packet_merge.type = slow_pci_heuristic(mdev) ? - MLX5E_PACKET_MERGE_NONE : MLX5E_PACKET_MERGE_LRO; - } params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ @@ -4735,6 +5054,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->vlan_features |= NETIF_F_TSO6; netdev->vlan_features |= NETIF_F_RXCSUM; netdev->vlan_features |= NETIF_F_RXHASH; + netdev->vlan_features |= NETIF_F_GSO_PARTIAL; netdev->mpls_features |= NETIF_F_SG; netdev->mpls_features |= NETIF_F_HW_CSUM; @@ -4752,7 +5072,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (!!MLX5_CAP_ETH(mdev, lro_cap) && !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) && !MLX5_CAP_ETH(mdev, tunnel_lro_gre) && - mlx5e_check_fragmented_striding_rq_cap(mdev)) + mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, + MLX5E_MPWRQ_UMR_MODE_ALIGNED)) netdev->vlan_features |= NETIF_F_LRO; netdev->hw_features = netdev->vlan_features; @@ -4761,10 +5082,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; - if (!!MLX5_CAP_GEN(mdev, shampo) && - mlx5e_check_fragmented_striding_rq_cap(mdev)) - netdev->hw_features |= NETIF_F_GRO_HW; - if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; @@ -4773,15 +5090,22 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) } if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; } if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) { - netdev->hw_features |= NETIF_F_GSO_GRE; - netdev->hw_enc_features |= NETIF_F_GSO_GRE; - netdev->gso_partial_features |= NETIF_F_GSO_GRE; + netdev->hw_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->gso_partial_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; } if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) { @@ -4793,7 +5117,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) NETIF_F_GSO_IPXIP6; } - netdev->hw_features |= NETIF_F_GSO_PARTIAL; netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4; netdev->hw_features |= NETIF_F_GSO_UDP_L4; netdev->features |= NETIF_F_GSO_UDP_L4; @@ -4836,9 +5159,11 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->priv_flags |= IFF_UNICAST_FLT; + netif_set_tso_max_size(netdev, GSO_MAX_SIZE); mlx5e_set_netdev_dev_addr(netdev); + mlx5e_macsec_build_netdev(priv); mlx5e_ipsec_build_netdev(priv); - mlx5e_tls_build_netdev(priv); + mlx5e_ktls_build_netdev(priv); } void mlx5e_create_q_counters(struct mlx5e_priv *priv) @@ -4883,6 +5208,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_flow_steering *fs; int err; mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu); @@ -4890,17 +5216,20 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, mlx5e_timestamp_init(priv); - err = mlx5e_fs_init(priv); - if (err) { + fs = mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + if (!fs) { + err = -ENOMEM; mlx5_core_err(mdev, "FS initialization failed, %d\n", err); return err; } + priv->fs = fs; err = mlx5e_ipsec_init(priv); if (err) mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err); - err = mlx5e_tls_init(priv); + err = mlx5e_ktls_init(priv); if (err) mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); @@ -4911,9 +5240,9 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_health_destroy_reporters(priv); - mlx5e_tls_cleanup(priv); + mlx5e_ktls_cleanup(priv); mlx5e_ipsec_cleanup(priv); - mlx5e_fs_cleanup(priv); + mlx5e_fs_cleanup(priv->fs); } static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) @@ -4934,7 +5263,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) goto err_destroy_q_counters; } - features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP; + features = MLX5E_RX_RES_FEATURE_PTP; if (priv->channels.params.tunneled_offload_en) features |= MLX5E_RX_RES_FEATURE_INNER_FT; err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features, @@ -4944,7 +5273,8 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_flow_steering(priv); + err = mlx5e_create_flow_steering(priv->fs, priv->rx_res, priv->profile, + priv->netdev); if (err) { mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); goto err_destroy_rx_res; @@ -4967,7 +5297,8 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) err_tc_nic_cleanup: mlx5e_tc_nic_cleanup(priv); err_destroy_flow_steering: - mlx5e_destroy_flow_steering(priv); + mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE), + priv->profile); err_destroy_rx_res: mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: @@ -4983,7 +5314,8 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { mlx5e_accel_cleanup_rx(priv); mlx5e_tc_nic_cleanup(priv); - mlx5e_destroy_flow_steering(priv); + mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE), + priv->profile); mlx5e_rx_res_destroy(priv->rx_res); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@ -4991,6 +5323,23 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) priv->rx_res = NULL; } +static void mlx5e_set_mqprio_rl(struct mlx5e_priv *priv) +{ + struct mlx5e_params *params; + struct mlx5e_mqprio_rl *rl; + + params = &priv->channels.params; + if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL) + return; + + rl = mlx5e_mqprio_rl_create(priv->mdev, params->mqprio.num_tc, + params->mqprio.channel.max_rate); + if (IS_ERR(rl)) + rl = NULL; + priv->mqprio_rl = rl; + mlx5e_mqprio_rl_update_params(params, rl); +} + static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) { int err; @@ -5001,16 +5350,30 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) return err; } + err = mlx5e_accel_init_tx(priv); + if (err) + goto err_destroy_tises; + + mlx5e_set_mqprio_rl(priv); mlx5e_dcbnl_initialize(priv); return 0; + +err_destroy_tises: + mlx5e_destroy_tises(priv); + return err; } static void mlx5e_nic_enable(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; + int err; + + mlx5e_fs_init_l2_addr(priv->fs, netdev); - mlx5e_init_l2_addr(priv); + err = mlx5e_macsec_init(priv); + if (err) + mlx5_core_err(mdev, "MACsec initialization failed, %d\n", err); /* Marking the link as currently not needed by the Driver */ if (!netif_running(netdev)) @@ -5069,6 +5432,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5e_disable_async_events(priv); mlx5_lag_remove_netdev(mdev, priv->netdev); mlx5_vxlan_reset_to_default(mdev->vxlan); + mlx5e_macsec_cleanup(priv); } int mlx5e_update_nic_rx(struct mlx5e_priv *priv) @@ -5090,12 +5454,27 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .update_carrier = mlx5e_update_carrier, .rx_handlers = &mlx5e_rx_handlers_nic, .max_tc = MLX5E_MAX_NUM_TC, - .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), .stats_grps = mlx5e_nic_stats_grps, .stats_grps_num = mlx5e_nic_stats_grps_num, - .rx_ptp_support = true, + .features = BIT(MLX5E_PROFILE_FEATURE_PTP_RX) | + BIT(MLX5E_PROFILE_FEATURE_PTP_TX) | + BIT(MLX5E_PROFILE_FEATURE_QOS_HTB) | + BIT(MLX5E_PROFILE_FEATURE_FS_VLAN) | + BIT(MLX5E_PROFILE_FEATURE_FS_TC), }; +static int mlx5e_profile_max_num_channels(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + int nch; + + nch = mlx5e_get_max_num_channels(mdev); + + if (profile->max_nch_limit) + nch = min_t(int, nch, profile->max_nch_limit(mdev)); + return nch; +} + static unsigned int mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev, const struct mlx5e_profile *profile) @@ -5104,11 +5483,10 @@ mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev, unsigned int max_nch, tmp; /* core resources */ - max_nch = mlx5e_get_max_num_channels(mdev); + max_nch = mlx5e_profile_max_num_channels(mdev, profile); /* netdev rx queues */ - tmp = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1); - max_nch = min_t(unsigned int, max_nch, tmp); + max_nch = min_t(unsigned int, max_nch, netdev->num_rx_queues); /* netdev tx queues */ tmp = netdev->num_tx_queues; @@ -5122,25 +5500,52 @@ mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev, return max_nch; } +int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev) +{ + /* Indirect TIRS: 2 sets of TTCs (inner + outer steering) + * and 1 set of direct TIRS + */ + return 2 * MLX5E_NUM_INDIR_TIRS + + mlx5e_profile_max_num_channels(mdev, &mlx5e_nic_profile); +} + +void mlx5e_set_rx_mode_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + set_rx_mode_work); + + return mlx5e_fs_set_rx_mode_work(priv->fs, priv->netdev); +} + /* mlx5e generic netdev management API (move to en_common.c) */ int mlx5e_priv_init(struct mlx5e_priv *priv, const struct mlx5e_profile *profile, struct net_device *netdev, struct mlx5_core_dev *mdev) { + int nch, num_txqs, node; + int err; + + num_txqs = netdev->num_tx_queues; + nch = mlx5e_calc_max_nch(mdev, netdev, profile); + node = dev_to_node(mlx5_core_dma_dev(mdev)); + /* priv init */ priv->mdev = mdev; priv->netdev = netdev; priv->msglevel = MLX5E_MSG_LEVEL; - priv->max_nch = mlx5e_calc_max_nch(mdev, netdev, profile); - priv->stats_nch = priv->max_nch; + priv->max_nch = nch; priv->max_opened_tc = 1; if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL)) return -ENOMEM; mutex_init(&priv->state_lock); - hash_init(priv->htb.qos_tc2node); + + err = mlx5e_selq_init(&priv->selq, &priv->state_lock); + if (err) + goto err_free_cpumask; + INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); @@ -5148,13 +5553,33 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, priv->wq = create_singlethread_workqueue("mlx5e"); if (!priv->wq) - goto err_free_cpumask; + goto err_free_selq; + + priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node); + if (!priv->txq2sq) + goto err_destroy_workqueue; + + priv->tx_rates = kcalloc_node(num_txqs, sizeof(*priv->tx_rates), GFP_KERNEL, node); + if (!priv->tx_rates) + goto err_free_txq2sq; + + priv->channel_stats = + kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node); + if (!priv->channel_stats) + goto err_free_tx_rates; return 0; +err_free_tx_rates: + kfree(priv->tx_rates); +err_free_txq2sq: + kfree(priv->txq2sq); +err_destroy_workqueue: + destroy_workqueue(priv->wq); +err_free_selq: + mlx5e_selq_cleanup(&priv->selq); err_free_cpumask: free_cpumask_var(priv->scratchpad.cpumask); - return -ENOMEM; } @@ -5166,28 +5591,58 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) if (!priv->mdev) return; + for (i = 0; i < priv->stats_nch; i++) + kvfree(priv->channel_stats[i]); + kfree(priv->channel_stats); + kfree(priv->tx_rates); + kfree(priv->txq2sq); destroy_workqueue(priv->wq); + mutex_lock(&priv->state_lock); + mlx5e_selq_cleanup(&priv->selq); + mutex_unlock(&priv->state_lock); free_cpumask_var(priv->scratchpad.cpumask); - for (i = 0; i < priv->htb.max_qos_sqs; i++) - kfree(priv->htb.qos_sq_stats[i]); - kvfree(priv->htb.qos_sq_stats); - - if (priv->mqprio_rl) { - mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); - mlx5e_mqprio_rl_free(priv->mqprio_rl); - } + for (i = 0; i < priv->htb_max_qos_sqs; i++) + kfree(priv->htb_qos_sq_stats[i]); + kvfree(priv->htb_qos_sq_stats); memset(priv, 0, sizeof(*priv)); } +static unsigned int mlx5e_get_max_num_txqs(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + unsigned int nch, ptp_txqs, qos_txqs; + + nch = mlx5e_profile_max_num_channels(mdev, profile); + + ptp_txqs = MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn) && + mlx5e_profile_feature_cap(profile, PTP_TX) ? + profile->max_tc : 0; + + qos_txqs = mlx5_qos_is_supported(mdev) && + mlx5e_profile_feature_cap(profile, QOS_HTB) ? + mlx5e_qos_max_leaf_nodes(mdev) : 0; + + return nch * profile->max_tc + ptp_txqs + qos_txqs; +} + +static unsigned int mlx5e_get_max_num_rxqs(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + return mlx5e_profile_max_num_channels(mdev, profile); +} + struct net_device * -mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, - unsigned int txqs, unsigned int rxqs) +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile) { struct net_device *netdev; + unsigned int txqs, rxqs; int err; + txqs = mlx5e_get_max_num_txqs(mdev, profile); + rxqs = mlx5e_get_max_num_rxqs(mdev, profile); + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), txqs, rxqs); if (!netdev) { mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); @@ -5201,6 +5656,7 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *prof } netif_carrier_off(netdev); + netif_tx_disable(netdev); dev_net_set(netdev, mlx5_core_net(mdev)); return netdev; @@ -5234,6 +5690,16 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) int err; clear_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (priv->fs) + mlx5e_fs_set_state_destroy(priv->fs, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + + /* Validate the max_wqe_size_sq capability. */ + if (WARN_ON_ONCE(mlx5e_get_max_sq_wqebbs(priv->mdev) < MLX5E_MAX_TX_WQEBBS)) { + mlx5_core_warn(priv->mdev, "MLX5E: Max SQ WQEBBs firmware capability: %u, needed %lu\n", + mlx5e_get_max_sq_wqebbs(priv->mdev), MLX5E_MAX_TX_WQEBBS); + return -EIO; + } /* max number of channels may have changed */ max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile); @@ -5293,6 +5759,9 @@ err_cleanup_tx: out: mlx5e_reset_channels(priv->netdev); set_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (priv->fs) + mlx5e_fs_set_state_destroy(priv->fs, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); cancel_work_sync(&priv->update_stats_work); return err; } @@ -5302,6 +5771,9 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv) const struct mlx5e_profile *profile = priv->profile; set_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (priv->fs) + mlx5e_fs_set_state_destroy(priv->fs, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); if (profile->disable) profile->disable(priv); @@ -5389,7 +5861,7 @@ void mlx5e_destroy_netdev(struct mlx5e_priv *priv) static int mlx5e_resume(struct auxiliary_device *adev) { struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); - struct mlx5e_priv *priv = dev_get_drvdata(&adev->dev); + struct mlx5e_priv *priv = auxiliary_get_drvdata(adev); struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = edev->mdev; int err; @@ -5412,7 +5884,7 @@ static int mlx5e_resume(struct auxiliary_device *adev) static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) { - struct mlx5e_priv *priv = dev_get_drvdata(&adev->dev); + struct mlx5e_priv *priv = auxiliary_get_drvdata(adev); struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; @@ -5432,22 +5904,10 @@ static int mlx5e_probe(struct auxiliary_device *adev, struct mlx5_core_dev *mdev = edev->mdev; struct net_device *netdev; pm_message_t state = {}; - unsigned int txqs, rxqs, ptp_txqs = 0; struct mlx5e_priv *priv; - int qos_sqs = 0; int err; - int nch; - if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) - ptp_txqs = profile->max_tc; - - if (mlx5_qos_is_supported(mdev)) - qos_sqs = mlx5e_qos_max_leaf_nodes(mdev); - - nch = mlx5e_get_max_num_channels(mdev); - txqs = nch * profile->max_tc + ptp_txqs + qos_sqs; - rxqs = nch * profile->rq_groups; - netdev = mlx5e_create_netdev(mdev, profile, txqs, rxqs); + netdev = mlx5e_create_netdev(mdev, profile); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); return -ENOMEM; @@ -5456,7 +5916,7 @@ static int mlx5e_probe(struct auxiliary_device *adev, mlx5e_build_nic_netdev(netdev); priv = netdev_priv(netdev); - dev_set_drvdata(&adev->dev, priv); + auxiliary_set_drvdata(adev, priv); priv->profile = profile; priv->ppriv = NULL; @@ -5504,7 +5964,7 @@ err_destroy_netdev: static void mlx5e_remove(struct auxiliary_device *adev) { - struct mlx5e_priv *priv = dev_get_drvdata(&adev->dev); + struct mlx5e_priv *priv = auxiliary_get_drvdata(adev); pm_message_t state = {}; mlx5e_dcbnl_delete_app(priv); @@ -5535,7 +5995,6 @@ int mlx5e_init(void) { int ret; - mlx5e_ipsec_build_inverse_table(); mlx5e_build_ptys2ethtool_map(); ret = auxiliary_driver_register(&mlx5e_driver); if (ret) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 48895d79796a..794cd8dfe9c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -50,10 +50,13 @@ #include "fs_core.h" #include "lib/mlx5.h" #include "lib/devcom.h" +#include "lib/vxlan.h" #define CREATE_TRACE_POINTS #include "diag/en_rep_tracepoint.h" #include "en_accel/ipsec.h" #include "en/tc/int_port.h" +#include "en/ptp.h" +#include "en/fs_ethtool.h" #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) @@ -67,7 +70,7 @@ static void mlx5e_rep_get_drvinfo(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; - strlcpy(drvinfo->driver, mlx5e_rep_driver_name, + strscpy(drvinfo->driver, mlx5e_rep_driver_name, sizeof(drvinfo->driver)); snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d (%.16s)", @@ -219,16 +222,22 @@ static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset) } } -static void mlx5e_rep_get_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) +static void +mlx5e_rep_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = netdev_priv(dev); - mlx5e_ethtool_get_ringparam(priv, param); + mlx5e_ethtool_get_ringparam(priv, param, kernel_param); } -static int mlx5e_rep_set_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) +static int +mlx5e_rep_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -258,7 +267,7 @@ static int mlx5e_rep_get_coalesce(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); - return mlx5e_ethtool_get_coalesce(priv, coal); + return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal); } static int mlx5e_rep_set_coalesce(struct net_device *netdev, @@ -268,7 +277,7 @@ static int mlx5e_rep_set_coalesce(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); - return mlx5e_ethtool_set_coalesce(priv, coal); + return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack); } static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev) @@ -389,18 +398,30 @@ out_err: return err; } -int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) +static int +mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { + int sqs_per_channel = mlx5e_get_dcb_num_tc(&priv->channels.params); struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + bool is_uplink_rep = mlx5e_is_uplink_rep(priv); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; + int n, tc, nch, num_sqs = 0; struct mlx5e_channel *c; - int n, tc, num_sqs = 0; int err = -ENOMEM; + bool ptp_sq; u32 *sqs; - sqs = kcalloc(priv->channels.num * mlx5e_get_dcb_num_tc(&priv->channels.params), - sizeof(*sqs), GFP_KERNEL); + ptp_sq = !!(priv->channels.ptp && + MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)); + nch = priv->channels.num + ptp_sq; + /* +2 for xdpsqs, they don't exist on the ptp channel but will not be + * counted for by num_sqs. + */ + if (is_uplink_rep) + sqs_per_channel += 2; + + sqs = kvcalloc(nch * sqs_per_channel, sizeof(*sqs), GFP_KERNEL); if (!sqs) goto out; @@ -408,10 +429,23 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) c = priv->channels.c[n]; for (tc = 0; tc < c->num_tc; tc++) sqs[num_sqs++] = c->sq[tc].sqn; + + if (is_uplink_rep) { + if (c->xdp) + sqs[num_sqs++] = c->rq_xdpsq.sqn; + + sqs[num_sqs++] = c->xdpsq.sqn; + } + } + if (ptp_sq) { + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + + for (tc = 0; tc < ptp_ch->num_tc; tc++) + sqs[num_sqs++] = ptp_ch->ptpsq[tc].txqsq.sqn; } err = mlx5e_sqs2vport_start(esw, rep, sqs, num_sqs); - kfree(sqs); + kvfree(sqs); out: if (err) @@ -419,7 +453,8 @@ out: return err; } -void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) +static void +mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -428,6 +463,49 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) mlx5e_sqs2vport_stop(esw, rep); } +static int +mlx5e_rep_add_meta_tunnel_rule(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_group *g; + + g = esw->fdb_table.offloads.send_to_vport_meta_grp; + if (!g) + return 0; + + flow_rule = mlx5_eswitch_add_send_to_vport_meta_rule(esw, rep->vport); + if (IS_ERR(flow_rule)) + return PTR_ERR(flow_rule); + + rpriv->send_to_vport_meta_rule = flow_rule; + + return 0; +} + +static void +mlx5e_rep_del_meta_tunnel_rule(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + if (rpriv->send_to_vport_meta_rule) + mlx5_eswitch_del_send_to_vport_meta_rule(rpriv->send_to_vport_meta_rule); +} + +void mlx5e_rep_activate_channels(struct mlx5e_priv *priv) +{ + mlx5e_add_sqs_fwd_rules(priv); + mlx5e_rep_add_meta_tunnel_rule(priv); +} + +void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv) +{ + mlx5e_rep_del_meta_tunnel_rule(priv); + mlx5e_remove_sqs_fwd_rules(priv); +} + static int mlx5e_rep_open(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -585,6 +663,18 @@ bool mlx5e_eswitch_vf_rep(const struct net_device *netdev) return netdev->netdev_ops == &mlx5e_netdev_ops_rep; } +/* One indirect TIR set for outer. Inner not supported in reps. */ +#define REP_NUM_INDIR_TIRS MLX5E_NUM_INDIR_TIRS + +static int mlx5e_rep_max_nch_limit(struct mlx5_core_dev *mdev) +{ + int max_tir_num = 1 << MLX5_CAP_GEN(mdev, log_max_tir); + int num_vports = mlx5_eswitch_get_total_vports(mdev); + + return (max_tir_num - mlx5e_get_pf_num_tirs(mdev) + - (num_vports * REP_NUM_INDIR_TIRS)) / num_vports; +} + static void mlx5e_build_rep_params(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -618,11 +708,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev) params->mqprio.num_tc = 1; params->tunneled_offload_en = false; - - /* Set an initial non-zero value, so that mlx5e_select_queue won't - * divide by zero if called before first activating channels. - */ - priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc; + if (rep->vport != MLX5_VPORT_UPLINK) + params->vlan_strip_disable = true; mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); } @@ -657,6 +744,13 @@ static int mlx5e_init_rep(struct mlx5_core_dev *mdev, { struct mlx5e_priv *priv = netdev_priv(netdev); + priv->fs = mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + if (!priv->fs) { + netdev_err(priv->netdev, "FS allocation failed\n"); + return -ENOMEM; + } + mlx5e_build_rep_params(netdev); mlx5e_timestamp_init(priv); @@ -669,16 +763,26 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv = netdev_priv(netdev); int err; + priv->fs = mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + if (!priv->fs) { + netdev_err(priv->netdev, "FS allocation failed\n"); + return -ENOMEM; + } + err = mlx5e_ipsec_init(priv); if (err) mlx5_core_err(mdev, "Uplink rep IPsec initialization failed, %d\n", err); mlx5e_vxlan_set_netdev_info(priv); - return mlx5e_init_rep(mdev, netdev); + mlx5e_build_rep_params(netdev); + mlx5e_timestamp_init(priv); + return 0; } static void mlx5e_cleanup_rep(struct mlx5e_priv *priv) { + mlx5e_fs_cleanup(priv->fs); mlx5e_ipsec_cleanup(priv); } @@ -689,19 +793,20 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) struct ttc_params ttc_params = {}; int err; - priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, - MLX5_FLOW_NAMESPACE_KERNEL); + mlx5e_fs_set_ns(priv->fs, + mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL), false); /* The inner_ttc in the ttc params is intentionally not set */ - mlx5e_set_ttc_params(priv, &ttc_params, false); + mlx5e_set_ttc_params(priv->fs, priv->rx_res, &ttc_params, false); if (rep->vport != MLX5_VPORT_UPLINK) /* To give uplik rep TTC a lower level for chaining from root ft */ ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1; - priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); - if (IS_ERR(priv->fs.ttc)) { - err = PTR_ERR(priv->fs.ttc); + mlx5e_fs_set_ttc(priv->fs, mlx5_create_ttc_table(priv->mdev, &ttc_params), false); + if (IS_ERR(mlx5e_fs_get_ttc(priv->fs, false))) { + err = PTR_ERR(mlx5e_fs_get_ttc(priv->fs, false)); netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", err); return err; @@ -721,7 +826,7 @@ static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv) /* non uplik reps will skip any bypass tables and go directly to * their own ttc */ - rpriv->root_ft = mlx5_get_ttc_flow_table(priv->fs.ttc); + rpriv->root_ft = mlx5_get_ttc_flow_table(mlx5e_fs_get_ttc(priv->fs, false)); return 0; } @@ -797,10 +902,12 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) int err; priv->rx_res = mlx5e_rx_res_alloc(); - if (!priv->rx_res) - return -ENOMEM; + if (!priv->rx_res) { + err = -ENOMEM; + goto err_free_fs; + } - mlx5e_init_l2_addr(priv); + mlx5e_fs_init_l2_addr(priv->fs, priv->netdev); err = mlx5e_open_drop_rq(priv, &priv->drop_rq); if (err) { @@ -827,29 +934,31 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_root_ft; - mlx5e_ethtool_init_steering(priv); + mlx5e_ethtool_init_steering(priv->fs); return 0; err_destroy_root_ft: mlx5e_destroy_rep_root_ft(priv); err_destroy_ttc_table: - mlx5_destroy_ttc_table(priv->fs.ttc); + mlx5_destroy_ttc_table(mlx5e_fs_get_ttc(priv->fs, false)); err_destroy_rx_res: mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; +err_free_fs: + mlx5e_fs_cleanup(priv->fs); return err; } static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { - mlx5e_ethtool_cleanup_steering(priv); + mlx5e_ethtool_cleanup_steering(priv->fs); rep_vport_rx_rule_destroy(priv); mlx5e_destroy_rep_root_ft(priv); - mlx5_destroy_ttc_table(priv->fs.ttc); + mlx5_destroy_ttc_table(mlx5e_fs_get_ttc(priv->fs, false)); mlx5e_rx_res_destroy(priv->rx_res); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_rx_res_free(priv->rx_res); @@ -911,6 +1020,13 @@ err_event_reg: return err; } +static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) +{ + mlx5e_rep_tc_netdevice_event_unregister(rpriv); + mlx5e_rep_bond_cleanup(rpriv); + mlx5e_rep_tc_cleanup(rpriv); +} + static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -925,31 +1041,33 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { err = mlx5e_init_uplink_rep_tx(rpriv); if (err) - goto destroy_tises; + goto err_init_tx; } + err = mlx5e_tc_ht_init(&rpriv->tc_ht); + if (err) + goto err_ht_init; + return 0; -destroy_tises: +err_ht_init: + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) + mlx5e_cleanup_uplink_rep_tx(rpriv); +err_init_tx: mlx5e_destroy_tises(priv); return err; } -static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) -{ - mlx5e_rep_tc_netdevice_event_unregister(rpriv); - mlx5e_rep_bond_cleanup(rpriv); - mlx5e_rep_tc_cleanup(rpriv); -} - static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; - mlx5e_destroy_tises(priv); + mlx5e_tc_ht_cleanup(&rpriv->tc_ht); if (rpriv->rep->vport == MLX5_VPORT_UPLINK) mlx5e_cleanup_uplink_rep_tx(rpriv); + + mlx5e_destroy_tises(priv); } static void mlx5e_rep_enable(struct mlx5e_priv *priv) @@ -1027,6 +1145,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) rtnl_lock(); if (netif_running(netdev)) mlx5e_open(netdev); + udp_tunnel_nic_reset_ntf(priv->netdev); netif_device_attach(netdev); rtnl_unlock(); } @@ -1048,6 +1167,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) mlx5_notifier_unregister(mdev, &priv->events_nb); mlx5e_rep_tc_disable(priv); mlx5_lag_remove_netdev(mdev, priv->netdev); + mlx5_vxlan_reset_to_default(mdev->vxlan); } static MLX5E_DEFINE_STATS_GRP(sw_rep, 0); @@ -1082,8 +1202,8 @@ static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = { &MLX5E_STATS_GRP(per_port_buff_congest), #ifdef CONFIG_MLX5_EN_IPSEC &MLX5E_STATS_GRP(ipsec_sw), - &MLX5E_STATS_GRP(ipsec_hw), #endif + &MLX5E_STATS_GRP(ptp), }; static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv) @@ -1104,10 +1224,9 @@ static const struct mlx5e_profile mlx5e_rep_profile = { .update_stats = mlx5e_stats_update_ndo_stats, .rx_handlers = &mlx5e_rx_handlers_rep, .max_tc = 1, - .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_rep_stats_grps, .stats_grps_num = mlx5e_rep_stats_grps_num, - .rx_ptp_support = false, + .max_nch_limit = mlx5e_rep_max_nch_limit, }; static const struct mlx5e_profile mlx5e_uplink_rep_profile = { @@ -1124,11 +1243,8 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .update_carrier = mlx5e_update_carrier, .rx_handlers = &mlx5e_rx_handlers_rep, .max_tc = MLX5E_MAX_NUM_TC, - /* XSK is needed so we can replace profile with NIC netdev */ - .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), .stats_grps = mlx5e_ul_rep_stats_grps, .stats_grps_num = mlx5e_ul_rep_stats_grps_num, - .rx_ptp_support = false, }; /* e-Switch vport representors */ @@ -1179,14 +1295,10 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) struct devlink_port *dl_port; struct net_device *netdev; struct mlx5e_priv *priv; - unsigned int txqs, rxqs; - int nch, err; + int err; profile = &mlx5e_rep_profile; - nch = mlx5e_get_max_num_channels(dev); - txqs = nch * profile->max_tc; - rxqs = nch * profile->rq_groups; - netdev = mlx5e_create_netdev(dev, profile, txqs, rxqs); + netdev = mlx5e_create_netdev(dev, profile); if (!netdev) { mlx5_core_warn(dev, "Failed to create representor netdev for vport %d\n", @@ -1244,7 +1356,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) struct mlx5e_rep_priv *rpriv; int err; - rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + rpriv = kvzalloc(sizeof(*rpriv), GFP_KERNEL); if (!rpriv) return -ENOMEM; @@ -1259,7 +1371,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) err = mlx5e_vport_vf_rep_load(dev, rep); if (err) - kfree(rpriv); + kvfree(rpriv); return err; } @@ -1287,7 +1399,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) priv->profile->cleanup(priv); mlx5e_destroy_netdev(priv); free_ppriv: - kfree(ppriv); /* mlx5e_rep_priv */ + kvfree(ppriv); /* mlx5e_rep_priv */ } static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index b01dacb6f527..b4e691760da9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -62,13 +62,9 @@ struct mlx5_tc_int_port_priv; struct mlx5e_rep_bond; struct mlx5e_tc_tun_encap; struct mlx5e_post_act; +struct mlx5e_flow_meters; struct mlx5_rep_uplink_priv { - /* Filters DB - instantiated by the uplink representor and shared by - * the uplink's VFs - */ - struct rhashtable tc_ht; - /* indirect block callbacks are invoked on bind/unbind events * on registered higher level devices (e.g. tunnel devices) * @@ -102,6 +98,8 @@ struct mlx5_rep_uplink_priv { /* OVS internal port support */ struct mlx5e_tc_int_port_priv *int_port_priv; + + struct mlx5e_flow_meters *flow_meters; }; struct mlx5e_rep_priv { @@ -113,6 +111,8 @@ struct mlx5e_rep_priv { struct list_head vport_sqs_list; struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ struct rtnl_link_stats64 prev_vf_vport_stats; + struct mlx5_flow_handle *send_to_vport_meta_rule; + struct rhashtable tc_ht; }; static inline @@ -183,6 +183,13 @@ struct mlx5e_decap_entry { struct rcu_head rcu; }; +struct mlx5e_mpls_info { + u32 label; + u8 tc; + u8 bos; + u8 ttl; +}; + struct mlx5e_encap_entry { /* attached neigh hash entry */ struct mlx5e_neigh_hash_entry *nhe; @@ -196,6 +203,7 @@ struct mlx5e_encap_entry { struct list_head route_list; struct mlx5_pkt_reformat *pkt_reformat; const struct ip_tunnel_info *tun_info; + struct mlx5e_mpls_info mpls_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; @@ -234,8 +242,8 @@ int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, void *sp); bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); -int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); -void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); +void mlx5e_rep_activate_channels(struct mlx5e_priv *priv); +void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv); void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); @@ -249,8 +257,8 @@ static inline bool mlx5e_eswitch_rep(const struct net_device *netdev) #else /* CONFIG_MLX5_ESWITCH */ static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } -static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; } -static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {} +static inline void mlx5e_rep_activate_channels(struct mlx5e_priv *priv) {} +static inline void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv) {} static inline int mlx5e_rep_init(void) { return 0; }; static inline void mlx5e_rep_cleanup(void) {}; static inline bool mlx5e_rep_has_offload_stats(const struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 793511d5ee4c..a61a43fc8d5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -34,11 +34,14 @@ #include <linux/ipv6.h> #include <linux/tcp.h> #include <linux/bitmap.h> +#include <linux/filter.h> #include <net/ip6_checksum.h> #include <net/page_pool.h> #include <net/inet_ecn.h> +#include <net/gro.h> #include <net/udp.h> #include <net/tcp.h> +#include <net/xdp_sock_drv.h> #include "en.h" #include "en/txrx.h" #include "en_tc.h" @@ -46,10 +49,10 @@ #include "en_rep.h" #include "en/rep/tc.h" #include "ipoib/ipoib.h" -#include "accel/ipsec.h" -#include "fpga/ipsec.h" +#include "en_accel/ipsec.h" +#include "en_accel/macsec.h" #include "en_accel/ipsec_rxtx.h" -#include "en_accel/tls_rxtx.h" +#include "en_accel/ktls_txrx.h" #include "en/xdp.h" #include "en/xsk/rx.h" #include "en/health.h" @@ -220,8 +223,7 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1; } -static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) +static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page) { struct mlx5e_page_cache *cache = &rq->page_cache; u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); @@ -232,120 +234,101 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, return false; } - if (!dev_page_is_reusable(dma_info->page)) { + if (!dev_page_is_reusable(page)) { stats->cache_waive++; return false; } - cache->page_cache[cache->tail] = *dma_info; + cache->page_cache[cache->tail] = page; cache->tail = tail_next; return true; } -static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au) { struct mlx5e_page_cache *cache = &rq->page_cache; struct mlx5e_rq_stats *stats = rq->stats; + dma_addr_t addr; if (unlikely(cache->head == cache->tail)) { stats->cache_empty++; return false; } - if (page_ref_count(cache->page_cache[cache->head].page) != 1) { + if (page_ref_count(cache->page_cache[cache->head]) != 1) { stats->cache_busy++; return false; } - *dma_info = cache->page_cache[cache->head]; + au->page = cache->page_cache[cache->head]; cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); stats->cache_reuse++; - dma_sync_single_for_device(rq->pdev, dma_info->addr, - PAGE_SIZE, - DMA_FROM_DEVICE); + addr = page_pool_get_dma_addr(au->page); + /* Non-XSK always uses PAGE_SIZE. */ + dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir); return true; } -static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) +static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au) { - if (mlx5e_rx_cache_get(rq, dma_info)) + dma_addr_t addr; + + if (mlx5e_rx_cache_get(rq, au)) return 0; - dma_info->page = page_pool_dev_alloc_pages(rq->page_pool); - if (unlikely(!dma_info->page)) + au->page = page_pool_dev_alloc_pages(rq->page_pool); + if (unlikely(!au->page)) return -ENOMEM; - dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0, - PAGE_SIZE, rq->buff.map_dir); - if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { - page_pool_recycle_direct(rq->page_pool, dma_info->page); - dma_info->page = NULL; + /* Non-XSK always uses PAGE_SIZE. */ + addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir); + if (unlikely(dma_mapping_error(rq->pdev, addr))) { + page_pool_recycle_direct(rq->page_pool, au->page); + au->page = NULL; return -ENOMEM; } + page_pool_set_dma_addr(au->page, addr); return 0; } -static inline int mlx5e_page_alloc(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) +void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page) { - if (rq->xsk_pool) - return mlx5e_xsk_page_alloc_pool(rq, dma_info); - else - return mlx5e_page_alloc_pool(rq, dma_info); -} + dma_addr_t dma_addr = page_pool_get_dma_addr(page); -void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) -{ - dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir); + dma_unmap_page_attrs(rq->pdev, dma_addr, PAGE_SIZE, rq->buff.map_dir, + DMA_ATTR_SKIP_CPU_SYNC); + page_pool_set_dma_addr(page, 0); } -void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info, - bool recycle) +void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle) { if (likely(recycle)) { - if (mlx5e_rx_cache_put(rq, dma_info)) + if (mlx5e_rx_cache_put(rq, page)) return; - mlx5e_page_dma_unmap(rq, dma_info); - page_pool_recycle_direct(rq->page_pool, dma_info->page); + mlx5e_page_dma_unmap(rq, page); + page_pool_recycle_direct(rq->page_pool, page); } else { - mlx5e_page_dma_unmap(rq, dma_info); - page_pool_release_page(rq->page_pool, dma_info->page); - put_page(dma_info->page); + mlx5e_page_dma_unmap(rq, page); + page_pool_release_page(rq->page_pool, page); + put_page(page); } } -static inline void mlx5e_page_release(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info, - bool recycle) -{ - if (rq->xsk_pool) - /* The `recycle` parameter is ignored, and the page is always - * put into the Reuse Ring, because there is no way to return - * the page to the userspace when the interface goes down. - */ - xsk_buff_free(dma_info->xsk); - else - mlx5e_page_release_dynamic(rq, dma_info, recycle); -} - static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *frag) { int err = 0; if (!frag->offset) - /* On first frag (offset == 0), replenish page (dma_info actually). - * Other frags that point to the same dma_info (with a different + /* On first frag (offset == 0), replenish page (alloc_unit actually). + * Other frags that point to the same alloc_unit (with a different * offset) should just use the new one without replenishing again * by themselves. */ - err = mlx5e_page_alloc(rq, frag->di); + err = mlx5e_page_alloc_pool(rq, frag->au); return err; } @@ -355,7 +338,7 @@ static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, bool recycle) { if (frag->last_in_page) - mlx5e_page_release(rq, frag->di, recycle); + mlx5e_page_release_dynamic(rq, frag->au->page, recycle); } static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) @@ -371,12 +354,16 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, int i; for (i = 0; i < rq->wqe.info.num_frags; i++, frag++) { + dma_addr_t addr; + u16 headroom; + err = mlx5e_get_rx_frag(rq, frag); if (unlikely(err)) goto free_frags; - wqe->data[i].addr = cpu_to_be64(frag->di->addr + - frag->offset + rq->buff.headroom); + headroom = i == 0 ? rq->buff.headroom : 0; + addr = page_pool_get_dma_addr(frag->au->page); + wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); } return 0; @@ -394,6 +381,15 @@ static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq, { int i; + if (rq->xsk_pool) { + /* The `recycle` parameter is ignored, and the page is always + * put into the Reuse Ring, because there is no way to return + * the page to the userspace when the interface goes down. + */ + xsk_buff_free(wi->au->xsk); + return; + } + for (i = 0; i < rq->wqe.info.num_frags; i++, wi++) mlx5e_put_rx_frag(rq, wi, recycle); } @@ -405,84 +401,78 @@ static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) mlx5e_free_rx_wqe(rq, wi, false); } -static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk) +static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) { struct mlx5_wq_cyc *wq = &rq->wqe.wq; - int err; int i; - if (rq->xsk_pool) { - int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags; - - /* Check in advance that we have enough frames, instead of - * allocating one-by-one, failing and moving frames to the - * Reuse Ring. - */ - if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, pages_desired))) - return -ENOMEM; - } - for (i = 0; i < wqe_bulk; i++) { - struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i); + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_rx_wqe_cyc *wqe; - err = mlx5e_alloc_rx_wqe(rq, wqe, ix + i); - if (unlikely(err)) - goto free_wqes; - } + wqe = mlx5_wq_cyc_get_wqe(wq, j); - return 0; - -free_wqes: - while (--i >= 0) - mlx5e_dealloc_rx_wqe(rq, ix + i); + if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, j))) + break; + } - return err; + return i; } static inline void mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, - struct mlx5e_dma_info *di, u32 frag_offset, u32 len, + union mlx5e_alloc_unit *au, u32 frag_offset, u32 len, unsigned int truesize) { - dma_sync_single_for_cpu(rq->pdev, - di->addr + frag_offset, - len, DMA_FROM_DEVICE); - page_ref_inc(di->page); + dma_addr_t addr = page_pool_get_dma_addr(au->page); + + dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, + rq->buff.map_dir); + page_ref_inc(au->page); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - di->page, frag_offset, len, truesize); + au->page, frag_offset, len, truesize); } static inline void -mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, - struct mlx5e_dma_info *dma_info, +mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb, + struct page *page, dma_addr_t addr, int offset_from, int dma_offset, u32 headlen) { - const void *from = page_address(dma_info->page) + offset_from; + const void *from = page_address(page) + offset_from; /* Aligning len to sizeof(long) optimizes memcpy performance */ unsigned int len = ALIGN(headlen, sizeof(long)); - dma_sync_single_for_cpu(pdev, dma_info->addr + dma_offset, len, - DMA_FROM_DEVICE); + dma_sync_single_for_cpu(rq->pdev, addr + dma_offset, len, + rq->buff.map_dir); skb_copy_to_linear_data(skb, from, len); } static void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle) { + union mlx5e_alloc_unit *alloc_units = wi->alloc_units; bool no_xdp_xmit; - struct mlx5e_dma_info *dma_info = wi->umr.dma_info; int i; /* A common case for AF_XDP. */ - if (bitmap_full(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE)) + if (bitmap_full(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe)) return; - no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, - MLX5_MPWRQ_PAGES_PER_WQE); + no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) - if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) - mlx5e_page_release(rq, &dma_info[i], recycle); + if (rq->xsk_pool) { + /* The `recycle` parameter is ignored, and the page is always + * put into the Reuse Ring, because there is no way to return + * the page to the userspace when the interface goes down. + */ + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) + if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) + xsk_buff_free(alloc_units[i].xsk); + } else { + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) + if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) + mlx5e_page_release_dynamic(rq, alloc_units[i].page, recycle); + } } static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n) @@ -567,11 +557,13 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; if (!(header_offset & (PAGE_SIZE - 1))) { - err = mlx5e_page_alloc(rq, dma_info); + union mlx5e_alloc_unit au; + + err = mlx5e_page_alloc_pool(rq, &au); if (unlikely(err)) goto err_unmap; - addr = dma_info->addr; - page = dma_info->page; + page = dma_info->page = au.page; + addr = dma_info->addr = page_pool_get_dma_addr(au.page); } else { dma_info->addr = addr + header_offset; dma_info->page = page; @@ -604,7 +596,7 @@ err_unmap: dma_info = &shampo->info[--index]; if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) { dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE); - mlx5e_page_release(rq, dma_info, true); + mlx5e_page_release_dynamic(rq, dma_info->page, true); } } rq->stats->buff_alloc_err++; @@ -652,57 +644,55 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); + union mlx5e_alloc_unit *au = &wi->alloc_units[0]; struct mlx5e_icosq *sq = rq->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *umr_wqe; + u32 offset; /* 17-bit value with MTT. */ u16 pi; int err; int i; - /* Check in advance that we have enough frames, instead of allocating - * one-by-one, failing and moving frames to the Reuse Ring. - */ - if (rq->xsk_pool && - unlikely(!xsk_buff_can_alloc(rq->xsk_pool, MLX5_MPWRQ_PAGES_PER_WQE))) { - err = -ENOMEM; - goto err; - } - if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { err = mlx5e_alloc_rx_hd_mpwqe(rq); if (unlikely(err)) goto err; } - pi = mlx5e_icosq_get_next_pi(sq, MLX5E_UMR_WQEBBS); + pi = mlx5e_icosq_get_next_pi(sq, rq->mpwqe.umr_wqebbs); umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); - memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts)); + memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { - err = mlx5e_page_alloc(rq, dma_info); + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) { + dma_addr_t addr; + + err = mlx5e_page_alloc_pool(rq, au); if (unlikely(err)) goto err_unmap; - umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR); + addr = page_pool_get_dma_addr(au->page); + umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { + .ptag = cpu_to_be64(addr | MLX5_EN_WR), + }; } - bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); + bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); wi->consumed_strides = 0; umr_wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); - umr_wqe->uctrl.xlt_offset = - cpu_to_be16(MLX5_ALIGNED_MTTS_OCTW(MLX5E_REQUIRED_MTTS(ix))); + + offset = (ix * rq->mpwqe.mtts_per_wqe) * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; + umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, - .num_wqebbs = MLX5E_UMR_WQEBBS, + .num_wqebbs = rq->mpwqe.umr_wqebbs, .umr.rq = rq, }; - sq->pc += MLX5E_UMR_WQEBBS; + sq->pc += rq->mpwqe.umr_wqebbs; sq->doorbell_cseg = &umr_wqe->ctrl; @@ -710,8 +700,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) err_unmap: while (--i >= 0) { - dma_info--; - mlx5e_page_release(rq, dma_info, true); + au--; + mlx5e_page_release_dynamic(rq, au->page, true); } err: @@ -745,7 +735,7 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE); if (hd_info->page != deleted_page) { deleted_page = hd_info->page; - mlx5e_page_release(rq, hd_info, false); + mlx5e_page_release_dynamic(rq, hd_info->page, false); } } @@ -760,7 +750,7 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); /* Don't recycle, this function is called on rq/netdev close */ mlx5e_free_rx_mpwqe(rq, wi, false); } @@ -768,38 +758,51 @@ static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { struct mlx5_wq_cyc *wq = &rq->wqe.wq; - u8 wqe_bulk; - int err; + int wqe_bulk, count; + bool busy = false; + u16 head; if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return false; - wqe_bulk = rq->wqe.info.wqe_bulk; - - if (mlx5_wq_cyc_missing(wq) < wqe_bulk) + if (mlx5_wq_cyc_missing(wq) < rq->wqe.info.wqe_bulk) return false; if (rq->page_pool) page_pool_nid_changed(rq->page_pool, numa_mem_id()); - do { - u16 head = mlx5_wq_cyc_get_head(wq); + wqe_bulk = mlx5_wq_cyc_missing(wq); + head = mlx5_wq_cyc_get_head(wq); - err = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk); - if (unlikely(err)) { - rq->stats->buff_alloc_err++; - break; - } + /* Don't allow any newly allocated WQEs to share the same page with old + * WQEs that aren't completed yet. Stop earlier. + */ + wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask; - mlx5_wq_cyc_push_n(wq, wqe_bulk); - } while (mlx5_wq_cyc_missing(wq) >= wqe_bulk); + if (!rq->xsk_pool) + count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk); + else if (likely(!rq->xsk_pool->dma_need_sync)) + count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk); + else + /* If dma_need_sync is true, it's more efficient to call + * xsk_buff_alloc in a loop, rather than xsk_buff_alloc_batch, + * because the latter does the same check and returns only one + * frame. + */ + count = mlx5e_xsk_alloc_rx_wqes(rq, head, wqe_bulk); + + mlx5_wq_cyc_push_n(wq, count); + if (unlikely(count != wqe_bulk)) { + rq->stats->buff_alloc_err++; + busy = true; + } /* ensure wqes are visible to device before updating doorbell record */ dma_wmb(); mlx5_wq_cyc_update_db_record(wq); - return !!err; + return busy; } void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq) @@ -958,8 +961,7 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) if (unlikely(rq->mpwqe.umr_in_progress > rq->mpwqe.umr_last_bulk)) rq->stats->congst_umr++; -#define UMR_WQE_BULK (2) - if (likely(missing < UMR_WQE_BULK)) + if (likely(missing < rq->mpwqe.min_wqe_bulk)) return false; if (rq->page_pool) @@ -968,7 +970,8 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) head = rq->mpwqe.actual_wq_head; i = missing; do { - alloc_err = mlx5e_alloc_rx_mpwqe(rq, head); + alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) : + mlx5e_alloc_rx_mpwqe(rq, head); if (unlikely(alloc_err)) break; @@ -1115,7 +1118,7 @@ static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct tcphdr *skb_tcp_hd) { - u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index); + u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe); struct tcphdr *last_tcp_hd; void *last_hd_addr; @@ -1347,7 +1350,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, } /* True when explicitly set via priv flag, or XDP prog is loaded */ - if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)) + if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) || + get_cqe_tls_offload(cqe)) goto csum_unnecessary; /* CQE csum doesn't cover padding octets in short ethernet @@ -1408,11 +1412,15 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, skb->mac_len = ETH_HLEN; - mlx5e_tls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt); + if (unlikely(get_cqe_tls_offload(cqe))) + mlx5e_ktls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt); if (unlikely(mlx5_ipsec_is_rx_flow(cqe))) mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, cqe); + if (unlikely(mlx5e_macsec_is_rx_flow(cqe))) + mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe); + if (lro_num_seg > 1) { mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); @@ -1487,7 +1495,7 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, static inline struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va, u32 frag_size, u16 headroom, - u32 cqe_bcnt) + u32 cqe_bcnt, u32 metasize) { struct sk_buff *skb = build_skb(va, frag_size); @@ -1499,6 +1507,9 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va, skb_reserve(skb, headroom); skb_put(skb, cqe_bcnt); + if (metasize) + skb_metadata_set(skb, metasize); + return skb; } @@ -1506,86 +1517,161 @@ static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom, u32 len, struct xdp_buff *xdp) { xdp_init_buff(xdp, rq->buff.frame0_sz, &rq->xdp_rxq); - xdp_prepare_buff(xdp, va, headroom, len, false); + xdp_prepare_buff(xdp, va, headroom, len, true); } static struct sk_buff * -mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, - struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt) +mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt) { - struct mlx5e_dma_info *di = wi->di; + union mlx5e_alloc_unit *au = wi->au; u16 rx_headroom = rq->buff.headroom; - struct xdp_buff xdp; + struct bpf_prog *prog; struct sk_buff *skb; + u32 metasize = 0; void *va, *data; + dma_addr_t addr; u32 frag_size; - va = page_address(di->page) + wi->offset; + va = page_address(au->page) + wi->offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset, - frag_size, DMA_FROM_DEVICE); - net_prefetchw(va); /* xdp_frame data area */ + addr = page_pool_get_dma_addr(au->page); + dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, + frag_size, rq->buff.map_dir); net_prefetch(data); - mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); - if (mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp)) - return NULL; /* page/packet was consumed by XDP */ + prog = rcu_dereference(rq->xdp_prog); + if (prog) { + struct xdp_buff xdp; + + net_prefetchw(va); /* xdp_frame data area */ + mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); + if (mlx5e_xdp_handle(rq, au->page, prog, &xdp)) + return NULL; /* page/packet was consumed by XDP */ - rx_headroom = xdp.data - xdp.data_hard_start; + rx_headroom = xdp.data - xdp.data_hard_start; + metasize = xdp.data - xdp.data_meta; + cqe_bcnt = xdp.data_end - xdp.data; + } frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt); + skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); if (unlikely(!skb)) return NULL; /* queue up for recycling/reuse */ - page_ref_inc(di->page); + page_ref_inc(au->page); return skb; } static struct sk_buff * -mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, - struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt) +mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt) { struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; struct mlx5e_wqe_frag_info *head_wi = wi; - u16 headlen = min_t(u32, MLX5E_RX_MAX_HEAD, cqe_bcnt); - u16 frag_headlen = headlen; - u16 byte_cnt = cqe_bcnt - headlen; + union mlx5e_alloc_unit *au = wi->au; + u16 rx_headroom = rq->buff.headroom; + struct skb_shared_info *sinfo; + u32 frag_consumed_bytes; + struct bpf_prog *prog; + struct xdp_buff xdp; struct sk_buff *skb; + dma_addr_t addr; + u32 truesize; + void *va; - /* XDP is not supported in this configuration, as incoming packets - * might spread among multiple pages. - */ - skb = napi_alloc_skb(rq->cq.napi, - ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); - if (unlikely(!skb)) { - rq->stats->buff_alloc_err++; - return NULL; - } + va = page_address(au->page) + wi->offset; + frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - net_prefetchw(skb->data); + addr = page_pool_get_dma_addr(au->page); + dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, + rq->buff.frame0_sz, rq->buff.map_dir); + net_prefetchw(va); /* xdp_frame data area */ + net_prefetch(va + rx_headroom); + + mlx5e_fill_xdp_buff(rq, va, rx_headroom, frag_consumed_bytes, &xdp); + sinfo = xdp_get_shared_info_from_buff(&xdp); + truesize = 0; + + cqe_bcnt -= frag_consumed_bytes; + frag_info++; + wi++; - while (byte_cnt) { - u16 frag_consumed_bytes = - min_t(u16, frag_info->frag_size - frag_headlen, byte_cnt); + while (cqe_bcnt) { + skb_frag_t *frag; - mlx5e_add_skb_frag(rq, skb, wi->di, wi->offset + frag_headlen, - frag_consumed_bytes, frag_info->frag_stride); - byte_cnt -= frag_consumed_bytes; - frag_headlen = 0; + au = wi->au; + + frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); + + addr = page_pool_get_dma_addr(au->page); + dma_sync_single_for_cpu(rq->pdev, addr + wi->offset, + frag_consumed_bytes, rq->buff.map_dir); + + if (!xdp_buff_has_frags(&xdp)) { + /* Init on the first fragment to avoid cold cache access + * when possible. + */ + sinfo->nr_frags = 0; + sinfo->xdp_frags_size = 0; + xdp_buff_set_frags_flag(&xdp); + } + + frag = &sinfo->frags[sinfo->nr_frags++]; + __skb_frag_set_page(frag, au->page); + skb_frag_off_set(frag, wi->offset); + skb_frag_size_set(frag, frag_consumed_bytes); + + if (page_is_pfmemalloc(au->page)) + xdp_buff_set_frag_pfmemalloc(&xdp); + + sinfo->xdp_frags_size += frag_consumed_bytes; + truesize += frag_info->frag_stride; + + cqe_bcnt -= frag_consumed_bytes; frag_info++; wi++; } - /* copy header */ - mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, head_wi->offset, - headlen); - /* skb linear part was allocated with headlen and aligned to long */ - skb->tail += headlen; - skb->len += headlen; + au = head_wi->au; + + prog = rcu_dereference(rq->xdp_prog); + if (prog && mlx5e_xdp_handle(rq, au->page, prog, &xdp)) { + if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { + int i; + + for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++) + mlx5e_put_rx_frag(rq, &head_wi[i], true); + } + return NULL; /* page/packet was consumed by XDP */ + } + + skb = mlx5e_build_linear_skb(rq, xdp.data_hard_start, rq->buff.frame0_sz, + xdp.data - xdp.data_hard_start, + xdp.data_end - xdp.data, + xdp.data - xdp.data_meta); + if (unlikely(!skb)) + return NULL; + + page_ref_inc(au->page); + + if (unlikely(xdp_buff_has_frags(&xdp))) { + int i; + + /* sinfo->nr_frags is reset by build_skb, calculate again. */ + xdp_update_skb_shared_info(skb, wi - head_wi - 1, + sinfo->xdp_frags_size, truesize, + xdp_buff_is_frag_pfmemalloc(&xdp)); + + for (i = 0; i < sinfo->nr_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + + page_ref_inc(skb_frag_page(frag)); + } + } return skb; } @@ -1602,6 +1688,12 @@ static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) } } +static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + trigger_report(rq, cqe); + rq->stats->wqe_err++; +} + static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct mlx5_wq_cyc *wq = &rq->wqe.wq; @@ -1615,15 +1707,15 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - trigger_report(rq, cqe); - rq->stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto free_wqe; } - skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe, mlx5e_skb_from_cqe_linear, mlx5e_skb_from_cqe_nonlinear, - rq, cqe, wi, cqe_bcnt); + mlx5e_xsk_skb_from_cqe_linear, + rq, wi, cqe_bcnt); if (!skb) { /* probably for XDP */ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { @@ -1669,14 +1761,14 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - rq->stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto free_wqe; } skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, mlx5e_skb_from_cqe_linear, mlx5e_skb_from_cqe_nonlinear, - rq, cqe, wi, cqe_bcnt); + rq, wi, cqe_bcnt); if (!skb) { /* probably for XDP */ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { @@ -1705,11 +1797,11 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 { u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); - struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id); u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; - u32 head_offset = wqe_offset & (PAGE_SIZE - 1); - u32 page_idx = wqe_offset >> PAGE_SHIFT; + u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1); + u32 page_idx = wqe_offset >> rq->mpwqe.page_shift; struct mlx5e_rx_wqe_ll *wqe; struct mlx5_wq_ll *wq; struct sk_buff *skb; @@ -1718,8 +1810,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 wi->consumed_strides += cstrides; if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - trigger_report(rq, cqe); - rq->stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto mpwrq_cqe_out; } @@ -1761,12 +1852,13 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = { #endif static void -mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - u32 data_bcnt, u32 data_offset) +mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, + union mlx5e_alloc_unit *au, u32 data_bcnt, u32 data_offset) { net_prefetchw(skb->data); while (data_bcnt) { + /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt); unsigned int truesize; @@ -1775,12 +1867,12 @@ mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, struct mlx5e_dma_i else truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); - mlx5e_add_skb_frag(rq, skb, di, data_offset, + mlx5e_add_skb_frag(rq, skb, au, data_offset, pg_consumed_bytes, truesize); data_bcnt -= pg_consumed_bytes; data_offset = 0; - di++; + au++; } } @@ -1788,12 +1880,13 @@ static struct sk_buff * mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, u16 cqe_bcnt, u32 head_offset, u32 page_idx) { + union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx]; u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); - struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; u32 frag_offset = head_offset + headlen; u32 byte_cnt = cqe_bcnt - headlen; - struct mlx5e_dma_info *head_di = di; + union mlx5e_alloc_unit *head_au = au; struct sk_buff *skb; + dma_addr_t addr; skb = napi_alloc_skb(rq->cq.napi, ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); @@ -1804,14 +1897,17 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w net_prefetchw(skb->data); + /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ if (unlikely(frag_offset >= PAGE_SIZE)) { - di++; + au++; frag_offset -= PAGE_SIZE; } - mlx5e_fill_skb_data(skb, rq, di, byte_cnt, frag_offset); + mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset); /* copy header */ - mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, head_offset, headlen); + addr = page_pool_get_dma_addr(head_au->page); + mlx5e_copy_skb_header(rq, skb, head_au->page, addr, + head_offset, head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; @@ -1823,12 +1919,13 @@ static struct sk_buff * mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, u16 cqe_bcnt, u32 head_offset, u32 page_idx) { - struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; + union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx]; u16 rx_headroom = rq->buff.headroom; - u32 cqe_bcnt32 = cqe_bcnt; - struct xdp_buff xdp; + struct bpf_prog *prog; struct sk_buff *skb; + u32 metasize = 0; void *va, *data; + dma_addr_t addr; u32 frag_size; /* Check packet size. Note LRO doesn't use linear SKB */ @@ -1837,35 +1934,43 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return NULL; } - va = page_address(di->page) + head_offset; + va = page_address(au->page) + head_offset; data = va + rx_headroom; - frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32); + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset, - frag_size, DMA_FROM_DEVICE); - net_prefetchw(va); /* xdp_frame data area */ + addr = page_pool_get_dma_addr(au->page); + dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, + frag_size, rq->buff.map_dir); net_prefetch(data); - mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp); - if (mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp)) { - if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) - __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ - return NULL; /* page/packet was consumed by XDP */ - } + prog = rcu_dereference(rq->xdp_prog); + if (prog) { + struct xdp_buff xdp; + + net_prefetchw(va); /* xdp_frame data area */ + mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); + if (mlx5e_xdp_handle(rq, au->page, prog, &xdp)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) + __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + return NULL; /* page/packet was consumed by XDP */ + } - rx_headroom = xdp.data - xdp.data_hard_start; - frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32); - skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32); + rx_headroom = xdp.data - xdp.data_hard_start; + metasize = xdp.data - xdp.data_meta; + cqe_bcnt = xdp.data_end - xdp.data; + } + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); + skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); if (unlikely(!skb)) return NULL; /* queue up for recycling/reuse */ - page_ref_inc(di->page); + page_ref_inc(au->page); return skb; } -static void +static struct sk_buff * mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5_cqe64 *cqe, u16 header_index) { @@ -1883,13 +1988,13 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) { /* build SKB around header */ - dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, DMA_FROM_DEVICE); + dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, rq->buff.map_dir); prefetchw(hdr); prefetch(data); - skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size); + skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0); if (unlikely(!skb)) - return; + return NULL; /* queue up for recycling/reuse */ page_ref_inc(head->page); @@ -1901,20 +2006,18 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, ALIGN(head_size, sizeof(long))); if (unlikely(!skb)) { rq->stats->buff_alloc_err++; - return; + return NULL; } prefetchw(skb->data); - mlx5e_copy_skb_header(rq->pdev, skb, head, + mlx5e_copy_skb_header(rq, skb, head->page, head->addr, head_offset + rx_headroom, rx_headroom, head_size); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += head_size; skb->len += head_size; } - rq->hw_gro_data->skb = skb; - NAPI_GRO_CB(skb)->count = 1; - skb_shinfo(skb)->gso_size = mpwrq_get_cqe_byte_cnt(cqe) - head_size; + return skb; } static void @@ -1948,7 +2051,7 @@ mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt) { int nr_frags = skb_shinfo(skb)->nr_frags; - return PAGE_SIZE * nr_frags + data_bcnt <= GSO_MAX_SIZE; + return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE; } static void @@ -1959,7 +2062,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE); - mlx5e_page_release(rq, &shampo->info[header_index], true); + mlx5e_page_release_dynamic(rq, shampo->info[header_index].page, true); } bitmap_clear(shampo->bitmap, header_index, 1); } @@ -1967,28 +2070,28 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { u16 data_bcnt = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size; - u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index); + u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe); u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset); u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); u32 data_offset = wqe_offset & (PAGE_SIZE - 1); u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); u32 page_idx = wqe_offset >> PAGE_SHIFT; + u16 head_size = cqe->shampo.header_size; struct sk_buff **skb = &rq->hw_gro_data->skb; bool flush = cqe->shampo.flush; bool match = cqe->shampo.match; struct mlx5e_rq_stats *stats = rq->stats; struct mlx5e_rx_wqe_ll *wqe; - struct mlx5e_dma_info *di; + union mlx5e_alloc_unit *au; struct mlx5e_mpw_info *wi; struct mlx5_wq_ll *wq; - wi = &rq->mpwqe.info[wqe_id]; + wi = mlx5e_get_mpw_info(rq, wqe_id); wi->consumed_strides += cstrides; if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - trigger_report(rq, cqe); - stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto mpwrq_cqe_out; } @@ -2006,9 +2109,16 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq } if (!*skb) { - mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); + if (likely(head_size)) + *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); + else + *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset, + page_idx); if (unlikely(!*skb)) goto free_hd_entry; + + NAPI_GRO_CB(*skb)->count = 1; + skb_shinfo(*skb)->gso_size = cqe_bcnt - head_size; } else { NAPI_GRO_CB(*skb)->count++; if (NAPI_GRO_CB(*skb)->count == 2 && @@ -2022,8 +2132,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq } } - di = &wi->umr.dma_info[page_idx]; - mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset); + if (likely(head_size)) { + au = &wi->alloc_units[page_idx]; + mlx5e_fill_skb_data(*skb, rq, au, data_bcnt, data_offset); + } mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb); if (flush) @@ -2044,11 +2156,11 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq { u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); - struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id); u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; - u32 head_offset = wqe_offset & (PAGE_SIZE - 1); - u32 page_idx = wqe_offset >> PAGE_SHIFT; + u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1); + u32 page_idx = wqe_offset >> rq->mpwqe.page_shift; struct mlx5e_rx_wqe_ll *wqe; struct mlx5_wq_ll *wq; struct sk_buff *skb; @@ -2057,8 +2169,7 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq wi->consumed_strides += cstrides; if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - trigger_report(rq, cqe); - rq->stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto mpwrq_cqe_out; } @@ -2072,9 +2183,10 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); - skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq, + skb = INDIRECT_CALL_3(rq->mpwqe.skb_from_cqe_mpwrq, mlx5e_skb_from_cqe_mpwrq_linear, mlx5e_skb_from_cqe_mpwrq_nonlinear, + mlx5e_xsk_skb_from_cqe_mpwrq_linear, rq, wi, cqe_bcnt, head_offset, page_idx); if (!skb) goto mpwrq_cqe_out; @@ -2188,7 +2300,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, priv = mlx5i_epriv(netdev); tstamp = &priv->tstamp; - stats = &priv->channel_stats[rq->ix].rq; + stats = rq->stats; flags_rqpn = be32_to_cpu(cqe->flags_rqpn); g = (flags_rqpn >> 28) & 3; @@ -2263,7 +2375,7 @@ static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, mlx5e_skb_from_cqe_linear, mlx5e_skb_from_cqe_nonlinear, - rq, cqe, wi, cqe_bcnt); + rq, wi, cqe_bcnt); if (!skb) goto wq_free_wqe; @@ -2285,46 +2397,6 @@ const struct mlx5e_rx_handlers mlx5i_rx_handlers = { }; #endif /* CONFIG_MLX5_CORE_IPOIB */ -#ifdef CONFIG_MLX5_EN_IPSEC - -static void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) -{ - struct mlx5_wq_cyc *wq = &rq->wqe.wq; - struct mlx5e_wqe_frag_info *wi; - struct sk_buff *skb; - u32 cqe_bcnt; - u16 ci; - - ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); - wi = get_frag(rq, ci); - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - - if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - rq->stats->wqe_err++; - goto wq_free_wqe; - } - - skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, - mlx5e_skb_from_cqe_linear, - mlx5e_skb_from_cqe_nonlinear, - rq, cqe, wi, cqe_bcnt); - if (unlikely(!skb)) /* a DROP, save the page-reuse checks */ - goto wq_free_wqe; - - skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb, &cqe_bcnt); - if (unlikely(!skb)) - goto wq_free_wqe; - - mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); - napi_gro_receive(rq->cq.napi, skb); - -wq_free_wqe: - mlx5e_free_rx_wqe(rq, wi, true); - mlx5_wq_cyc_pop(wq); -} - -#endif /* CONFIG_MLX5_EN_IPSEC */ - int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk) { struct net_device *netdev = rq->netdev; @@ -2341,10 +2413,6 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; - if (mlx5_fpga_is_ipsec_device(mdev)) { - netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n"); - return -EINVAL; - } if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo; if (!rq->handle_rx_cqe) { @@ -2363,19 +2431,12 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool default: /* MLX5_WQ_TYPE_CYCLIC */ rq->wqe.skb_from_cqe = xsk ? mlx5e_xsk_skb_from_cqe_linear : - mlx5e_rx_is_linear_skb(params, NULL) ? + mlx5e_rx_is_linear_skb(mdev, params, NULL) ? mlx5e_skb_from_cqe_linear : mlx5e_skb_from_cqe_nonlinear; rq->post_wqes = mlx5e_post_rx_wqes; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; - -#ifdef CONFIG_MLX5_EN_IPSEC - if ((mlx5_fpga_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) && - priv->ipsec) - rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe; - else -#endif - rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe; + rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe; if (!rq->handle_rx_cqe) { netdev_err(netdev, "RX handler of RQ is not set\n"); return -EINVAL; @@ -2406,7 +2467,7 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe goto free_wqe; } - skb = mlx5e_skb_from_cqe_nonlinear(rq, cqe, wi, cqe_bcnt); + skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe_bcnt); if (!skb) goto free_wqe; @@ -2424,7 +2485,7 @@ free_wqe: void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params) { - rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(params, NULL) ? + rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(rq->mdev, params, NULL) ? mlx5e_skb_from_cqe_linear : mlx5e_skb_from_cqe_nonlinear; rq->post_wqes = mlx5e_post_rx_wqes; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 8c9163d2c646..08a75654f5f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -334,6 +334,7 @@ void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest, netdev_info(ndev, "\t[%d] %s start..\n", i, st.name); buf[count] = st.st_func(priv); netdev_info(ndev, "\t[%d] %s end: result(%lld)\n", i, st.name, buf[count]); + count++; } mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 2a9bfc3ffa2e..03c1841970f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -32,9 +32,14 @@ #include "lib/mlx5.h" #include "en.h" -#include "en_accel/tls.h" +#include "en_accel/ktls.h" #include "en_accel/en_accel.h" #include "en/ptp.h" +#include "en/port.h" + +#ifdef CONFIG_PAGE_POOL_STATS +#include <net/page_pool.h> +#endif static unsigned int stats_grps_num(struct mlx5e_priv *priv) { @@ -182,6 +187,19 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) }, +#ifdef CONFIG_PAGE_POOL_STATS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_fast) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow_high_order) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_empty) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_refill) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_waive) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cached) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cache_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_released_ref) }, +#endif #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_bytes) }, @@ -348,6 +366,19 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, s->rx_congst_umr += rq_stats->congst_umr; s->rx_arfs_err += rq_stats->arfs_err; s->rx_recover += rq_stats->recover; +#ifdef CONFIG_PAGE_POOL_STATS + s->rx_pp_alloc_fast += rq_stats->pp_alloc_fast; + s->rx_pp_alloc_slow += rq_stats->pp_alloc_slow; + s->rx_pp_alloc_empty += rq_stats->pp_alloc_empty; + s->rx_pp_alloc_refill += rq_stats->pp_alloc_refill; + s->rx_pp_alloc_waive += rq_stats->pp_alloc_waive; + s->rx_pp_alloc_slow_high_order += rq_stats->pp_alloc_slow_high_order; + s->rx_pp_recycle_cached += rq_stats->pp_recycle_cached; + s->rx_pp_recycle_cache_full += rq_stats->pp_recycle_cache_full; + s->rx_pp_recycle_ring += rq_stats->pp_recycle_ring; + s->rx_pp_recycle_ring_full += rq_stats->pp_recycle_ring_full; + s->rx_pp_recycle_released_ref += rq_stats->pp_recycle_released_ref; +#endif #ifdef CONFIG_MLX5_EN_TLS s->rx_tls_decrypted_packets += rq_stats->tls_decrypted_packets; s->rx_tls_decrypted_bytes += rq_stats->tls_decrypted_bytes; @@ -443,8 +474,8 @@ static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv, int i; /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ - max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs); - stats = READ_ONCE(priv->htb.qos_sq_stats); + max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs); + stats = READ_ONCE(priv->htb_qos_sq_stats); for (i = 0; i < max_qos_sqs; i++) { mlx5e_stats_grp_sw_update_stats_sq(s, READ_ONCE(stats[i])); @@ -454,6 +485,35 @@ static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv, } } +#ifdef CONFIG_PAGE_POOL_STATS +static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c) +{ + struct mlx5e_rq_stats *rq_stats = c->rq.stats; + struct page_pool *pool = c->rq.page_pool; + struct page_pool_stats stats = { 0 }; + + if (!page_pool_get_stats(pool, &stats)) + return; + + rq_stats->pp_alloc_fast = stats.alloc_stats.fast; + rq_stats->pp_alloc_slow = stats.alloc_stats.slow; + rq_stats->pp_alloc_slow_high_order = stats.alloc_stats.slow_high_order; + rq_stats->pp_alloc_empty = stats.alloc_stats.empty; + rq_stats->pp_alloc_waive = stats.alloc_stats.waive; + rq_stats->pp_alloc_refill = stats.alloc_stats.refill; + + rq_stats->pp_recycle_cached = stats.recycle_stats.cached; + rq_stats->pp_recycle_cache_full = stats.recycle_stats.cache_full; + rq_stats->pp_recycle_ring = stats.recycle_stats.ring; + rq_stats->pp_recycle_ring_full = stats.recycle_stats.ring_full; + rq_stats->pp_recycle_released_ref = stats.recycle_stats.released_refcnt; +} +#else +static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c) +{ +} +#endif + static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) { struct mlx5e_sw_stats *s = &priv->stats.sw; @@ -461,9 +521,13 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) memset(s, 0, sizeof(*s)); + for (i = 0; i < priv->channels.num; i++) /* for active channels only */ + mlx5e_stats_update_stats_rq_page_pool(priv->channels.c[i]); + for (i = 0; i < priv->stats_nch; i++) { struct mlx5e_channel_stats *channel_stats = - &priv->channel_stats[i]; + priv->channel_stats[i]; + int j; mlx5e_stats_grp_sw_update_stats_rq_stats(s, &channel_stats->rq); @@ -577,17 +641,26 @@ static const struct counter_desc vnic_env_stats_dev_oob_desc[] = { VNIC_ENV_OFF(vport_env.internal_rq_out_of_buffer) }, }; +static const struct counter_desc vnic_env_stats_drop_desc[] = { + { "rx_oversize_pkts_buffer", + VNIC_ENV_OFF(vport_env.eth_wqe_too_small) }, +}; + #define NUM_VNIC_ENV_STEER_COUNTERS(dev) \ (MLX5_CAP_GEN(dev, nic_receive_steering_discard) ? \ ARRAY_SIZE(vnic_env_stats_steer_desc) : 0) #define NUM_VNIC_ENV_DEV_OOB_COUNTERS(dev) \ (MLX5_CAP_GEN(dev, vnic_env_int_rq_oob) ? \ ARRAY_SIZE(vnic_env_stats_dev_oob_desc) : 0) +#define NUM_VNIC_ENV_DROP_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, eth_wqe_too_small) ? \ + ARRAY_SIZE(vnic_env_stats_drop_desc) : 0) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vnic_env) { return NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev) + - NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); + NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev) + + NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vnic_env) @@ -601,6 +674,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vnic_env) for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, vnic_env_stats_dev_oob_desc[i].format); + + for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vnic_env_stats_drop_desc[i].format); + return idx; } @@ -615,6 +693,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vnic_env) for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++) data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out, vnic_env_stats_dev_oob_desc, i); + + for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++) + data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_drop_desc, i); + return idx; } @@ -624,7 +707,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env) u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; struct mlx5_core_dev *mdev = priv->mdev; - if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) + if (!mlx5e_stats_grp_vnic_env_num_stats(priv)) return; MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); @@ -1158,16 +1241,100 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } -void mlx5e_stats_fec_get(struct mlx5e_priv *priv, - struct ethtool_fec_stats *fec_stats) +static int fec_num_lanes(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(pmlp_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {}; + int err; + + MLX5_SET(pmlp_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_PMLP, 0, 0); + if (err) + return 0; + + return MLX5_GET(pmlp_reg, out, width); +} + +static int fec_active_mode(struct mlx5_core_dev *mdev) +{ + unsigned long fec_active_long; + u32 fec_active; + + if (mlx5e_get_fec_mode(mdev, &fec_active, NULL)) + return MLX5E_FEC_NOFEC; + + fec_active_long = fec_active; + return find_first_bit(&fec_active_long, sizeof(unsigned long) * BITS_PER_BYTE); +} + +#define MLX5E_STATS_SET_FEC_BLOCK(idx) ({ \ + fec_stats->corrected_blocks.lanes[(idx)] = \ + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \ + fc_fec_corrected_blocks_lane##idx); \ + fec_stats->uncorrectable_blocks.lanes[(idx)] = \ + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \ + fc_fec_uncorrectable_blocks_lane##idx); \ +}) + +static void fec_set_fc_stats(struct ethtool_fec_stats *fec_stats, + u32 *ppcnt, u8 lanes) +{ + if (lanes > 3) { /* 4 lanes */ + MLX5E_STATS_SET_FEC_BLOCK(3); + MLX5E_STATS_SET_FEC_BLOCK(2); + } + if (lanes > 1) /* 2 lanes */ + MLX5E_STATS_SET_FEC_BLOCK(1); + if (lanes > 0) /* 1 lane */ + MLX5E_STATS_SET_FEC_BLOCK(0); +} + +static void fec_set_rs_stats(struct ethtool_fec_stats *fec_stats, u32 *ppcnt) +{ + fec_stats->corrected_blocks.total = + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, + rs_fec_corrected_blocks); + fec_stats->uncorrectable_blocks.total = + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, + rs_fec_uncorrectable_blocks); +} + +static void fec_set_block_stats(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) { - u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)]; struct mlx5_core_dev *mdev = priv->mdev; - u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int mode = fec_active_mode(mdev); - if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) + if (mode == MLX5E_FEC_NOFEC) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + if (mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0)) + return; + + switch (mode) { + case MLX5E_FEC_RS_528_514: + case MLX5E_FEC_RS_544_514: + case MLX5E_FEC_LLRS_272_257_1: + fec_set_rs_stats(fec_stats, out); return; + case MLX5E_FEC_FIRECODE: + fec_set_fc_stats(fec_stats, out, fec_num_lanes(mdev)); + } +} + +static void fec_set_corrected_bits_total(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) +{ + u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); MLX5_SET(ppcnt_reg, in, local_port, 1); MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); @@ -1181,6 +1348,16 @@ void mlx5e_stats_fec_get(struct mlx5e_priv *priv, phy_corrected_bits); } +void mlx5e_stats_fec_get(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) +{ + if (!MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group)) + return; + + fec_set_corrected_bits_total(priv, fec_stats); + fec_set_block_stats(priv, fec_stats); +} + #define PPORT_ETH_EXT_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_extended_cntrs_grp_data_layout.c##_high) @@ -1742,17 +1919,17 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pme) { return; } static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(tls) { - return mlx5e_tls_get_count(priv); + return mlx5e_ktls_get_count(priv); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(tls) { - return idx + mlx5e_tls_get_strings(priv, data + idx * ETH_GSTRING_LEN); + return idx + mlx5e_ktls_get_strings(priv, data + idx * ETH_GSTRING_LEN); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(tls) { - return idx + mlx5e_tls_get_stats(priv, data + idx); + return idx + mlx5e_ktls_get_stats(priv, data + idx); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(tls) { return; } @@ -1792,6 +1969,19 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) }, +#ifdef CONFIG_PAGE_POOL_STATS + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_fast) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow_high_order) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_empty) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_refill) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_waive) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cached) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cache_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_released_ref) }, +#endif #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_bytes) }, @@ -1929,6 +2119,8 @@ static const struct counter_desc ptp_cq_stats_desc[] = { { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, err_cqe) }, { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort) }, { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) }, + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_cqe) }, + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_event) }, }; static const struct counter_desc ptp_rq_stats_desc[] = { @@ -2013,13 +2205,13 @@ static const struct counter_desc qos_sq_stats_desc[] = { static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos) { /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ - return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb.max_qos_sqs); + return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb_max_qos_sqs); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qos) { /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ - u16 max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs); + u16 max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs); int i, qid; for (qid = 0; qid < max_qos_sqs; qid++) @@ -2037,8 +2229,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos) int i, qid; /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ - max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs); - stats = READ_ONCE(priv->htb.qos_sq_stats); + max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs); + stats = READ_ONCE(priv->htb_qos_sq_stats); for (qid = 0; qid < max_qos_sqs; qid++) { struct mlx5e_sq_stats *s = READ_ONCE(stats[qid]); @@ -2076,7 +2268,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp) for (i = 0; i < NUM_PTP_CH_STATS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, - ptp_ch_stats_desc[i].format); + "%s", ptp_ch_stats_desc[i].format); if (priv->tx_ptp_opened) { for (tc = 0; tc < priv->max_opened_tc; tc++) @@ -2197,21 +2389,21 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels) for (i = 0; i < max_nch; i++) for (j = 0; j < NUM_CH_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].ch, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->ch, ch_stats_desc, j); for (i = 0; i < max_nch; i++) { for (j = 0; j < NUM_RQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq, rq_stats_desc, j); for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xskrq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xskrq, xskrq_stats_desc, j); for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq_xdpsq, rq_xdpsq_stats_desc, j); } @@ -2219,17 +2411,17 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels) for (i = 0; i < max_nch; i++) for (j = 0; j < NUM_SQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc], + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->sq[tc], sq_stats_desc, j); for (i = 0; i < max_nch; i++) { for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xsksq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xsksq, xsksq_stats_desc, j); for (j = 0; j < NUM_XDPSQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xdpsq, xdpsq_stats_desc, j); } @@ -2253,7 +2445,7 @@ MLX5E_DEFINE_STATS_GRP(channels, 0); MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0); MLX5E_DEFINE_STATS_GRP(eth_ext, 0); static MLX5E_DEFINE_STATS_GRP(tls, 0); -static MLX5E_DEFINE_STATS_GRP(ptp, 0); +MLX5E_DEFINE_STATS_GRP(ptp, 0); static MLX5E_DEFINE_STATS_GRP(qos, 0); /* The stats groups order is opposite to the update_stats() order calls */ @@ -2272,13 +2464,15 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { &MLX5E_STATS_GRP(pme), #ifdef CONFIG_MLX5_EN_IPSEC &MLX5E_STATS_GRP(ipsec_sw), - &MLX5E_STATS_GRP(ipsec_hw), #endif &MLX5E_STATS_GRP(tls), &MLX5E_STATS_GRP(channels), &MLX5E_STATS_GRP(per_port_buff_congest), &MLX5E_STATS_GRP(ptp), &MLX5E_STATS_GRP(qos), +#ifdef CONFIG_MLX5_EN_MACSEC + &MLX5E_STATS_GRP(macsec_hw), +#endif }; unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 2c1ed5b81be6..9f781085be47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -205,7 +205,19 @@ struct mlx5e_sw_stats { u64 ch_aff_change; u64 ch_force_irq; u64 ch_eq_rearm; - +#ifdef CONFIG_PAGE_POOL_STATS + u64 rx_pp_alloc_fast; + u64 rx_pp_alloc_slow; + u64 rx_pp_alloc_slow_high_order; + u64 rx_pp_alloc_empty; + u64 rx_pp_alloc_refill; + u64 rx_pp_alloc_waive; + u64 rx_pp_recycle_cached; + u64 rx_pp_recycle_cache_full; + u64 rx_pp_recycle_ring; + u64 rx_pp_recycle_ring_full; + u64 rx_pp_recycle_released_ref; +#endif #ifdef CONFIG_MLX5_EN_TLS u64 tx_tls_encrypted_packets; u64 tx_tls_encrypted_bytes; @@ -261,6 +273,10 @@ struct mlx5e_qcounter_stats { u32 rx_if_down_packets; }; +#define VNIC_ENV_GET(vnic_env_stats, c) \ + MLX5_GET(query_vnic_env_out, (vnic_env_stats)->query_vnic_env_out, \ + vport_env.c) + struct mlx5e_vnic_env_stats { __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)]; }; @@ -352,6 +368,19 @@ struct mlx5e_rq_stats { u64 congst_umr; u64 arfs_err; u64 recover; +#ifdef CONFIG_PAGE_POOL_STATS + u64 pp_alloc_fast; + u64 pp_alloc_slow; + u64 pp_alloc_slow_high_order; + u64 pp_alloc_empty; + u64 pp_alloc_refill; + u64 pp_alloc_waive; + u64 pp_recycle_cached; + u64 pp_recycle_cache_full; + u64 pp_recycle_ring; + u64 pp_recycle_ring_full; + u64 pp_recycle_released_ref; +#endif #ifdef CONFIG_MLX5_EN_TLS u64 tls_decrypted_packets; u64 tls_decrypted_bytes; @@ -428,6 +457,8 @@ struct mlx5e_ptp_cq_stats { u64 err_cqe; u64 abort; u64 abort_abs_diff_ns; + u64 resync_cqe; + u64 resync_event; }; struct mlx5e_stats { @@ -457,7 +488,8 @@ extern MLX5E_DECLARE_STATS_GRP(per_prio); extern MLX5E_DECLARE_STATS_GRP(pme); extern MLX5E_DECLARE_STATS_GRP(channels); extern MLX5E_DECLARE_STATS_GRP(per_port_buff_congest); -extern MLX5E_DECLARE_STATS_GRP(ipsec_hw); extern MLX5E_DECLARE_STATS_GRP(ipsec_sw); +extern MLX5E_DECLARE_STATS_GRP(ptp); +extern MLX5E_DECLARE_STATS_GRP(macsec_hw); #endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3d45f4ae80c0..5a6aa61ec82a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -39,10 +39,6 @@ #include <linux/rhashtable.h> #include <linux/refcount.h> #include <linux/completion.h> -#include <linux/if_macvlan.h> -#include <net/tc_act/tc_pedit.h> -#include <net/tc_act/tc_csum.h> -#include <net/psample.h> #include <net/arp.h> #include <net/ipv6_stubs.h> #include <net/bareudp.h> @@ -62,6 +58,8 @@ #include "en/mod_hdr.h" #include "en/tc_tun_encap.h" #include "en/tc/sample.h" +#include "en/tc/act/act.h" +#include "en/tc/post_meter.h" #include "lib/devcom.h" #include "lib/geneve.h" #include "lib/fs_chains.h" @@ -70,12 +68,33 @@ #include "lag/lag.h" #include "lag/mp.h" -#define nic_chains(priv) ((priv)->fs.tc.chains) -#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) - #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) +struct mlx5e_tc_table { + /* Protects the dynamic assignment of the t parameter + * which is the nic tc root table. + */ + struct mutex t_lock; + struct mlx5e_priv *priv; + struct mlx5_flow_table *t; + struct mlx5_flow_table *miss_t; + struct mlx5_fs_chains *chains; + struct mlx5e_post_act *post_act; + + struct rhashtable ht; + + struct mod_hdr_tbl mod_hdr; + struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */ + DECLARE_HASHTABLE(hairpin_tbl, 8); + + struct notifier_block netdevice_nb; + struct netdev_net_notifier netdevice_nn; + + struct mlx5_tc_ct_priv *ct; + struct mapping_ctx *mapping; +}; + struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { [CHAIN_TO_REG] = { .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, @@ -110,8 +129,27 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { .mlen = 16, }, [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct, + [PACKET_COLOR_TO_REG] = packet_color_to_reg, }; +struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) +{ + struct mlx5e_tc_table *tc; + + tc = kvzalloc(sizeof(*tc), GFP_KERNEL); + return tc ? tc : ERR_PTR(-ENOMEM); +} + +void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) +{ + kvfree(tc); +} + +struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc) +{ + return tc->chains; +} + /* To avoid false lock dependency warning set the tc_ht lock * class different than the lock class of the ht being used when deleting * last flow from a group and then deleting a group, we get into del_sw_flow_group() @@ -121,6 +159,7 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { static struct lock_class_key tc_ht_lock_key; static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); +static void free_flow_post_acts(struct mlx5e_tc_flow *flow); void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, @@ -209,12 +248,9 @@ mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, char *modact; int err; - err = alloc_mod_hdr_actions(mdev, ns, mod_hdr_acts); - if (err) - return err; - - modact = mod_hdr_acts->actions + - (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ); + modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts); + if (IS_ERR(modact)) + return PTR_ERR(modact); /* Firmware has 5bit length field and 0 means 32bits */ if (mlen == 32) @@ -248,9 +284,34 @@ mlx5e_get_int_port_priv(struct mlx5e_priv *priv) return NULL; } +struct mlx5e_flow_meters * +mlx5e_get_flow_meters(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_priv *priv; + + if (is_mdev_switchdev_mode(dev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + priv = netdev_priv(uplink_rpriv->netdev); + if (!uplink_priv->flow_meters) + uplink_priv->flow_meters = + mlx5e_flow_meters_init(priv, + MLX5_FLOW_NAMESPACE_FDB, + uplink_priv->post_act); + if (!IS_ERR(uplink_priv->flow_meters)) + return uplink_priv->flow_meters; + } + + return NULL; +} + static struct mlx5_tc_ct_priv * get_ct_priv(struct mlx5e_priv *priv) { + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_rep_uplink_priv *uplink_priv; struct mlx5e_rep_priv *uplink_rpriv; @@ -262,7 +323,7 @@ get_ct_priv(struct mlx5e_priv *priv) return uplink_priv->ct_priv; } - return priv->fs.tc.ct; + return tc->ct; } static struct mlx5e_tc_psample * @@ -282,6 +343,24 @@ get_sample_priv(struct mlx5e_priv *priv) return NULL; } +static struct mlx5e_post_act * +get_post_action(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (is_mdev_switchdev_mode(priv->mdev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->post_act; + } + + return tc->post_act; +} + struct mlx5_flow_handle * mlx5_tc_rule_insert(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, @@ -304,13 +383,121 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv, if (is_mdev_switchdev_mode(priv->mdev)) { mlx5_eswitch_del_offloaded_rule(esw, rule, attr); - return; } mlx5e_del_offloaded_nic_rule(priv, rule, attr); } +static bool +is_flow_meter_action(struct mlx5_flow_attr *attr) +{ + return ((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) && + (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)); +} + +static int +mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_post_act *post_act = get_post_action(priv); + struct mlx5e_post_meter_priv *post_meter; + enum mlx5_flow_namespace_type ns_type; + struct mlx5e_flow_meter_handle *meter; + + meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params); + if (IS_ERR(meter)) { + mlx5_core_err(priv->mdev, "Failed to get flow meter\n"); + return PTR_ERR(meter); + } + + ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters); + post_meter = mlx5e_post_meter_init(priv, ns_type, post_act, meter->green_counter, + meter->red_counter); + if (IS_ERR(post_meter)) { + mlx5_core_err(priv->mdev, "Failed to init post meter\n"); + goto err_meter_init; + } + + attr->meter_attr.meter = meter; + attr->meter_attr.post_meter = post_meter; + attr->dest_ft = mlx5e_post_meter_get_ft(post_meter); + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + return 0; + +err_meter_init: + mlx5e_tc_meter_put(meter); + return PTR_ERR(post_meter); +} + +static void +mlx5e_tc_del_flow_meter(struct mlx5_flow_attr *attr) +{ + mlx5e_post_meter_cleanup(attr->meter_attr.post_meter); + mlx5e_tc_meter_put(attr->meter_attr.meter); +} + +struct mlx5_flow_handle * +mlx5e_tc_rule_offload(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int err; + + if (attr->flags & MLX5_ATTR_FLAG_CT) { + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = + &attr->parse_attr->mod_hdr_acts; + + return mlx5_tc_ct_flow_offload(get_ct_priv(priv), + spec, attr, + mod_hdr_acts); + } + + if (!is_mdev_switchdev_mode(priv->mdev)) + return mlx5e_add_offloaded_nic_rule(priv, spec, attr); + + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) + return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr); + + if (is_flow_meter_action(attr)) { + err = mlx5e_tc_add_flow_meter(priv, attr); + if (err) + return ERR_PTR(err); + } + + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); +} + +void +mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (attr->flags & MLX5_ATTR_FLAG_CT) { + mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr); + return; + } + + if (!is_mdev_switchdev_mode(priv->mdev)) { + mlx5e_del_offloaded_nic_rule(priv, rule, attr); + return; + } + + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) { + mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr); + return; + } + + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); + + if (attr->meter_attr.meter) + mlx5e_tc_del_flow_meter(attr); +} + int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, @@ -333,7 +520,7 @@ void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; char *modact; - modact = mod_hdr_acts->actions + (act_id * MLX5_MH_ACT_SZ); + modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id); /* Firmware has 5bit length field and 0 means 32bits */ if (mlen == 32) @@ -403,7 +590,7 @@ bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) return flow_flag_test(flow, ESWITCH); } -static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) +bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) { return flow_flag_test(flow, FT); } @@ -413,7 +600,7 @@ bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) return flow_flag_test(flow, OFFLOADED); } -static int get_flow_name_space(struct mlx5e_tc_flow *flow) +int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow) { return mlx5e_is_eswitch_flow(flow) ? MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; @@ -422,11 +609,12 @@ static int get_flow_name_space(struct mlx5e_tc_flow *flow) static struct mod_hdr_tbl * get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - return get_flow_name_space(flow) == MLX5_FLOW_NAMESPACE_FDB ? + return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr : - &priv->fs.tc.mod_hdr; + &tc->mod_hdr; } static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, @@ -437,7 +625,7 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, struct mlx5e_mod_hdr_handle *mh; mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow), - get_flow_name_space(flow), + mlx5e_get_flow_namespace(flow), &parse_attr->mod_hdr_acts); if (IS_ERR(mh)) return PTR_ERR(mh); @@ -625,6 +813,7 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) { struct mlx5e_priv *priv = hp->func_priv; struct ttc_params ttc_params; + struct mlx5_ttc_table *ttc; int err; err = mlx5e_hairpin_create_indirect_rqt(hp); @@ -642,9 +831,10 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) goto err_create_ttc_table; } + ttc = mlx5e_fs_get_ttc(priv->fs, false); netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", hp->num_channels, - mlx5_get_ttc_flow_table(priv->fs.ttc)->id); + mlx5_get_ttc_flow_table(ttc)->id); return 0; @@ -731,10 +921,11 @@ static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio) static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, u16 peer_vhca_id, u8 prio) { + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); struct mlx5e_hairpin_entry *hpe; u32 hash_key = hash_hairpin_info(peer_vhca_id, prio); - hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe, + hash_for_each_possible(tc->hairpin_tbl, hpe, hairpin_hlist, hash_key) { if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) { refcount_inc(&hpe->refcnt); @@ -748,11 +939,12 @@ static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, static void mlx5e_hairpin_put(struct mlx5e_priv *priv, struct mlx5e_hairpin_entry *hpe) { + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); /* no more hairpin flows for us, release the hairpin pair */ - if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock)) + if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock)) return; hash_del(&hpe->hairpin_hlist); - mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + mutex_unlock(&tc->hairpin_tbl_lock); if (!IS_ERR_OR_NULL(hpe->hp)) { netdev_dbg(priv->netdev, "del hairpin: peer %s\n", @@ -808,6 +1000,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, struct netlink_ext_ack *extack) { + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); int peer_ifindex = parse_attr->mirred_ifindex[0]; struct mlx5_hairpin_params params; struct mlx5_core_dev *peer_mdev; @@ -836,10 +1029,10 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, if (err) return err; - mutex_lock(&priv->fs.tc.hairpin_tbl_lock); + mutex_lock(&tc->hairpin_tbl_lock); hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); if (hpe) { - mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + mutex_unlock(&tc->hairpin_tbl_lock); wait_for_completion(&hpe->res_ready); if (IS_ERR(hpe->hp)) { @@ -851,7 +1044,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); if (!hpe) { - mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + mutex_unlock(&tc->hairpin_tbl_lock); return -ENOMEM; } @@ -863,9 +1056,9 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, refcount_set(&hpe->refcnt, 1); init_completion(&hpe->res_ready); - hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, + hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist, hash_hairpin_info(peer_id, match_prio)); - mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + mutex_unlock(&tc->hairpin_tbl_lock); params.log_data_size = 16; params.log_data_size = min_t(u8, params.log_data_size, @@ -941,10 +1134,11 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { struct mlx5_flow_context *flow_context = &spec->flow_context; - struct mlx5_fs_chains *nic_chains = nic_chains(priv); + struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs); + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr; - struct mlx5e_tc_table *tc = &priv->fs.tc; struct mlx5_flow_destination dest[2] = {}; + struct mlx5_fs_chains *nic_chains; struct mlx5_flow_act flow_act = { .action = attr->action, .flags = FLOW_ACT_NO_APPEND, @@ -953,6 +1147,7 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, struct mlx5_flow_table *ft; int dest_ix = 0; + nic_chains = mlx5e_nic_chains(tc); flow_context->flags |= FLOW_CONTEXT_HAS_TAG; flow_context->flow_tag = nic_attr->flow_tag; @@ -977,7 +1172,7 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, if (IS_ERR(dest[dest_ix].ft)) return ERR_CAST(dest[dest_ix].ft); } else { - dest[dest_ix].ft = mlx5e_vlan_get_flowtable(priv->fs.vlan); + dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan); } dest_ix++; } @@ -1005,7 +1200,7 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, mutex_unlock(&tc->t_lock); netdev_err(priv->netdev, "Failed to create tc offload table\n"); - rule = ERR_CAST(priv->fs.tc.t); + rule = ERR_CAST(tc->t); goto err_ft_get; } } @@ -1048,6 +1243,21 @@ err_ft_get: } static int +alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev, + struct mlx5_flow_attr *attr) + +{ + struct mlx5_fc *counter; + + counter = mlx5_fc_create(counter_dev, true); + if (IS_ERR(counter)) + return PTR_ERR(counter); + + attr->counter = counter; + return 0; +} + +static int mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) @@ -1055,7 +1265,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_core_dev *dev = priv->mdev; - struct mlx5_fc *counter; int err; parse_attr = attr->parse_attr; @@ -1067,22 +1276,20 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - counter = mlx5_fc_create(dev, true); - if (IS_ERR(counter)) - return PTR_ERR(counter); - - attr->counter = counter; + err = alloc_flow_attr_counter(dev, attr); + if (err) + return err; } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); + mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); if (err) return err; } - if (flow_flag_test(flow, CT)) - flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec, + if (attr->flags & MLX5_ATTR_FLAG_CT) + flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec, attr, &parse_attr->mod_hdr_acts); else flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, @@ -1095,8 +1302,10 @@ void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, struct mlx5_flow_handle *rule, struct mlx5_flow_attr *attr) { - struct mlx5_fs_chains *nic_chains = nic_chains(priv); + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_fs_chains *nic_chains; + nic_chains = mlx5e_nic_chains(tc); mlx5_del_flow_rules(rule); if (attr->chain || attr->prio) @@ -1111,37 +1320,39 @@ void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); struct mlx5_flow_attr *attr = flow->attr; - struct mlx5e_tc_table *tc = &priv->fs.tc; flow_flag_clear(flow, OFFLOADED); - if (flow_flag_test(flow, CT)) - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); + if (attr->flags & MLX5_ATTR_FLAG_CT) + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); else if (!IS_ERR_OR_NULL(flow->rule[0])) mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr); /* Remove root table if no rules are left to avoid * extra steering hops. */ - mutex_lock(&priv->fs.tc.t_lock); + mutex_lock(&tc->t_lock); if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && !IS_ERR_OR_NULL(tc->t)) { - mlx5_chains_put_table(nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL); - priv->fs.tc.t = NULL; + mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL); + tc->t = NULL; } - mutex_unlock(&priv->fs.tc.t_lock); - - kvfree(attr->parse_attr); + mutex_unlock(&tc->t_lock); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); - mlx5_fc_destroy(priv->mdev, attr->counter); + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + mlx5_fc_destroy(priv->mdev, attr->counter); if (flow_flag_test(flow, HAIRPIN)) mlx5e_hairpin_flow_del(priv, flow); + free_flow_post_acts(flow); + + kvfree(attr->parse_attr); kfree(flow->attr); } @@ -1151,40 +1362,27 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr) { - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; struct mlx5_flow_handle *rule; - if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) + if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); - if (flow_flag_test(flow, CT)) { - mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; - - rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv), - flow, spec, attr, - mod_hdr_acts); - } else if (flow_flag_test(flow, SAMPLE)) { - rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr, - mlx5e_tc_get_flow_tun_id(flow)); - } else { - rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); - } + rule = mlx5e_tc_rule_offload(flow->priv, spec, attr); if (IS_ERR(rule)) return rule; if (attr->esw_attr->split_count) { flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); - if (IS_ERR(flow->rule[1])) { - if (flow_flag_test(flow, CT)) - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); - else - mlx5_eswitch_del_offloaded_rule(esw, rule, attr); - return flow->rule[1]; - } + if (IS_ERR(flow->rule[1])) + goto err_rule1; } return rule; + +err_rule1: + mlx5e_tc_rule_unoffload(flow->priv, rule, attr); + return flow->rule[1]; } void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, @@ -1193,24 +1391,13 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, { flow_flag_clear(flow, OFFLOADED); - if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) - goto offload_rule_0; - - if (flow_flag_test(flow, CT)) { - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); - return; - } - - if (flow_flag_test(flow, SAMPLE)) { - mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); - return; - } + if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) + return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); if (attr->esw_attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); -offload_rule_0: - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr); } struct mlx5_flow_handle * @@ -1218,8 +1405,13 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec) { + struct mlx5e_tc_mod_hdr_acts mod_acts = {}; + struct mlx5e_mod_hdr_handle *mh = NULL; struct mlx5_flow_attr *slow_attr; struct mlx5_flow_handle *rule; + bool fwd_and_modify_cap; + u32 chain_mapping = 0; + int err; slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); if (!slow_attr) @@ -1228,15 +1420,58 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; slow_attr->esw_attr->split_count = 0; - slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; + + fwd_and_modify_cap = MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table); + if (!fwd_and_modify_cap) + goto skip_restore; + + err = mlx5_chains_get_chain_mapping(esw_chains(esw), flow->attr->chain, &chain_mapping); + if (err) + goto err_get_chain; + + err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + CHAIN_TO_REG, chain_mapping); + if (err) + goto err_reg_set; + + mh = mlx5e_mod_hdr_attach(esw->dev, get_mod_hdr_table(flow->priv, flow), + MLX5_FLOW_NAMESPACE_FDB, &mod_acts); + if (IS_ERR(mh)) { + err = PTR_ERR(mh); + goto err_attach; + } + + slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + slow_attr->modify_hdr = mlx5e_mod_hdr_get(mh); +skip_restore: rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); - if (!IS_ERR(rule)) - flow_flag_set(flow, SLOW); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto err_offload; + } + + flow->slow_mh = mh; + flow->chain_mapping = chain_mapping; + flow_flag_set(flow, SLOW); + mlx5e_mod_hdr_dealloc(&mod_acts); kfree(slow_attr); return rule; + +err_offload: + if (fwd_and_modify_cap) + mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), mh); +err_attach: +err_reg_set: + if (fwd_and_modify_cap) + mlx5_chains_put_chain_mapping(esw_chains(esw), chain_mapping); +err_get_chain: + mlx5e_mod_hdr_dealloc(&mod_acts); + kfree(slow_attr); + return ERR_PTR(err); } void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, @@ -1253,8 +1488,18 @@ void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; slow_attr->esw_attr->split_count = 0; - slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; + if (flow->slow_mh) { + slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + slow_attr->modify_hdr = mlx5e_mod_hdr_get(flow->slow_mh); + } mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); + if (flow->slow_mh) { + mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), flow->slow_mh); + mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping); + flow->chain_mapping = 0; + flow->slow_mh = NULL; + } flow_flag_clear(flow, SLOW); kfree(slow_attr); } @@ -1308,8 +1553,6 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow) mutex_unlock(&uplink_priv->unready_flows_lock); } -static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv); - bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev) { struct mlx5_core_dev *out_mdev, *route_mdev; @@ -1320,11 +1563,14 @@ bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_ route_priv = netdev_priv(route_dev); route_mdev = route_priv->mdev; - if (out_mdev->coredev_type != MLX5_COREDEV_PF || - route_mdev->coredev_type != MLX5_COREDEV_VF) + if (out_mdev->coredev_type != MLX5_COREDEV_PF) return false; - return same_hw_devs(out_priv, route_priv); + if (route_mdev->coredev_type != MLX5_COREDEV_VF && + route_mdev->coredev_type != MLX5_COREDEV_SF) + return false; + + return mlx5e_same_hw_devs(out_priv, route_priv); } int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) @@ -1364,26 +1610,120 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro } int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow) + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) { - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts; + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; struct mlx5_modify_hdr *mod_hdr; mod_hdr = mlx5_modify_header_alloc(priv->mdev, - get_flow_name_space(flow), + mlx5e_get_flow_namespace(flow), mod_hdr_acts->num_actions, mod_hdr_acts->actions); if (IS_ERR(mod_hdr)) return PTR_ERR(mod_hdr); - WARN_ON(flow->attr->modify_hdr); - flow->attr->modify_hdr = mod_hdr; + WARN_ON(attr->modify_hdr); + attr->modify_hdr = mod_hdr; return 0; } static int +set_encap_dests(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack, + bool *encap_valid, + bool *vf_tun) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_esw_flow_attr *esw_attr; + struct net_device *encap_dev = NULL; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *out_priv; + int out_index; + int err = 0; + + if (!mlx5e_is_eswitch_flow(flow)) + return 0; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + *vf_tun = false; + *encap_valid = true; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + struct net_device *out_dev; + int mirred_ifindex; + + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + mirred_ifindex = parse_attr->mirred_ifindex[out_index]; + out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); + if (!out_dev) { + NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); + err = -ENODEV; + goto out; + } + err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, + extack, &encap_dev, encap_valid); + dev_put(out_dev); + if (err) + goto out; + + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && + !esw_attr->dest_int_port) + *vf_tun = true; + + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + esw_attr->dests[out_index].rep = rpriv->rep; + esw_attr->dests[out_index].mdev = out_priv->mdev; + } + + if (*vf_tun && esw_attr->out_count > 1) { + NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); + err = -EOPNOTSUPP; + goto out; + } + +out: + return err; +} + +static void +clean_encap_dests(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + bool *vf_tun) +{ + struct mlx5_esw_flow_attr *esw_attr; + int out_index; + + if (!mlx5e_is_eswitch_flow(flow)) + return; + + esw_attr = attr->esw_attr; + *vf_tun = false; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && + !esw_attr->dest_int_port) + *vf_tun = true; + + mlx5e_detach_encap(priv, flow, attr, out_index); + kfree(attr->parse_attr->tun_info[out_index]); + } +} + +static int mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) @@ -1391,15 +1731,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; - bool vf_tun = false, encap_valid = true; - struct net_device *encap_dev = NULL; struct mlx5_esw_flow_attr *esw_attr; - struct mlx5e_rep_priv *rpriv; - struct mlx5e_priv *out_priv; - struct mlx5_fc *counter; + bool vf_tun, encap_valid; u32 max_prio, max_chain; int err = 0; - int out_index; parse_attr = attr->parse_attr; esw_attr = attr->esw_attr; @@ -1430,7 +1765,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_out; - if (!attr->chain && esw_attr->int_port) { + if (!attr->chain && esw_attr->int_port && + attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { /* If decap route device is internal port, change the * source vport value in reg_c0 back to uplink just in * case the rule performs goto chain > 0. If we have a miss @@ -1445,7 +1781,9 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, metadata); if (err) - return err; + goto err_out; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } } @@ -1461,13 +1799,15 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (attr->chain) { NL_SET_ERR_MSG_MOD(extack, "Internal port rule is only supported on chain 0"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; } if (attr->dest_chain) { NL_SET_ERR_MSG_MOD(extack, "Internal port rule offload doesn't support goto action"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; } int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), @@ -1475,56 +1815,25 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, flow_flag_test(flow, EGRESS) ? MLX5E_TC_INT_PORT_EGRESS : MLX5E_TC_INT_PORT_INGRESS); - if (IS_ERR(int_port)) - return PTR_ERR(int_port); - - esw_attr->int_port = int_port; - } - - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { - struct net_device *out_dev; - int mirred_ifindex; - - if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) - continue; - - mirred_ifindex = parse_attr->mirred_ifindex[out_index]; - out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); - if (!out_dev) { - NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); - err = -ENODEV; + if (IS_ERR(int_port)) { + err = PTR_ERR(int_port); goto err_out; } - err = mlx5e_attach_encap(priv, flow, out_dev, out_index, - extack, &encap_dev, &encap_valid); - dev_put(out_dev); - if (err) - goto err_out; - if (esw_attr->dests[out_index].flags & - MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && - !esw_attr->dest_int_port) - vf_tun = true; - out_priv = netdev_priv(encap_dev); - rpriv = out_priv->ppriv; - esw_attr->dests[out_index].rep = rpriv->rep; - esw_attr->dests[out_index].mdev = out_priv->mdev; + esw_attr->int_port = int_port; } - if (vf_tun && esw_attr->out_count > 1) { - NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); - err = -EOPNOTSUPP; + err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun); + if (err) goto err_out; - } err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) goto err_out; - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && - !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { if (vf_tun) { - err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); + err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr); if (err) goto err_out; } else { @@ -1535,20 +1844,16 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - counter = mlx5_fc_create(esw_attr->counter_dev, true); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); + err = alloc_flow_attr_counter(esw_attr->counter_dev, attr); + if (err) goto err_out; - } - - attr->counter = counter; } /* we get here if one of the following takes place: * (1) there's no error * (2) there's an encap action and we don't have valid neigh */ - if (!encap_valid) + if (!encap_valid || flow_flag_test(flow, SLOW)) flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); else flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); @@ -1585,8 +1890,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; - bool vf_tun = false; - int out_index; + bool vf_tun; esw_attr = attr->esw_attr; mlx5e_put_flow_tunnel_id(flow); @@ -1610,29 +1914,17 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (flow->decap_route) mlx5e_detach_decap_route(priv, flow); - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { - if (esw_attr->dests[out_index].flags & - MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && - !esw_attr->dest_int_port) - vf_tun = true; - if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { - mlx5e_detach_encap(priv, flow, out_index); - kfree(attr->parse_attr->tun_info[out_index]); - } - } + clean_encap_dests(priv, flow, attr, &vf_tun); mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { - dealloc_mod_hdr_actions(&attr->parse_attr->mod_hdr_acts); + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); if (vf_tun && attr->modify_hdr) mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); else mlx5e_detach_mod_hdr(priv, flow); } - kfree(attr->sample_attr); - kvfree(attr->parse_attr); - kvfree(attr->esw_attr->rx_tun_attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(esw_attr->counter_dev, attr->counter); @@ -1646,12 +1938,22 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); + free_flow_post_acts(flow); + + if (flow->attr->lag.count) + mlx5_lag_del_mpesw_rule(esw->dev); + + kvfree(attr->esw_attr->rx_tun_attr); + kvfree(attr->parse_attr); kfree(flow->attr); } struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) { - return flow->attr->counter; + struct mlx5_flow_attr *attr; + + attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list); + return attr->counter; } /* Iterate over tmp_list of flows attached to flow_list head. */ @@ -1863,7 +2165,7 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } - flow->tunnel_id = value; + flow->attr->tunnel_id = value; return 0; err_set: @@ -1877,8 +2179,8 @@ err_enc_opts: static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) { - u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK; - u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS; + u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK; + u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS; struct mlx5_rep_uplink_priv *uplink_priv; struct mlx5e_rep_priv *uplink_rpriv; struct mlx5_eswitch *esw; @@ -1894,11 +2196,6 @@ static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) enc_opts_id); } -u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow) -{ - return flow->tunnel_id; -} - void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, struct flow_match_basic *match, bool outer, void *headers_c, void *headers_v) @@ -1948,6 +2245,111 @@ u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer) return ip_version; } +/* Tunnel device follows RFC 6040, see include/net/inet_ecn.h. + * And changes inner ip_ecn depending on inner and outer ip_ecn as follows: + * +---------+----------------------------------------+ + * |Arriving | Arriving Outer Header | + * | Inner +---------+---------+---------+----------+ + * | Header | Not-ECT | ECT(0) | ECT(1) | CE | + * +---------+---------+---------+---------+----------+ + * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> | + * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* | + * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* | + * | CE | CE | CE | CE | CE | + * +---------+---------+---------+---------+----------+ + * + * Tc matches on inner after decapsulation on tunnel device, but hw offload matches + * the inner ip_ecn value before hardware decap action. + * + * Cells marked are changed from original inner packet ip_ecn value during decap, and + * so matching those values on inner ip_ecn before decap will fail. + * + * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn, + * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE, + * and such we can drop the inner ip_ecn=CE match. + */ + +static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv, + struct flow_cls_offload *f, + bool *match_inner_ecn) +{ + u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0; + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ip match; + + *match_inner_ecn = true; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + flow_rule_match_enc_ip(rule, &match); + outer_ecn_key = match.key->tos & INET_ECN_MASK; + outer_ecn_mask = match.mask->tos & INET_ECN_MASK; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + flow_rule_match_ip(rule, &match); + inner_ecn_key = match.key->tos & INET_ECN_MASK; + inner_ecn_mask = match.mask->tos & INET_ECN_MASK; + } + + if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) { + NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (!outer_ecn_mask) { + if (!inner_ecn_mask) + return 0; + + NL_SET_ERR_MSG_MOD(extack, + "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, + "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) { + NL_SET_ERR_MSG_MOD(extack, + "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, + "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (!inner_ecn_mask) + return 0; + + /* Both inner and outer have full mask on ecn */ + + if (outer_ecn_key == INET_ECN_ECT_1) { + /* inner ecn might change by DECAP action */ + + NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported"); + netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported"); + return -EOPNOTSUPP; + } + + if (outer_ecn_key != INET_ECN_CE) + return 0; + + if (inner_ecn_key != INET_ECN_CE) { + /* Can't happen in software, as packet ecn will be changed to CE after decap */ + NL_SET_ERR_MSG_MOD(extack, + "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); + netdev_warn(priv->netdev, + "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); + return -EOPNOTSUPP; + } + + /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase, + * drop match on inner ecn + */ + *match_inner_ecn = false; + + return 0; +} + static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, @@ -2053,16 +2455,14 @@ static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec) outer_headers); } -static void *get_match_headers_value(u32 flags, - struct mlx5_flow_spec *spec) +void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec) { return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? get_match_inner_headers_value(spec) : get_match_outer_headers_value(spec); } -static void *get_match_headers_criteria(u32 flags, - struct mlx5_flow_spec *spec) +void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec) { return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? get_match_inner_headers_criteria(spec) : @@ -2143,6 +2543,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector *dissector = rule->match.dissector; enum fs_flow_table_type fs_type; + bool match_inner_ecn = true; u16 addr_type = 0; u8 ip_proto = 0; u8 *match_level; @@ -2196,6 +2597,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, headers_c = get_match_inner_headers_criteria(spec); headers_v = get_match_inner_headers_value(spec); } + + err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn); + if (err) + return err; } err = mlx5e_flower_parse_meta(filter_dev, f); @@ -2257,6 +2662,17 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, match.key->vlan_priority); *match_level = MLX5_MATCH_L2; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) && + match.mask->vlan_eth_type && + MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, + ft_field_support.outer_second_vid, + fs_type)) { + MLX5_SET(fte_match_set_misc, misc_c, + outer_second_cvlan_tag, 1); + spec->match_criteria_enable |= + MLX5_MATCH_MISC_PARAMETERS; + } } } else if (*match_level != MLX5_MATCH_NONE) { /* cvlan_tag enabled in match criteria and @@ -2419,10 +2835,12 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, struct flow_match_ip match; flow_rule_match_ip(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, - match.mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, - match.key->tos & 0x3); + if (match_inner_ecn) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); + } MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, match.mask->tos >> 2); @@ -2607,55 +3025,6 @@ static int parse_cls_flower(struct mlx5e_priv *priv, return err; } -struct pedit_headers { - struct ethhdr eth; - struct vlan_hdr vlan; - struct iphdr ip4; - struct ipv6hdr ip6; - struct tcphdr tcp; - struct udphdr udp; -}; - -struct pedit_headers_action { - struct pedit_headers vals; - struct pedit_headers masks; - u32 pedits; -}; - -static int pedit_header_offsets[] = { - [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), - [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), - [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), - [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), - [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), -}; - -#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) - -static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, - struct pedit_headers_action *hdrs, - struct netlink_ext_ack *extack) -{ - u32 *curr_pmask, *curr_pval; - - curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); - curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset); - - if (*curr_pmask & mask) { /* disallow acting twice on the same location */ - NL_SET_ERR_MSG_MOD(extack, - "curr_pmask and new mask same. Acting twice on same location"); - goto out_err; - } - - *curr_pmask |= mask; - *curr_pval |= (val & mask); - - return 0; - -out_err: - return -EOPNOTSUPP; -} - struct mlx5_fields { u8 field; u8 field_bsize; @@ -2759,34 +3128,32 @@ static unsigned long mask_to_le(unsigned long mask, int size) return mask; } + static int offload_pedit_fields(struct mlx5e_priv *priv, int namespace, - struct pedit_headers_action *hdrs, struct mlx5e_tc_flow_parse_attr *parse_attr, u32 *action_flags, struct netlink_ext_ack *extack) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; - int i, action_size, first, last, next_z; + struct pedit_headers_action *hdrs = parse_attr->hdrs; void *headers_c, *headers_v, *action, *vals_p; u32 *s_masks_p, *a_masks_p, s_mask, a_mask; struct mlx5e_tc_mod_hdr_acts *mod_acts; - struct mlx5_fields *f; unsigned long mask, field_mask; - int err; + int i, first, last, next_z; + struct mlx5_fields *f; u8 cmd; mod_acts = &parse_attr->mod_hdr_acts; - headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec); - headers_v = get_match_headers_value(*action_flags, &parse_attr->spec); + headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec); + headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec); set_masks = &hdrs[0].masks; add_masks = &hdrs[1].masks; set_vals = &hdrs[0].vals; add_vals = &hdrs[1].vals; - action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto); - for (i = 0; i < ARRAY_SIZE(fields); i++) { bool skip; @@ -2854,18 +3221,16 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts); - if (err) { + action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts); + if (IS_ERR(action)) { NL_SET_ERR_MSG_MOD(extack, "too many pedit actions, can't offload"); mlx5_core_warn(priv->mdev, "mlx5: parsed %d pedit actions, can't do more\n", mod_acts->num_actions); - return err; + return PTR_ERR(action); } - action = mod_acts->actions + - (mod_acts->num_actions * action_size); MLX5_SET(set_action_in, action, action_type, cmd); MLX5_SET(set_action_in, action, field, f->field); @@ -2895,205 +3260,51 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, return 0; } -static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev, - int namespace) -{ - if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ - return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions); - else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */ - return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions); -} - -int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, - int namespace, - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) -{ - int action_size, new_num_actions, max_hw_actions; - size_t new_sz, old_sz; - void *ret; - - if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions) - return 0; - - action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto); - - max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev, - namespace); - new_num_actions = min(max_hw_actions, - mod_hdr_acts->actions ? - mod_hdr_acts->max_actions * 2 : 1); - if (mod_hdr_acts->max_actions == new_num_actions) - return -ENOSPC; - - new_sz = action_size * new_num_actions; - old_sz = mod_hdr_acts->max_actions * action_size; - ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL); - if (!ret) - return -ENOMEM; - - memset(ret + old_sz, 0, new_sz - old_sz); - mod_hdr_acts->actions = ret; - mod_hdr_acts->max_actions = new_num_actions; - - return 0; -} - -void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) -{ - kfree(mod_hdr_acts->actions); - mod_hdr_acts->actions = NULL; - mod_hdr_acts->num_actions = 0; - mod_hdr_acts->max_actions = 0; -} - static const struct pedit_headers zero_masks = {}; -static int -parse_pedit_to_modify_hdr(struct mlx5e_priv *priv, - const struct flow_action_entry *act, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, - struct netlink_ext_ack *extack) -{ - u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; - int err = -EOPNOTSUPP; - u32 mask, val, offset; - u8 htype; - - htype = act->mangle.htype; - err = -EOPNOTSUPP; /* can't be all optimistic */ - - if (htype == FLOW_ACT_MANGLE_UNSPEC) { - NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded"); - goto out_err; - } - - if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) { - NL_SET_ERR_MSG_MOD(extack, - "The pedit offload action is not supported"); - goto out_err; - } - - mask = act->mangle.mask; - val = act->mangle.val; - offset = act->mangle.offset; - - err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd], extack); - if (err) - goto out_err; - - hdrs[cmd].pedits++; - - return 0; -out_err: - return err; -} - -static int -parse_pedit_to_reformat(const struct flow_action_entry *act, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct netlink_ext_ack *extack) +static int verify_offload_pedit_fields(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct netlink_ext_ack *extack) { - u32 mask, val, offset; - u32 *p; - - if (act->id != FLOW_ACTION_MANGLE) { - NL_SET_ERR_MSG_MOD(extack, "Unsupported action id"); - return -EOPNOTSUPP; - } + struct pedit_headers *cmd_masks; + u8 cmd; - if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) { - NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported"); - return -EOPNOTSUPP; + for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { + cmd_masks = &parse_attr->hdrs[cmd].masks; + if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { + NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field"); + netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd); + print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, + 16, 1, cmd_masks, sizeof(zero_masks), true); + return -EOPNOTSUPP; + } } - mask = ~act->mangle.mask; - val = act->mangle.val; - offset = act->mangle.offset; - p = (u32 *)&parse_attr->eth; - *(p + (offset >> 2)) |= (val & mask); - return 0; } -static int parse_tc_pedit_action(struct mlx5e_priv *priv, - const struct flow_action_entry *act, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) -{ - if (flow && flow_flag_test(flow, L3_TO_L2_DECAP)) - return parse_pedit_to_reformat(act, parse_attr, extack); - - return parse_pedit_to_modify_hdr(priv, act, namespace, - parse_attr, hdrs, extack); -} - static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, u32 *action_flags, struct netlink_ext_ack *extack) { - struct pedit_headers *cmd_masks; int err; - u8 cmd; - err = offload_pedit_fields(priv, namespace, hdrs, parse_attr, - action_flags, extack); - if (err < 0) + err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack); + if (err) goto out_dealloc_parsed_actions; - for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { - cmd_masks = &hdrs[cmd].masks; - if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { - NL_SET_ERR_MSG_MOD(extack, - "attempt to offload an unsupported field"); - netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd); - print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, - 16, 1, cmd_masks, sizeof(zero_masks), true); - err = -EOPNOTSUPP; - goto out_dealloc_parsed_actions; - } - } + err = verify_offload_pedit_fields(priv, parse_attr, extack); + if (err) + goto out_dealloc_parsed_actions; return 0; out_dealloc_parsed_actions: - dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); + mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); return err; } -static bool csum_offload_supported(struct mlx5e_priv *priv, - u32 action, - u32 update_flags, - struct netlink_ext_ack *extack) -{ - u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP | - TCA_CSUM_UPDATE_FLAG_UDP; - - /* The HW recalcs checksums only if re-writing headers */ - if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) { - NL_SET_ERR_MSG_MOD(extack, - "TC csum action is only offloaded with pedit"); - netdev_warn(priv->netdev, - "TC csum action is only offloaded with pedit\n"); - return false; - } - - if (update_flags & ~prot_flags) { - NL_SET_ERR_MSG_MOD(extack, - "can't offload TC csum action for some header/s"); - netdev_warn(priv->netdev, - "can't offload TC csum action for some header/s - flags %#x\n", - update_flags); - return false; - } - - return true; -} - struct ip_ttl_word { __u8 ttl; __u8 protocol; @@ -3216,8 +3427,8 @@ static bool modify_header_match_supported(struct mlx5e_priv *priv, u8 ip_proto; int i; - headers_c = get_match_headers_criteria(actions, spec); - headers_v = get_match_headers_value(actions, spec); + headers_c = mlx5e_get_match_headers_criteria(actions, spec); + headers_v = mlx5e_get_match_headers_value(actions, spec); ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); /* for non-IP we only re-write MACs, so we're okay */ @@ -3291,11 +3502,11 @@ actions_match_supported_fdb(struct mlx5e_priv *priv, static bool actions_match_supported(struct mlx5e_priv *priv, struct flow_action *flow_action, + u32 actions, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { - u32 actions = flow->attr->action; bool ct_flow, ct_clear; ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; @@ -3307,6 +3518,30 @@ actions_match_supported(struct mlx5e_priv *priv, return false; } + if (!(~actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); + return false; + } + + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); + return false; + } + + if (!(~actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); + return false; + } + + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); + return false; + } + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions, ct_flow, ct_clear, extack)) @@ -3324,7 +3559,7 @@ static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv return priv->mdev == peer_priv->mdev; } -static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) +bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) { struct mlx5_core_dev *fmdev, *pmdev; u64 fsystem_guid, psystem_guid; @@ -3338,139 +3573,14 @@ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) return (fsystem_guid == psystem_guid); } -static bool same_vf_reps(struct mlx5e_priv *priv, - struct net_device *out_dev) -{ - return mlx5e_eswitch_vf_rep(priv->netdev) && - priv->netdev == out_dev; -} - -static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace, - const struct flow_action_entry *act, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, - u32 *action, struct netlink_ext_ack *extack) -{ - u16 mask16 = VLAN_VID_MASK; - u16 val16 = act->vlan.vid & VLAN_VID_MASK; - const struct flow_action_entry pedit_act = { - .id = FLOW_ACTION_MANGLE, - .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH, - .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI), - .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16), - .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16), - }; - u8 match_prio_mask, match_prio_val; - void *headers_c, *headers_v; - int err; - - headers_c = get_match_headers_criteria(*action, &parse_attr->spec); - headers_v = get_match_headers_value(*action, &parse_attr->spec); - - if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) && - MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) { - NL_SET_ERR_MSG_MOD(extack, - "VLAN rewrite action must have VLAN protocol match"); - return -EOPNOTSUPP; - } - - match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); - match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); - if (act->vlan.prio != (match_prio_val & match_prio_mask)) { - NL_SET_ERR_MSG_MOD(extack, - "Changing VLAN prio is not supported"); - return -EOPNOTSUPP; - } - - err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack); - *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - - return err; -} - -static int -add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, - u32 *action, struct netlink_ext_ack *extack) -{ - const struct flow_action_entry prio_tag_act = { - .vlan.vid = 0, - .vlan.prio = - MLX5_GET(fte_match_set_lyr_2_4, - get_match_headers_value(*action, - &parse_attr->spec), - first_prio) & - MLX5_GET(fte_match_set_lyr_2_4, - get_match_headers_criteria(*action, - &parse_attr->spec), - first_prio), - }; - - return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, - &prio_tag_act, parse_attr, hdrs, action, - extack); -} - -static int validate_goto_chain(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - const struct flow_action_entry *act, - u32 actions, - struct netlink_ext_ack *extack) -{ - bool is_esw = mlx5e_is_eswitch_flow(flow); - struct mlx5_flow_attr *attr = flow->attr; - bool ft_flow = mlx5e_is_ft_flow(flow); - u32 dest_chain = act->chain_index; - struct mlx5_fs_chains *chains; - struct mlx5_eswitch *esw; - u32 reformat_and_fwd; - u32 max_chain; - - esw = priv->mdev->priv.eswitch; - chains = is_esw ? esw_chains(esw) : nic_chains(priv); - max_chain = mlx5_chains_get_chain_range(chains); - reformat_and_fwd = is_esw ? - MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) : - MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table); - - if (ft_flow) { - NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); - return -EOPNOTSUPP; - } - - if (!mlx5_chains_backwards_supported(chains) && - dest_chain <= attr->chain) { - NL_SET_ERR_MSG_MOD(extack, - "Goto lower numbered chain isn't supported"); - return -EOPNOTSUPP; - } - - if (dest_chain > max_chain) { - NL_SET_ERR_MSG_MOD(extack, - "Requested destination chain is out of supported range"); - return -EOPNOTSUPP; - } - - if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | - MLX5_FLOW_CONTEXT_ACTION_DECAP) && - !reformat_and_fwd) { - NL_SET_ERR_MSG_MOD(extack, - "Goto chain is not allowed if action has reformat or decap"); - return -EOPNOTSUPP; - } - - return 0; -} - static int actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, - struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) { struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct pedit_headers_action *hdrs = parse_attr->hdrs; enum mlx5_flow_namespace_type ns_type; int err; @@ -3478,19 +3588,18 @@ actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) return 0; - ns_type = get_flow_name_space(flow); + ns_type = mlx5e_get_flow_namespace(flow); - err = alloc_tc_pedit_action(priv, ns_type, parse_attr, hdrs, - &attr->action, extack); + err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack); if (err) return err; - /* In case all pedit actions are skipped, remove the MOD_HDR flag. */ if (parse_attr->mod_hdr_acts.num_actions > 0) return 0; + /* In case all pedit actions are skipped, remove the MOD_HDR flag. */ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); + mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); if (ns_type != MLX5_FLOW_NAMESPACE_FDB) return 0; @@ -3502,295 +3611,372 @@ actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, return 0; } -static int -parse_tc_nic_actions(struct mlx5e_priv *priv, - struct flow_action *flow_action, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) +static struct mlx5_flow_attr* +mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr, + enum mlx5_flow_namespace_type ns_type) { struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; - struct pedit_headers_action hdrs[2] = {}; - const struct flow_action_entry *act; - struct mlx5_nic_flow_attr *nic_attr; - u32 action = 0; - int err, i; + u32 attr_sz = ns_to_attr_sz(ns_type); + struct mlx5_flow_attr *attr2; - if (!flow_action_has_entries(flow_action)) { - NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); - return -EINVAL; + attr2 = mlx5_alloc_flow_attr(ns_type); + parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); + if (!attr2 || !parse_attr) { + kvfree(parse_attr); + kfree(attr2); + return NULL; } - if (!flow_action_hw_stats_check(flow_action, extack, - FLOW_ACTION_HW_STATS_DELAYED_BIT)) { - NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); - return -EOPNOTSUPP; + memcpy(attr2, attr, attr_sz); + INIT_LIST_HEAD(&attr2->list); + parse_attr->filter_dev = attr->parse_attr->filter_dev; + attr2->action = 0; + attr2->flags = 0; + attr2->parse_attr = parse_attr; + attr2->dest_chain = 0; + attr2->dest_ft = NULL; + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { + attr2->esw_attr->out_count = 0; + attr2->esw_attr->split_count = 0; } - nic_attr = attr->nic_attr; - nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; - parse_attr = attr->parse_attr; + return attr2; +} - flow_action_for_each(i, act, flow_action) { - switch (act->id) { - case FLOW_ACTION_ACCEPT: - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - break; - case FLOW_ACTION_DROP: - action |= MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; +static struct mlx5_core_dev * +get_flow_counter_dev(struct mlx5e_tc_flow *flow) +{ + return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev; +} + +struct mlx5_flow_attr * +mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow) +{ + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_attr *attr; + int i; + + list_for_each_entry(attr, &flow->attrs, list) { + esw_attr = attr->esw_attr; + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) + return attr; + } + } + + return NULL; +} + +void +mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow) +{ + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr; + + list_for_each_entry(attr, &flow->attrs, list) { + if (list_is_last(&attr->list, &flow->attrs)) break; - case FLOW_ACTION_MANGLE: - case FLOW_ACTION_ADD: - err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL, - parse_attr, hdrs, NULL, extack); - if (err) - return err; - action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle); + } +} + +static void +free_flow_post_acts(struct mlx5e_tc_flow *flow) +{ + struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow); + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr, *tmp; + bool vf_tun; + + list_for_each_entry_safe(attr, tmp, &flow->attrs, list) { + if (list_is_last(&attr->list, &flow->attrs)) break; - case FLOW_ACTION_VLAN_MANGLE: - err = add_vlan_rewrite_action(priv, - MLX5_FLOW_NAMESPACE_KERNEL, - act, parse_attr, hdrs, - &action, extack); - if (err) - return err; + if (attr->post_act_handle) + mlx5e_tc_post_act_del(post_act, attr->post_act_handle); + + clean_encap_dests(flow->priv, flow, attr, &vf_tun); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + mlx5_fc_destroy(counter_dev, attr->counter); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); + if (attr->modify_hdr) + mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr); + } + + list_del(&attr->list); + kvfree(attr->parse_attr); + kfree(attr); + } +} + +int +mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow) +{ + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr; + int err = 0; + + list_for_each_entry(attr, &flow->attrs, list) { + if (list_is_last(&attr->list, &flow->attrs)) break; - case FLOW_ACTION_CSUM: - if (csum_offload_supported(priv, action, - act->csum_flags, - extack)) - break; - return -EOPNOTSUPP; - case FLOW_ACTION_REDIRECT: { - struct net_device *peer_dev = act->dev; - - if (priv->netdev->netdev_ops == peer_dev->netdev_ops && - same_hw_devs(priv, netdev_priv(peer_dev))) { - parse_attr->mirred_ifindex[0] = peer_dev->ifindex; - flow_flag_set(flow, HAIRPIN); - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - } else { - NL_SET_ERR_MSG_MOD(extack, - "device is not on same HW, can't offload"); - netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n", - peer_dev->name); - return -EOPNOTSUPP; - } - } + err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle); + if (err) break; - case FLOW_ACTION_MARK: { - u32 mark = act->mark; + } - if (mark & ~MLX5E_TC_FLOW_ID_MASK) { - NL_SET_ERR_MSG_MOD(extack, - "Bad flow mark - only 16 bit is supported"); - return -EOPNOTSUPP; - } + return err; +} - nic_attr->flow_tag = mark; - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } - break; - case FLOW_ACTION_GOTO: - err = validate_goto_chain(priv, flow, act, action, - extack); +/* TC filter rule HW translation: + * + * +---------------------+ + * + ft prio (tc chain) + + * + original match + + * +---------------------+ + * | + * | if multi table action + * | + * v + * +---------------------+ + * + post act ft |<----. + * + match fte id | | split on multi table action + * + do actions |-----' + * +---------------------+ + * | + * | + * v + * Do rest of the actions after last multi table action. + */ +static int +alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) +{ + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr, *next_attr = NULL; + struct mlx5e_post_act_handle *handle; + bool vf_tun, encap_valid = true; + int err; + + /* This is going in reverse order as needed. + * The first entry is the last attribute. + */ + list_for_each_entry(attr, &flow->attrs, list) { + if (!next_attr) { + /* Set counter action on last post act rule. */ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + } else { + err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr); if (err) - return err; + goto out_free; + } - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - attr->dest_chain = act->chain_index; + /* Don't add post_act rule for first attr (last in the list). + * It's being handled by the caller. + */ + if (list_is_last(&attr->list, &flow->attrs)) break; - case FLOW_ACTION_CT: - err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, - &parse_attr->mod_hdr_acts, - act, extack); + + err = set_encap_dests(flow->priv, flow, attr, extack, &encap_valid, &vf_tun); + if (err) + goto out_free; + + if (!encap_valid) + flow_flag_set(flow, SLOW); + + err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack); + if (err) + goto out_free; + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr); if (err) - return err; + goto out_free; + } - flow_flag_set(flow, CT); - break; - default: - NL_SET_ERR_MSG_MOD(extack, - "The offload action is not supported in NIC action"); - return -EOPNOTSUPP; + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr); + if (err) + goto out_free; } - } - attr->action = action; + handle = mlx5e_tc_post_act_add(post_act, attr); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out_free; + } - if (attr->dest_chain && parse_attr->mirred_ifindex[0]) { - NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported"); - return -EOPNOTSUPP; + attr->post_act_handle = handle; + next_attr = attr; } - err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack); - if (err) - return err; + if (flow_flag_test(flow, SLOW)) + goto out; - if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) - return -EOPNOTSUPP; + err = mlx5e_tc_offload_flow_post_acts(flow); + if (err) + goto out_free; +out: return 0; + +out_free: + free_flow_post_acts(flow); + return err; } -static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, - struct net_device *peer_netdev) +static int +parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action) { - struct mlx5e_priv *peer_priv; - - peer_priv = netdev_priv(peer_netdev); + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow_action flow_action_reorder; + struct mlx5e_tc_flow *flow = parse_state->flow; + struct mlx5_flow_attr *attr = flow->attr; + enum mlx5_flow_namespace_type ns_type; + struct mlx5e_priv *priv = flow->priv; + struct flow_action_entry *act, **_act; + struct mlx5e_tc_act *tc_act; + int err, i; - return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && - mlx5e_eswitch_vf_rep(priv->netdev) && - mlx5e_eswitch_vf_rep(peer_netdev) && - same_hw_devs(priv, peer_priv)); -} + flow_action_reorder.num_entries = flow_action->num_entries; + flow_action_reorder.entries = kcalloc(flow_action->num_entries, + sizeof(flow_action), GFP_KERNEL); + if (!flow_action_reorder.entries) + return -ENOMEM; -static int parse_tc_vlan_action(struct mlx5e_priv *priv, - const struct flow_action_entry *act, - struct mlx5_esw_flow_attr *attr, - u32 *action, - struct netlink_ext_ack *extack) -{ - u8 vlan_idx = attr->total_vlan; + mlx5e_tc_act_reorder_flow_actions(flow_action, &flow_action_reorder); - if (vlan_idx >= MLX5_FS_VLAN_DEPTH) { - NL_SET_ERR_MSG_MOD(extack, "Total vlans used is greater than supported"); - return -EOPNOTSUPP; - } + ns_type = mlx5e_get_flow_namespace(flow); + list_add(&attr->list, &flow->attrs); - switch (act->id) { - case FLOW_ACTION_VLAN_POP: - if (vlan_idx) { - if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, - MLX5_FS_VLAN_DEPTH)) { - NL_SET_ERR_MSG_MOD(extack, - "vlan pop action is not supported"); - return -EOPNOTSUPP; - } + flow_action_for_each(i, _act, &flow_action_reorder) { + act = *_act; + tc_act = mlx5e_tc_act_get(act->id, ns_type); + if (!tc_act) { + NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action"); + err = -EOPNOTSUPP; + goto out_free; + } - *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2; - } else { - *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + if (!tc_act->can_offload(parse_state, act, i, attr)) { + err = -EOPNOTSUPP; + goto out_free; } - break; - case FLOW_ACTION_VLAN_PUSH: - attr->vlan_vid[vlan_idx] = act->vlan.vid; - attr->vlan_prio[vlan_idx] = act->vlan.prio; - attr->vlan_proto[vlan_idx] = act->vlan.proto; - if (!attr->vlan_proto[vlan_idx]) - attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q); - - if (vlan_idx) { - if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, - MLX5_FS_VLAN_DEPTH)) { - NL_SET_ERR_MSG_MOD(extack, - "vlan push action is not supported for vlan depth > 1"); - return -EOPNOTSUPP; - } - *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; - } else { - if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && - (act->vlan.proto != htons(ETH_P_8021Q) || - act->vlan.prio)) { - NL_SET_ERR_MSG_MOD(extack, - "vlan push action is not supported"); - return -EOPNOTSUPP; + err = tc_act->parse_action(parse_state, act, priv, attr); + if (err) + goto out_free; + + parse_state->actions |= attr->action; + + /* Split attr for multi table act if not the last act. */ + if (tc_act->is_multi_table_act && + tc_act->is_multi_table_act(priv, act, attr) && + i < flow_action_reorder.num_entries - 1) { + err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); + if (err) + goto out_free; + + attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type); + if (!attr) { + err = -ENOMEM; + goto out_free; } - *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + list_add(&attr->list, &flow->attrs); } - break; - default: - NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN"); - return -EINVAL; } - attr->total_vlan = vlan_idx + 1; + kfree(flow_action_reorder.entries); + + err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); + if (err) + goto out_free_post_acts; + + err = alloc_flow_post_acts(flow, extack); + if (err) + goto out_free_post_acts; return 0; + +out_free: + kfree(flow_action_reorder.entries); +out_free_post_acts: + free_flow_post_acts(flow); + + return err; } -static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev, - struct net_device *out_dev) +static int +flow_action_supported(struct flow_action *flow_action, + struct netlink_ext_ack *extack) { - struct net_device *fdb_out_dev = out_dev; - struct net_device *uplink_upper; + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); + return -EINVAL; + } - rcu_read_lock(); - uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev); - if (uplink_upper && netif_is_lag_master(uplink_upper) && - uplink_upper == out_dev) { - fdb_out_dev = uplink_dev; - } else if (netif_is_lag_master(out_dev)) { - fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev)); - if (fdb_out_dev && - (!mlx5e_eswitch_rep(fdb_out_dev) || - !netdev_port_same_parent_id(fdb_out_dev, uplink_dev))) - fdb_out_dev = NULL; + if (!flow_action_hw_stats_check(flow_action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); + return -EOPNOTSUPP; } - rcu_read_unlock(); - return fdb_out_dev; + + return 0; } -static int add_vlan_push_action(struct mlx5e_priv *priv, - struct mlx5_flow_attr *attr, - struct net_device **out_dev, - u32 *action, - struct netlink_ext_ack *extack) +static int +parse_tc_nic_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) { - struct net_device *vlan_dev = *out_dev; - struct flow_action_entry vlan_act = { - .id = FLOW_ACTION_VLAN_PUSH, - .vlan.vid = vlan_dev_vlan_id(vlan_dev), - .vlan.proto = vlan_dev_vlan_proto(vlan_dev), - .vlan.prio = 0, - }; + struct mlx5e_tc_act_parse_state *parse_state; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; int err; - err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action, extack); + err = flow_action_supported(flow_action, extack); if (err) return err; - rcu_read_lock(); - *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev)); - rcu_read_unlock(); - if (!*out_dev) - return -ENODEV; + attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + parse_attr = attr->parse_attr; + parse_state = &parse_attr->parse_state; + mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); + parse_state->ct_priv = get_ct_priv(priv); + + err = parse_tc_actions(parse_state, flow_action); + if (err) + return err; - if (is_vlan_dev(*out_dev)) - err = add_vlan_push_action(priv, attr, out_dev, action, extack); + err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); + if (err) + return err; - return err; + if (!actions_match_supported(priv, flow_action, parse_state->actions, + parse_attr, flow, extack)) + return -EOPNOTSUPP; + + return 0; } -static int add_vlan_pop_action(struct mlx5e_priv *priv, - struct mlx5_flow_attr *attr, - u32 *action, - struct netlink_ext_ack *extack) +static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, + struct net_device *peer_netdev) { - struct flow_action_entry vlan_act = { - .id = FLOW_ACTION_VLAN_POP, - }; - int nest_level, err = 0; + struct mlx5e_priv *peer_priv; - nest_level = attr->parse_attr->filter_dev->lower_level - - priv->netdev->lower_level; - while (nest_level--) { - err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action, extack); - if (err) - return err; - } + peer_priv = netdev_priv(peer_netdev); - return err; + return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && + mlx5e_eswitch_vf_rep(priv->netdev) && + mlx5e_eswitch_vf_rep(peer_netdev) && + mlx5e_same_hw_devs(priv, peer_priv)); } static bool same_hw_reps(struct mlx5e_priv *priv, @@ -3802,7 +3988,7 @@ static bool same_hw_reps(struct mlx5e_priv *priv, return mlx5e_eswitch_rep(priv->netdev) && mlx5e_eswitch_rep(peer_netdev) && - same_hw_devs(priv, peer_priv); + mlx5e_same_hw_devs(priv, peer_priv); } static bool is_lag_dev(struct mlx5e_priv *priv, @@ -3813,12 +3999,25 @@ static bool is_lag_dev(struct mlx5e_priv *priv, same_hw_reps(priv, peer_netdev)); } +static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev) +{ + if (same_hw_reps(priv, out_dev) && + MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) && + MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up)) + return true; + + return false; +} + bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, struct net_device *out_dev) { if (is_merged_eswitch_vfs(priv, out_dev)) return true; + if (is_multiport_eligible(priv, out_dev)) + return true; + if (is_lag_dev(priv, out_dev)) return true; @@ -3826,66 +4025,6 @@ bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, same_port_devs(priv, netdev_priv(out_dev)); } -static bool is_duplicated_output_device(struct net_device *dev, - struct net_device *out_dev, - int *ifindexes, int if_count, - struct netlink_ext_ack *extack) -{ - int i; - - for (i = 0; i < if_count; i++) { - if (ifindexes[i] == out_dev->ifindex) { - NL_SET_ERR_MSG_MOD(extack, - "can't duplicate output to same device"); - netdev_err(dev, "can't duplicate output to same device: %s\n", - out_dev->name); - return true; - } - } - - return false; -} - -static int verify_uplink_forwarding(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct net_device *out_dev, - struct netlink_ext_ack *extack) -{ - struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_rep_priv *rep_priv; - - /* Forwarding non encapsulated traffic between - * uplink ports is allowed only if - * termination_table_raw_traffic cap is set. - * - * Input vport was stored attr->in_rep. - * In LAG case, *priv* is the private data of - * uplink which may be not the input vport. - */ - rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep); - - if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) && - mlx5e_eswitch_uplink_rep(out_dev))) - return 0; - - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, - termination_table_raw_traffic)) { - NL_SET_ERR_MSG_MOD(extack, - "devices are both uplink, can't offload forwarding"); - pr_err("devices %s %s are both uplink, can't offload forwarding\n", - priv->netdev->name, out_dev->name); - return -EOPNOTSUPP; - } else if (out_dev != rep_priv->netdev) { - NL_SET_ERR_MSG_MOD(extack, - "devices are not the same uplink, can't offload forwarding"); - pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n", - priv->netdev->name, out_dev->name); - return -EOPNOTSUPP; - } - return 0; -} - int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, int ifindex, @@ -3925,442 +4064,57 @@ int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, return 0; } -static int parse_tc_fdb_actions(struct mlx5e_priv *priv, - struct flow_action *flow_action, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) +static int +parse_tc_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) { - struct pedit_headers_action hdrs[2] = {}; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_act_parse_state *parse_state; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5e_sample_attr sample_attr = {}; - const struct ip_tunnel_info *info = NULL; struct mlx5_flow_attr *attr = flow->attr; - int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; - bool ft_flow = mlx5e_is_ft_flow(flow); - const struct flow_action_entry *act; struct mlx5_esw_flow_attr *esw_attr; - bool encap = false, decap = false; - u32 action = attr->action; - int err, i, if_count = 0; - bool ptype_host = false; - bool mpls_push = false; - - if (!flow_action_has_entries(flow_action)) { - NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); - return -EINVAL; - } + struct net_device *filter_dev; + int err; - if (!flow_action_hw_stats_check(flow_action, extack, - FLOW_ACTION_HW_STATS_DELAYED_BIT)) { - NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); - return -EOPNOTSUPP; - } + err = flow_action_supported(flow_action, extack); + if (err) + return err; esw_attr = attr->esw_attr; parse_attr = attr->parse_attr; + filter_dev = parse_attr->filter_dev; + parse_state = &parse_attr->parse_state; + mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); + parse_state->ct_priv = get_ct_priv(priv); - flow_action_for_each(i, act, flow_action) { - switch (act->id) { - case FLOW_ACTION_ACCEPT: - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - attr->flags |= MLX5_ESW_ATTR_FLAG_ACCEPT; - break; - case FLOW_ACTION_PTYPE: - if (act->ptype != PACKET_HOST) { - NL_SET_ERR_MSG_MOD(extack, - "skbedit ptype is only supported with type host"); - return -EOPNOTSUPP; - } - - ptype_host = true; - break; - case FLOW_ACTION_DROP: - action |= MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - break; - case FLOW_ACTION_TRAP: - if (!flow_offload_has_one_action(flow_action)) { - NL_SET_ERR_MSG_MOD(extack, - "action trap is supported as a sole action only"); - return -EOPNOTSUPP; - } - action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT); - attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; - break; - case FLOW_ACTION_MPLS_PUSH: - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, - reformat_l2_to_l3_tunnel) || - act->mpls_push.proto != htons(ETH_P_MPLS_UC)) { - NL_SET_ERR_MSG_MOD(extack, - "mpls push is supported only for mpls_uc protocol"); - return -EOPNOTSUPP; - } - mpls_push = true; - break; - case FLOW_ACTION_MPLS_POP: - /* we only support mpls pop if it is the first action - * and the filter net device is bareudp. Subsequent - * actions can be pedit and the last can be mirred - * egress redirect. - */ - if (i) { - NL_SET_ERR_MSG_MOD(extack, - "mpls pop supported only as first action"); - return -EOPNOTSUPP; - } - if (!netif_is_bareudp(parse_attr->filter_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "mpls pop supported only on bareudp devices"); - return -EOPNOTSUPP; - } - - parse_attr->eth.h_proto = act->mpls_pop.proto; - action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - flow_flag_set(flow, L3_TO_L2_DECAP); - break; - case FLOW_ACTION_MANGLE: - case FLOW_ACTION_ADD: - err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB, - parse_attr, hdrs, flow, extack); - if (err) - return err; - - if (!flow_flag_test(flow, L3_TO_L2_DECAP)) { - action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - esw_attr->split_count = esw_attr->out_count; - } - break; - case FLOW_ACTION_CSUM: - if (csum_offload_supported(priv, action, - act->csum_flags, extack)) - break; - - return -EOPNOTSUPP; - case FLOW_ACTION_REDIRECT_INGRESS: { - struct net_device *out_dev; - - out_dev = act->dev; - if (!out_dev) - return -EOPNOTSUPP; - - if (!netif_is_ovs_master(out_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "redirect to ingress is supported only for OVS internal ports"); - return -EOPNOTSUPP; - } - - if (netif_is_ovs_master(parse_attr->filter_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "redirect to ingress is not supported from internal port"); - return -EOPNOTSUPP; - } - - if (!ptype_host) { - NL_SET_ERR_MSG_MOD(extack, - "redirect to int port ingress requires ptype=host action"); - return -EOPNOTSUPP; - } - - if (esw_attr->out_count) { - NL_SET_ERR_MSG_MOD(extack, - "redirect to int port ingress is supported only as single destination"); - return -EOPNOTSUPP; - } - - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - - err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex, - MLX5E_TC_INT_PORT_INGRESS, - &action, esw_attr->out_count); - if (err) - return err; - - esw_attr->out_count++; - - break; - } - case FLOW_ACTION_REDIRECT: - case FLOW_ACTION_MIRRED: { - struct mlx5e_priv *out_priv; - struct net_device *out_dev; - - out_dev = act->dev; - if (!out_dev) { - /* out_dev is NULL when filters with - * non-existing mirred device are replayed to - * the driver. - */ - return -EINVAL; - } - - if (mpls_push && !netif_is_bareudp(out_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "mpls is supported only through a bareudp device"); - return -EOPNOTSUPP; - } - - if (ft_flow && out_dev == priv->netdev) { - /* Ignore forward to self rules generated - * by adding both mlx5 devs to the flow table - * block on a normal nft offload setup. - */ - return -EOPNOTSUPP; - } - - if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { - NL_SET_ERR_MSG_MOD(extack, - "can't support more output ports, can't offload forwarding"); - netdev_warn(priv->netdev, - "can't support more than %d output ports, can't offload forwarding\n", - esw_attr->out_count); - return -EOPNOTSUPP; - } - - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - if (encap) { - parse_attr->mirred_ifindex[esw_attr->out_count] = - out_dev->ifindex; - parse_attr->tun_info[esw_attr->out_count] = - mlx5e_dup_tun_info(info); - if (!parse_attr->tun_info[esw_attr->out_count]) - return -ENOMEM; - encap = false; - esw_attr->dests[esw_attr->out_count].flags |= - MLX5_ESW_DEST_ENCAP; - esw_attr->out_count++; - /* attr->dests[].rep is resolved when we - * handle encap - */ - } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - - if (is_duplicated_output_device(priv->netdev, - out_dev, - ifindexes, - if_count, - extack)) - return -EOPNOTSUPP; - - ifindexes[if_count] = out_dev->ifindex; - if_count++; - - out_dev = get_fdb_out_dev(uplink_dev, out_dev); - if (!out_dev) - return -ENODEV; - - if (is_vlan_dev(out_dev)) { - err = add_vlan_push_action(priv, attr, - &out_dev, - &action, extack); - if (err) - return err; - } - - if (is_vlan_dev(parse_attr->filter_dev)) { - err = add_vlan_pop_action(priv, attr, - &action, extack); - if (err) - return err; - } - - if (netif_is_macvlan(out_dev)) - out_dev = macvlan_dev_real_dev(out_dev); - - err = verify_uplink_forwarding(priv, flow, out_dev, extack); - if (err) - return err; - - if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "devices are not on same switch HW, can't offload forwarding"); - return -EOPNOTSUPP; - } - - if (same_vf_reps(priv, out_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "can't forward from a VF to itself"); - return -EOPNOTSUPP; - } - - out_priv = netdev_priv(out_dev); - rpriv = out_priv->ppriv; - esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; - esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; - esw_attr->out_count++; - } else if (netif_is_ovs_master(out_dev)) { - err = mlx5e_set_fwd_to_int_port_actions(priv, attr, - out_dev->ifindex, - MLX5E_TC_INT_PORT_EGRESS, - &action, - esw_attr->out_count); - if (err) - return err; - - esw_attr->out_count++; - } else if (parse_attr->filter_dev != priv->netdev) { - /* All mlx5 devices are called to configure - * high level device filters. Therefore, the - * *attempt* to install a filter on invalid - * eswitch should not trigger an explicit error - */ - return -EINVAL; - } else { - NL_SET_ERR_MSG_MOD(extack, - "devices are not on same switch HW, can't offload forwarding"); - netdev_warn(priv->netdev, - "devices %s %s not on same switch HW, can't offload forwarding\n", - priv->netdev->name, - out_dev->name); - return -EOPNOTSUPP; - } - } - break; - case FLOW_ACTION_TUNNEL_ENCAP: - info = act->tunnel; - if (info) { - encap = true; - } else { - NL_SET_ERR_MSG_MOD(extack, - "Zero tunnel attributes is not supported"); - return -EOPNOTSUPP; - } - - break; - case FLOW_ACTION_VLAN_PUSH: - case FLOW_ACTION_VLAN_POP: - if (act->id == FLOW_ACTION_VLAN_PUSH && - (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) { - /* Replace vlan pop+push with vlan modify */ - action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; - err = add_vlan_rewrite_action(priv, - MLX5_FLOW_NAMESPACE_FDB, - act, parse_attr, hdrs, - &action, extack); - } else { - err = parse_tc_vlan_action(priv, act, esw_attr, &action, extack); - } - if (err) - return err; - - esw_attr->split_count = esw_attr->out_count; - break; - case FLOW_ACTION_VLAN_MANGLE: - err = add_vlan_rewrite_action(priv, - MLX5_FLOW_NAMESPACE_FDB, - act, parse_attr, hdrs, - &action, extack); - if (err) - return err; - - esw_attr->split_count = esw_attr->out_count; - break; - case FLOW_ACTION_TUNNEL_DECAP: - decap = true; - break; - case FLOW_ACTION_GOTO: - err = validate_goto_chain(priv, flow, act, action, - extack); - if (err) - return err; - - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - attr->dest_chain = act->chain_index; - break; - case FLOW_ACTION_CT: - if (flow_flag_test(flow, SAMPLE)) { - NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported"); - return -EOPNOTSUPP; - } - err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, - &parse_attr->mod_hdr_acts, - act, extack); - if (err) - return err; - - flow_flag_set(flow, CT); - esw_attr->split_count = esw_attr->out_count; - break; - case FLOW_ACTION_SAMPLE: - if (flow_flag_test(flow, CT)) { - NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported"); - return -EOPNOTSUPP; - } - sample_attr.rate = act->sample.rate; - sample_attr.group_num = act->sample.psample_group->group_num; - if (act->sample.truncate) - sample_attr.trunc_size = act->sample.trunc_size; - flow_flag_set(flow, SAMPLE); - break; - default: - NL_SET_ERR_MSG_MOD(extack, - "The offload action is not supported in FDB action"); - return -EOPNOTSUPP; - } - } + err = parse_tc_actions(parse_state, flow_action); + if (err) + return err; /* Forward to/from internal port can only have 1 dest */ - if ((netif_is_ovs_master(parse_attr->filter_dev) || esw_attr->dest_int_port) && + if ((netif_is_ovs_master(filter_dev) || esw_attr->dest_int_port) && esw_attr->out_count > 1) { NL_SET_ERR_MSG_MOD(extack, "Rules with internal port can have only one destination"); return -EOPNOTSUPP; } - /* always set IP version for indirect table handling */ - attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true); - - if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && - action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { - /* For prio tag mode, replace vlan pop with rewrite vlan prio - * tag rewrite. - */ - action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; - err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs, - &action, extack); - if (err) - return err; + /* Forward from tunnel/internal port to internal port is not supported */ + if ((mlx5e_get_tc_tun(filter_dev) || netif_is_ovs_master(filter_dev)) && + esw_attr->dest_int_port) { + NL_SET_ERR_MSG_MOD(extack, + "Forwarding from tunnel/internal port to internal port is not supported"); + return -EOPNOTSUPP; } - attr->action = action; - - err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack); + err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); if (err) return err; - if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) - return -EOPNOTSUPP; - - if (attr->dest_chain && decap) { - /* It can be supported if we'll create a mapping for - * the tunnel device only (without tunnel), and set - * this tunnel id with this decap flow. - * - * On restore (miss), we'll just set this saved tunnel - * device. - */ - - NL_SET_ERR_MSG(extack, "Decap with goto isn't supported"); - netdev_warn(priv->netdev, "Decap with goto isn't supported"); + if (!actions_match_supported(priv, flow_action, parse_state->actions, + parse_attr, flow, extack)) return -EOPNOTSUPP; - } - - /* Allocate sample attribute only when there is a sample action and - * no errors after parsing. - */ - if (flow_flag_test(flow, SAMPLE)) { - attr->sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL); - if (!attr->sample_attr) - return -ENOMEM; - *attr->sample_attr = sample_attr; - } return 0; } @@ -4394,14 +4148,14 @@ static const struct rhashtable_params tc_ht_params = { static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, unsigned long flags) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5e_rep_priv *rpriv; if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - return &uplink_rpriv->uplink_priv.tc_ht; + rpriv = priv->ppriv; + return &rpriv->tc_ht; } else /* NIC offload */ - return &priv->fs.tc.ht; + return &tc->ht; } static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) @@ -4434,7 +4188,12 @@ mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type) sizeof(struct mlx5_nic_flow_attr); struct mlx5_flow_attr *attr; - return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL); + attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL); + if (!attr) + return attr; + + INIT_LIST_HEAD(&attr->list); + return attr; } static int @@ -4458,7 +4217,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, flow->cookie = f->cookie; flow->priv = priv; - attr = mlx5_alloc_flow_attr(get_flow_name_space(flow)); + attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow)); if (!attr) goto err_free; @@ -4468,6 +4227,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, INIT_LIST_HEAD(&flow->encaps[out_index].list); INIT_LIST_HEAD(&flow->hairpin); INIT_LIST_HEAD(&flow->l3_to_l2_reformat); + INIT_LIST_HEAD(&flow->attrs); refcount_set(&flow->refcnt, 1); init_completion(&flow->init_done); init_completion(&flow->del_hw_done); @@ -4525,6 +4285,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_core_dev *in_mdev) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; @@ -4553,21 +4314,33 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; + /* always set IP version for indirect table handling */ + flow->attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true); + err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); if (err) goto err_free; + if (flow->attr->lag.count) { + err = mlx5_lag_add_mpesw_rule(esw->dev); + if (err) + goto err_free; + } + err = mlx5e_tc_add_fdb_flow(priv, flow, extack); complete_all(&flow->init_done); if (err) { if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) - goto err_free; + goto err_lag; add_unready_flow(flow); } return flow; +err_lag: + if (flow->attr->lag.count) + mlx5_lag_del_mpesw_rule(esw->dev); err_free: mlx5e_flow_put(priv, flow); out: @@ -4714,7 +4487,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, err_free: flow_flag_set(flow, FAILED); - dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); + mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); mlx5e_flow_put(priv, flow); out: return err; @@ -4766,7 +4539,7 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, int err = 0; if (!mlx5_esw_hold(priv->mdev)) - return -EAGAIN; + return -EBUSY; mlx5_esw_get(priv->mdev); @@ -4965,6 +4738,33 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate, return err; } +int mlx5e_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + return 0; +} + static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct netlink_ext_ack *extack) @@ -4992,10 +4792,16 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_POLICE: - if (act->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); + if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not continue"); return -EOPNOTSUPP; } + + err = mlx5e_policer_validate(flow_action, act, extack); + if (err) + return err; + err = apply_police_params(priv, act->police.rate_bytes_ps, extack); if (err) return err; @@ -5014,14 +4820,8 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, struct tc_cls_matchall_offload *ma) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct netlink_ext_ack *extack = ma->common.extack; - if (!mlx5_esw_qos_enabled(esw)) { - NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device"); - return -EOPNOTSUPP; - } - if (ma->common.prio != 1) { NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); return -EINVAL; @@ -5057,22 +4857,23 @@ void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) { + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); struct mlx5_core_dev *peer_mdev = peer_priv->mdev; struct mlx5e_hairpin_entry *hpe, *tmp; LIST_HEAD(init_wait_list); u16 peer_vhca_id; int bkt; - if (!same_hw_devs(priv, peer_priv)) + if (!mlx5e_same_hw_devs(priv, peer_priv)) return; peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); - mutex_lock(&priv->fs.tc.hairpin_tbl_lock); - hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) + mutex_lock(&tc->hairpin_tbl_lock); + hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) if (refcount_inc_not_zero(&hpe->refcnt)) list_add(&hpe->dead_peer_wait_list, &init_wait_list); - mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + mutex_unlock(&tc->hairpin_tbl_lock); list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { wait_for_completion(&hpe->res_ready); @@ -5087,7 +4888,6 @@ static int mlx5e_tc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *ndev = netdev_notifier_info_to_dev(ptr); - struct mlx5e_flow_steering *fs; struct mlx5e_priv *peer_priv; struct mlx5e_tc_table *tc; struct mlx5e_priv *priv; @@ -5098,8 +4898,7 @@ static int mlx5e_tc_netdev_event(struct notifier_block *this, return NOTIFY_DONE; tc = container_of(this, struct mlx5e_tc_table, netdevice_nb); - fs = container_of(tc, struct mlx5e_flow_steering, tc); - priv = container_of(fs, struct mlx5e_priv, fs); + priv = tc->priv; peer_priv = netdev_priv(ndev); if (priv == peer_priv || !(priv->netdev->features & NETIF_F_HW_TC)) @@ -5126,9 +4925,39 @@ static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev) return tc_tbl_size; } +static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_flow_table **ft = &tc->miss_t; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + int err = 0; + + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.level = MLX5E_TC_MISS_LEVEL; + ft_attr.prio = 0; + ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); + + *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(*ft)) { + err = PTR_ERR(*ft); + netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err); + } + + return err; +} + +static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + + mlx5_destroy_flow_table(tc->miss_t); +} + int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { - struct mlx5e_tc_table *tc = &priv->fs.tc; + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); struct mlx5_core_dev *dev = priv->mdev; struct mapping_ctx *chains_mapping; struct mlx5_chains_attr attr = {}; @@ -5139,6 +4968,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) mutex_init(&tc->t_lock); mutex_init(&tc->hairpin_tbl_lock); hash_init(tc->hairpin_tbl); + tc->priv = priv; err = rhashtable_init(&tc->ht, &tc_ht_params); if (err) @@ -5158,23 +4988,27 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) } tc->mapping = chains_mapping; + err = mlx5e_tc_nic_create_miss_table(priv); + if (err) + goto err_chains; + if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; attr.ns = MLX5_FLOW_NAMESPACE_KERNEL; attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev); attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS; - attr.default_ft = mlx5e_vlan_get_flowtable(priv->fs.vlan); + attr.default_ft = tc->miss_t; attr.mapping = chains_mapping; tc->chains = mlx5_chains_create(dev, &attr); if (IS_ERR(tc->chains)) { err = PTR_ERR(tc->chains); - goto err_chains; + goto err_miss; } tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL); - tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr, + tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr, MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; @@ -5193,6 +5027,8 @@ err_reg: mlx5_tc_ct_clean(tc->ct); mlx5e_tc_post_act_destroy(tc->post_act); mlx5_chains_destroy(tc->chains); +err_miss: + mlx5e_tc_nic_destroy_miss_table(priv); err_chains: mapping_destroy(chains_mapping); err_mapping: @@ -5211,7 +5047,7 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg) void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) { - struct mlx5e_tc_table *tc = &priv->fs.tc; + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); if (tc->netdevice_nb.notifier_call) unregister_netdevice_notifier_dev_net(priv->netdev, @@ -5233,12 +5069,30 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) mlx5e_tc_post_act_destroy(tc->post_act); mapping_destroy(tc->mapping); mlx5_chains_destroy(tc->chains); + mlx5e_tc_nic_destroy_miss_table(priv); +} + +int mlx5e_tc_ht_init(struct rhashtable *tc_ht) +{ + int err; + + err = rhashtable_init(tc_ht, &tc_ht_params); + if (err) + return err; + + lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key); + + return 0; } -int mlx5e_tc_esw_init(struct rhashtable *tc_ht) +void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) +{ + rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); +} + +int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) { const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts); - struct mlx5_rep_uplink_priv *uplink_priv; struct mlx5e_rep_priv *rpriv; struct mapping_ctx *mapping; struct mlx5_eswitch *esw; @@ -5246,7 +5100,6 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) u64 mapping_id; int err = 0; - uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); priv = netdev_priv(rpriv->netdev); esw = priv->mdev->priv.eswitch; @@ -5286,12 +5139,6 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) } uplink_priv->tunnel_enc_opts_mapping = mapping; - err = rhashtable_init(tc_ht, &tc_ht_params); - if (err) - goto err_ht_init; - - lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key); - uplink_priv->encap = mlx5e_tc_tun_init(priv); if (IS_ERR(uplink_priv->encap)) { err = PTR_ERR(uplink_priv->encap); @@ -5301,8 +5148,6 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) return 0; err_register_fib_notifier: - rhashtable_destroy(tc_ht); -err_ht_init: mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); err_enc_opts_mapping: mapping_destroy(uplink_priv->tunnel_mapping); @@ -5316,13 +5161,8 @@ err_tun_mapping: return err; } -void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) +void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv) { - struct mlx5_rep_uplink_priv *uplink_priv; - - uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); - - rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); mlx5e_tc_tun_cleanup(uplink_priv->encap); mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); @@ -5331,6 +5171,7 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); mlx5_tc_ct_clean(uplink_priv->ct_priv); + mlx5e_flow_meters_cleanup(uplink_priv->flow_meters); mlx5e_tc_post_act_destroy(uplink_priv->post_act); } @@ -5411,13 +5252,13 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) u32 chain = 0, chain_tag, reg_b, zone_restore_id; struct mlx5e_priv *priv = netdev_priv(skb->dev); - struct mlx5e_tc_table *tc = &priv->fs.tc; struct mlx5_mapped_obj mapped_obj; struct tc_skb_ext *tc_skb_ext; + struct mlx5e_tc_table *tc; int err; reg_b = be32_to_cpu(cqe->ft_metadata); - + tc = mlx5e_fs_get_tc(priv->fs); chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; err = mapping_find(tc->mapping, chain_tag, &mapped_obj); @@ -5436,7 +5277,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, tc_skb_ext->chain = chain; - zone_restore_id = (reg_b >> REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) & + zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) & ESW_ZONE_ID_MASK; if (!mlx5e_tc_ct_restore_flow(tc->ct, skb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index fdb222793027..48241317a535 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -39,6 +39,7 @@ #include "en/tc_ct.h" #include "en/tc_tun.h" #include "en/tc/int_port.h" +#include "en/tc/meter.h" #include "en_rep.h" #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff @@ -53,7 +54,7 @@ ESW_FLOW_ATTR_SZ :\ NIC_FLOW_ATTR_SZ) - +struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc); int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags); struct mlx5e_tc_update_priv { @@ -71,7 +72,8 @@ struct mlx5_flow_attr { struct mlx5_fc *counter; struct mlx5_modify_hdr *modify_hdr; struct mlx5_ct_attr ct_attr; - struct mlx5e_sample_attr *sample_attr; + struct mlx5e_sample_attr sample_attr; + struct mlx5e_meter_attr meter_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; u32 chain; u16 prio; @@ -82,13 +84,41 @@ struct mlx5_flow_attr { u8 outer_match_level; u8 ip_version; u8 tun_ip_version; + int tunnel_id; /* mapped tunnel id */ u32 flags; + u32 exe_aso_type; + struct list_head list; + struct mlx5e_post_act_handle *post_act_handle; + struct { + /* Indicate whether the parsed flow should be counted for lag mode decision + * making + */ + bool count; + } lag; + /* keep this union last */ union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; }; }; +enum { + MLX5_ATTR_FLAG_VLAN_HANDLED = BIT(0), + MLX5_ATTR_FLAG_SLOW_PATH = BIT(1), + MLX5_ATTR_FLAG_NO_IN_PORT = BIT(2), + MLX5_ATTR_FLAG_SRC_REWRITE = BIT(3), + MLX5_ATTR_FLAG_SAMPLE = BIT(4), + MLX5_ATTR_FLAG_ACCEPT = BIT(5), + MLX5_ATTR_FLAG_CT = BIT(6), +}; + +/* Returns true if any of the flags that require skipping further TC/NF processing are set. */ +static inline bool +mlx5e_tc_attr_flags_skip(u32 attr_flags) +{ + return attr_flags & (MLX5_ATTR_FLAG_SLOW_PATH | MLX5_ATTR_FLAG_ACCEPT); +} + struct mlx5_rx_tun_attr { u16 decap_vport; union { @@ -149,9 +179,11 @@ enum { #define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT) -int mlx5e_tc_esw_init(struct rhashtable *tc_ht); -void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); -bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow); +int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv); +void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv); + +int mlx5e_tc_ht_init(struct rhashtable *tc_ht); +void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht); int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct flow_cls_offload *f, unsigned long flags); @@ -201,6 +233,7 @@ enum mlx5e_tc_attr_to_reg { FTEID_TO_REG, NIC_CHAIN_TO_REG, NIC_ZONE_RESTORE_TO_REG, + PACKET_COLOR_TO_REG, }; struct mlx5e_tc_attr_to_reg_mapping { @@ -213,6 +246,10 @@ struct mlx5e_tc_attr_to_reg_mapping { extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[]; +#define MLX5_REG_MAPPING_MOFFSET(reg_id) (mlx5e_tc_attr_to_reg_mappings[reg_id].moffset) +#define MLX5_REG_MAPPING_MBITS(reg_id) (mlx5e_tc_attr_to_reg_mappings[reg_id].mlen) +#define MLX5_REG_MAPPING_MASK(reg_id) (GENMASK(mlx5e_tc_attr_to_reg_mappings[reg_id].mlen - 1, 0)) + bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, struct net_device *out_dev); @@ -244,16 +281,8 @@ int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, u32 data); int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow); - -int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, - int namespace, - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); -void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); - -struct mlx5e_tc_flow; -u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow); + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, struct flow_match_basic *match, bool outer, @@ -295,6 +324,8 @@ int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, #else /* CONFIG_MLX5_CLS_ACT */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} +static inline int mlx5e_tc_ht_init(struct rhashtable *tc_ht) { return 0; } +static inline void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) {} static inline int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { return -EOPNOTSUPP; } @@ -326,6 +357,8 @@ mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) #endif #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +struct mlx5e_tc_table *mlx5e_tc_table_alloc(void); +void mlx5e_tc_table_free(struct mlx5e_tc_table *tc); static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) { #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) @@ -346,6 +379,8 @@ static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb); #else /* CONFIG_MLX5_CLS_ACT */ +static inline struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) { return NULL; } +static inline void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) {} static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) { return false; } static inline bool diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 7fd33b356cc8..f7897ddb29c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -39,7 +39,9 @@ #include "ipoib/ipoib.h" #include "en_accel/en_accel.h" #include "en_accel/ipsec_rxtx.h" +#include "en_accel/macsec.h" #include "en/ptp.h" +#include <net/ipv6.h> static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) { @@ -53,117 +55,6 @@ static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) } } -#ifdef CONFIG_MLX5_CORE_EN_DCB -static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb) -{ - int dscp_cp = 0; - - if (skb->protocol == htons(ETH_P_IP)) - dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; - else if (skb->protocol == htons(ETH_P_IPV6)) - dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; - - return priv->dcbx_dp.dscp2prio[dscp_cp]; -} -#endif - -static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - int up = 0; - - if (!netdev_get_num_tc(dev)) - goto return_txq; - -#ifdef CONFIG_MLX5_CORE_EN_DCB - if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP) - up = mlx5e_get_dscp_up(priv, skb); - else -#endif - if (skb_vlan_tag_present(skb)) - up = skb_vlan_tag_get_prio(skb); - -return_txq: - return priv->port_ptp_tc2realtxq[up]; -} - -static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb, - u16 htb_maj_id) -{ - u16 classid; - - if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id) - classid = TC_H_MIN(skb->priority); - else - classid = READ_ONCE(priv->htb.defcls); - - if (!classid) - return 0; - - return mlx5e_get_txq_by_classid(priv, classid); -} - -u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - int num_tc_x_num_ch; - int txq_ix; - int up = 0; - int ch_ix; - - /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */ - num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch); - if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) { - struct mlx5e_ptp *ptp_channel; - - /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */ - u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id); - - if (unlikely(htb_maj_id)) { - txq_ix = mlx5e_select_htb_queue(priv, skb, htb_maj_id); - if (txq_ix > 0) - return txq_ix; - } - - ptp_channel = READ_ONCE(priv->channels.ptp); - if (unlikely(ptp_channel && - test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) && - mlx5e_use_ptpsq(skb))) - return mlx5e_select_ptpsq(dev, skb); - - txq_ix = netdev_pick_tx(dev, skb, NULL); - /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs. - * If they are selected, switch to regular queues. - * Driver to select these queues only at mlx5e_select_ptpsq() - * and mlx5e_select_htb_queue(). - */ - if (unlikely(txq_ix >= num_tc_x_num_ch)) - txq_ix %= num_tc_x_num_ch; - } else { - txq_ix = netdev_pick_tx(dev, skb, NULL); - } - - if (!netdev_get_num_tc(dev)) - return txq_ix; - -#ifdef CONFIG_MLX5_CORE_EN_DCB - if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP) - up = mlx5e_get_dscp_up(priv, skb); - else -#endif - if (skb_vlan_tag_present(skb)) - up = skb_vlan_tag_get_prio(skb); - - /* Normalize any picked txq_ix to [0, num_channels), - * So we can return a txq_ix that matches the channel and - * packet UP. - */ - ch_ix = priv->txq2sq[txq_ix]->ch_ix; - - return priv->channel_tc2realtxq[ch_ix][up]; -} - static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) { #define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) @@ -202,16 +93,26 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, return min_t(u16, hlen, skb_headlen(skb)); } +#define MLX5_UNSAFE_MEMCPY_DISCLAIMER \ + "This copy has been bounds-checked earlier in " \ + "mlx5i_sq_calc_wqe_attr() and intentionally " \ + "crosses a flex array boundary. Since it is " \ + "performance sensitive, splitting the copy is " \ + "undesirable." + static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) { struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; int cpy1_sz = 2 * ETH_ALEN; int cpy2_sz = ihs - cpy1_sz; - memcpy(vhdr, skb->data, cpy1_sz); + memcpy(&vhdr->addrs, skb->data, cpy1_sz); vhdr->h_vlan_proto = skb->vlan_proto; vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); - memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz); + unsafe_memcpy(&vhdr->h_vlan_encapsulated_proto, + skb->data + cpy1_sz, + cpy2_sz, + MLX5_UNSAFE_MEMCPY_DISCLAIMER); } static inline void @@ -241,23 +142,32 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, sq->stats->csum_none++; } +/* Returns the number of header bytes that we plan + * to inline later in the transmit descriptor + */ static inline u16 -mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb) +mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop) { struct mlx5e_sq_stats *stats = sq->stats; u16 ihs; + *hopbyhop = 0; if (skb->encapsulation) { - ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); + ihs = skb_inner_tcp_all_headers(skb); stats->tso_inner_packets++; stats->tso_inner_bytes += skb->len - ihs; } else { - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { ihs = skb_transport_offset(skb) + sizeof(struct udphdr); - else - ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); + } else { + ihs = skb_tcp_all_headers(skb); + if (ipv6_has_hopopt_jumbo(skb)) { + *hopbyhop = sizeof(struct hop_jumbo_hdr); + ihs -= sizeof(struct hop_jumbo_hdr); + } + } stats->tso_packets++; - stats->tso_bytes += skb->len - ihs; + stats->tso_bytes += skb->len - ihs - *hopbyhop; } return ihs; @@ -319,6 +229,7 @@ struct mlx5e_tx_attr { __be16 mss; u16 insz; u8 opcode; + u8 hopbyhop; }; struct mlx5e_tx_wqe_attr { @@ -355,14 +266,16 @@ static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_sq_stats *stats = sq->stats; if (skb_is_gso(skb)) { - u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb); + int hopbyhop; + u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb, &hopbyhop); *attr = (struct mlx5e_tx_attr) { .opcode = MLX5_OPCODE_LSO, .mss = cpu_to_be16(skb_shinfo(skb)->gso_size), .ihs = ihs, .num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs, - .headlen = skb_headlen(skb) - ihs, + .headlen = skb_headlen(skb) - ihs - hopbyhop, + .hopbyhop = hopbyhop, }; stats->packets += skb_shinfo(skb)->gso_segs; @@ -392,6 +305,8 @@ static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_at u16 ds_cnt_inl = 0; u16 ds_cnt_ids = 0; + /* Sync the calculation with MLX5E_MAX_TX_WQEBBS. */ + if (attr->insz) ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz, MLX5_SEND_WQE_DS); @@ -404,6 +319,9 @@ static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_at inl += VLAN_HLEN; ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS); + if (WARN_ON_ONCE(ds_cnt_inl > MLX5E_MAX_TX_INLINE_DS)) + netdev_warn(skb->dev, "ds_cnt_inl = %u > max %u\n", ds_cnt_inl, + (u16)MLX5E_MAX_TX_INLINE_DS); ds_cnt += ds_cnt_inl; } @@ -429,6 +347,26 @@ static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq) } } +static void mlx5e_tx_flush(struct mlx5e_txqsq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + /* Must not be called when a MPWQE session is active but empty. */ + mlx5e_tx_mpwqe_ensure_complete(sq); + + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + wi = &sq->db.wqe_info[pi]; + + *wi = (struct mlx5e_tx_wqe_info) { + .num_wqebbs = 1, + }; + + wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); +} + static inline void mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, const struct mlx5e_tx_attr *attr, @@ -459,6 +397,11 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, if (unlikely(sq->ptpsq)) { mlx5e_skb_cb_hwtstamp_init(skb); mlx5e_skb_fifo_push(&sq->ptpsq->skb_fifo, skb); + if (!netif_tx_queue_stopped(sq->txq) && + !mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo)) { + netif_tx_stop_queue(sq->txq); + sq->stats->stopped++; + } skb_get(skb); } @@ -476,7 +419,8 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg; struct mlx5_wqe_data_seg *dseg; struct mlx5e_tx_wqe_info *wi; - + u16 ihs = attr->ihs; + struct ipv6hdr *h6; struct mlx5e_sq_stats *stats = sq->stats; int num_dma; @@ -490,15 +434,40 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, eseg->mss = attr->mss; - if (attr->ihs) { - if (skb_vlan_tag_present(skb)) { - eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs + VLAN_HLEN); - mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs); + if (ihs) { + u8 *start = eseg->inline_hdr.start; + + if (unlikely(attr->hopbyhop)) { + /* remove the HBH header. + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] + */ + if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(start, skb, ETH_HLEN + sizeof(*h6)); + ihs += VLAN_HLEN; + h6 = (struct ipv6hdr *)(start + sizeof(struct vlan_ethhdr)); + } else { + unsafe_memcpy(start, skb->data, + ETH_HLEN + sizeof(*h6), + MLX5_UNSAFE_MEMCPY_DISCLAIMER); + h6 = (struct ipv6hdr *)(start + ETH_HLEN); + } + h6->nexthdr = IPPROTO_TCP; + /* Copy the TCP header after the IPv6 one */ + memcpy(h6 + 1, + skb->data + ETH_HLEN + sizeof(*h6) + + sizeof(struct hop_jumbo_hdr), + tcp_hdrlen(skb)); + /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */ + } else if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(start, skb, ihs); + ihs += VLAN_HLEN; stats->added_vlan_packets++; } else { - eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs); - memcpy(eseg->inline_hdr.start, skb->data, attr->ihs); + unsafe_memcpy(eseg->inline_hdr.start, skb->data, + attr->ihs, + MLX5_UNSAFE_MEMCPY_DISCLAIMER); } + eseg->inline_hdr.sz |= cpu_to_be16(ihs); dseg += wqe_attr->ds_cnt_inl; } else if (skb_vlan_tag_present(skb)) { eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); @@ -509,7 +478,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, } dseg += wqe_attr->ds_cnt_ids; - num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs, + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs + attr->hopbyhop, attr->headlen, dseg); if (unlikely(num_dma < 0)) goto err_drop; @@ -521,12 +490,13 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr) { return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs && - !attr->insz; + !attr->insz && !mlx5e_macsec_skb_is_offload(skb); } static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg) @@ -544,7 +514,7 @@ static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe *wqe; u16 pi; - pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS); + pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); wqe = MLX5E_TX_FETCH_WQE(sq, pi); net_prefetchw(wqe->data); @@ -622,6 +592,13 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_ctrl_seg *cseg; struct mlx5e_xmit_data txd; + txd.data = skb->data; + txd.len = skb->len; + + txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) + goto err_unmap; + if (!mlx5e_tx_mpwqe_session_is_active(sq)) { mlx5e_tx_mpwqe_session_start(sq, eseg); } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) { @@ -631,21 +608,12 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, sq->stats->xmit_more += xmit_more; - txd.data = skb->data; - txd.len = skb->len; - - txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) - goto err_unmap; mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE); - mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb); - mlx5e_tx_mpwqe_add_dseg(sq, &txd); - mlx5e_tx_skb_update_hwts_flags(skb); - if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) { + if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) { /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */ cseg = mlx5e_tx_mpwqe_session_complete(sq); @@ -664,6 +632,7 @@ err_unmap: mlx5e_dma_unmap_wqe_err(sq, 1); sq->stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) @@ -673,12 +642,22 @@ void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) mlx5e_tx_mpwqe_session_complete(sq); } +static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + if (ptpsq->ts_cqe_ctr_mask && unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + eseg->flow_table_metadata = cpu_to_be32(ptpsq->skb_fifo_pc & + ptpsq->ts_cqe_ctr_mask); +} + static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, struct mlx5_wqe_eth_seg *eseg, u16 ihs) { mlx5e_accel_tx_eseg(priv, skb, eseg, ihs); mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg); + if (unlikely(sq->ptpsq)) + mlx5e_cqe_ts_id_eseg(sq->ptpsq, skb, eseg); } netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) @@ -691,8 +670,21 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) struct mlx5e_txqsq *sq; u16 pi; + /* All changes to txq2sq are performed in sync with mlx5e_xmit, when the + * queue being changed is disabled, and smp_wmb guarantees that the + * changes are visible before mlx5e_xmit tries to read from txq2sq. It + * guarantees that the value of txq2sq[qid] doesn't change while + * mlx5e_xmit is running on queue number qid. smb_wmb is paired with + * HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE. + */ sq = priv->txq2sq[skb_get_queue_mapping(skb)]; if (unlikely(!sq)) { + /* Two cases when sq can be NULL: + * 1. The HTB node is registered, and mlx5e_select_queue + * selected its queue ID, but the SQ itself is not yet created. + * 2. HTB SQ creation failed. Similar to the previous case, but + * the SQ won't be created. + */ dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -886,6 +878,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) if (netif_tx_queue_stopped(sq->txq) && mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && + mlx5e_ptpsq_fifo_has_room(sq) && !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { netif_tx_wake_queue(sq->txq); stats->wake++; @@ -1016,12 +1009,34 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, eseg->mss = attr.mss; if (attr.ihs) { - memcpy(eseg->inline_hdr.start, skb->data, attr.ihs); + if (unlikely(attr.hopbyhop)) { + struct ipv6hdr *h6; + + /* remove the HBH header. + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] + */ + unsafe_memcpy(eseg->inline_hdr.start, skb->data, + ETH_HLEN + sizeof(*h6), + MLX5_UNSAFE_MEMCPY_DISCLAIMER); + h6 = (struct ipv6hdr *)((char *)eseg->inline_hdr.start + ETH_HLEN); + h6->nexthdr = IPPROTO_TCP; + /* Copy the TCP header after the IPv6 one */ + unsafe_memcpy(h6 + 1, + skb->data + ETH_HLEN + sizeof(*h6) + + sizeof(struct hop_jumbo_hdr), + tcp_hdrlen(skb), + MLX5_UNSAFE_MEMCPY_DISCLAIMER); + /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */ + } else { + unsafe_memcpy(eseg->inline_hdr.start, skb->data, + attr.ihs, + MLX5_UNSAFE_MEMCPY_DISCLAIMER); + } eseg->inline_hdr.sz = cpu_to_be16(attr.ihs); dseg += wqe_attr.ds_cnt_inl; } - num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs, + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs + attr.hopbyhop, attr.headlen, dseg); if (unlikely(num_dma < 0)) goto err_drop; @@ -1033,5 +1048,6 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 833be29170a1..9a458a5d9853 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -31,6 +31,7 @@ */ #include <linux/irq.h> +#include <net/xdp_sock_drv.h> #include "en.h" #include "en/txrx.h" #include "en/xdp.h" @@ -86,26 +87,36 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq) static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq) { + bool need_wakeup = xsk_uses_need_wakeup(xskrq->xsk_pool); bool busy_xsk = false, xsk_rx_alloc_err; - /* Handle the race between the application querying need_wakeup and the - * driver setting it: - * 1. Update need_wakeup both before and after the TX. If it goes to - * "yes", it can only happen with the first update. - * 2. If the application queried need_wakeup before we set it, the - * packets will be transmitted anyway, even w/o a wakeup. - * 3. Give a chance to clear need_wakeup after new packets were queued - * for TX. + /* If SQ is empty, there are no TX completions to trigger NAPI, so set + * need_wakeup. Do it before queuing packets for TX to avoid race + * condition with userspace. */ - mlx5e_xsk_update_tx_wakeup(xsksq); + if (need_wakeup && xsksq->pc == xsksq->cc) + xsk_set_tx_need_wakeup(xsksq->xsk_pool); busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); - mlx5e_xsk_update_tx_wakeup(xsksq); + /* If we queued some packets for TX, no need for wakeup anymore. */ + if (need_wakeup && xsksq->pc != xsksq->cc) + xsk_clear_tx_need_wakeup(xsksq->xsk_pool); + /* If WQ is empty, RX won't trigger NAPI, so set need_wakeup. Do it + * before refilling to avoid race condition with userspace. + */ + if (need_wakeup && !mlx5e_rqwq_get_cur_sz(xskrq)) + xsk_set_rx_need_wakeup(xskrq->xsk_pool); xsk_rx_alloc_err = INDIRECT_CALL_2(xskrq->post_wqes, mlx5e_post_rx_mpwqes, mlx5e_post_rx_wqes, xskrq); - busy_xsk |= mlx5e_xsk_update_rx_wakeup(xskrq, xsk_rx_alloc_err); + /* Ask for wakeup if WQ is not full after refill. */ + if (!need_wakeup) + busy_xsk |= xsk_rx_alloc_err; + else if (xsk_rx_alloc_err) + xsk_set_rx_need_wakeup(xskrq->xsk_pool); + else + xsk_clear_rx_need_wakeup(xskrq->xsk_pool); return busy_xsk; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 792e0d6aa861..a0242dc15741 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -5,7 +5,6 @@ #include <linux/interrupt.h> #include <linux/notifier.h> -#include <linux/module.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/vport.h> #include <linux/mlx5/eq.h> @@ -19,6 +18,7 @@ #include "lib/clock.h" #include "diag/fw_tracer.h" #include "mlx5_irq.h" +#include "devlink.h" enum { MLX5_EQE_OWNER_INIT_VAL = 0x1, @@ -58,6 +58,8 @@ struct mlx5_eq_table { struct mutex lock; /* sync async eqs creations */ int num_comp_eqs; struct mlx5_irq_table *irq_table; + struct mlx5_irq **comp_irqs; + struct mlx5_irq *ctrl_irq; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; #endif @@ -265,8 +267,8 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; u8 log_eq_stride = ilog2(MLX5_EQE_SIZE); struct mlx5_priv *priv = &dev->priv; - u16 vecidx = param->irq_index; __be64 *pas; + u16 vecidx; void *eqc; int inlen; u32 *in; @@ -288,20 +290,16 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc); init_eq_buf(eq); - eq->irq = mlx5_irq_request(dev, vecidx, param->affinity); - if (IS_ERR(eq->irq)) { - err = PTR_ERR(eq->irq); - goto err_buf; - } - + eq->irq = param->irq; vecidx = mlx5_irq_get_index(eq->irq); + inlen = MLX5_ST_SZ_BYTES(create_eq_in) + MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages; in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; - goto err_irq; + goto err_buf; } pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); @@ -345,8 +343,6 @@ err_eq: err_in: kvfree(in); -err_irq: - mlx5_irq_release(eq->irq); err_buf: mlx5_frag_buf_free(dev, &eq->frag_buf); return err; @@ -400,7 +396,6 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); - mlx5_irq_release(eq->irq); mlx5_frag_buf_free(dev, &eq->frag_buf); return err; @@ -443,7 +438,8 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) struct mlx5_eq_table *eq_table; int i; - eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL); + eq_table = kvzalloc_node(sizeof(*eq_table), GFP_KERNEL, + dev->priv.numa_node); if (!eq_table) return -ENOMEM; @@ -579,6 +575,9 @@ static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4]) if (MLX5_CAP_GEN_MAX(dev, vhca_state)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_VHCA_STATE_CHANGE); + if (MLX5_CAP_MACSEC(dev, log_max_macsec_offload)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE); + mask[0] = async_event_mask; if (MLX5_CAP_GEN(dev, event_cap)) @@ -593,11 +592,8 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq, eq->irq_nb.notifier_call = mlx5_eq_async_int; spin_lock_init(&eq->lock); - if (!zalloc_cpumask_var(¶m->affinity, GFP_KERNEL)) - return -ENOMEM; err = create_async_eq(dev, &eq->core, param); - free_cpumask_var(param->affinity); if (err) { mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err); return err; @@ -622,17 +618,38 @@ static void cleanup_async_eq(struct mlx5_core_dev *dev, name, err); } +static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); + return MLX5_NUM_ASYNC_EQE; +} static int create_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_param param = {}; int err; + /* All the async_eqs are using single IRQ, request one IRQ and share its + * index among all the async_eqs of this device. + */ + table->ctrl_irq = mlx5_ctrl_irq_request(dev); + if (IS_ERR(table->ctrl_irq)) + return PTR_ERR(table->ctrl_irq); + MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); mlx5_eq_notifier_register(dev, &table->cq_err_nb); param = (struct mlx5_eq_param) { - .irq_index = MLX5_IRQ_EQ_CTRL, + .irq = table->ctrl_irq, .nent = MLX5_NUM_CMD_EQE, .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, }; @@ -645,8 +662,8 @@ static int create_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); param = (struct mlx5_eq_param) { - .irq_index = MLX5_IRQ_EQ_CTRL, - .nent = MLX5_NUM_ASYNC_EQE, + .irq = table->ctrl_irq, + .nent = async_eq_depth_devlink_param_get(dev), }; gather_async_events_mask(dev, param.mask); @@ -655,7 +672,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) goto err2; param = (struct mlx5_eq_param) { - .irq_index = MLX5_IRQ_EQ_CTRL, + .irq = table->ctrl_irq, .nent = /* TODO: sriov max_vf + */ 1, .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST, }; @@ -674,6 +691,7 @@ err2: err1: mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); + mlx5_ctrl_irq_release(table->ctrl_irq); return err; } @@ -688,6 +706,7 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) cleanup_async_eq(dev, &table->cmd_eq, "cmd"); mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); + mlx5_ctrl_irq_release(table->ctrl_irq); } struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) @@ -712,15 +731,14 @@ struct mlx5_eq * mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param) { - struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); + struct mlx5_eq *eq = kvzalloc_node(sizeof(*eq), GFP_KERNEL, + dev->priv.numa_node); int err; - if (!cpumask_available(param->affinity)) - return ERR_PTR(-EINVAL); - if (!eq) return ERR_PTR(-ENOMEM); + param->irq = dev->priv.eq_table->ctrl_irq; err = create_async_eq(dev, eq, param); if (err) { kvfree(eq); @@ -780,6 +798,54 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) } EXPORT_SYMBOL(mlx5_eq_update_ci); +static void comp_irqs_release(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + + if (mlx5_core_is_sf(dev)) + mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs); + else + mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs); + kfree(table->comp_irqs); +} + +static int comp_irqs_request(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + int ncomp_eqs = table->num_comp_eqs; + u16 *cpus; + int ret; + int i; + + ncomp_eqs = table->num_comp_eqs; + table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL); + if (!table->comp_irqs) + return -ENOMEM; + if (mlx5_core_is_sf(dev)) { + ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs); + if (ret < 0) + goto free_irqs; + return ret; + } + + cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL); + if (!cpus) { + ret = -ENOMEM; + goto free_irqs; + } + for (i = 0; i < ncomp_eqs; i++) + cpus[i] = cpumask_local_spread(i, dev->priv.numa_node); + ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs); + kfree(cpus); + if (ret < 0) + goto free_irqs; + return ret; + +free_irqs: + kfree(table->comp_irqs); + return ret; +} + static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -794,6 +860,22 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) tasklet_disable(&eq->tasklet_ctx.task); kfree(eq); } + comp_irqs_release(dev); +} + +static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); + return MLX5_COMP_EQ_SIZE; } static int create_comp_eqs(struct mlx5_core_dev *dev) @@ -805,14 +887,16 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) int err; int i; + ncomp_eqs = comp_irqs_request(dev); + if (ncomp_eqs < 0) + return ncomp_eqs; INIT_LIST_HEAD(&table->comp_eqs_list); - ncomp_eqs = table->num_comp_eqs; - nent = MLX5_COMP_EQ_SIZE; + nent = comp_eq_depth_devlink_param_get(dev); + for (i = 0; i < ncomp_eqs; i++) { struct mlx5_eq_param param = {}; - int vecidx = i; - eq = kzalloc(sizeof(*eq), GFP_KERNEL); + eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node); if (!eq) { err = -ENOMEM; goto clean; @@ -825,18 +909,11 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { - .irq_index = vecidx, + .irq = table->comp_irqs[i], .nent = nent, }; - if (!zalloc_cpumask_var(¶m.affinity, GFP_KERNEL)) { - err = -ENOMEM; - goto clean_eq; - } - cpumask_set_cpu(cpumask_local_spread(i, dev->priv.numa_node), - param.affinity); err = create_map_eq(dev, &eq->core, ¶m); - free_cpumask_var(param.affinity); if (err) goto clean_eq; err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); @@ -850,7 +927,9 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) list_add_tail(&eq->list, &table->comp_eqs_list); } + table->num_comp_eqs = ncomp_eqs; return 0; + clean_eq: kfree(eq); clean: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c index 39e948bc1204..a994e71e05c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c @@ -92,6 +92,7 @@ static int esw_acl_ingress_mod_metadata_create(struct mlx5_eswitch *esw, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; + flow_act.fg = vport->ingress.offloads.metadata_allmatch_grp; vport->ingress.offloads.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0); @@ -117,6 +118,36 @@ static void esw_acl_ingress_mod_metadata_destroy(struct mlx5_eswitch *esw, vport->ingress.offloads.modify_metadata_rule = NULL; } +static int esw_acl_ingress_src_port_drop_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + int err = 0; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + flow_act.fg = vport->ingress.offloads.drop_grp; + flow_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } + + vport->ingress.offloads.drop_rule = flow_rule; +out: + return err; +} + +static void esw_acl_ingress_src_port_drop_destroy(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!vport->ingress.offloads.drop_rule) + return; + + mlx5_del_flow_rules(vport->ingress.offloads.drop_rule); + vport->ingress.offloads.drop_rule = NULL; +} + static int esw_acl_ingress_ofld_rules_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -154,6 +185,7 @@ static void esw_acl_ingress_ofld_rules_destroy(struct mlx5_eswitch *esw, { esw_acl_ingress_allow_rule_destroy(vport); esw_acl_ingress_mod_metadata_destroy(esw, vport); + esw_acl_ingress_src_port_drop_destroy(esw, vport); } static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw, @@ -170,10 +202,29 @@ static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw, if (!flow_group_in) return -ENOMEM; + if (vport->vport == MLX5_VPORT_UPLINK) { + /* This group can hold an FTE to drop all traffic. + * Need in case LAG is enabled. + */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + ret = PTR_ERR(g); + esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n", + vport->vport, ret); + goto drop_err; + } + vport->ingress.offloads.drop_grp = g; + flow_index++; + } + if (esw_acl_ingress_prio_tag_enabled(esw, vport)) { /* This group is to hold FTE to match untagged packets when prio_tag * is enabled. */ + memset(flow_group_in, 0, inlen); match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); MLX5_SET(create_flow_group_in, flow_group_in, @@ -221,6 +272,11 @@ metadata_err: vport->ingress.offloads.metadata_prio_tag_grp = NULL; } prio_tag_err: + if (!IS_ERR_OR_NULL(vport->ingress.offloads.drop_grp)) { + mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp); + vport->ingress.offloads.drop_grp = NULL; + } +drop_err: kvfree(flow_group_in); return ret; } @@ -236,6 +292,11 @@ static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport) mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp); vport->ingress.offloads.metadata_prio_tag_grp = NULL; } + + if (vport->ingress.offloads.drop_grp) { + mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp); + vport->ingress.offloads.drop_grp = NULL; + } } int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, @@ -252,6 +313,8 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, if (mlx5_eswitch_vport_match_metadata_enabled(esw)) num_ftes++; + if (vport->vport == MLX5_VPORT_UPLINK) + num_ftes++; if (esw_acl_ingress_prio_tag_enabled(esw, vport)) num_ftes++; @@ -320,3 +383,27 @@ out: vport->metadata = vport->default_metadata; return err; } + +int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (IS_ERR(vport)) { + esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); + return PTR_ERR(vport); + } + + return esw_acl_ingress_src_port_drop_create(esw, vport); +} + +void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (WARN_ON_ONCE(IS_ERR(vport))) { + esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); + return; + } + + esw_acl_ingress_src_port_drop_destroy(esw, vport); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h index c57869b93d60..11d3d3978848 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h @@ -6,6 +6,7 @@ #include "eswitch.h" +#ifdef CONFIG_MLX5_ESWITCH /* Eswitch acl egress external APIs */ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport); @@ -25,5 +26,19 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vpor void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num, u32 metadata); +void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num); +#else /* CONFIG_MLX5_ESWITCH */ +static void +mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, + u16 vport_num) +{} + +static int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, + u16 vport_num) +{ + return 0; +} +#endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_ACL_OFLD_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index f690f430f40f..4fbff7bcc155 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2021 Mellanox Technologies. */ +#include <linux/build_bug.h> #include <linux/list.h> #include <linux/notifier.h> #include <net/netevent.h> @@ -12,26 +13,57 @@ #define CREATE_TRACE_POINTS #include "diag/bridge_tracepoint.h" -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE 64000 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE 12000 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE 16000 #define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM 0 -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO (MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE / 4 - 1) -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_FROM \ +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM \ (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1) -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_TO \ - (MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE / 2 - 1) -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \ - (MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_TO + 1) -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO (MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE - 1) - -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE 64000 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO + 1) +static_assert(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE == 64000); + +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE 16000 +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE (32000 - 1) #define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM 0 -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE / 2 - 1) -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \ +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM \ (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1) -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE - 2) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE - 1) #define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM \ (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO + 1) -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO \ + MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO + 1) +static_assert(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE == 64000); #define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0 @@ -63,12 +95,14 @@ struct mlx5_esw_bridge { struct mlx5_flow_table *egress_ft; struct mlx5_flow_group *egress_vlan_fg; + struct mlx5_flow_group *egress_qinq_fg; struct mlx5_flow_group *egress_mac_fg; struct mlx5_flow_group *egress_miss_fg; struct mlx5_pkt_reformat *egress_miss_pkt_reformat; struct mlx5_flow_handle *egress_miss_handle; unsigned long ageing_time; u32 flags; + u16 vlan_proto; }; static void @@ -138,7 +172,9 @@ mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw) } static struct mlx5_flow_group * -mlx5_esw_bridge_ingress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *ingress_ft) +mlx5_esw_bridge_ingress_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto, + struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *fg; @@ -154,30 +190,53 @@ mlx5_esw_bridge_ingress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flo MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16); MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0); - MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + if (vlan_proto == ETH_P_8021Q) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + else if (vlan_proto == ETH_P_8021AD) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag); MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid); MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); - MLX5_SET(create_flow_group_in, in, start_flow_index, - MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM); - MLX5_SET(create_flow_group_in, in, end_flow_index, - MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO); + MLX5_SET(create_flow_group_in, in, start_flow_index, from); + MLX5_SET(create_flow_group_in, in, end_flow_index, to); fg = mlx5_create_flow_group(ingress_ft, in); kvfree(in); if (IS_ERR(fg)) esw_warn(esw->dev, - "Failed to create VLAN flow group for bridge ingress table (err=%ld)\n", - PTR_ERR(fg)); + "Failed to create VLAN(proto=%x) flow group for bridge ingress table (err=%ld)\n", + vlan_proto, PTR_ERR(fg)); return fg; } static struct mlx5_flow_group * -mlx5_esw_bridge_ingress_filter_fg_create(struct mlx5_eswitch *esw, - struct mlx5_flow_table *ingress_ft) +mlx5_esw_bridge_ingress_vlan_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO; + + return mlx5_esw_bridge_ingress_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, ingress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_qinq_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO; + + return mlx5_esw_bridge_ingress_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw, + ingress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(unsigned int from, unsigned int to, + u16 vlan_proto, struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *fg; @@ -193,27 +252,48 @@ mlx5_esw_bridge_ingress_filter_fg_create(struct mlx5_eswitch *esw, MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16); MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0); - MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); - + if (vlan_proto == ETH_P_8021Q) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + else if (vlan_proto == ETH_P_8021AD) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag); MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); - MLX5_SET(create_flow_group_in, in, start_flow_index, - MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_FROM); - MLX5_SET(create_flow_group_in, in, end_flow_index, - MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_TO); + MLX5_SET(create_flow_group_in, in, start_flow_index, from); + MLX5_SET(create_flow_group_in, in, end_flow_index, to); fg = mlx5_create_flow_group(ingress_ft, in); if (IS_ERR(fg)) esw_warn(esw->dev, "Failed to create bridge ingress table VLAN filter flow group (err=%ld)\n", PTR_ERR(fg)); - kvfree(in); return fg; } static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_vlan_filter_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO; + + return mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(from, to, ETH_P_8021Q, esw, + ingress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_qinq_filter_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO; + + return mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(from, to, ETH_P_8021AD, esw, + ingress_ft); +} + +static struct mlx5_flow_group * mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *ingress_ft) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -250,7 +330,9 @@ mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow } static struct mlx5_flow_group * -mlx5_esw_bridge_egress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft) +mlx5_esw_bridge_egress_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto, + struct mlx5_eswitch *esw, + struct mlx5_flow_table *egress_ft) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *fg; @@ -265,13 +347,14 @@ mlx5_esw_bridge_egress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_47_16); MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_15_0); - MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + if (vlan_proto == ETH_P_8021Q) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + else if (vlan_proto == ETH_P_8021AD) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag); MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid); - MLX5_SET(create_flow_group_in, in, start_flow_index, - MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM); - MLX5_SET(create_flow_group_in, in, end_flow_index, - MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO); + MLX5_SET(create_flow_group_in, in, start_flow_index, from); + MLX5_SET(create_flow_group_in, in, end_flow_index, to); fg = mlx5_create_flow_group(egress_ft, in); if (IS_ERR(fg)) @@ -283,6 +366,25 @@ mlx5_esw_bridge_egress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow } static struct mlx5_flow_group * +mlx5_esw_bridge_egress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO; + + return mlx5_esw_bridge_egress_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, egress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_egress_qinq_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *egress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO; + + return mlx5_esw_bridge_egress_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw, egress_ft); +} + +static struct mlx5_flow_group * mlx5_esw_bridge_egress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -346,7 +448,7 @@ mlx5_esw_bridge_egress_miss_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow static int mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads) { - struct mlx5_flow_group *mac_fg, *filter_fg, *vlan_fg; + struct mlx5_flow_group *mac_fg, *qinq_filter_fg, *qinq_fg, *vlan_filter_fg, *vlan_fg; struct mlx5_flow_table *ingress_ft, *skip_ft; struct mlx5_eswitch *esw = br_offloads->esw; int err; @@ -374,10 +476,22 @@ mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads) goto err_vlan_fg; } - filter_fg = mlx5_esw_bridge_ingress_filter_fg_create(esw, ingress_ft); - if (IS_ERR(filter_fg)) { - err = PTR_ERR(filter_fg); - goto err_filter_fg; + vlan_filter_fg = mlx5_esw_bridge_ingress_vlan_filter_fg_create(esw, ingress_ft); + if (IS_ERR(vlan_filter_fg)) { + err = PTR_ERR(vlan_filter_fg); + goto err_vlan_filter_fg; + } + + qinq_fg = mlx5_esw_bridge_ingress_qinq_fg_create(esw, ingress_ft); + if (IS_ERR(qinq_fg)) { + err = PTR_ERR(qinq_fg); + goto err_qinq_fg; + } + + qinq_filter_fg = mlx5_esw_bridge_ingress_qinq_filter_fg_create(esw, ingress_ft); + if (IS_ERR(qinq_filter_fg)) { + err = PTR_ERR(qinq_filter_fg); + goto err_qinq_filter_fg; } mac_fg = mlx5_esw_bridge_ingress_mac_fg_create(esw, ingress_ft); @@ -389,13 +503,19 @@ mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads) br_offloads->ingress_ft = ingress_ft; br_offloads->skip_ft = skip_ft; br_offloads->ingress_vlan_fg = vlan_fg; - br_offloads->ingress_filter_fg = filter_fg; + br_offloads->ingress_vlan_filter_fg = vlan_filter_fg; + br_offloads->ingress_qinq_fg = qinq_fg; + br_offloads->ingress_qinq_filter_fg = qinq_filter_fg; br_offloads->ingress_mac_fg = mac_fg; return 0; err_mac_fg: - mlx5_destroy_flow_group(filter_fg); -err_filter_fg: + mlx5_destroy_flow_group(qinq_filter_fg); +err_qinq_filter_fg: + mlx5_destroy_flow_group(qinq_fg); +err_qinq_fg: + mlx5_destroy_flow_group(vlan_filter_fg); +err_vlan_filter_fg: mlx5_destroy_flow_group(vlan_fg); err_vlan_fg: mlx5_destroy_flow_table(skip_ft); @@ -409,8 +529,12 @@ mlx5_esw_bridge_ingress_table_cleanup(struct mlx5_esw_bridge_offloads *br_offloa { mlx5_destroy_flow_group(br_offloads->ingress_mac_fg); br_offloads->ingress_mac_fg = NULL; - mlx5_destroy_flow_group(br_offloads->ingress_filter_fg); - br_offloads->ingress_filter_fg = NULL; + mlx5_destroy_flow_group(br_offloads->ingress_qinq_filter_fg); + br_offloads->ingress_qinq_filter_fg = NULL; + mlx5_destroy_flow_group(br_offloads->ingress_qinq_fg); + br_offloads->ingress_qinq_fg = NULL; + mlx5_destroy_flow_group(br_offloads->ingress_vlan_filter_fg); + br_offloads->ingress_vlan_filter_fg = NULL; mlx5_destroy_flow_group(br_offloads->ingress_vlan_fg); br_offloads->ingress_vlan_fg = NULL; mlx5_destroy_flow_table(br_offloads->skip_ft); @@ -428,7 +552,7 @@ static int mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads, struct mlx5_esw_bridge *bridge) { - struct mlx5_flow_group *miss_fg = NULL, *mac_fg, *vlan_fg; + struct mlx5_flow_group *miss_fg = NULL, *mac_fg, *vlan_fg, *qinq_fg; struct mlx5_pkt_reformat *miss_pkt_reformat = NULL; struct mlx5_flow_handle *miss_handle = NULL; struct mlx5_eswitch *esw = br_offloads->esw; @@ -447,6 +571,12 @@ mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads, goto err_vlan_fg; } + qinq_fg = mlx5_esw_bridge_egress_qinq_fg_create(esw, egress_ft); + if (IS_ERR(qinq_fg)) { + err = PTR_ERR(qinq_fg); + goto err_qinq_fg; + } + mac_fg = mlx5_esw_bridge_egress_mac_fg_create(esw, egress_ft); if (IS_ERR(mac_fg)) { err = PTR_ERR(mac_fg); @@ -491,6 +621,7 @@ skip_miss_flow: bridge->egress_ft = egress_ft; bridge->egress_vlan_fg = vlan_fg; + bridge->egress_qinq_fg = qinq_fg; bridge->egress_mac_fg = mac_fg; bridge->egress_miss_fg = miss_fg; bridge->egress_miss_pkt_reformat = miss_pkt_reformat; @@ -498,6 +629,8 @@ skip_miss_flow: return 0; err_mac_fg: + mlx5_destroy_flow_group(qinq_fg); +err_qinq_fg: mlx5_destroy_flow_group(vlan_fg); err_vlan_fg: mlx5_destroy_flow_table(egress_ft); @@ -515,6 +648,7 @@ mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge) if (bridge->egress_miss_fg) mlx5_destroy_flow_group(bridge->egress_miss_fg); mlx5_destroy_flow_group(bridge->egress_mac_fg); + mlx5_destroy_flow_group(bridge->egress_qinq_fg); mlx5_destroy_flow_group(bridge->egress_vlan_fg); mlx5_destroy_flow_table(bridge->egress_ft); } @@ -559,10 +693,17 @@ mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char flow_act.pkt_reformat = vlan->pkt_reformat_push; flow_act.modify_hdr = vlan->pkt_mod_hdr_push_mark; } else if (vlan) { - MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, - outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, - outer_headers.cvlan_tag); + if (bridge->vlan_proto == ETH_P_8021Q) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + } else if (bridge->vlan_proto == ETH_P_8021AD) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.svlan_tag); + } MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.first_vid); MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid, @@ -645,10 +786,17 @@ mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *a MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num)); - MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, - outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, - outer_headers.cvlan_tag); + if (bridge->vlan_proto == ETH_P_8021Q) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + } else if (bridge->vlan_proto == ETH_P_8021AD) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.svlan_tag); + } handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, &dest, 1); @@ -696,10 +844,17 @@ mlx5_esw_bridge_egress_flow_create(u16 vport_num, u16 esw_owner_vhca_id, const u flow_act.pkt_reformat = vlan->pkt_reformat_pop; } - MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, - outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, - outer_headers.cvlan_tag); + if (bridge->vlan_proto == ETH_P_8021Q) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + } else if (bridge->vlan_proto == ETH_P_8021AD) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.svlan_tag); + } MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.first_vid); MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid, @@ -774,6 +929,7 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex, bridge->ifindex = ifindex; bridge->refcnt = 1; bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME); + bridge->vlan_proto = ETH_P_8021Q; list_add(&bridge->list, &br_offloads->bridges); return bridge; @@ -911,12 +1067,13 @@ mlx5_esw_bridge_vlan_lookup(u16 vid, struct mlx5_esw_bridge_port *port) } static int -mlx5_esw_bridge_vlan_push_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +mlx5_esw_bridge_vlan_push_create(u16 vlan_proto, struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_eswitch *esw) { struct { __be16 h_vlan_proto; __be16 h_vlan_TCI; - } vlan_hdr = { htons(ETH_P_8021Q), htons(vlan->vid) }; + } vlan_hdr = { htons(vlan_proto), htons(vlan->vid) }; struct mlx5_pkt_reformat_params reformat_params = {}; struct mlx5_pkt_reformat *pkt_reformat; @@ -1008,36 +1165,58 @@ mlx5_esw_bridge_vlan_push_mark_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct vlan->pkt_mod_hdr_push_mark = NULL; } -static struct mlx5_esw_bridge_vlan * -mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *port, - struct mlx5_eswitch *esw) +static int +mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_eswitch *esw) { - struct mlx5_esw_bridge_vlan *vlan; int err; - vlan = kvzalloc(sizeof(*vlan), GFP_KERNEL); - if (!vlan) - return ERR_PTR(-ENOMEM); - - vlan->vid = vid; - vlan->flags = flags; - INIT_LIST_HEAD(&vlan->fdb_list); - if (flags & BRIDGE_VLAN_INFO_PVID) { - err = mlx5_esw_bridge_vlan_push_create(vlan, esw); + err = mlx5_esw_bridge_vlan_push_create(vlan_proto, vlan, esw); if (err) - goto err_vlan_push; + return err; err = mlx5_esw_bridge_vlan_push_mark_create(vlan, esw); if (err) goto err_vlan_push_mark; } + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) { err = mlx5_esw_bridge_vlan_pop_create(vlan, esw); if (err) goto err_vlan_pop; } + return 0; + +err_vlan_pop: + if (vlan->pkt_mod_hdr_push_mark) + mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw); +err_vlan_push_mark: + if (vlan->pkt_reformat_push) + mlx5_esw_bridge_vlan_push_cleanup(vlan, esw); + return err; +} + +static struct mlx5_esw_bridge_vlan * +mlx5_esw_bridge_vlan_create(u16 vlan_proto, u16 vid, u16 flags, struct mlx5_esw_bridge_port *port, + struct mlx5_eswitch *esw) +{ + struct mlx5_esw_bridge_vlan *vlan; + int err; + + vlan = kvzalloc(sizeof(*vlan), GFP_KERNEL); + if (!vlan) + return ERR_PTR(-ENOMEM); + + vlan->vid = vid; + vlan->flags = flags; + INIT_LIST_HEAD(&vlan->fdb_list); + + err = mlx5_esw_bridge_vlan_push_pop_create(vlan_proto, flags, vlan, esw); + if (err) + goto err_vlan_push_pop; + err = xa_insert(&port->vlans, vid, vlan, GFP_KERNEL); if (err) goto err_xa_insert; @@ -1048,13 +1227,11 @@ mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *por err_xa_insert: if (vlan->pkt_reformat_pop) mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw); -err_vlan_pop: if (vlan->pkt_mod_hdr_push_mark) mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw); -err_vlan_push_mark: if (vlan->pkt_reformat_push) mlx5_esw_bridge_vlan_push_cleanup(vlan, esw); -err_vlan_push: +err_vlan_push_pop: kvfree(vlan); return ERR_PTR(err); } @@ -1102,6 +1279,50 @@ static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port, mlx5_esw_bridge_vlan_cleanup(port, vlan, bridge); } +static int mlx5_esw_bridge_port_vlans_recreate(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_esw_bridge_vlan *vlan; + unsigned long i; + int err; + + xa_for_each(&port->vlans, i, vlan) { + mlx5_esw_bridge_vlan_flush(vlan, bridge); + err = mlx5_esw_bridge_vlan_push_pop_create(bridge->vlan_proto, vlan->flags, vlan, + br_offloads->esw); + if (err) { + esw_warn(br_offloads->esw->dev, + "Failed to create VLAN=%u(proto=%x) push/pop actions (vport=%u,err=%d)\n", + vlan->vid, bridge->vlan_proto, port->vport_num, + err); + return err; + } + } + + return 0; +} + +static int +mlx5_esw_bridge_vlans_recreate(struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_esw_bridge_port *port; + unsigned long i; + int err; + + xa_for_each(&br_offloads->ports, i, port) { + if (port->bridge != bridge) + continue; + + err = mlx5_esw_bridge_port_vlans_recreate(port, bridge); + if (err) + return err; + } + + return 0; +} + static struct mlx5_esw_bridge_vlan * mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge *bridge, struct mlx5_eswitch *esw) @@ -1287,6 +1508,32 @@ int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, boo return 0; } +int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, + br_offloads); + if (!port) + return -EINVAL; + + bridge = port->bridge; + if (bridge->vlan_proto == proto) + return 0; + if (proto != ETH_P_8021Q && proto != ETH_P_8021AD) { + esw_warn(br_offloads->esw->dev, "Can't set unsupported VLAN protocol %x", proto); + return -EOPNOTSUPP; + } + + mlx5_esw_bridge_fdb_flush(bridge); + bridge->vlan_proto = proto; + mlx5_esw_bridge_vlans_recreate(bridge); + + return 0; +} + static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 flags, struct mlx5_esw_bridge_offloads *br_offloads, struct mlx5_esw_bridge *bridge) @@ -1434,7 +1681,8 @@ int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge); } - vlan = mlx5_esw_bridge_vlan_create(vid, flags, port, br_offloads->esw); + vlan = mlx5_esw_bridge_vlan_create(port->bridge->vlan_proto, vid, flags, port, + br_offloads->esw); if (IS_ERR(vlan)) { NL_SET_ERR_MSG_MOD(extack, "Failed to create VLAN entry"); return PTR_ERR(vlan); @@ -1574,6 +1822,8 @@ struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw) { struct mlx5_esw_bridge_offloads *br_offloads; + ASSERT_RTNL(); + br_offloads = kvzalloc(sizeof(*br_offloads), GFP_KERNEL); if (!br_offloads) return ERR_PTR(-ENOMEM); @@ -1590,6 +1840,8 @@ void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw) { struct mlx5_esw_bridge_offloads *br_offloads = esw->br_offloads; + ASSERT_RTNL(); + if (!br_offloads) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h index efc39975226e..10851a515bca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -26,7 +26,9 @@ struct mlx5_esw_bridge_offloads { struct mlx5_flow_table *ingress_ft; struct mlx5_flow_group *ingress_vlan_fg; - struct mlx5_flow_group *ingress_filter_fg; + struct mlx5_flow_group *ingress_vlan_filter_fg; + struct mlx5_flow_group *ingress_qinq_fg; + struct mlx5_flow_group *ingress_qinq_filter_fg; struct mlx5_flow_group *ingress_mac_fg; struct mlx5_flow_table *skip_ft; @@ -60,6 +62,8 @@ int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsign struct mlx5_esw_bridge_offloads *br_offloads); int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable, struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto, + struct mlx5_esw_bridge_offloads *br_offloads); int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c new file mode 100644 index 000000000000..2db13c71e88c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/debugfs.h> +#include "eswitch.h" + +enum vnic_diag_counter { + MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE, + MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW, + MLX5_VNIC_DIAG_COMP_EQ_OVERRUN, + MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN, + MLX5_VNIC_DIAG_CQ_OVERRUN, + MLX5_VNIC_DIAG_INVALID_COMMAND, + MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND, +}; + +static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter, + u32 *val) +{ + u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; + struct mlx5_core_dev *dev = vport->dev; + u16 vport_num = vport->vport; + void *vnic_diag_out; + int err; + + MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); + MLX5_SET(query_vnic_env_in, in, vport_number, vport_num); + if (!mlx5_esw_is_manager_vport(dev->priv.eswitch, vport_num)) + MLX5_SET(query_vnic_env_in, in, other_vport, 1); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + vnic_diag_out = MLX5_ADDR_OF(query_vnic_env_out, out, vport_env); + switch (counter) { + case MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE: + *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, total_error_queues); + break; + case MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW: + *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, + send_queue_priority_update_flow); + break; + case MLX5_VNIC_DIAG_COMP_EQ_OVERRUN: + *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, comp_eq_overrun); + break; + case MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN: + *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, async_eq_overrun); + break; + case MLX5_VNIC_DIAG_CQ_OVERRUN: + *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, cq_overrun); + break; + case MLX5_VNIC_DIAG_INVALID_COMMAND: + *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, invalid_command); + break; + case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND: + *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command); + break; + } + + return 0; +} + +static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport, + enum vnic_diag_counter type) +{ + u32 val = 0; + int ret; + + ret = mlx5_esw_query_vnic_diag(vport, type, &val); + if (ret) + return ret; + + seq_printf(file, "%d\n", val); + return 0; +} + +static int total_q_under_processor_handle_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE); +} + +static int send_queue_priority_update_flow_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, + MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW); +} + +static int comp_eq_overrun_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_COMP_EQ_OVERRUN); +} + +static int async_eq_overrun_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN); +} + +static int cq_overrun_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_CQ_OVERRUN); +} + +static int invalid_command_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_INVALID_COMMAND); +} + +static int quota_exceeded_command_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND); +} + +DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle); +DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow); +DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun); +DEFINE_SHOW_ATTRIBUTE(async_eq_overrun); +DEFINE_SHOW_ATTRIBUTE(cq_overrun); +DEFINE_SHOW_ATTRIBUTE(invalid_command); +DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command); + +void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + debugfs_remove_recursive(vport->dbgfs); + vport->dbgfs = NULL; +} + +/* vnic diag dir name is "pf", "ecpf" or "{vf/sf}_xxxx" */ +#define VNIC_DIAG_DIR_NAME_MAX_LEN 8 + +void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + struct dentry *vnic_diag; + char dir_name[VNIC_DIAG_DIR_NAME_MAX_LEN]; + int err; + + if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) + return; + + if (vport_num == MLX5_VPORT_PF) { + strcpy(dir_name, "pf"); + } else if (vport_num == MLX5_VPORT_ECPF) { + strcpy(dir_name, "ecpf"); + } else { + err = snprintf(dir_name, VNIC_DIAG_DIR_NAME_MAX_LEN, "%s_%d", is_sf ? "sf" : "vf", + is_sf ? sf_num : vport_num - MLX5_VPORT_FIRST_VF); + if (WARN_ON(err < 0)) + return; + } + + vport->dbgfs = debugfs_create_dir(dir_name, esw->dbgfs); + vnic_diag = debugfs_create_dir("vnic_diag", vport->dbgfs); + + if (MLX5_CAP_GEN(esw->dev, vnic_env_queue_counters)) { + debugfs_create_file("total_q_under_processor_handle", 0444, vnic_diag, vport, + &total_q_under_processor_handle_fops); + debugfs_create_file("send_queue_priority_update_flow", 0444, vnic_diag, vport, + &send_queue_priority_update_flow_fops); + } + + if (MLX5_CAP_GEN(esw->dev, eq_overrun_count)) { + debugfs_create_file("comp_eq_overrun", 0444, vnic_diag, vport, + &comp_eq_overrun_fops); + debugfs_create_file("async_eq_overrun", 0444, vnic_diag, vport, + &async_eq_overrun_fops); + } + + if (MLX5_CAP_GEN(esw->dev, vnic_env_cq_overrun)) + debugfs_create_file("cq_overrun", 0444, vnic_diag, vport, &cq_overrun_fops); + + if (MLX5_CAP_GEN(esw->dev, invalid_command_count)) + debugfs_create_file("invalid_command", 0444, vnic_diag, vport, + &invalid_command_fops); + + if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count)) + debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport, + "a_exceeded_command_fops); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 7f9b96d9537e..9bc7be95db54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -87,11 +87,11 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_ devlink = priv_to_devlink(dev); dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); - err = devlink_port_register(devlink, dl_port, dl_port_index); + err = devl_port_register(devlink, dl_port, dl_port_index); if (err) goto reg_err; - err = devlink_rate_leaf_create(dl_port, vport); + err = devl_rate_leaf_create(dl_port, vport); if (err) goto rate_err; @@ -99,7 +99,7 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_ return 0; rate_err: - devlink_port_unregister(dl_port); + devl_port_unregister(dl_port); reg_err: mlx5_esw_dl_port_free(dl_port); return err; @@ -118,10 +118,10 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo if (vport->dl_port->devlink_rate) { mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); - devlink_rate_leaf_destroy(vport->dl_port); + devl_rate_leaf_destroy(vport->dl_port); } - devlink_port_unregister(vport->dl_port); + devl_port_unregister(vport->dl_port); mlx5_esw_dl_port_free(vport->dl_port); vport->dl_port = NULL; } @@ -156,11 +156,11 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller); devlink = priv_to_devlink(dev); dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); - err = devlink_port_register(devlink, dl_port, dl_port_index); + err = devl_port_register(devlink, dl_port, dl_port_index); if (err) return err; - err = devlink_rate_leaf_create(dl_port, vport); + err = devl_rate_leaf_create(dl_port, vport); if (err) goto rate_err; @@ -168,7 +168,7 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p return 0; rate_err: - devlink_port_unregister(dl_port); + devl_port_unregister(dl_port); return err; } @@ -182,9 +182,9 @@ void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num if (vport->dl_port->devlink_rate) { mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); - devlink_rate_leaf_destroy(vport->dl_port); + devl_rate_leaf_destroy(vport->dl_port); } - devlink_port_unregister(vport->dl_port); + devl_port_unregister(vport->dl_port); vport->dl_port = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h index 3401188e0a60..51ac24e6ec3c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h @@ -21,7 +21,7 @@ DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_template, __field(unsigned int, used) ), TP_fast_assign( - strncpy(__entry->dev_name, + strscpy(__entry->dev_name, netdev_name(fdb->dev), IFNAMSIZ); memcpy(__entry->addr, fdb->key.addr, ETH_ALEN); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c index 425c91814b34..c9a91158e99c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c @@ -14,6 +14,7 @@ #include "fs_core.h" #include "esw/indir_table.h" #include "lib/fs_chains.h" +#include "en/mod_hdr.h" #define MLX5_ESW_INDIR_TABLE_SIZE 128 #define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2) @@ -77,15 +78,19 @@ mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, struct mlx5_core_dev *dest_mdev) { struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + bool vf_sf_vport; + + vf_sf_vport = mlx5_eswitch_is_vf_vport(esw, vport_num) || + mlx5_esw_is_sf_vport(esw, vport_num); /* Use indirect table for all IP traffic from UL to VF with vport * destination when source rewrite flag is set. */ return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK && - mlx5_eswitch_is_vf_vport(esw, vport_num) && + vf_sf_vport && esw->dev == dest_mdev && attr->ip_version && - attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE; + attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE; } u16 @@ -226,7 +231,7 @@ static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw, goto err_handle; } - dealloc_mod_hdr_actions(&mod_acts); + mlx5e_mod_hdr_dealloc(&mod_acts); rule->handle = handle; rule->vni = esw_attr->rx_tun_attr->vni; rule->mh = flow_act.modify_hdr; @@ -243,7 +248,7 @@ err_table: mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr); err_mod_hdr_alloc: err_mod_hdr_regc1: - dealloc_mod_hdr_actions(&mod_acts); + mlx5e_mod_hdr_dealloc(&mod_acts); err_mod_hdr_regc0: err_ethertype: kfree(rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index df277a6cddc0..fabe49a35a5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -11,6 +11,7 @@ #include "mlx5_core.h" #include "eswitch.h" #include "fs_core.h" +#include "fs_ft_pool.h" #include "esw/qos.h" enum { @@ -95,8 +96,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) if (!flow_group_in) return -ENOMEM; - table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - ft_attr.max_fte = table_size; + ft_attr.max_fte = POOL_NEXT_SIZE; ft_attr.prio = LEGACY_FDB_PRIO; fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { @@ -105,6 +105,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) goto out; } esw->fdb_table.legacy.fdb = fdb; + table_size = fdb->max_fte; /* Addresses group : Full match unicast/multicast addresses */ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -431,7 +432,7 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, int err = 0; if (!mlx5_esw_allowed(esw)) - return -EPERM; + return vlan ? -EPERM : 0; if (vlan || qos) set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; @@ -522,9 +523,7 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, return PTR_ERR(evport); mutex_lock(&esw->state_lock); - err = mlx5_esw_qos_set_vport_min_rate(esw, evport, min_rate, NULL); - if (!err) - err = mlx5_esw_qos_set_vport_max_rate(esw, evport, max_rate, NULL); + err = mlx5_esw_qos_set_vport_rate(esw, evport, max_rate, min_rate); mutex_unlock(&esw->state_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index d377ddc70fc7..4f8a24d84a86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -204,10 +204,8 @@ static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divid return 0; } -int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, - struct mlx5_vport *evport, - u32 min_rate, - struct netlink_ext_ack *extack) +static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, + u32 min_rate, struct netlink_ext_ack *extack) { u32 fw_max_bw_share, previous_min_rate; bool min_rate_supported; @@ -231,10 +229,8 @@ int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, return err; } -int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, - struct mlx5_vport *evport, - u32 max_rate, - struct netlink_ext_ack *extack) +static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, + u32 max_rate, struct netlink_ext_ack *extack) { u32 act_max_rate = max_rate; bool max_rate_supported; @@ -432,16 +428,13 @@ static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, } static struct mlx5_esw_rate_group * -esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_esw_rate_group *group; u32 divider; int err; - if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) - return ERR_PTR(-EOPNOTSUPP); - group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM); @@ -482,9 +475,32 @@ err_sched_elem: return ERR_PTR(err); } -static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, - struct mlx5_esw_rate_group *group, - struct netlink_ext_ack *extack) +static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack); +static void esw_qos_put(struct mlx5_eswitch *esw); + +static struct mlx5_esw_rate_group * +esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group; + int err; + + if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) + return ERR_PTR(-EOPNOTSUPP); + + err = esw_qos_get(esw, extack); + if (err) + return ERR_PTR(err); + + group = __esw_qos_create_rate_group(esw, extack); + if (IS_ERR(group)) + esw_qos_put(esw); + + return group; +} + +static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) { u32 divider; int err; @@ -503,7 +519,21 @@ static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); + kfree(group); + + return err; +} + +static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + int err; + + err = __esw_qos_destroy_rate_group(esw, group, extack); + esw_qos_put(esw); + return err; } @@ -526,7 +556,7 @@ static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) return false; } -void mlx5_esw_qos_create(struct mlx5_eswitch *esw) +static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = esw->dev; @@ -534,14 +564,10 @@ void mlx5_esw_qos_create(struct mlx5_eswitch *esw) int err; if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) - return; + return -EOPNOTSUPP; if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) - return; - - mutex_lock(&esw->state_lock); - if (esw->qos.enabled) - goto unlock; + return -EOPNOTSUPP; MLX5_SET(scheduling_context, tsar_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); @@ -555,75 +581,94 @@ void mlx5_esw_qos_create(struct mlx5_eswitch *esw) &esw->qos.root_tsar_ix); if (err) { esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); - goto unlock; + return err; } INIT_LIST_HEAD(&esw->qos.groups); if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { - esw->qos.group0 = esw_qos_create_rate_group(esw, NULL); + esw->qos.group0 = __esw_qos_create_rate_group(esw, extack); if (IS_ERR(esw->qos.group0)) { esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n", PTR_ERR(esw->qos.group0)); + err = PTR_ERR(esw->qos.group0); goto err_group0; } } - esw->qos.enabled = true; -unlock: - mutex_unlock(&esw->state_lock); - return; + refcount_set(&esw->qos.refcnt, 1); + + return 0; err_group0: - err = mlx5_destroy_scheduling_element_cmd(esw->dev, - SCHEDULING_HIERARCHY_E_SWITCH, - esw->qos.root_tsar_ix); - if (err) - esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); - mutex_unlock(&esw->state_lock); + if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, + esw->qos.root_tsar_ix)) + esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n"); + + return err; } -void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw) +static void esw_qos_destroy(struct mlx5_eswitch *esw) { - struct devlink *devlink = priv_to_devlink(esw->dev); int err; - devlink_rate_nodes_destroy(devlink); - mutex_lock(&esw->state_lock); - if (!esw->qos.enabled) - goto unlock; - if (esw->qos.group0) - esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); + __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, esw->qos.root_tsar_ix); if (err) esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); +} - esw->qos.enabled = false; -unlock: - mutex_unlock(&esw->state_lock); +static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + int err = 0; + + lockdep_assert_held(&esw->state_lock); + + if (!refcount_inc_not_zero(&esw->qos.refcnt)) { + /* esw_qos_create() set refcount to 1 only on success. + * No need to decrement on failure. + */ + err = esw_qos_create(esw, extack); + } + + return err; +} + +static void esw_qos_put(struct mlx5_eswitch *esw) +{ + lockdep_assert_held(&esw->state_lock); + if (refcount_dec_and_test(&esw->qos.refcnt)) + esw_qos_destroy(esw); } -int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - u32 max_rate, u32 bw_share) +static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { int err; lockdep_assert_held(&esw->state_lock); - if (!esw->qos.enabled) + if (vport->qos.enabled) return 0; - if (vport->qos.enabled) - return -EEXIST; + err = esw_qos_get(esw, extack); + if (err) + return err; vport->qos.group = esw->qos.group0; err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); - if (!err) { - vport->qos.enabled = true; - trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); - } + if (err) + goto err_out; + + vport->qos.enabled = true; + trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); + + return 0; + +err_out: + esw_qos_put(esw); return err; } @@ -633,7 +678,7 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo int err; lockdep_assert_held(&esw->state_lock); - if (!esw->qos.enabled || !vport->qos.enabled) + if (!vport->qos.enabled) return; WARN(vport->qos.group && vport->qos.group != esw->qos.group0, "Disabling QoS on port before detaching it from group"); @@ -645,8 +690,27 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", vport->vport, err); - vport->qos.enabled = false; + memset(&vport->qos, 0, sizeof(vport->qos)); trace_mlx5_esw_vport_qos_destroy(vport); + + esw_qos_put(esw); +} + +int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + u32 max_rate, u32 min_rate) +{ + int err; + + lockdep_assert_held(&esw->state_lock); + err = esw_qos_vport_enable(esw, vport, 0, 0, NULL); + if (err) + return err; + + err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL); + if (!err) + err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL); + + return err; } int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps) @@ -654,22 +718,29 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_vport *vport; u32 bitmask; + int err; vport = mlx5_eswitch_get_vport(esw, vport_num); if (IS_ERR(vport)) return PTR_ERR(vport); - if (!vport->qos.enabled) - return -EOPNOTSUPP; - - MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); - bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + mutex_lock(&esw->state_lock); + if (!vport->qos.enabled) { + /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */ + err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL); + } else { + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); + + bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + err = mlx5_modify_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + ctx, + vport->qos.esw_tsar_ix, + bitmask); + } + mutex_unlock(&esw->state_lock); - return mlx5_modify_scheduling_element_cmd(esw->dev, - SCHEDULING_HIERARCHY_E_SWITCH, - ctx, - vport->qos.esw_tsar_ix, - bitmask); + return err; } #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */ @@ -728,7 +799,12 @@ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void return err; mutex_lock(&esw->state_lock); - err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); + err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + if (err) + goto unlock; + + err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); +unlock: mutex_unlock(&esw->state_lock); return err; } @@ -749,7 +825,12 @@ int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void * return err; mutex_lock(&esw->state_lock); - err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); + err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + if (err) + goto unlock; + + err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); +unlock: mutex_unlock(&esw->state_lock); return err; } @@ -843,10 +924,16 @@ int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { - int err; + int err = 0; mutex_lock(&esw->state_lock); - err = esw_qos_vport_update_group(esw, vport, group, extack); + if (!vport->qos.enabled && !group) + goto unlock; + + err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + if (!err) + err = esw_qos_vport_update_group(esw, vport, group, extack); +unlock: mutex_unlock(&esw->state_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h index 28451abe2d2f..0141e9d52037 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -6,18 +6,8 @@ #ifdef CONFIG_MLX5_ESWITCH -int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, - struct mlx5_vport *evport, - u32 min_rate, - struct netlink_ext_ack *extack); -int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, - struct mlx5_vport *evport, - u32 max_rate, - struct netlink_ext_ack *extack); -void mlx5_esw_qos_create(struct mlx5_eswitch *esw); -void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw); -int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - u32 max_rate, u32 bw_share); +int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, + u32 max_rate, u32 min_rate); void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport); int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 51a8cecc4a7c..2169486c4bfb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -36,6 +36,7 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/mpfs.h> +#include <linux/debugfs.h> #include "esw/acl/lgcy.h" #include "esw/legacy.h" #include "esw/qos.h" @@ -781,9 +782,6 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) if (err) return err; - /* Attach vport to the eswitch rate limiter */ - mlx5_esw_qos_vport_enable(esw, vport, vport->qos.max_rate, vport->qos.bw_share); - if (mlx5_esw_is_manager_vport(esw, vport_num)) return 0; @@ -1005,6 +1003,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, if (err) return err; + mlx5_esw_vport_debugfs_create(esw, vport_num, false, 0); err = esw_offloads_load_rep(esw, vport_num); if (err) goto err_rep; @@ -1012,6 +1011,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, return err; err_rep: + mlx5_esw_vport_debugfs_destroy(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num); return err; } @@ -1019,6 +1019,7 @@ err_rep: void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num) { esw_offloads_unload_rep(esw, vport_num); + mlx5_esw_vport_debugfs_destroy(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num); } @@ -1155,8 +1156,6 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) { const u32 *out; - WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE); - if (num_vfs < 0) return; @@ -1189,6 +1188,9 @@ static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw) int total_vports; int err; + if (esw->flags & MLX5_ESWITCH_VPORT_ACL_NS_CREATED) + return 0; + total_vports = mlx5_eswitch_get_total_vports(dev); if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { @@ -1206,6 +1208,7 @@ static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw) } else { esw_warn(dev, "ingress ACL is not supported by FW\n"); } + esw->flags |= MLX5_ESWITCH_VPORT_ACL_NS_CREATED; return 0; err: @@ -1218,6 +1221,7 @@ static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw) { struct mlx5_core_dev *dev = esw->dev; + esw->flags &= ~MLX5_ESWITCH_VPORT_ACL_NS_CREATED; if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) mlx5_fs_ingress_acls_cleanup(dev); if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) @@ -1227,7 +1231,6 @@ static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw) /** * mlx5_eswitch_enable_locked - Enable eswitch * @esw: Pointer to eswitch - * @mode: Eswitch mode to enable * @num_vfs: Enable eswitch for given number of VFs. This is optional. * Valid value are 0, > 0 and MLX5_ESWITCH_IGNORE_NUM_VFS. * Caller should pass num_vfs > 0 when enabling eswitch for @@ -1241,7 +1244,7 @@ static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw) * mode. If num_vfs >=0 is provided, it setup VF related eswitch vports. * It returns 0 on success or error code on failure. */ -int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) +int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) { int err; @@ -1260,11 +1263,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) mlx5_eswitch_update_num_of_vfs(esw, num_vfs); - mlx5_esw_qos_create(esw); - - esw->mode = mode; - - if (mode == MLX5_ESWITCH_LEGACY) { + if (esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_legacy_enable(esw); } else { mlx5_rescan_drivers(esw->dev); @@ -1274,23 +1273,19 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) if (err) goto abort; + esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED; + mlx5_eswitch_event_handlers_register(esw); esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n", - mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", esw->esw_funcs.num_vfs, esw->enabled_vports); - mlx5_esw_mode_change_notify(esw, mode); + mlx5_esw_mode_change_notify(esw, esw->mode); return 0; abort: - esw->mode = MLX5_ESWITCH_NONE; - - if (mode == MLX5_ESWITCH_OFFLOADS) - mlx5_rescan_drivers(esw->dev); - - mlx5_esw_qos_destroy(esw); mlx5_esw_acls_ns_cleanup(esw); return err; } @@ -1311,14 +1306,16 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) if (!mlx5_esw_allowed(esw)) return 0; - toggle_lag = esw->mode == MLX5_ESWITCH_NONE; + devl_assert_locked(priv_to_devlink(esw->dev)); + + toggle_lag = !mlx5_esw_is_fdb_created(esw); if (toggle_lag) mlx5_lag_disable_change(esw->dev); down_write(&esw->mode_lock); - if (esw->mode == MLX5_ESWITCH_NONE) { - ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs); + if (!mlx5_esw_is_fdb_created(esw)) { + ret = mlx5_eswitch_enable_locked(esw, num_vfs); } else { enum mlx5_eswitch_vport_event vport_events; @@ -1336,53 +1333,82 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) return ret; } -void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) +/* When disabling sriov, free driver level resources. */ +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) { - int old_mode; - - lockdep_assert_held_write(&esw->mode_lock); - - if (esw->mode == MLX5_ESWITCH_NONE) + if (!mlx5_esw_allowed(esw)) return; - esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", + devl_assert_locked(priv_to_devlink(esw->dev)); + down_write(&esw->mode_lock); + /* If driver is unloaded, this function is called twice by remove_one() + * and mlx5_unload(). Prevent the second call. + */ + if (!esw->esw_funcs.num_vfs && !clear_vf) + goto unlock; + + esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", esw->esw_funcs.num_vfs, esw->enabled_vports); + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (clear_vf) + mlx5_eswitch_clear_vf_vports_info(esw); + /* If disabling sriov in switchdev mode, free meta rules here + * because it depends on num_vfs. + */ + if (esw->mode == MLX5_ESWITCH_OFFLOADS) { + struct devlink *devlink = priv_to_devlink(esw->dev); + + devl_rate_nodes_destroy(devlink); + } + + esw->esw_funcs.num_vfs = 0; + +unlock: + up_write(&esw->mode_lock); +} + +/* Free resources for corresponding eswitch mode. It is called by devlink + * when changing eswitch mode or modprobe when unloading driver. + */ +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) +{ + struct devlink *devlink = priv_to_devlink(esw->dev); + /* Notify eswitch users that it is exiting from current mode. * So that it can do necessary cleanup before the eswitch is disabled. */ - mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_NONE); + mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_LEGACY); mlx5_eswitch_event_handlers_unregister(esw); - if (esw->mode == MLX5_ESWITCH_LEGACY) - esw_legacy_disable(esw); - else if (esw->mode == MLX5_ESWITCH_OFFLOADS) - esw_offloads_disable(esw); - - old_mode = esw->mode; - esw->mode = MLX5_ESWITCH_NONE; - - if (old_mode == MLX5_ESWITCH_OFFLOADS) - mlx5_rescan_drivers(esw->dev); + esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->esw_funcs.num_vfs, esw->enabled_vports); - mlx5_esw_qos_destroy(esw); - mlx5_esw_acls_ns_cleanup(esw); + if (esw->fdb_table.flags & MLX5_ESW_FDB_CREATED) { + esw->fdb_table.flags &= ~MLX5_ESW_FDB_CREATED; + if (esw->mode == MLX5_ESWITCH_OFFLOADS) + esw_offloads_disable(esw); + else if (esw->mode == MLX5_ESWITCH_LEGACY) + esw_legacy_disable(esw); + mlx5_esw_acls_ns_cleanup(esw); + } - if (clear_vf) - mlx5_eswitch_clear_vf_vports_info(esw); + if (esw->mode == MLX5_ESWITCH_OFFLOADS) + devl_rate_nodes_destroy(devlink); } -void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) +void mlx5_eswitch_disable(struct mlx5_eswitch *esw) { if (!mlx5_esw_allowed(esw)) return; + devl_assert_locked(priv_to_devlink(esw->dev)); mlx5_lag_disable_change(esw->dev); down_write(&esw->mode_lock); - mlx5_eswitch_disable_locked(esw, clear_vf); - esw->esw_funcs.num_vfs = 0; + mlx5_eswitch_disable_locked(esw); up_write(&esw->mode_lock); mlx5_lag_enable_change(esw->dev); } @@ -1573,22 +1599,25 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) ida_init(&esw->offloads.vport_metadata_ida); xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC); mutex_init(&esw->state_lock); - lockdep_register_key(&esw->mode_lock_key); init_rwsem(&esw->mode_lock); - lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key); + refcount_set(&esw->qos.refcnt, 0); esw->enabled_vports = 0; - esw->mode = MLX5_ESWITCH_NONE; + esw->mode = MLX5_ESWITCH_LEGACY; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; else esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; + if (MLX5_ESWITCH_MANAGER(dev) && + mlx5_esw_vport_match_metadata_supported(esw)) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; dev->priv.eswitch = esw; BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head); + esw->dbgfs = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(esw->dev)); esw_info(dev, "Total vports %d, per vport: max uc(%d) max mc(%d)\n", esw->total_vports, @@ -1612,9 +1641,10 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_info(esw->dev, "cleanup\n"); + debugfs_remove_recursive(esw->dbgfs); esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); - lockdep_unregister_key(&esw->mode_lock_key); + WARN_ON(refcount_read(&esw->qos.refcnt)); mutex_destroy(&esw->state_lock); WARN_ON(!xa_empty(&esw->offloads.vhca_map)); xa_destroy(&esw->offloads.vhca_map); @@ -1703,82 +1733,6 @@ bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num) return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_SF); } -static bool -is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num) -{ - return vport_num == MLX5_VPORT_PF || - mlx5_eswitch_is_vf_vport(esw, vport_num) || - mlx5_esw_is_sf_vport(esw, vport_num); -} - -int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, - u8 *hw_addr, int *hw_addr_len, - struct netlink_ext_ack *extack) -{ - struct mlx5_eswitch *esw; - struct mlx5_vport *vport; - int err = -EOPNOTSUPP; - u16 vport_num; - - esw = mlx5_devlink_eswitch_get(port->devlink); - if (IS_ERR(esw)) - return PTR_ERR(esw); - - vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); - if (!is_port_function_supported(esw, vport_num)) - return -EOPNOTSUPP; - - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) { - NL_SET_ERR_MSG_MOD(extack, "Invalid port"); - return PTR_ERR(vport); - } - - mutex_lock(&esw->state_lock); - if (vport->enabled) { - ether_addr_copy(hw_addr, vport->info.mac); - *hw_addr_len = ETH_ALEN; - err = 0; - } - mutex_unlock(&esw->state_lock); - return err; -} - -int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, - const u8 *hw_addr, int hw_addr_len, - struct netlink_ext_ack *extack) -{ - struct mlx5_eswitch *esw; - struct mlx5_vport *vport; - int err = -EOPNOTSUPP; - u16 vport_num; - - esw = mlx5_devlink_eswitch_get(port->devlink); - if (IS_ERR(esw)) { - NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr"); - return PTR_ERR(esw); - } - - vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); - if (!is_port_function_supported(esw, vport_num)) { - NL_SET_ERR_MSG_MOD(extack, "Port doesn't support set hw_addr"); - return -EINVAL; - } - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) { - NL_SET_ERR_MSG_MOD(extack, "Invalid port"); - return PTR_ERR(vport); - } - - mutex_lock(&esw->state_lock); - if (vport->enabled) - err = mlx5_esw_set_vport_mac_locked(esw, vport, hw_addr); - else - NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled"); - mutex_unlock(&esw->state_lock); - return err; -} - int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { @@ -1835,8 +1789,10 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, ivi->qos = evport->info.qos; ivi->spoofchk = evport->info.spoofchk; ivi->trusted = evport->info.trusted; - ivi->min_tx_rate = evport->qos.min_rate; - ivi->max_tx_rate = evport->qos.max_rate; + if (evport->qos.enabled) { + ivi->min_tx_rate = evport->qos.min_rate; + ivi->max_tx_rate = evport->qos.max_rate; + } mutex_unlock(&esw->state_lock); return 0; @@ -1951,7 +1907,7 @@ u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw = dev->priv.eswitch; - return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_NONE; + return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_LEGACY; } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); @@ -1966,17 +1922,6 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); -bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) -{ - if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && - dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) || - (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS && - dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS)) - return true; - - return false; -} - bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { @@ -2082,23 +2027,10 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw) */ void mlx5_esw_unlock(struct mlx5_eswitch *esw) { - if (!mlx5_esw_allowed(esw)) - return; up_write(&esw->mode_lock); } /** - * mlx5_esw_lock() - Take write lock on esw mode lock - * @esw: eswitch device. - */ -void mlx5_esw_lock(struct mlx5_eswitch *esw) -{ - if (!mlx5_esw_allowed(esw)) - return; - down_write(&esw->mode_lock); -} - -/** * mlx5_eswitch_get_total_vports - Get total vports of the eswitch * * @dev: Pointer to core device diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 42f8ee2e5d9f..f68dc2d0dbe6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -113,8 +113,11 @@ struct vport_ingress { * packet with metadata. */ struct mlx5_flow_group *metadata_allmatch_grp; + /* Optional group to add a drop all rule */ + struct mlx5_flow_group *drop_grp; struct mlx5_modify_hdr *modify_metadata; struct mlx5_flow_handle *modify_metadata_rule; + struct mlx5_flow_handle *drop_rule; } offloads; }; @@ -188,6 +191,7 @@ struct mlx5_vport { enum mlx5_eswitch_vport_event enabled_events; int index; struct devlink_port *dl_port; + struct dentry *dbgfs; }; struct mlx5_esw_indir_table; @@ -240,6 +244,8 @@ struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; + struct mlx5_flow_group *vport_rx_drop_group; + struct mlx5_flow_handle *vport_rx_drop_rule; struct xarray vport_reps; struct list_head peer_flows; struct mutex peer_mutex; @@ -279,10 +285,15 @@ struct mlx5_esw_functions { enum { MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED = BIT(1), + MLX5_ESWITCH_VPORT_ACL_NS_CREATED = BIT(2), }; struct mlx5_esw_bridge_offloads; +enum { + MLX5_ESW_FDB_CREATED = BIT(0), +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -308,10 +319,14 @@ struct mlx5_eswitch { atomic64_t user_count; struct { - bool enabled; u32 root_tsar_ix; struct mlx5_esw_rate_group *group0; struct list_head groups; /* Protected by esw->state_lock */ + + /* Protected by esw->state_lock. + * Initially 0, meaning no QoS users and QoS is disabled. + */ + refcount_t refcnt; } qos; struct mlx5_esw_bridge_offloads *br_offloads; @@ -324,7 +339,7 @@ struct mlx5_eswitch { u32 large_group_num; } params; struct blocking_notifier_head n_head; - struct lock_class_key mode_lock_key; + struct dentry *dbgfs; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -332,6 +347,10 @@ int esw_offloads_enable(struct mlx5_eswitch *esw); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); +struct mlx5_flow_handle * +mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule); + bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw); int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable); u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw); @@ -339,18 +358,16 @@ void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata); int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps); -bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw); -int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable); - /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); #define MLX5_ESWITCH_IGNORE_NUM_VFS (-1) -int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs); +int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs); int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs); -void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf); -void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf); +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf); +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw); +void mlx5_eswitch_disable(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u16 vport, const u8 *mac); int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, @@ -447,22 +464,6 @@ enum { MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE = BIT(2), }; -enum { - MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0), - MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1), - MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2), - MLX5_ESW_ATTR_FLAG_SRC_REWRITE = BIT(3), - MLX5_ESW_ATTR_FLAG_SAMPLE = BIT(4), - MLX5_ESW_ATTR_FLAG_ACCEPT = BIT(5), -}; - -/* Returns true if any of the flags that require skipping further TC/NF processing are set. */ -static inline bool -mlx5_esw_attr_flags_skip(u32 attr_flags) -{ - return attr_flags & (MLX5_ESW_ATTR_FLAG_SLOW_PATH | MLX5_ESW_ATTR_FLAG_ACCEPT); -} - struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_core_dev *in_mdev; @@ -486,6 +487,7 @@ struct mlx5_esw_flow_attr { int src_port_rewrite_act_id; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5_rx_tun_attr *rx_tun_attr; + struct ethhdr eth; struct mlx5_pkt_reformat *decap_pkt_reformat; }; @@ -516,11 +518,6 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos, u8 set_flags); -static inline bool mlx5_esw_qos_enabled(struct mlx5_eswitch *esw) -{ - return esw->qos.enabled; -} - static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, u8 vlan_depth) { @@ -534,8 +531,6 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2); } -bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, - struct mlx5_core_dev *dev1); bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1); @@ -594,6 +589,11 @@ mlx5_esw_devlink_port_index_to_vport_num(unsigned int dl_port_index) return dl_port_index & 0xffff; } +static inline bool mlx5_esw_is_fdb_created(struct mlx5_eswitch *esw) +{ + return esw->fdb_table.flags & MLX5_ESW_FDB_CREATED; +} + /* TODO: This mlx5e_tc function shouldn't be called by eswitch */ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); @@ -691,6 +691,9 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num); +void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num); + int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, u16 vport_num, u32 controller, u32 sfnum); void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); @@ -722,7 +725,6 @@ void mlx5_esw_get(struct mlx5_core_dev *dev); void mlx5_esw_put(struct mlx5_core_dev *dev); int mlx5_esw_try_lock(struct mlx5_eswitch *esw); void mlx5_esw_unlock(struct mlx5_eswitch *esw); -void mlx5_esw_lock(struct mlx5_eswitch *esw); void esw_vport_change_handle_locked(struct mlx5_vport *vport); @@ -739,8 +741,8 @@ int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } -static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {} -static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } +static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {} +static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {} static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; } @@ -749,9 +751,6 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) return ERR_PTR(-EOPNOTSUPP); } -static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; } -static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; } - static inline struct mlx5_flow_handle * esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 32bc08a39925..728ca9f2bb9d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -49,6 +49,7 @@ #include "en_tc.h" #include "en/mapping.h" #include "devlink.h" +#include "lag/lag.h" #define mlx5_esw_for_each_rep(esw, i, rep) \ xa_for_each(&((esw)->offloads.vport_reps), i, rep) @@ -69,6 +70,8 @@ #define MLX5_ESW_VPORT_TBL_SIZE 128 #define MLX5_ESW_VPORT_TBL_NUM_GROUPS 4 +#define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1) + static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = { .max_fte = MLX5_ESW_VPORT_TBL_SIZE, .max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS, @@ -139,7 +142,7 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, if (mlx5_esw_indir_table_decap_vport(attr)) vport = mlx5_esw_indir_table_decap_vport(attr); - if (esw_attr->int_port) + if (attr && !attr->chain && esw_attr->int_port) metadata = mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port); else @@ -180,7 +183,7 @@ esw_setup_decap_indir(struct mlx5_eswitch *esw, { struct mlx5_flow_table *ft; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) return -EOPNOTSUPP; ft = mlx5_esw_indir_table_get(esw, attr, spec, @@ -201,12 +204,12 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw, static int esw_setup_sampler_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, - struct mlx5_flow_attr *attr, + u32 sampler_id, int i) { flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; - dest[i].sampler_id = attr->sample_attr->sampler_id; + dest[i].sampler_id = sampler_id; return 0; } @@ -229,10 +232,8 @@ esw_setup_ft_dest(struct mlx5_flow_destination *dest, } static void -esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, - struct mlx5_flow_act *flow_act, - struct mlx5_fs_chains *chains, - int i) +esw_setup_accept_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_fs_chains *chains, int i) { if (mlx5_chains_ignore_flow_level_supported(chains)) flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; @@ -240,6 +241,16 @@ esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, dest[i].ft = mlx5_chains_get_tc_end_ft(chains); } +static void +esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, int i) +{ + if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = esw->fdb_table.offloads.slow_fdb; +} + static int esw_setup_chain_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, @@ -295,26 +306,28 @@ esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest, int *i) { struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; - int j, err; + int err; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) return -EOPNOTSUPP; - for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) { - err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i); - if (err) - goto err_setup_chain; + /* flow steering cannot handle more than one dest with the same ft + * in a single flow + */ + if (esw_attr->out_count - esw_attr->split_count > 1) + return -EOPNOTSUPP; - if (esw_attr->dests[j].pkt_reformat) { - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - flow_act->pkt_reformat = esw_attr->dests[j].pkt_reformat; - } + err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i); + if (err) + return err; + + if (esw_attr->dests[esw_attr->split_count].pkt_reformat) { + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = esw_attr->dests[esw_attr->split_count].pkt_reformat; } - return 0; + (*i)++; -err_setup_chain: - esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j); - return err; + return 0; } static void esw_cleanup_chain_src_port_rewrite(struct mlx5_eswitch *esw, @@ -362,7 +375,7 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest, struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; int j, err; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) return -EOPNOTSUPP; for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) { @@ -416,6 +429,9 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f dest[dest_idx].vport.vhca_id = MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + if (dest[dest_idx].vport.num == MLX5_VPORT_UPLINK && + mlx5_lag_mpesw_is_activated(esw->dev)) + dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; } if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { if (pkt_reformat) { @@ -461,20 +477,17 @@ esw_setup_dests(struct mlx5_flow_destination *dest, if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) && esw_src_port_rewrite_supported(esw)) - attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE; + attr->flags |= MLX5_ATTR_FLAG_SRC_REWRITE; - if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) { - esw_setup_sampler_dest(dest, flow_act, attr, *i); + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE && + !(attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)) { + esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i); (*i)++; - } else if (attr->dest_ft) { - esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); + } else if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) { + esw_setup_slow_path_dest(dest, flow_act, esw, *i); (*i)++; - } else if (mlx5_esw_attr_flags_skip(attr->flags)) { - esw_setup_slow_path_dest(dest, flow_act, chains, *i); - (*i)++; - } else if (attr->dest_chain) { - err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, - 1, 0, *i); + } else if (attr->flags & MLX5_ATTR_FLAG_ACCEPT) { + esw_setup_accept_dest(dest, flow_act, chains, *i); (*i)++; } else if (esw_is_indir_table(esw, attr)) { err = esw_setup_indir_table(dest, flow_act, esw, attr, spec, true, i); @@ -482,6 +495,15 @@ esw_setup_dests(struct mlx5_flow_destination *dest, err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i); } else { *i = esw_setup_vport_dests(dest, flow_act, esw, esw_attr, *i); + + if (attr->dest_ft) { + err = esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); + (*i)++; + } else if (attr->dest_chain) { + err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, + 1, 0, *i); + (*i)++; + } } return err; @@ -496,7 +518,7 @@ esw_cleanup_dests(struct mlx5_eswitch *esw, if (attr->dest_ft) { esw_cleanup_decap_indir(esw, attr); - } else if (!mlx5_esw_attr_flags_skip(attr->flags)) { + } else if (!mlx5e_tc_attr_flags_skip(attr->flags)) { if (attr->dest_chain) esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0); else if (esw_is_indir_table(esw, attr)) @@ -506,6 +528,20 @@ esw_cleanup_dests(struct mlx5_eswitch *esw, } } +static void +esw_setup_meter(struct mlx5_flow_attr *attr, struct mlx5_flow_act *flow_act) +{ + struct mlx5e_flow_meter_handle *meter; + + meter = attr->meter_attr.meter; + flow_act->exe_aso.type = attr->exe_aso_type; + flow_act->exe_aso.object_id = meter->obj_id; + flow_act->exe_aso.flow_meter.meter_idx = meter->idx; + flow_act->exe_aso.flow_meter.init_color = MLX5_FLOW_METER_COLOR_GREEN; + /* use metadata reg 5 for packet color */ + flow_act->exe_aso.return_reg_id = 5; +} + struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, @@ -573,6 +609,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_hdr = attr->modify_hdr; + if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) && + attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER) + esw_setup_meter(attr, &flow_act); + if (split) { fwd_attr.chain = attr->chain; fwd_attr.prio = attr->prio; @@ -587,7 +627,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, else fdb = attr->ft; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT)) + if (!(attr->flags & MLX5_ATTR_FLAG_NO_IN_PORT)) mlx5_eswitch_set_rule_source_port(esw, spec, attr, esw_attr->in_mdev->priv.eswitch, esw_attr->in_rep->vport); @@ -719,7 +759,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, mlx5_del_flow_rules(rule); - if (!mlx5_esw_attr_flags_skip(attr->flags)) { + if (!mlx5e_tc_attr_flags_skip(attr->flags)) { /* unref the term table */ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { if (esw_attr->dests[i].termtbl) @@ -861,7 +901,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, if (err) goto unlock; - attr->flags &= ~MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; + attr->flags &= ~MLX5_ATTR_FLAG_VLAN_HANDLED; vport = esw_vlan_action_get_vport(esw_attr, push, pop); @@ -869,7 +909,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, /* tracks VF --> wire rules without vlan push action */ if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { vport->vlan_refcount++; - attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; + attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED; } goto unlock; @@ -900,7 +940,7 @@ skip_set_push: } out: if (!err) - attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; + attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED; unlock: mutex_unlock(&esw->state_lock); return err; @@ -919,7 +959,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) return 0; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_VLAN_HANDLED)) + if (!(attr->flags & MLX5_ATTR_FLAG_VLAN_HANDLED)) return 0; push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); @@ -1022,43 +1062,23 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } -static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw) +void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule) { - struct mlx5_flow_handle **flows = esw->fdb_table.offloads.send_to_vport_meta_rules; - int i = 0, num_vfs = esw->esw_funcs.num_vfs; - - if (!num_vfs || !flows) - return; - - for (i = 0; i < num_vfs; i++) - mlx5_del_flow_rules(flows[i]); - - kvfree(flows); + if (rule) + mlx5_del_flow_rules(rule); } -static int -mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw) +struct mlx5_flow_handle * +mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {0}; - int num_vfs, rule_idx = 0, err = 0; struct mlx5_flow_handle *flow_rule; - struct mlx5_flow_handle **flows; struct mlx5_flow_spec *spec; - struct mlx5_vport *vport; - unsigned long i; - u16 vport_num; - - num_vfs = esw->esw_funcs.num_vfs; - flows = kvcalloc(num_vfs, sizeof(*flows), GFP_KERNEL); - if (!flows) - return -ENOMEM; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) { - err = -ENOMEM; - goto alloc_err; - } + if (!spec) + return ERR_PTR(-ENOMEM); MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); @@ -1071,34 +1091,18 @@ mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw) dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { - vport_num = vport->vport; - MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); - dest.vport.num = vport_num; - - flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, - spec, &flow_act, &dest, 1); - if (IS_ERR(flow_rule)) { - err = PTR_ERR(flow_rule); - esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule idx %d, err %ld\n", - rule_idx, PTR_ERR(flow_rule)); - goto rule_err; - } - flows[rule_idx++] = flow_rule; - } + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); + dest.vport.num = vport_num; - esw->fdb_table.offloads.send_to_vport_meta_rules = flows; - kvfree(spec); - return 0; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule vport %d, err %ld\n", + vport_num, PTR_ERR(flow_rule)); -rule_err: - while (--rule_idx >= 0) - mlx5_del_flow_rules(flows[rule_idx]); kvfree(spec); -alloc_err: - kvfree(flows); - return err; + return flow_rule; } static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw) @@ -1623,18 +1627,200 @@ esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains) #endif +static int +esw_create_send_to_vport_group(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + u32 *flow_group_in, + int *ix) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + int count, err = 0; + + memset(flow_group_in, 0, inlen); + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } + + /* See comment at table_size calculation */ + count = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, *ix + count - 1); + *ix += count; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(esw->dev, "Failed to create send-to-vport flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.offloads.send_to_vport_grp = g; + +out: + return err; +} + +static int +esw_create_meta_send_to_vport_group(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + u32 *flow_group_in, + int *ix) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + int err = 0; + + if (!esw_src_port_rewrite_supported(esw)) + return 0; + + memset(flow_group_in, 0, inlen); + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix); + MLX5_SET(create_flow_group_in, flow_group_in, + end_flow_index, *ix + esw->total_vports - 1); + *ix += esw->total_vports; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(esw->dev, + "Failed to create send-to-vport meta flow group err(%d)\n", err); + goto send_vport_meta_err; + } + esw->fdb_table.offloads.send_to_vport_meta_grp = g; + + return 0; + +send_vport_meta_err: + return err; +} + +static int +esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + u32 *flow_group_in, + int *ix) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + int err = 0; + + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return 0; + + memset(flow_group_in, 0, inlen); + + esw_set_flow_group_source_port(esw, flow_group_in); + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + *ix + esw->total_vports - 1); + *ix += esw->total_vports; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(esw->dev, "Failed to create peer miss flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.offloads.peer_miss_grp = g; + +out: + return err; +} + +static int +esw_create_miss_group(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + u32 *flow_group_in, + int *ix) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + int err = 0; + u8 *dmac; + + memset(flow_group_in, 0, inlen); + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + *ix + MLX5_ESW_MISS_FLOWS); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(esw->dev, "Failed to create miss flow group err(%d)\n", err); + goto miss_err; + } + esw->fdb_table.offloads.miss_grp = g; + + err = esw_add_fdb_miss_rule(esw); + if (err) + goto miss_rule_err; + + return 0; + +miss_rule_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); +miss_err: + return err; +} + static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_table_attr ft_attr = {}; - int num_vfs, table_size, ix, err = 0; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; + int table_size, ix = 0, err = 0; u32 flags = 0, *flow_group_in; - struct mlx5_flow_group *g; - void *match_criteria; - u8 *dmac; esw_debug(esw->dev, "Create offloads FDB Tables\n"); @@ -1668,7 +1854,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) * total vports of the peer (currently is also uses esw->total_vports). */ table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) + - MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs; + esw->total_vports * 2 + MLX5_ESW_MISS_FLOWS; /* create the slow path fdb with encap set, so further table instances * can be created at run time while VFs are probed if the FW allows that. @@ -1709,139 +1895,29 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) goto fdb_chains_err; } - /* create send-to-vport group */ - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - - MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_eswitch_owner_vhca_id); - MLX5_SET(create_flow_group_in, flow_group_in, - source_eswitch_owner_vhca_id_valid, 1); - } - - /* See comment above table_size calculation */ - ix = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1); - - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err); + err = esw_create_send_to_vport_group(esw, fdb, flow_group_in, &ix); + if (err) goto send_vport_err; - } - esw->fdb_table.offloads.send_to_vport_grp = g; - - if (esw_src_port_rewrite_supported(esw)) { - /* meta send to vport */ - memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS_2); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - - MLX5_SET(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_mask()); - MLX5_SET(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); - - num_vfs = esw->esw_funcs.num_vfs; - if (num_vfs) { - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, - end_flow_index, ix + num_vfs - 1); - ix += num_vfs; - - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n", - err); - goto send_vport_meta_err; - } - esw->fdb_table.offloads.send_to_vport_meta_grp = g; - - err = mlx5_eswitch_add_send_to_vport_meta_rules(esw); - if (err) - goto meta_rule_err; - } - } - - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { - /* create peer esw miss group */ - memset(flow_group_in, 0, inlen); - - esw_set_flow_group_source_port(esw, flow_group_in); - - if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { - match_criteria = MLX5_ADDR_OF(create_flow_group_in, - flow_group_in, - match_criteria); - - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_eswitch_owner_vhca_id); - - MLX5_SET(create_flow_group_in, flow_group_in, - source_eswitch_owner_vhca_id_valid, 1); - } - - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, - ix + esw->total_vports - 1); - ix += esw->total_vports; - - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err); - goto peer_miss_err; - } - esw->fdb_table.offloads.peer_miss_grp = g; - } - - /* create miss group */ - memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_OUTER_HEADERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, - match_criteria); - dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, - outer_headers.dmac_47_16); - dmac[0] = 0x01; - - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, - ix + MLX5_ESW_MISS_FLOWS); + err = esw_create_meta_send_to_vport_group(esw, fdb, flow_group_in, &ix); + if (err) + goto send_vport_meta_err; - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create miss flow group err(%d)\n", err); - goto miss_err; - } - esw->fdb_table.offloads.miss_grp = g; + err = esw_create_peer_esw_miss_group(esw, fdb, flow_group_in, &ix); + if (err) + goto peer_miss_err; - err = esw_add_fdb_miss_rule(esw); + err = esw_create_miss_group(esw, fdb, flow_group_in, &ix); if (err) - goto miss_rule_err; + goto miss_err; kvfree(flow_group_in); return 0; -miss_rule_err: - mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); miss_err: if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); peer_miss_err: - mlx5_eswitch_del_send_to_vport_meta_rules(esw); -meta_rule_err: if (esw->fdb_table.offloads.send_to_vport_meta_grp) mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp); send_vport_meta_err: @@ -1868,7 +1944,6 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); - mlx5_eswitch_del_send_to_vport_meta_rules(esw); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); if (esw->fdb_table.offloads.send_to_vport_meta_grp) mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp); @@ -1886,7 +1961,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) atomic64_set(&esw->user_count, 0); } -static int esw_get_offloads_ft_size(struct mlx5_eswitch *esw) +static int esw_get_nr_ft_offloads_steering_src_ports(struct mlx5_eswitch *esw) { int nvports; @@ -1911,7 +1986,8 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) return -EOPNOTSUPP; } - ft_attr.max_fte = esw_get_offloads_ft_size(esw); + ft_attr.max_fte = esw_get_nr_ft_offloads_steering_src_ports(esw) + + MLX5_ESW_FT_OFFLOADS_DROP_RULE; ft_attr.prio = 1; ft_offloads = mlx5_create_flow_table(ns, &ft_attr); @@ -1940,7 +2016,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) int nvports; int err = 0; - nvports = esw_get_offloads_ft_size(esw); + nvports = esw_get_nr_ft_offloads_steering_src_ports(esw); flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; @@ -1970,6 +2046,52 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) mlx5_destroy_flow_group(esw->offloads.vport_rx_group); } +static int esw_create_vport_rx_drop_rule_index(struct mlx5_eswitch *esw) +{ + /* ft_offloads table is enlarged by MLX5_ESW_FT_OFFLOADS_DROP_RULE (1) + * for the drop rule, which is placed at the end of the table. + * So return the total of vport and int_port as rule index. + */ + return esw_get_nr_ft_offloads_steering_src_ports(esw); +} + +static int esw_create_vport_rx_drop_group(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + u32 *flow_group_in; + int flow_index; + int err = 0; + + flow_index = esw_create_vport_rx_drop_rule_index(esw); + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in); + + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_warn(esw->dev, "Failed to create vport rx drop group err %d\n", err); + goto out; + } + + esw->offloads.vport_rx_drop_group = g; +out: + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_vport_rx_drop_group(struct mlx5_eswitch *esw) +{ + if (esw->offloads.vport_rx_drop_group) + mlx5_destroy_flow_group(esw->offloads.vport_rx_drop_group); +} + struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, struct mlx5_flow_destination *dest) @@ -2018,6 +2140,32 @@ out: return flow_rule; } +static int esw_create_vport_rx_drop_rule(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, NULL, + &flow_act, NULL, 0); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, + "fs offloads: Failed to add vport rx drop rule err %ld\n", + PTR_ERR(flow_rule)); + return PTR_ERR(flow_rule); + } + + esw->offloads.vport_rx_drop_rule = flow_rule; + + return 0; +} + +static void esw_destroy_vport_rx_drop_rule(struct mlx5_eswitch *esw) +{ + if (esw->offloads.vport_rx_drop_rule) + mlx5_del_flow_rules(esw->offloads.vport_rx_drop_rule); +} + static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode) { u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; @@ -2028,7 +2176,7 @@ static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode) if (!MLX5_CAP_GEN(dev, vport_group_manager)) return -EOPNOTSUPP; - if (esw->mode == MLX5_ESWITCH_NONE) + if (!mlx5_esw_is_fdb_created(esw)) return -EOPNOTSUPP; switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { @@ -2162,20 +2310,15 @@ out_free: static int esw_offloads_start(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { - int err, err1; + int err; - mlx5_eswitch_disable_locked(esw, false); - err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS, - esw->dev->priv.sriov.num_vfs); + esw->mode = MLX5_ESWITCH_OFFLOADS; + err = mlx5_eswitch_enable_locked(esw, esw->dev->priv.sriov.num_vfs); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to offloads"); - err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, - MLX5_ESWITCH_IGNORE_NUM_VFS); - if (err1) { - NL_SET_ERR_MSG_MOD(extack, - "Failed setting eswitch back to legacy"); - } + esw->mode = MLX5_ESWITCH_LEGACY; + mlx5_rescan_drivers(esw->dev); } if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) { if (mlx5_eswitch_inline_mode_get(esw, @@ -2376,60 +2519,6 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); } -static int esw_set_uplink_slave_ingress_root(struct mlx5_core_dev *master, - struct mlx5_core_dev *slave) -{ - u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; - u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {}; - struct mlx5_eswitch *esw; - struct mlx5_flow_root_namespace *root; - struct mlx5_flow_namespace *ns; - struct mlx5_vport *vport; - int err; - - MLX5_SET(set_flow_table_root_in, in, opcode, - MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); - MLX5_SET(set_flow_table_root_in, in, table_type, FS_FT_ESW_INGRESS_ACL); - MLX5_SET(set_flow_table_root_in, in, other_vport, 1); - MLX5_SET(set_flow_table_root_in, in, vport_number, MLX5_VPORT_UPLINK); - - if (master) { - esw = master->priv.eswitch; - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); - MLX5_SET(set_flow_table_root_in, in, table_of_other_vport, 1); - MLX5_SET(set_flow_table_root_in, in, table_vport_number, - MLX5_VPORT_UPLINK); - - ns = mlx5_get_flow_vport_acl_namespace(master, - MLX5_FLOW_NAMESPACE_ESW_INGRESS, - vport->index); - root = find_root(&ns->node); - mutex_lock(&root->chain_lock); - - MLX5_SET(set_flow_table_root_in, in, - table_eswitch_owner_vhca_id_valid, 1); - MLX5_SET(set_flow_table_root_in, in, - table_eswitch_owner_vhca_id, - MLX5_CAP_GEN(master, vhca_id)); - MLX5_SET(set_flow_table_root_in, in, table_id, - root->root_ft->id); - } else { - esw = slave->priv.eswitch; - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); - ns = mlx5_get_flow_vport_acl_namespace(slave, - MLX5_FLOW_NAMESPACE_ESW_INGRESS, - vport->index); - root = find_root(&ns->node); - mutex_lock(&root->chain_lock); - MLX5_SET(set_flow_table_root_in, in, table_id, root->root_ft->id); - } - - err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out)); - mutex_unlock(&root->chain_lock); - - return err; -} - static int esw_set_slave_root_fdb(struct mlx5_core_dev *master, struct mlx5_core_dev *slave) { @@ -2611,15 +2700,10 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, { int err; - err = esw_set_uplink_slave_ingress_root(master_esw->dev, - slave_esw->dev); - if (err) - return -EINVAL; - err = esw_set_slave_root_fdb(master_esw->dev, slave_esw->dev); if (err) - goto err_fdb; + return err; err = esw_set_master_egress_rule(master_esw->dev, slave_esw->dev); @@ -2631,9 +2715,6 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, err_acl: esw_set_slave_root_fdb(NULL, slave_esw->dev); -err_fdb: - esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev); - return err; } @@ -2642,7 +2723,6 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, { esw_unset_master_egress_rule(master_esw->dev); esw_set_slave_root_fdb(NULL, slave_esw->dev); - esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev); } #define ESW_OFFLOADS_DEVCOM_PAIR (0) @@ -2747,9 +2827,6 @@ static int mlx5_esw_offloads_devcom_event(int event, switch (event) { case ESW_OFFLOADS_DEVCOM_PAIR: - if (mlx5_get_next_phys_dev(esw->dev) != peer_esw->dev) - break; - if (mlx5_eswitch_vport_match_metadata_enabled(esw) != mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) break; @@ -2801,6 +2878,9 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw) if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) return; + if (!mlx5_is_lag_supported(esw->dev)) + return; + mlx5_devcom_register_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS, mlx5_esw_offloads_devcom_event, @@ -2818,6 +2898,9 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) return; + if (!mlx5_is_lag_supported(esw->dev)) + return; + mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS, ESW_OFFLOADS_DEVCOM_UNPAIR, esw); @@ -2836,13 +2919,22 @@ bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) return false; - if (mlx5_core_is_ecpf_esw_manager(esw->dev) || - mlx5_ecpf_vport_exists(esw->dev)) - return false; - return true; } +#define MLX5_ESW_METADATA_RSVD_UPLINK 1 + +/* Share the same metadata for uplink's. This is fine because: + * (a) In shared FDB mode (LAG) both uplink's are treated the + * same and tagged with the same metadata. + * (b) In non shared FDB mode, packets from physical port0 + * cannot hit eswitch of PF1 and vice versa. + */ +static u32 mlx5_esw_match_metadata_reserved(struct mlx5_eswitch *esw) +{ + return MLX5_ESW_METADATA_RSVD_UPLINK; +} + u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw) { u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1; @@ -2857,8 +2949,10 @@ u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw) return 0; /* Metadata is 4 bits of PFNUM and 12 bits of unique id */ - /* Use only non-zero vport_id (1-4095) for all PF's */ - id = ida_alloc_range(&esw->offloads.vport_metadata_ida, 1, vport_end_ida, GFP_KERNEL); + /* Use only non-zero vport_id (2-4095) for all PF's */ + id = ida_alloc_range(&esw->offloads.vport_metadata_ida, + MLX5_ESW_METADATA_RSVD_UPLINK + 1, + vport_end_ida, GFP_KERNEL); if (id < 0) return 0; id = (pf_num << ESW_VPORT_BITS) | id; @@ -2876,7 +2970,11 @@ void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata) static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - vport->default_metadata = mlx5_esw_match_metadata_alloc(esw); + if (vport->vport == MLX5_VPORT_UPLINK) + vport->default_metadata = mlx5_esw_match_metadata_reserved(esw); + else + vport->default_metadata = mlx5_esw_match_metadata_alloc(esw); + vport->metadata = vport->default_metadata; return vport->metadata ? 0 : -ENOSPC; } @@ -2887,6 +2985,9 @@ static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw, if (!vport->default_metadata) return; + if (vport->vport == MLX5_VPORT_UPLINK) + return; + WARN_ON(vport->metadata != vport->default_metadata); mlx5_esw_match_metadata_free(esw, vport->default_metadata); } @@ -2930,7 +3031,7 @@ int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable) int err = 0; down_write(&esw->mode_lock); - if (esw->mode != MLX5_ESWITCH_NONE) { + if (mlx5_esw_is_fdb_created(esw)) { err = -EBUSY; goto done; } @@ -3060,8 +3161,20 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) if (err) goto create_fg_err; + err = esw_create_vport_rx_drop_group(esw); + if (err) + goto create_rx_drop_fg_err; + + err = esw_create_vport_rx_drop_rule(esw); + if (err) + goto create_rx_drop_rule_err; + return 0; +create_rx_drop_rule_err: + esw_destroy_vport_rx_drop_group(esw); +create_rx_drop_fg_err: + esw_destroy_vport_rx_group(esw); create_fg_err: esw_destroy_offloads_fdb_tables(esw); create_fdb_err: @@ -3079,6 +3192,8 @@ create_indir_err: static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) { + esw_destroy_vport_rx_drop_rule(esw); + esw_destroy_vport_rx_drop_group(esw); esw_destroy_vport_rx_group(esw); esw_destroy_offloads_fdb_tables(esw); esw_destroy_restore_table(esw); @@ -3091,6 +3206,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) static void esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) { + struct devlink *devlink; bool host_pf_disabled; u16 new_num_vfs; @@ -3102,6 +3218,8 @@ esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled) return; + devlink = priv_to_devlink(esw->dev); + devl_lock(devlink); /* Number of VFs can only change from "0 to x" or "x to 0". */ if (esw->esw_funcs.num_vfs > 0) { mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); @@ -3110,10 +3228,13 @@ esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) err = mlx5_eswitch_load_vf_vports(esw, new_num_vfs, MLX5_VPORT_UC_ADDR_CHANGE); - if (err) + if (err) { + devl_unlock(devlink); return; + } } esw->esw_funcs.num_vfs = new_num_vfs; + devl_unlock(devlink); } static void esw_functions_changed_event_handler(struct work_struct *work) @@ -3263,20 +3384,12 @@ err_metadata: static int esw_offloads_stop(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { - int err, err1; + int err; - mlx5_eswitch_disable_locked(esw, false); - err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, - MLX5_ESWITCH_IGNORE_NUM_VFS); - if (err) { + esw->mode = MLX5_ESWITCH_LEGACY; + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS); + if (err) NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); - err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS, - MLX5_ESWITCH_IGNORE_NUM_VFS); - if (err1) { - NL_SET_ERR_MSG_MOD(extack, - "Failed setting eswitch back to offloads"); - } - } return err; } @@ -3370,15 +3483,6 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) return 0; } -static int eswitch_devlink_esw_mode_check(const struct mlx5_eswitch *esw) -{ - /* devlink commands in NONE eswitch mode are currently supported only - * on ECPF. - */ - return (esw->mode == MLX5_ESWITCH_NONE && - !mlx5_core_is_ecpf_esw_manager(esw->dev)) ? -EOPNOTSUPP : 0; -} - int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { @@ -3405,6 +3509,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (cur_mlx5_mode == mlx5_mode) goto unlock; + mlx5_eswitch_disable_locked(esw); if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) { if (mlx5_devlink_trap_get_num_active(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, @@ -3415,6 +3520,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, err = esw_offloads_start(esw, extack); } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) { err = esw_offloads_stop(esw, extack); + mlx5_rescan_drivers(esw->dev); } else { err = -EINVAL; } @@ -3436,12 +3542,7 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) return PTR_ERR(esw); down_write(&esw->mode_lock); - err = eswitch_devlink_esw_mode_check(esw); - if (err) - goto unlock; - err = esw_mode_to_devlink(esw->mode, mode); -unlock: up_write(&esw->mode_lock); return err; } @@ -3490,9 +3591,6 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, return PTR_ERR(esw); down_write(&esw->mode_lock); - err = eswitch_devlink_esw_mode_check(esw); - if (err) - goto out; switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: @@ -3544,12 +3642,7 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) return PTR_ERR(esw); down_write(&esw->mode_lock); - err = eswitch_devlink_esw_mode_check(esw); - if (err) - goto unlock; - err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); -unlock: up_write(&esw->mode_lock); return err; } @@ -3560,16 +3653,13 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw; - int err; + int err = 0; esw = mlx5_devlink_eswitch_get(devlink); if (IS_ERR(esw)) return PTR_ERR(esw); down_write(&esw->mode_lock); - err = eswitch_devlink_esw_mode_check(esw); - if (err) - goto unlock; if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) || @@ -3620,22 +3710,15 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, enum devlink_eswitch_encap_mode *encap) { struct mlx5_eswitch *esw; - int err; esw = mlx5_devlink_eswitch_get(devlink); if (IS_ERR(esw)) return PTR_ERR(esw); - down_write(&esw->mode_lock); - err = eswitch_devlink_esw_mode_check(esw); - if (err) - goto unlock; - *encap = esw->offloads.encap; -unlock: up_write(&esw->mode_lock); - return err; + return 0; } static bool @@ -3758,12 +3841,14 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p if (err) goto devlink_err; + mlx5_esw_vport_debugfs_create(esw, vport_num, true, sfnum); err = mlx5_esw_offloads_rep_load(esw, vport_num); if (err) goto rep_err; return 0; rep_err: + mlx5_esw_vport_debugfs_destroy(esw, vport_num); mlx5_esw_devlink_sf_port_unregister(esw, vport_num); devlink_err: mlx5_esw_vport_disable(esw, vport_num); @@ -3773,6 +3858,7 @@ devlink_err: void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) { mlx5_esw_offloads_rep_unload(esw, vport_num); + mlx5_esw_vport_debugfs_destroy(esw, vport_num); mlx5_esw_devlink_sf_port_unregister(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num); } @@ -3867,3 +3953,62 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, return vport->metadata; } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_set); + +static bool +is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num == MLX5_VPORT_PF || + mlx5_eswitch_is_vf_vport(esw, vport_num) || + mlx5_esw_is_sf_vport(esw, vport_num); +} + +int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, + u8 *hw_addr, int *hw_addr_len, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + u16 vport_num; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); + if (!is_port_function_supported(esw, vport_num)) + return -EOPNOTSUPP; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid port"); + return PTR_ERR(vport); + } + + mutex_lock(&esw->state_lock); + ether_addr_copy(hw_addr, vport->info.mac); + *hw_addr_len = ETH_ALEN; + mutex_unlock(&esw->state_lock); + return 0; +} + +int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, + const u8 *hw_addr, int hw_addr_len, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + u16 vport_num; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) { + NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr"); + return PTR_ERR(esw); + } + + vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); + if (!is_port_function_supported(esw, vport_num)) { + NL_SET_ERR_MSG_MOD(extack, "Port doesn't support set hw_addr"); + return -EINVAL; + } + + return mlx5_eswitch_set_vport_mac(esw, vport_num, hw_addr); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 182306bbefaa..108a3503f413 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -30,9 +30,9 @@ mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, sizeof(dest->vport.num), hash); hash = jhash((const void *)&dest->vport.vhca_id, sizeof(dest->vport.num), hash); - if (dest->vport.pkt_reformat) - hash = jhash(dest->vport.pkt_reformat, - sizeof(*dest->vport.pkt_reformat), + if (flow_act->pkt_reformat) + hash = jhash(flow_act->pkt_reformat, + sizeof(*flow_act->pkt_reformat), hash); return hash; } @@ -53,9 +53,11 @@ mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, if (ret) return ret; - return dest1->vport.pkt_reformat && dest2->vport.pkt_reformat ? - memcmp(dest1->vport.pkt_reformat, dest2->vport.pkt_reformat, - sizeof(*dest1->vport.pkt_reformat)) : 0; + if (flow_act1->pkt_reformat && flow_act2->pkt_reformat) + return memcmp(flow_act1->pkt_reformat, flow_act2->pkt_reformat, + sizeof(*flow_act1->pkt_reformat)); + + return !(flow_act1->pkt_reformat == flow_act2->pkt_reformat); } static int @@ -219,12 +221,14 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) || - mlx5_esw_attr_flags_skip(attr->flags) || + mlx5e_tc_attr_flags_skip(attr->flags) || (!mlx5_eswitch_offload_is_uplink_port(esw, spec) && !esw_attr->int_port)) return false; /* push vlan on RX */ - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH && + !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) & + MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX)) return true; /* hairpin */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index a1ac3a654962..9459e56ee90a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -36,6 +36,7 @@ static struct mlx5_nb events_nbs_ref[] = { /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_OBJECT_CHANGE }, /* QP/WQ resource events to forward */ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED }, {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG }, @@ -132,6 +133,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_MONITOR_COUNTER"; case MLX5_EVENT_TYPE_DEVICE_TRACER: return "MLX5_EVENT_TYPE_DEVICE_TRACER"; + case MLX5_EVENT_TYPE_OBJECT_CHANGE: + return "MLX5_EVENT_TYPE_OBJECT_CHANGE"; default: return "Unrecognized event"; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c index 2ce4241459ce..39c03dcbd196 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/module.h> #include <linux/etherdevice.h> #include <linux/mlx5/driver.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h index 2a984e82ae16..750c32050165 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -57,9 +57,6 @@ struct mlx5_fpga_device { u32 mkey; struct mlx5_uars_page *uar; } conn_res; - - struct mlx5_fpga_ipsec *ipsec; - struct mlx5_fpga_tls *tls; }; #define mlx5_fpga_dbg(__adev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c deleted file mode 100644 index 8ec148010d62..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ /dev/null @@ -1,1582 +0,0 @@ -/* - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/rhashtable.h> -#include <linux/mlx5/driver.h> -#include <linux/mlx5/fs_helpers.h> -#include <linux/mlx5/fs.h> -#include <linux/rbtree.h> - -#include "mlx5_core.h" -#include "fs_cmd.h" -#include "fpga/ipsec.h" -#include "fpga/sdk.h" -#include "fpga/core.h" - -enum mlx5_fpga_ipsec_cmd_status { - MLX5_FPGA_IPSEC_CMD_PENDING, - MLX5_FPGA_IPSEC_CMD_SEND_FAIL, - MLX5_FPGA_IPSEC_CMD_COMPLETE, -}; - -struct mlx5_fpga_ipsec_cmd_context { - struct mlx5_fpga_dma_buf buf; - enum mlx5_fpga_ipsec_cmd_status status; - struct mlx5_ifc_fpga_ipsec_cmd_resp resp; - int status_code; - struct completion complete; - struct mlx5_fpga_device *dev; - struct list_head list; /* Item in pending_cmds */ - u8 command[]; -}; - -struct mlx5_fpga_esp_xfrm; - -struct mlx5_fpga_ipsec_sa_ctx { - struct rhash_head hash; - struct mlx5_ifc_fpga_ipsec_sa hw_sa; - u32 sa_handle; - struct mlx5_core_dev *dev; - struct mlx5_fpga_esp_xfrm *fpga_xfrm; -}; - -struct mlx5_fpga_esp_xfrm { - unsigned int num_rules; - struct mlx5_fpga_ipsec_sa_ctx *sa_ctx; - struct mutex lock; /* xfrm lock */ - struct mlx5_accel_esp_xfrm accel_xfrm; -}; - -struct mlx5_fpga_ipsec_rule { - struct rb_node node; - struct fs_fte *fte; - struct mlx5_fpga_ipsec_sa_ctx *ctx; -}; - -static const struct rhashtable_params rhash_sa = { - /* Keep out "cmd" field from the key as it's - * value is not constant during the lifetime - * of the key object. - */ - .key_len = sizeof_field(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) - - sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd), - .key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) + - sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd), - .head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash), - .automatic_shrinking = true, - .min_size = 1, -}; - -struct mlx5_fpga_ipsec { - struct mlx5_fpga_device *fdev; - struct list_head pending_cmds; - spinlock_t pending_cmds_lock; /* Protects pending_cmds */ - u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)]; - struct mlx5_fpga_conn *conn; - - struct notifier_block fs_notifier_ingress_bypass; - struct notifier_block fs_notifier_egress; - - /* Map hardware SA --> SA context - * (mlx5_fpga_ipsec_sa) (mlx5_fpga_ipsec_sa_ctx) - * We will use this hash to avoid SAs duplication in fpga which - * aren't allowed - */ - struct rhashtable sa_hash; /* hw_sa -> mlx5_fpga_ipsec_sa_ctx */ - struct mutex sa_hash_lock; - - /* Tree holding all rules for this fpga device - * Key for searching a rule (mlx5_fpga_ipsec_rule) is (ft, id) - */ - struct rb_root rules_rb; - struct mutex rules_rb_lock; /* rules lock */ - - struct ida halloc; -}; - -bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) -{ - if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga)) - return false; - - if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) != - MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX) - return false; - - if (MLX5_CAP_FPGA(mdev, sandbox_product_id) != - MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC) - return false; - - return true; -} - -static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_dma_buf *buf, - u8 status) -{ - struct mlx5_fpga_ipsec_cmd_context *context; - - if (status) { - context = container_of(buf, struct mlx5_fpga_ipsec_cmd_context, - buf); - mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n", - status); - context->status = MLX5_FPGA_IPSEC_CMD_SEND_FAIL; - complete(&context->complete); - } -} - -static inline -int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome) -{ - switch (syndrome) { - case MLX5_FPGA_IPSEC_RESPONSE_SUCCESS: - return 0; - case MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE: - return -EEXIST; - case MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST: - return -EINVAL; - case MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE: - return -EIO; - } - return -EIO; -} - -static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf) -{ - struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data; - struct mlx5_fpga_ipsec_cmd_context *context; - enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome; - struct mlx5_fpga_device *fdev = cb_arg; - unsigned long flags; - - if (buf->sg[0].size < sizeof(*resp)) { - mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n", - buf->sg[0].size, sizeof(*resp)); - return; - } - - mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x\n", - ntohl(resp->syndrome)); - - spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); - context = list_first_entry_or_null(&fdev->ipsec->pending_cmds, - struct mlx5_fpga_ipsec_cmd_context, - list); - if (context) - list_del(&context->list); - spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); - - if (!context) { - mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n"); - return; - } - mlx5_fpga_dbg(fdev, "Handling response for %p\n", context); - - syndrome = ntohl(resp->syndrome); - context->status_code = syndrome_to_errno(syndrome); - context->status = MLX5_FPGA_IPSEC_CMD_COMPLETE; - memcpy(&context->resp, resp, sizeof(*resp)); - - if (context->status_code) - mlx5_fpga_warn(fdev, "IPSec command failed with syndrome %08x\n", - syndrome); - - complete(&context->complete); -} - -static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev, - const void *cmd, int cmd_size) -{ - struct mlx5_fpga_ipsec_cmd_context *context; - struct mlx5_fpga_device *fdev = mdev->fpga; - unsigned long flags; - int res; - - if (!fdev || !fdev->ipsec) - return ERR_PTR(-EOPNOTSUPP); - - if (cmd_size & 3) - return ERR_PTR(-EINVAL); - - context = kzalloc(sizeof(*context) + cmd_size, GFP_ATOMIC); - if (!context) - return ERR_PTR(-ENOMEM); - - context->status = MLX5_FPGA_IPSEC_CMD_PENDING; - context->dev = fdev; - context->buf.complete = mlx5_fpga_ipsec_send_complete; - init_completion(&context->complete); - memcpy(&context->command, cmd, cmd_size); - context->buf.sg[0].size = cmd_size; - context->buf.sg[0].data = &context->command; - - spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); - res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf); - if (!res) - list_add_tail(&context->list, &fdev->ipsec->pending_cmds); - spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); - - if (res) { - mlx5_fpga_warn(fdev, "Failed to send IPSec command: %d\n", res); - kfree(context); - return ERR_PTR(res); - } - - /* Context should be freed by the caller after completion. */ - return context; -} - -static int mlx5_fpga_ipsec_cmd_wait(void *ctx) -{ - struct mlx5_fpga_ipsec_cmd_context *context = ctx; - unsigned long timeout = - msecs_to_jiffies(MLX5_FPGA_CMD_TIMEOUT_MSEC); - int res; - - res = wait_for_completion_timeout(&context->complete, timeout); - if (!res) { - mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n"); - return -ETIMEDOUT; - } - - if (context->status == MLX5_FPGA_IPSEC_CMD_COMPLETE) - res = context->status_code; - else - res = -EIO; - - return res; -} - -static inline bool is_v2_sadb_supported(struct mlx5_fpga_ipsec *fipsec) -{ - if (MLX5_GET(ipsec_extended_cap, fipsec->caps, v2_command)) - return true; - return false; -} - -static int mlx5_fpga_ipsec_update_hw_sa(struct mlx5_fpga_device *fdev, - struct mlx5_ifc_fpga_ipsec_sa *hw_sa, - int opcode) -{ - struct mlx5_core_dev *dev = fdev->mdev; - struct mlx5_ifc_fpga_ipsec_sa *sa; - struct mlx5_fpga_ipsec_cmd_context *cmd_context; - size_t sa_cmd_size; - int err; - - hw_sa->ipsec_sa_v1.cmd = htonl(opcode); - if (is_v2_sadb_supported(fdev->ipsec)) - sa_cmd_size = sizeof(*hw_sa); - else - sa_cmd_size = sizeof(hw_sa->ipsec_sa_v1); - - cmd_context = (struct mlx5_fpga_ipsec_cmd_context *) - mlx5_fpga_ipsec_cmd_exec(dev, hw_sa, sa_cmd_size); - if (IS_ERR(cmd_context)) - return PTR_ERR(cmd_context); - - err = mlx5_fpga_ipsec_cmd_wait(cmd_context); - if (err) - goto out; - - sa = (struct mlx5_ifc_fpga_ipsec_sa *)&cmd_context->command; - if (sa->ipsec_sa_v1.sw_sa_handle != cmd_context->resp.sw_sa_handle) { - mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n", - ntohl(sa->ipsec_sa_v1.sw_sa_handle), - ntohl(cmd_context->resp.sw_sa_handle)); - err = -EIO; - } - -out: - kfree(cmd_context); - return err; -} - -u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - u32 ret = 0; - - if (mlx5_fpga_is_ipsec_device(mdev)) { - ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE; - ret |= MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA; - } else { - return ret; - } - - if (!fdev->ipsec) - return ret; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp)) - ret |= MLX5_ACCEL_IPSEC_CAP_ESP; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6)) - ret |= MLX5_ACCEL_IPSEC_CAP_IPV6; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso)) - ret |= MLX5_ACCEL_IPSEC_CAP_LSO; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer)) - ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esn)) { - ret |= MLX5_ACCEL_IPSEC_CAP_ESN; - ret |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN; - } - - return ret; -} - -static unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - - if (!fdev || !fdev->ipsec) - return 0; - - return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, - number_of_ipsec_counters); -} - -static int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, - unsigned int counters_count) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - unsigned int i; - __be32 *data; - u32 count; - u64 addr; - int ret; - - if (!fdev || !fdev->ipsec) - return 0; - - addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, - ipsec_counters_addr_low) + - ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, - ipsec_counters_addr_high) << 32); - - count = mlx5_fpga_ipsec_counters_count(mdev); - - data = kzalloc(array3_size(sizeof(*data), count, 2), GFP_KERNEL); - if (!data) { - ret = -ENOMEM; - goto out; - } - - ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data, - MLX5_FPGA_ACCESS_TYPE_DONTCARE); - if (ret < 0) { - mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n", - ret); - goto out; - } - ret = 0; - - if (count > counters_count) - count = counters_count; - - /* Each counter is low word, then high. But each word is big-endian */ - for (i = 0; i < count; i++) - counters[i] = (u64)ntohl(data[i * 2]) | - ((u64)ntohl(data[i * 2 + 1]) << 32); - -out: - kfree(data); - return ret; -} - -static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags) -{ - struct mlx5_fpga_ipsec_cmd_context *context; - struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0}; - int err; - - cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP); - cmd.flags = htonl(flags); - context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd)); - if (IS_ERR(context)) - return PTR_ERR(context); - - err = mlx5_fpga_ipsec_cmd_wait(context); - if (err) - goto out; - - if ((context->resp.flags & cmd.flags) != cmd.flags) { - mlx5_fpga_err(context->dev, "Failed to set capabilities. cmd 0x%08x vs resp 0x%08x\n", - cmd.flags, - context->resp.flags); - err = -EIO; - } - -out: - kfree(context); - return err; -} - -static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev) -{ - u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev); - u32 flags = 0; - - if (dev_caps & MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER) - flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER; - - return mlx5_fpga_ipsec_set_caps(mdev, flags); -} - -static void -mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs, - struct mlx5_ifc_fpga_ipsec_sa *hw_sa) -{ - const struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm; - - /* key */ - memcpy(&hw_sa->ipsec_sa_v1.key_enc, aes_gcm->aes_key, - aes_gcm->key_len / 8); - /* Duplicate 128 bit key twice according to HW layout */ - if (aes_gcm->key_len == 128) - memcpy(&hw_sa->ipsec_sa_v1.key_enc[16], - aes_gcm->aes_key, aes_gcm->key_len / 8); - - /* salt and seq_iv */ - memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, &aes_gcm->seq_iv, - sizeof(aes_gcm->seq_iv)); - memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt, - sizeof(aes_gcm->salt)); - - /* esn */ - if (xfrm_attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) { - hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_ESN_EN; - hw_sa->ipsec_sa_v1.flags |= - (xfrm_attrs->flags & - MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ? - MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0; - hw_sa->esn = htonl(xfrm_attrs->esn); - } else { - hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_ESN_EN; - hw_sa->ipsec_sa_v1.flags &= - ~(xfrm_attrs->flags & - MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ? - MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0; - hw_sa->esn = 0; - } - - /* rx handle */ - hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle); - - /* enc mode */ - switch (aes_gcm->key_len) { - case 128: - hw_sa->ipsec_sa_v1.enc_mode = - MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128; - break; - case 256: - hw_sa->ipsec_sa_v1.enc_mode = - MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128; - break; - } - - /* flags */ - hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_SA_VALID | - MLX5_FPGA_IPSEC_SA_SPI_EN | - MLX5_FPGA_IPSEC_SA_IP_ESP; - - if (xfrm_attrs->action & MLX5_ACCEL_ESP_ACTION_ENCRYPT) - hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_DIR_SX; - else - hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_DIR_SX; -} - -static void -mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs, - const __be32 saddr[4], - const __be32 daddr[4], - const __be32 spi, bool is_ipv6, - struct mlx5_ifc_fpga_ipsec_sa *hw_sa) -{ - mlx5_fpga_ipsec_build_hw_xfrm(mdev, xfrm_attrs, hw_sa); - - /* IPs */ - memcpy(hw_sa->ipsec_sa_v1.sip, saddr, sizeof(hw_sa->ipsec_sa_v1.sip)); - memcpy(hw_sa->ipsec_sa_v1.dip, daddr, sizeof(hw_sa->ipsec_sa_v1.dip)); - - /* SPI */ - hw_sa->ipsec_sa_v1.spi = spi; - - /* flags */ - if (is_ipv6) - hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6; -} - -static bool is_full_mask(const void *p, size_t len) -{ - WARN_ON(len % 4); - - return !memchr_inv(p, 0xff, len); -} - -static bool validate_fpga_full_mask(struct mlx5_core_dev *dev, - const u32 *match_c, - const u32 *match_v) -{ - const void *misc_params_c = MLX5_ADDR_OF(fte_match_param, - match_c, - misc_parameters); - const void *headers_c = MLX5_ADDR_OF(fte_match_param, - match_c, - outer_headers); - const void *headers_v = MLX5_ADDR_OF(fte_match_param, - match_v, - outer_headers); - - if (mlx5_fs_is_outer_ipv4_flow(dev, headers_c, headers_v)) { - const void *s_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, - headers_c, - src_ipv4_src_ipv6.ipv4_layout.ipv4); - const void *d_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, - headers_c, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - - if (!is_full_mask(s_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout, - ipv4)) || - !is_full_mask(d_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout, - ipv4))) - return false; - } else { - const void *s_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, - headers_c, - src_ipv4_src_ipv6.ipv6_layout.ipv6); - const void *d_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, - headers_c, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6); - - if (!is_full_mask(s_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)) || - !is_full_mask(d_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6))) - return false; - } - - if (!is_full_mask(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, - outer_esp_spi), - MLX5_FLD_SZ_BYTES(fte_match_set_misc, outer_esp_spi))) - return false; - - return true; -} - -static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev, - u8 match_criteria_enable, - const u32 *match_c, - const u32 *match_v) -{ - u32 ipsec_dev_caps = mlx5_fpga_ipsec_device_caps(dev); - bool ipv6_flow; - - ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v); - - if (!(match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) || - mlx5_fs_is_outer_udp_flow(match_c, match_v) || - mlx5_fs_is_outer_tcp_flow(match_c, match_v) || - mlx5_fs_is_vxlan_flow(match_c) || - !(mlx5_fs_is_outer_ipv4_flow(dev, match_c, match_v) || - ipv6_flow)) - return false; - - if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_DEVICE)) - return false; - - if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_ESP) && - mlx5_fs_is_outer_ipsec_flow(match_c)) - return false; - - if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_IPV6) && - ipv6_flow) - return false; - - if (!validate_fpga_full_mask(dev, match_c, match_v)) - return false; - - return true; -} - -static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, - u8 match_criteria_enable, - const u32 *match_c, - const u32 *match_v, - struct mlx5_flow_act *flow_act, - struct mlx5_flow_context *flow_context) -{ - const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - bool is_dmac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_47_16) || - MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_15_0); - bool is_smac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_47_16) || - MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_15_0); - int ret; - - ret = mlx5_is_fpga_ipsec_rule(dev, match_criteria_enable, match_c, - match_v); - if (!ret) - return ret; - - if (is_dmac || is_smac || - (match_criteria_enable & - ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) || - (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) || - (flow_context->flags & FLOW_CONTEXT_HAS_TAG)) - return false; - - return true; -} - -static void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *accel_xfrm, - const __be32 saddr[4], const __be32 daddr[4], - const __be32 spi, bool is_ipv6, u32 *sa_handle) -{ - struct mlx5_fpga_ipsec_sa_ctx *sa_ctx; - struct mlx5_fpga_esp_xfrm *fpga_xfrm = - container_of(accel_xfrm, typeof(*fpga_xfrm), - accel_xfrm); - struct mlx5_fpga_device *fdev = mdev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - int opcode, err; - void *context; - - /* alloc SA */ - sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL); - if (!sa_ctx) - return ERR_PTR(-ENOMEM); - - sa_ctx->dev = mdev; - - /* build candidate SA */ - mlx5_fpga_ipsec_build_hw_sa(mdev, &accel_xfrm->attrs, - saddr, daddr, spi, is_ipv6, - &sa_ctx->hw_sa); - - mutex_lock(&fpga_xfrm->lock); - - if (fpga_xfrm->sa_ctx) { /* multiple rules for same accel_xfrm */ - /* all rules must be with same IPs and SPI */ - if (memcmp(&sa_ctx->hw_sa, &fpga_xfrm->sa_ctx->hw_sa, - sizeof(sa_ctx->hw_sa))) { - context = ERR_PTR(-EINVAL); - goto exists; - } - - ++fpga_xfrm->num_rules; - context = fpga_xfrm->sa_ctx; - goto exists; - } - - if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT) { - err = ida_alloc_min(&fipsec->halloc, 1, GFP_KERNEL); - if (err < 0) { - context = ERR_PTR(err); - goto exists; - } - - sa_ctx->sa_handle = err; - if (sa_handle) - *sa_handle = sa_ctx->sa_handle; - } - /* This is unbounded fpga_xfrm, try to add to hash */ - mutex_lock(&fipsec->sa_hash_lock); - - err = rhashtable_lookup_insert_fast(&fipsec->sa_hash, &sa_ctx->hash, - rhash_sa); - if (err) { - /* Can't bound different accel_xfrm to already existing sa_ctx. - * This is because we can't support multiple ketmats for - * same IPs and SPI - */ - context = ERR_PTR(-EEXIST); - goto unlock_hash; - } - - /* Bound accel_xfrm to sa_ctx */ - opcode = is_v2_sadb_supported(fdev->ipsec) ? - MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 : - MLX5_FPGA_IPSEC_CMD_OP_ADD_SA; - err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode); - sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; - if (err) { - context = ERR_PTR(err); - goto delete_hash; - } - - mutex_unlock(&fipsec->sa_hash_lock); - - ++fpga_xfrm->num_rules; - fpga_xfrm->sa_ctx = sa_ctx; - sa_ctx->fpga_xfrm = fpga_xfrm; - - mutex_unlock(&fpga_xfrm->lock); - - return sa_ctx; - -delete_hash: - WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash, - rhash_sa)); -unlock_hash: - mutex_unlock(&fipsec->sa_hash_lock); - if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT) - ida_free(&fipsec->halloc, sa_ctx->sa_handle); -exists: - mutex_unlock(&fpga_xfrm->lock); - kfree(sa_ctx); - return context; -} - -static void * -mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev, - struct fs_fte *fte, - bool is_egress) -{ - struct mlx5_accel_esp_xfrm *accel_xfrm; - __be32 saddr[4], daddr[4], spi; - struct mlx5_flow_group *fg; - bool is_ipv6 = false; - - fs_get_obj(fg, fte->node.parent); - /* validate */ - if (is_egress && - !mlx5_is_fpga_egress_ipsec_rule(mdev, - fg->mask.match_criteria_enable, - fg->mask.match_criteria, - fte->val, - &fte->action, - &fte->flow_context)) - return ERR_PTR(-EINVAL); - else if (!mlx5_is_fpga_ipsec_rule(mdev, - fg->mask.match_criteria_enable, - fg->mask.match_criteria, - fte->val)) - return ERR_PTR(-EINVAL); - - /* get xfrm context */ - accel_xfrm = - (struct mlx5_accel_esp_xfrm *)fte->action.esp_id; - - /* IPs */ - if (mlx5_fs_is_outer_ipv4_flow(mdev, fg->mask.match_criteria, - fte->val)) { - memcpy(&saddr[3], - MLX5_ADDR_OF(fte_match_set_lyr_2_4, - fte->val, - src_ipv4_src_ipv6.ipv4_layout.ipv4), - sizeof(saddr[3])); - memcpy(&daddr[3], - MLX5_ADDR_OF(fte_match_set_lyr_2_4, - fte->val, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - sizeof(daddr[3])); - } else { - memcpy(saddr, - MLX5_ADDR_OF(fte_match_param, - fte->val, - outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), - sizeof(saddr)); - memcpy(daddr, - MLX5_ADDR_OF(fte_match_param, - fte->val, - outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - sizeof(daddr)); - is_ipv6 = true; - } - - /* SPI */ - spi = MLX5_GET_BE(typeof(spi), - fte_match_param, fte->val, - misc_parameters.outer_esp_spi); - - /* create */ - return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm, - saddr, daddr, - spi, is_ipv6, NULL); -} - -static void -mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx) -{ - struct mlx5_fpga_device *fdev = sa_ctx->dev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - int opcode = is_v2_sadb_supported(fdev->ipsec) ? - MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 : - MLX5_FPGA_IPSEC_CMD_OP_DEL_SA; - int err; - - err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode); - sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; - if (err) { - WARN_ON(err); - return; - } - - if (sa_ctx->fpga_xfrm->accel_xfrm.attrs.action == - MLX5_ACCEL_ESP_ACTION_DECRYPT) - ida_free(&fipsec->halloc, sa_ctx->sa_handle); - - mutex_lock(&fipsec->sa_hash_lock); - WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash, - rhash_sa)); - mutex_unlock(&fipsec->sa_hash_lock); -} - -static void mlx5_fpga_ipsec_delete_sa_ctx(void *context) -{ - struct mlx5_fpga_esp_xfrm *fpga_xfrm = - ((struct mlx5_fpga_ipsec_sa_ctx *)context)->fpga_xfrm; - - mutex_lock(&fpga_xfrm->lock); - if (!--fpga_xfrm->num_rules) { - mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx); - kfree(fpga_xfrm->sa_ctx); - fpga_xfrm->sa_ctx = NULL; - } - mutex_unlock(&fpga_xfrm->lock); -} - -static inline struct mlx5_fpga_ipsec_rule * -_rule_search(struct rb_root *root, struct fs_fte *fte) -{ - struct rb_node *node = root->rb_node; - - while (node) { - struct mlx5_fpga_ipsec_rule *rule = - container_of(node, struct mlx5_fpga_ipsec_rule, - node); - - if (rule->fte < fte) - node = node->rb_left; - else if (rule->fte > fte) - node = node->rb_right; - else - return rule; - } - return NULL; -} - -static struct mlx5_fpga_ipsec_rule * -rule_search(struct mlx5_fpga_ipsec *ipsec_dev, struct fs_fte *fte) -{ - struct mlx5_fpga_ipsec_rule *rule; - - mutex_lock(&ipsec_dev->rules_rb_lock); - rule = _rule_search(&ipsec_dev->rules_rb, fte); - mutex_unlock(&ipsec_dev->rules_rb_lock); - - return rule; -} - -static inline int _rule_insert(struct rb_root *root, - struct mlx5_fpga_ipsec_rule *rule) -{ - struct rb_node **new = &root->rb_node, *parent = NULL; - - /* Figure out where to put new node */ - while (*new) { - struct mlx5_fpga_ipsec_rule *this = - container_of(*new, struct mlx5_fpga_ipsec_rule, - node); - - parent = *new; - if (rule->fte < this->fte) - new = &((*new)->rb_left); - else if (rule->fte > this->fte) - new = &((*new)->rb_right); - else - return -EEXIST; - } - - /* Add new node and rebalance tree. */ - rb_link_node(&rule->node, parent, new); - rb_insert_color(&rule->node, root); - - return 0; -} - -static int rule_insert(struct mlx5_fpga_ipsec *ipsec_dev, - struct mlx5_fpga_ipsec_rule *rule) -{ - int ret; - - mutex_lock(&ipsec_dev->rules_rb_lock); - ret = _rule_insert(&ipsec_dev->rules_rb, rule); - mutex_unlock(&ipsec_dev->rules_rb_lock); - - return ret; -} - -static inline void _rule_delete(struct mlx5_fpga_ipsec *ipsec_dev, - struct mlx5_fpga_ipsec_rule *rule) -{ - struct rb_root *root = &ipsec_dev->rules_rb; - - mutex_lock(&ipsec_dev->rules_rb_lock); - rb_erase(&rule->node, root); - mutex_unlock(&ipsec_dev->rules_rb_lock); -} - -static void rule_delete(struct mlx5_fpga_ipsec *ipsec_dev, - struct mlx5_fpga_ipsec_rule *rule) -{ - _rule_delete(ipsec_dev, rule); - kfree(rule); -} - -struct mailbox_mod { - uintptr_t saved_esp_id; - u32 saved_action; - u32 saved_outer_esp_spi_value; -}; - -static void restore_spec_mailbox(struct fs_fte *fte, - struct mailbox_mod *mbox_mod) -{ - char *misc_params_v = MLX5_ADDR_OF(fte_match_param, - fte->val, - misc_parameters); - - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, - mbox_mod->saved_outer_esp_spi_value); - fte->action.action |= mbox_mod->saved_action; - fte->action.esp_id = (uintptr_t)mbox_mod->saved_esp_id; -} - -static void modify_spec_mailbox(struct mlx5_core_dev *mdev, - struct fs_fte *fte, - struct mailbox_mod *mbox_mod) -{ - char *misc_params_v = MLX5_ADDR_OF(fte_match_param, - fte->val, - misc_parameters); - - mbox_mod->saved_esp_id = fte->action.esp_id; - mbox_mod->saved_action = fte->action.action & - (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT); - mbox_mod->saved_outer_esp_spi_value = - MLX5_GET(fte_match_set_misc, misc_params_v, - outer_esp_spi); - - fte->action.esp_id = 0; - fte->action.action &= ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT); - if (!MLX5_CAP_FLOWTABLE(mdev, - flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 0); -} - -static enum fs_flow_table_type egress_to_fs_ft(bool egress) -{ - return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX; -} - -static int fpga_ipsec_fs_create_flow_group(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - u32 *in, - struct mlx5_flow_group *fg, - bool is_egress) -{ - int (*create_flow_group)(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, u32 *in, - struct mlx5_flow_group *fg) = - mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group; - char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in, - match_criteria.misc_parameters); - struct mlx5_core_dev *dev = ns->dev; - u32 saved_outer_esp_spi_mask; - u8 match_criteria_enable; - int ret; - - if (MLX5_CAP_FLOWTABLE(dev, - flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - return create_flow_group(ns, ft, in, fg); - - match_criteria_enable = - MLX5_GET(create_flow_group_in, in, match_criteria_enable); - saved_outer_esp_spi_mask = - MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi); - if (!match_criteria_enable || !saved_outer_esp_spi_mask) - return create_flow_group(ns, ft, in, fg); - - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0); - - if (!(*misc_params_c) && - !memcmp(misc_params_c, misc_params_c + 1, MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1)) - MLX5_SET(create_flow_group_in, in, match_criteria_enable, - match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS); - - ret = create_flow_group(ns, ft, in, fg); - - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask); - MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable); - - return ret; -} - -static int fpga_ipsec_fs_create_fte(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - struct fs_fte *fte, - bool is_egress) -{ - int (*create_fte)(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - struct fs_fte *fte) = - mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte; - struct mlx5_core_dev *dev = ns->dev; - struct mlx5_fpga_device *fdev = dev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - struct mlx5_fpga_ipsec_rule *rule; - bool is_esp = fte->action.esp_id; - struct mailbox_mod mbox_mod; - int ret; - - if (!is_esp || - !(fte->action.action & - (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) - return create_fte(ns, ft, fg, fte); - - rule = kzalloc(sizeof(*rule), GFP_KERNEL); - if (!rule) - return -ENOMEM; - - rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress); - if (IS_ERR(rule->ctx)) { - int err = PTR_ERR(rule->ctx); - - kfree(rule); - return err; - } - - rule->fte = fte; - WARN_ON(rule_insert(fipsec, rule)); - - modify_spec_mailbox(dev, fte, &mbox_mod); - ret = create_fte(ns, ft, fg, fte); - restore_spec_mailbox(fte, &mbox_mod); - if (ret) { - _rule_delete(fipsec, rule); - mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx); - kfree(rule); - } - - return ret; -} - -static int fpga_ipsec_fs_update_fte(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - int modify_mask, - struct fs_fte *fte, - bool is_egress) -{ - int (*update_fte)(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - int modify_mask, - struct fs_fte *fte) = - mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte; - struct mlx5_core_dev *dev = ns->dev; - bool is_esp = fte->action.esp_id; - struct mailbox_mod mbox_mod; - int ret; - - if (!is_esp || - !(fte->action.action & - (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) - return update_fte(ns, ft, fg, modify_mask, fte); - - modify_spec_mailbox(dev, fte, &mbox_mod); - ret = update_fte(ns, ft, fg, modify_mask, fte); - restore_spec_mailbox(fte, &mbox_mod); - - return ret; -} - -static int fpga_ipsec_fs_delete_fte(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct fs_fte *fte, - bool is_egress) -{ - int (*delete_fte)(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct fs_fte *fte) = - mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte; - struct mlx5_core_dev *dev = ns->dev; - struct mlx5_fpga_device *fdev = dev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - struct mlx5_fpga_ipsec_rule *rule; - bool is_esp = fte->action.esp_id; - struct mailbox_mod mbox_mod; - int ret; - - if (!is_esp || - !(fte->action.action & - (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) - return delete_fte(ns, ft, fte); - - rule = rule_search(fipsec, fte); - if (!rule) - return -ENOENT; - - mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx); - rule_delete(fipsec, rule); - - modify_spec_mailbox(dev, fte, &mbox_mod); - ret = delete_fte(ns, ft, fte); - restore_spec_mailbox(fte, &mbox_mod); - - return ret; -} - -static int -mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - u32 *in, - struct mlx5_flow_group *fg) -{ - return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, true); -} - -static int -mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, true); -} - -static int -mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - int modify_mask, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte, - true); -} - -static int -mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_delete_fte(ns, ft, fte, true); -} - -static int -mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - u32 *in, - struct mlx5_flow_group *fg) -{ - return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, false); -} - -static int -mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, false); -} - -static int -mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - int modify_mask, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte, - false); -} - -static int -mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_delete_fte(ns, ft, fte, false); -} - -static struct mlx5_flow_cmds fpga_ipsec_ingress; -static struct mlx5_flow_cmds fpga_ipsec_egress; - -const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) -{ - switch (type) { - case FS_FT_NIC_RX: - return &fpga_ipsec_ingress; - case FS_FT_NIC_TX: - return &fpga_ipsec_egress; - default: - WARN_ON(true); - return NULL; - } -} - -static int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_conn_attr init_attr = {0}; - struct mlx5_fpga_device *fdev = mdev->fpga; - struct mlx5_fpga_conn *conn; - int err; - - if (!mlx5_fpga_is_ipsec_device(mdev)) - return 0; - - fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL); - if (!fdev->ipsec) - return -ENOMEM; - - fdev->ipsec->fdev = fdev; - - err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps), - fdev->ipsec->caps); - if (err) { - mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n", - err); - goto error; - } - - INIT_LIST_HEAD(&fdev->ipsec->pending_cmds); - spin_lock_init(&fdev->ipsec->pending_cmds_lock); - - init_attr.rx_size = SBU_QP_QUEUE_SIZE; - init_attr.tx_size = SBU_QP_QUEUE_SIZE; - init_attr.recv_cb = mlx5_fpga_ipsec_recv; - init_attr.cb_arg = fdev; - conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr); - if (IS_ERR(conn)) { - err = PTR_ERR(conn); - mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n", - err); - goto error; - } - fdev->ipsec->conn = conn; - - err = rhashtable_init(&fdev->ipsec->sa_hash, &rhash_sa); - if (err) - goto err_destroy_conn; - mutex_init(&fdev->ipsec->sa_hash_lock); - - fdev->ipsec->rules_rb = RB_ROOT; - mutex_init(&fdev->ipsec->rules_rb_lock); - - err = mlx5_fpga_ipsec_enable_supported_caps(mdev); - if (err) { - mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n", - err); - goto err_destroy_hash; - } - - ida_init(&fdev->ipsec->halloc); - - return 0; - -err_destroy_hash: - rhashtable_destroy(&fdev->ipsec->sa_hash); - -err_destroy_conn: - mlx5_fpga_sbu_conn_destroy(conn); - -error: - kfree(fdev->ipsec); - fdev->ipsec = NULL; - return err; -} - -static void destroy_rules_rb(struct rb_root *root) -{ - struct mlx5_fpga_ipsec_rule *r, *tmp; - - rbtree_postorder_for_each_entry_safe(r, tmp, root, node) { - rb_erase(&r->node, root); - mlx5_fpga_ipsec_delete_sa_ctx(r->ctx); - kfree(r); - } -} - -static void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - - if (!mlx5_fpga_is_ipsec_device(mdev)) - return; - - ida_destroy(&fdev->ipsec->halloc); - destroy_rules_rb(&fdev->ipsec->rules_rb); - rhashtable_destroy(&fdev->ipsec->sa_hash); - - mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn); - kfree(fdev->ipsec); - fdev->ipsec = NULL; -} - -void mlx5_fpga_ipsec_build_fs_cmds(void) -{ - /* ingress */ - fpga_ipsec_ingress.create_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->create_flow_table; - fpga_ipsec_ingress.destroy_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_table; - fpga_ipsec_ingress.modify_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->modify_flow_table; - fpga_ipsec_ingress.create_flow_group = - mlx5_fpga_ipsec_fs_create_flow_group_ingress; - fpga_ipsec_ingress.destroy_flow_group = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_group; - fpga_ipsec_ingress.create_fte = - mlx5_fpga_ipsec_fs_create_fte_ingress; - fpga_ipsec_ingress.update_fte = - mlx5_fpga_ipsec_fs_update_fte_ingress; - fpga_ipsec_ingress.delete_fte = - mlx5_fpga_ipsec_fs_delete_fte_ingress; - fpga_ipsec_ingress.update_root_ft = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->update_root_ft; - - /* egress */ - fpga_ipsec_egress.create_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->create_flow_table; - fpga_ipsec_egress.destroy_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_table; - fpga_ipsec_egress.modify_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->modify_flow_table; - fpga_ipsec_egress.create_flow_group = - mlx5_fpga_ipsec_fs_create_flow_group_egress; - fpga_ipsec_egress.destroy_flow_group = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_group; - fpga_ipsec_egress.create_fte = - mlx5_fpga_ipsec_fs_create_fte_egress; - fpga_ipsec_egress.update_fte = - mlx5_fpga_ipsec_fs_update_fte_egress; - fpga_ipsec_egress.delete_fte = - mlx5_fpga_ipsec_fs_delete_fte_egress; - fpga_ipsec_egress.update_root_ft = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->update_root_ft; -} - -static int -mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs) -{ - if (attrs->tfc_pad) { - mlx5_core_err(mdev, "Cannot offload xfrm states with tfc padding\n"); - return -EOPNOTSUPP; - } - - if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) { - mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay\n"); - return -EOPNOTSUPP; - } - - if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) { - mlx5_core_err(mdev, "Only aes gcm keymat is supported\n"); - return -EOPNOTSUPP; - } - - if (attrs->keymat.aes_gcm.iv_algo != - MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) { - mlx5_core_err(mdev, "Only iv sequence algo is supported\n"); - return -EOPNOTSUPP; - } - - if (attrs->keymat.aes_gcm.icv_len != 128) { - mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n"); - return -EOPNOTSUPP; - } - - if (attrs->keymat.aes_gcm.key_len != 128 && - attrs->keymat.aes_gcm.key_len != 256) { - mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); - return -EOPNOTSUPP; - } - - if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) && - (!MLX5_GET(ipsec_extended_cap, mdev->fpga->ipsec->caps, - v2_command))) { - mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); - return -EOPNOTSUPP; - } - - return 0; -} - -static struct mlx5_accel_esp_xfrm * -mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) -{ - struct mlx5_fpga_esp_xfrm *fpga_xfrm; - - if (!(flags & MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA)) { - mlx5_core_warn(mdev, "Tried to create an esp action without metadata\n"); - return ERR_PTR(-EINVAL); - } - - if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { - mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n"); - return ERR_PTR(-EOPNOTSUPP); - } - - fpga_xfrm = kzalloc(sizeof(*fpga_xfrm), GFP_KERNEL); - if (!fpga_xfrm) - return ERR_PTR(-ENOMEM); - - mutex_init(&fpga_xfrm->lock); - memcpy(&fpga_xfrm->accel_xfrm.attrs, attrs, - sizeof(fpga_xfrm->accel_xfrm.attrs)); - - return &fpga_xfrm->accel_xfrm; -} - -static void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) -{ - struct mlx5_fpga_esp_xfrm *fpga_xfrm = - container_of(xfrm, struct mlx5_fpga_esp_xfrm, - accel_xfrm); - /* assuming no sa_ctx are connected to this xfrm_ctx */ - kfree(fpga_xfrm); -} - -static int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs) -{ - struct mlx5_core_dev *mdev = xfrm->mdev; - struct mlx5_fpga_device *fdev = mdev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - struct mlx5_fpga_esp_xfrm *fpga_xfrm; - struct mlx5_ifc_fpga_ipsec_sa org_hw_sa; - - int err = 0; - - if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs))) - return 0; - - if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { - mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n"); - return -EOPNOTSUPP; - } - - if (is_v2_sadb_supported(fipsec)) { - mlx5_core_warn(mdev, "Modify esp is not supported\n"); - return -EOPNOTSUPP; - } - - fpga_xfrm = container_of(xfrm, struct mlx5_fpga_esp_xfrm, accel_xfrm); - - mutex_lock(&fpga_xfrm->lock); - - if (!fpga_xfrm->sa_ctx) - /* Unbounded xfrm, change only sw attrs */ - goto change_sw_xfrm_attrs; - - /* copy original hw sa */ - memcpy(&org_hw_sa, &fpga_xfrm->sa_ctx->hw_sa, sizeof(org_hw_sa)); - mutex_lock(&fipsec->sa_hash_lock); - /* remove original hw sa from hash */ - WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, - &fpga_xfrm->sa_ctx->hash, rhash_sa)); - /* update hw_sa with new xfrm attrs*/ - mlx5_fpga_ipsec_build_hw_xfrm(xfrm->mdev, attrs, - &fpga_xfrm->sa_ctx->hw_sa); - /* try to insert new hw_sa to hash */ - err = rhashtable_insert_fast(&fipsec->sa_hash, - &fpga_xfrm->sa_ctx->hash, rhash_sa); - if (err) - goto rollback_sa; - - /* modify device with new hw_sa */ - err = mlx5_fpga_ipsec_update_hw_sa(fdev, &fpga_xfrm->sa_ctx->hw_sa, - MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2); - fpga_xfrm->sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; - if (err) - WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, - &fpga_xfrm->sa_ctx->hash, - rhash_sa)); -rollback_sa: - if (err) { - /* return original hw_sa to hash */ - memcpy(&fpga_xfrm->sa_ctx->hw_sa, &org_hw_sa, - sizeof(org_hw_sa)); - WARN_ON(rhashtable_insert_fast(&fipsec->sa_hash, - &fpga_xfrm->sa_ctx->hash, - rhash_sa)); - } - mutex_unlock(&fipsec->sa_hash_lock); - -change_sw_xfrm_attrs: - if (!err) - memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs)); - mutex_unlock(&fpga_xfrm->lock); - return err; -} - -static const struct mlx5_accel_ipsec_ops fpga_ipsec_ops = { - .device_caps = mlx5_fpga_ipsec_device_caps, - .counters_count = mlx5_fpga_ipsec_counters_count, - .counters_read = mlx5_fpga_ipsec_counters_read, - .create_hw_context = mlx5_fpga_ipsec_create_sa_ctx, - .free_hw_context = mlx5_fpga_ipsec_delete_sa_ctx, - .init = mlx5_fpga_ipsec_init, - .cleanup = mlx5_fpga_ipsec_cleanup, - .esp_create_xfrm = mlx5_fpga_esp_create_xfrm, - .esp_modify_xfrm = mlx5_fpga_esp_modify_xfrm, - .esp_destroy_xfrm = mlx5_fpga_esp_destroy_xfrm, -}; - -const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev) -{ - if (!mlx5_fpga_is_ipsec_device(mdev)) - return NULL; - - return &fpga_ipsec_ops; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h deleted file mode 100644 index 8931b5584477..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __MLX5_FPGA_IPSEC_H__ -#define __MLX5_FPGA_IPSEC_H__ - -#include "accel/ipsec.h" -#include "fs_cmd.h" - -#ifdef CONFIG_MLX5_FPGA_IPSEC -const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev); -u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev); -const struct mlx5_flow_cmds * -mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type); -void mlx5_fpga_ipsec_build_fs_cmds(void); -bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev); -#else -static inline -const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev) -{ return NULL; } -static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; } -static inline const struct mlx5_flow_cmds * -mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) -{ - return mlx5_fs_cmd_get_default(type); -} - -static inline void mlx5_fpga_ipsec_build_fs_cmds(void) {}; -static inline bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) { return false; } - -#endif /* CONFIG_MLX5_FPGA_IPSEC */ -#endif /* __MLX5_FPGA_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c deleted file mode 100644 index 29b7339ebfa3..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +++ /dev/null @@ -1,622 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/mlx5/device.h> -#include "fpga/tls.h" -#include "fpga/cmd.h" -#include "fpga/sdk.h" -#include "fpga/core.h" -#include "accel/tls.h" - -struct mlx5_fpga_tls_command_context; - -typedef void (*mlx5_fpga_tls_command_complete) - (struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev, - struct mlx5_fpga_tls_command_context *ctx, - struct mlx5_fpga_dma_buf *resp); - -struct mlx5_fpga_tls_command_context { - struct list_head list; - /* There is no guarantee on the order between the TX completion - * and the command response. - * The TX completion is going to touch cmd->buf even in - * the case of successful transmission. - * So instead of requiring separate allocations for cmd - * and cmd->buf we've decided to use a reference counter - */ - refcount_t ref; - struct mlx5_fpga_dma_buf buf; - mlx5_fpga_tls_command_complete complete; -}; - -static void -mlx5_fpga_tls_put_command_ctx(struct mlx5_fpga_tls_command_context *ctx) -{ - if (refcount_dec_and_test(&ctx->ref)) - kfree(ctx); -} - -static void mlx5_fpga_tls_cmd_complete(struct mlx5_fpga_device *fdev, - struct mlx5_fpga_dma_buf *resp) -{ - struct mlx5_fpga_conn *conn = fdev->tls->conn; - struct mlx5_fpga_tls_command_context *ctx; - struct mlx5_fpga_tls *tls = fdev->tls; - unsigned long flags; - - spin_lock_irqsave(&tls->pending_cmds_lock, flags); - ctx = list_first_entry(&tls->pending_cmds, - struct mlx5_fpga_tls_command_context, list); - list_del(&ctx->list); - spin_unlock_irqrestore(&tls->pending_cmds_lock, flags); - ctx->complete(conn, fdev, ctx, resp); -} - -static void mlx5_fpga_cmd_send_complete(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_dma_buf *buf, - u8 status) -{ - struct mlx5_fpga_tls_command_context *ctx = - container_of(buf, struct mlx5_fpga_tls_command_context, buf); - - mlx5_fpga_tls_put_command_ctx(ctx); - - if (unlikely(status)) - mlx5_fpga_tls_cmd_complete(fdev, NULL); -} - -static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev, - struct mlx5_fpga_tls_command_context *cmd, - mlx5_fpga_tls_command_complete complete) -{ - struct mlx5_fpga_tls *tls = fdev->tls; - unsigned long flags; - int ret; - - refcount_set(&cmd->ref, 2); - cmd->complete = complete; - cmd->buf.complete = mlx5_fpga_cmd_send_complete; - - spin_lock_irqsave(&tls->pending_cmds_lock, flags); - /* mlx5_fpga_sbu_conn_sendmsg is called under pending_cmds_lock - * to make sure commands are inserted to the tls->pending_cmds list - * and the command QP in the same order. - */ - ret = mlx5_fpga_sbu_conn_sendmsg(tls->conn, &cmd->buf); - if (likely(!ret)) - list_add_tail(&cmd->list, &tls->pending_cmds); - else - complete(tls->conn, fdev, cmd, NULL); - spin_unlock_irqrestore(&tls->pending_cmds_lock, flags); -} - -/* Start of context identifiers range (inclusive) */ -#define SWID_START 0 -/* End of context identifiers range (exclusive) */ -#define SWID_END BIT(24) - -static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock, - void *ptr) -{ - unsigned long flags; - int ret; - - /* TLS metadata format is 1 byte for syndrome followed - * by 3 bytes of swid (software ID) - * swid must not exceed 3 bytes. - * See tls_rxtx.c:insert_pet() for details - */ - BUILD_BUG_ON((SWID_END - 1) & 0xFF000000); - - idr_preload(GFP_KERNEL); - spin_lock_irqsave(idr_spinlock, flags); - ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC); - spin_unlock_irqrestore(idr_spinlock, flags); - idr_preload_end(); - - return ret; -} - -static void *mlx5_fpga_tls_release_swid(struct idr *idr, - spinlock_t *idr_spinlock, u32 swid) -{ - unsigned long flags; - void *ptr; - - spin_lock_irqsave(idr_spinlock, flags); - ptr = idr_remove(idr, swid); - spin_unlock_irqrestore(idr_spinlock, flags); - return ptr; -} - -static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_dma_buf *buf, u8 status) -{ - kfree(buf); -} - -static void -mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_tls_command_context *cmd, - struct mlx5_fpga_dma_buf *resp) -{ - if (resp) { - u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); - - if (syndrome) - mlx5_fpga_err(fdev, - "Teardown stream failed with syndrome = %d", - syndrome); - } - mlx5_fpga_tls_put_command_ctx(cmd); -} - -static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd) -{ - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, src_port), flow, - MLX5_BYTE_OFF(tls_flow, ipv6)); - - MLX5_SET(tls_cmd, cmd, ipv6, MLX5_GET(tls_flow, flow, ipv6)); - MLX5_SET(tls_cmd, cmd, direction_sx, - MLX5_GET(tls_flow, flow, direction_sx)); -} - -int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn) -{ - struct mlx5_fpga_dma_buf *buf; - int size = sizeof(*buf) + MLX5_TLS_COMMAND_SIZE; - void *flow; - void *cmd; - int ret; - - buf = kzalloc(size, GFP_ATOMIC); - if (!buf) - return -ENOMEM; - - cmd = (buf + 1); - - rcu_read_lock(); - flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); - if (unlikely(!flow)) { - rcu_read_unlock(); - WARN_ONCE(1, "Received NULL pointer for handle\n"); - kfree(buf); - return -EINVAL; - } - mlx5_fpga_tls_flow_to_cmd(flow, cmd); - rcu_read_unlock(); - - MLX5_SET(tls_cmd, cmd, swid, ntohl(handle)); - MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn)); - MLX5_SET(tls_cmd, cmd, tcp_sn, seq); - MLX5_SET(tls_cmd, cmd, command_type, CMD_RESYNC_RX); - - buf->sg[0].data = cmd; - buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; - buf->complete = mlx_tls_kfree_complete; - - ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf); - if (ret < 0) - kfree(buf); - - return ret; -} - -static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, - void *flow, u32 swid, gfp_t flags) -{ - struct mlx5_fpga_tls_command_context *ctx; - struct mlx5_fpga_dma_buf *buf; - void *cmd; - - ctx = kzalloc(sizeof(*ctx) + MLX5_TLS_COMMAND_SIZE, flags); - if (!ctx) - return; - - buf = &ctx->buf; - cmd = (ctx + 1); - MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM); - MLX5_SET(tls_cmd, cmd, swid, swid); - - mlx5_fpga_tls_flow_to_cmd(flow, cmd); - kfree(flow); - - buf->sg[0].data = cmd; - buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; - - mlx5_fpga_tls_cmd_send(mdev->fpga, ctx, - mlx5_fpga_tls_teardown_completion); -} - -void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - gfp_t flags, bool direction_sx) -{ - struct mlx5_fpga_tls *tls = mdev->fpga->tls; - void *flow; - - if (direction_sx) - flow = mlx5_fpga_tls_release_swid(&tls->tx_idr, - &tls->tx_idr_spinlock, - swid); - else - flow = mlx5_fpga_tls_release_swid(&tls->rx_idr, - &tls->rx_idr_spinlock, - swid); - - if (!flow) { - mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n", - swid); - return; - } - - synchronize_rcu(); /* before kfree(flow) */ - mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags); -} - -enum mlx5_fpga_setup_stream_status { - MLX5_FPGA_CMD_PENDING, - MLX5_FPGA_CMD_SEND_FAILED, - MLX5_FPGA_CMD_RESPONSE_RECEIVED, - MLX5_FPGA_CMD_ABANDONED, -}; - -struct mlx5_setup_stream_context { - struct mlx5_fpga_tls_command_context cmd; - atomic_t status; - u32 syndrome; - struct completion comp; -}; - -static void -mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_tls_command_context *cmd, - struct mlx5_fpga_dma_buf *resp) -{ - struct mlx5_setup_stream_context *ctx = - container_of(cmd, struct mlx5_setup_stream_context, cmd); - int status = MLX5_FPGA_CMD_SEND_FAILED; - void *tls_cmd = ctx + 1; - - /* If we failed to send to command resp == NULL */ - if (resp) { - ctx->syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); - status = MLX5_FPGA_CMD_RESPONSE_RECEIVED; - } - - status = atomic_xchg_release(&ctx->status, status); - if (likely(status != MLX5_FPGA_CMD_ABANDONED)) { - complete(&ctx->comp); - return; - } - - mlx5_fpga_err(fdev, "Command was abandoned, syndrome = %u\n", - ctx->syndrome); - - if (!ctx->syndrome) { - /* The process was killed while waiting for the context to be - * added, and the add completed successfully. - * We need to destroy the HW context, and we can't can't reuse - * the command context because we might not have received - * the tx completion yet. - */ - mlx5_fpga_tls_del_flow(fdev->mdev, - MLX5_GET(tls_cmd, tls_cmd, swid), - GFP_ATOMIC, - MLX5_GET(tls_cmd, tls_cmd, - direction_sx)); - } - - mlx5_fpga_tls_put_command_ctx(cmd); -} - -static int mlx5_fpga_tls_setup_stream_cmd(struct mlx5_core_dev *mdev, - struct mlx5_setup_stream_context *ctx) -{ - struct mlx5_fpga_dma_buf *buf; - void *cmd = ctx + 1; - int status, ret = 0; - - buf = &ctx->cmd.buf; - buf->sg[0].data = cmd; - buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; - MLX5_SET(tls_cmd, cmd, command_type, CMD_SETUP_STREAM); - - init_completion(&ctx->comp); - atomic_set(&ctx->status, MLX5_FPGA_CMD_PENDING); - ctx->syndrome = -1; - - mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd, - mlx5_fpga_tls_setup_completion); - wait_for_completion_killable(&ctx->comp); - - status = atomic_xchg_acquire(&ctx->status, MLX5_FPGA_CMD_ABANDONED); - if (unlikely(status == MLX5_FPGA_CMD_PENDING)) - /* ctx is going to be released in mlx5_fpga_tls_setup_completion */ - return -EINTR; - - if (unlikely(ctx->syndrome)) - ret = -ENOMEM; - - mlx5_fpga_tls_put_command_ctx(&ctx->cmd); - return ret; -} - -static void mlx5_fpga_tls_hw_qp_recv_cb(void *cb_arg, - struct mlx5_fpga_dma_buf *buf) -{ - struct mlx5_fpga_device *fdev = (struct mlx5_fpga_device *)cb_arg; - - mlx5_fpga_tls_cmd_complete(fdev, buf); -} - -bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev) -{ - if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga)) - return false; - - if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) != - MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX) - return false; - - if (MLX5_CAP_FPGA(mdev, sandbox_product_id) != - MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS) - return false; - - if (MLX5_CAP_FPGA(mdev, sandbox_product_version) != 0) - return false; - - return true; -} - -static int mlx5_fpga_tls_get_caps(struct mlx5_fpga_device *fdev, - u32 *p_caps) -{ - int err, cap_size = MLX5_ST_SZ_BYTES(tls_extended_cap); - u32 caps = 0; - void *buf; - - buf = kzalloc(cap_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - err = mlx5_fpga_get_sbu_caps(fdev, cap_size, buf); - if (err) - goto out; - - if (MLX5_GET(tls_extended_cap, buf, tx)) - caps |= MLX5_ACCEL_TLS_TX; - if (MLX5_GET(tls_extended_cap, buf, rx)) - caps |= MLX5_ACCEL_TLS_RX; - if (MLX5_GET(tls_extended_cap, buf, tls_v12)) - caps |= MLX5_ACCEL_TLS_V12; - if (MLX5_GET(tls_extended_cap, buf, tls_v13)) - caps |= MLX5_ACCEL_TLS_V13; - if (MLX5_GET(tls_extended_cap, buf, lro)) - caps |= MLX5_ACCEL_TLS_LRO; - if (MLX5_GET(tls_extended_cap, buf, ipv6)) - caps |= MLX5_ACCEL_TLS_IPV6; - - if (MLX5_GET(tls_extended_cap, buf, aes_gcm_128)) - caps |= MLX5_ACCEL_TLS_AES_GCM128; - if (MLX5_GET(tls_extended_cap, buf, aes_gcm_256)) - caps |= MLX5_ACCEL_TLS_AES_GCM256; - - *p_caps = caps; - err = 0; -out: - kfree(buf); - return err; -} - -int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - struct mlx5_fpga_conn_attr init_attr = {0}; - struct mlx5_fpga_conn *conn; - struct mlx5_fpga_tls *tls; - int err = 0; - - if (!mlx5_fpga_is_tls_device(mdev) || !fdev) - return 0; - - tls = kzalloc(sizeof(*tls), GFP_KERNEL); - if (!tls) - return -ENOMEM; - - err = mlx5_fpga_tls_get_caps(fdev, &tls->caps); - if (err) - goto error; - - if (!(tls->caps & (MLX5_ACCEL_TLS_V12 | MLX5_ACCEL_TLS_AES_GCM128))) { - err = -ENOTSUPP; - goto error; - } - - init_attr.rx_size = SBU_QP_QUEUE_SIZE; - init_attr.tx_size = SBU_QP_QUEUE_SIZE; - init_attr.recv_cb = mlx5_fpga_tls_hw_qp_recv_cb; - init_attr.cb_arg = fdev; - conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr); - if (IS_ERR(conn)) { - err = PTR_ERR(conn); - mlx5_fpga_err(fdev, "Error creating TLS command connection %d\n", - err); - goto error; - } - - tls->conn = conn; - spin_lock_init(&tls->pending_cmds_lock); - INIT_LIST_HEAD(&tls->pending_cmds); - - idr_init(&tls->tx_idr); - idr_init(&tls->rx_idr); - spin_lock_init(&tls->tx_idr_spinlock); - spin_lock_init(&tls->rx_idr_spinlock); - fdev->tls = tls; - return 0; - -error: - kfree(tls); - return err; -} - -void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - - if (!fdev || !fdev->tls) - return; - - mlx5_fpga_sbu_conn_destroy(fdev->tls->conn); - kfree(fdev->tls); - fdev->tls = NULL; -} - -static void mlx5_fpga_tls_set_aes_gcm128_ctx(void *cmd, - struct tls_crypto_info *info, - __be64 *rcd_sn) -{ - struct tls12_crypto_info_aes_gcm_128 *crypto_info = - (struct tls12_crypto_info_aes_gcm_128 *)info; - - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_rcd_sn), crypto_info->rec_seq, - TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); - - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_implicit_iv), - crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key), - crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); - - /* in AES-GCM 128 we need to write the key twice */ - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key) + - TLS_CIPHER_AES_GCM_128_KEY_SIZE, - crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); - - MLX5_SET(tls_cmd, cmd, alg, MLX5_TLS_ALG_AES_GCM_128); -} - -static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps, - struct tls_crypto_info *crypto_info) -{ - __be64 rcd_sn; - - switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: - if (!(caps & MLX5_ACCEL_TLS_AES_GCM128)) - return -EINVAL; - mlx5_fpga_tls_set_aes_gcm128_ctx(cmd, crypto_info, &rcd_sn); - break; - default: - return -EINVAL; - } - - return 0; -} - -static int _mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 swid, u32 tcp_sn) -{ - u32 caps = mlx5_fpga_tls_device_caps(mdev); - struct mlx5_setup_stream_context *ctx; - int ret = -ENOMEM; - size_t cmd_size; - void *cmd; - - cmd_size = MLX5_TLS_COMMAND_SIZE + sizeof(*ctx); - ctx = kzalloc(cmd_size, GFP_KERNEL); - if (!ctx) - goto out; - - cmd = ctx + 1; - ret = mlx5_fpga_tls_set_key_material(cmd, caps, crypto_info); - if (ret) - goto free_ctx; - - mlx5_fpga_tls_flow_to_cmd(flow, cmd); - - MLX5_SET(tls_cmd, cmd, swid, swid); - MLX5_SET(tls_cmd, cmd, tcp_sn, tcp_sn); - - return mlx5_fpga_tls_setup_stream_cmd(mdev, ctx); - -free_ctx: - kfree(ctx); -out: - return ret; -} - -int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx) -{ - struct mlx5_fpga_tls *tls = mdev->fpga->tls; - int ret = -ENOMEM; - u32 swid; - - if (direction_sx) - ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr, - &tls->tx_idr_spinlock, flow); - else - ret = mlx5_fpga_tls_alloc_swid(&tls->rx_idr, - &tls->rx_idr_spinlock, flow); - - if (ret < 0) - return ret; - - swid = ret; - MLX5_SET(tls_flow, flow, direction_sx, direction_sx ? 1 : 0); - - ret = _mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid, - start_offload_tcp_sn); - if (ret && ret != -EINTR) - goto free_swid; - - *p_swid = swid; - return 0; -free_swid: - if (direction_sx) - mlx5_fpga_tls_release_swid(&tls->tx_idr, - &tls->tx_idr_spinlock, swid); - else - mlx5_fpga_tls_release_swid(&tls->rx_idr, - &tls->rx_idr_spinlock, swid); - - return ret; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h deleted file mode 100644 index 5714cf391d1b..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __MLX5_FPGA_TLS_H__ -#define __MLX5_FPGA_TLS_H__ - -#include <linux/mlx5/driver.h> - -#include <net/tls.h> -#include "fpga/core.h" - -struct mlx5_fpga_tls { - struct list_head pending_cmds; - spinlock_t pending_cmds_lock; /* Protects pending_cmds */ - u32 caps; - struct mlx5_fpga_conn *conn; - - struct idr tx_idr; - struct idr rx_idr; - spinlock_t tx_idr_spinlock; /* protects the IDR */ - spinlock_t rx_idr_spinlock; /* protects the IDR */ -}; - -int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx); - -void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - gfp_t flags, bool direction_sx); - -bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev); -int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev); -void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev); - -static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev) -{ - return mdev->fpga->tls->caps; -} - -int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn); - -#endif /* __MLX5_FPGA_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 750b21124a1a..32d4c967469c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -50,10 +50,12 @@ static int mlx5_cmd_stub_update_root_ft(struct mlx5_flow_root_namespace *ns, static int mlx5_cmd_stub_create_flow_table(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, - unsigned int size, + struct mlx5_flow_table_attr *ft_attr, struct mlx5_flow_table *next_ft) { - ft->max_fte = size ? roundup_pow_of_two(size) : 1; + int max_fte = ft_attr->max_fte; + + ft->max_fte = max_fte ? roundup_pow_of_two(max_fte) : 1; return 0; } @@ -152,6 +154,12 @@ static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns) return 0; } +static u32 mlx5_cmd_stub_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + return 0; +} + static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master, struct mlx5_core_dev *slave, bool ft_id_valid, @@ -252,7 +260,7 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, - unsigned int size, + struct mlx5_flow_table_attr *ft_attr, struct mlx5_flow_table *next_ft) { int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); @@ -261,17 +269,19 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {}; u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {}; struct mlx5_core_dev *dev = ns->dev; + unsigned int size; int err; - if (size != POOL_NEXT_SIZE) - size = roundup_pow_of_two(size); - size = mlx5_ft_pool_get_avail_sz(dev, ft->type, size); + if (ft_attr->max_fte != POOL_NEXT_SIZE) + size = roundup_pow_of_two(ft_attr->max_fte); + size = mlx5_ft_pool_get_avail_sz(dev, ft->type, ft_attr->max_fte); if (!size) return -ENOSPC; MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); + MLX5_SET(create_flow_table_in, in, uid, ft_attr->uid); MLX5_SET(create_flow_table_in, in, table_type, ft->type); MLX5_SET(create_flow_table_in, in, flow_table_context.level, ft->level); MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, size ? ilog2(size) : 0); @@ -449,9 +459,11 @@ static int mlx5_set_extended_dest(struct mlx5_core_dev *dev, return 0; list_for_each_entry(dst, &fte->node.children, node.list) { - if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER || + dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_NONE) continue; - if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + if ((dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) num_encap++; num_fwd_destinations++; @@ -471,6 +483,30 @@ static int mlx5_set_extended_dest(struct mlx5_core_dev *dev, return 0; } + +static void +mlx5_cmd_set_fte_flow_meter(struct fs_fte *fte, void *in_flow_context) +{ + void *exe_aso_ctrl; + void *execute_aso; + + execute_aso = MLX5_ADDR_OF(flow_context, in_flow_context, + execute_aso[0]); + MLX5_SET(execute_aso, execute_aso, valid, 1); + MLX5_SET(execute_aso, execute_aso, aso_object_id, + fte->action.exe_aso.object_id); + + exe_aso_ctrl = MLX5_ADDR_OF(execute_aso, execute_aso, exe_aso_ctrl); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, return_reg_id, + fte->action.exe_aso.return_reg_id); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, aso_type, + fte->action.exe_aso.type); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, init_color, + fte->action.exe_aso.flow_meter.init_color); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, meter_id, + fte->action.exe_aso.flow_meter.meter_idx); +} + static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, int opmod, int modify_mask, struct mlx5_flow_table *ft, @@ -541,7 +577,10 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->action.modify_hdr->id); - MLX5_SET(flow_context, in_flow_context, ipsec_obj_id, fte->action.ipsec_obj_id); + MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_type, + fte->action.crypto.type); + MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_obj_id, + fte->action.crypto.obj_id); vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan); @@ -564,18 +603,23 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, int list_size = 0; list_for_each_entry(dst, &fte->node.children, node.list) { - unsigned int id, type = dst->dest_attr.type; + enum mlx5_flow_destination_type type = dst->dest_attr.type; + enum mlx5_ifc_flow_destination_type ifc_type; + unsigned int id; if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) continue; switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_NONE: + continue; case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: id = dst->dest_attr.ft_num; - type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE; break; case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: id = dst->dest_attr.ft->id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE; break; case MLX5_FLOW_DESTINATION_TYPE_UPLINK: case MLX5_FLOW_DESTINATION_TYPE_VPORT: @@ -589,8 +633,10 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, if (type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) { /* destination_id is reserved */ id = 0; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK; break; } + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT; id = dst->dest_attr.vport.num; if (extended_dest && dst->dest_attr.vport.pkt_reformat) { @@ -605,13 +651,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, break; case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER: id = dst->dest_attr.sampler_id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; break; default: id = dst->dest_attr.tir_num; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR; } MLX5_SET(dest_format_struct, in_dests, destination_type, - type); + ifc_type); MLX5_SET(dest_format_struct, in_dests, destination_id, id); in_dests += dst_cnt_size; list_size++; @@ -646,6 +694,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, list_size); } + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) { + if (fte->action.exe_aso.type == MLX5_EXE_ASO_FLOW_METER) { + mlx5_cmd_set_fte_flow_meter(fte, in_flow_context); + } else { + err = -EOPNOTSUPP; + goto err_out; + } + } + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); err_out: kvfree(in); @@ -788,7 +845,8 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, int err; u32 *in; - if (namespace == MLX5_FLOW_NAMESPACE_FDB) + if (namespace == MLX5_FLOW_NAMESPACE_FDB || + namespace == MLX5_FLOW_NAMESPACE_FDB_BYPASS) max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size); else max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size); @@ -860,18 +918,19 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns, switch (namespace) { case MLX5_FLOW_NAMESPACE_FDB: + case MLX5_FLOW_NAMESPACE_FDB_BYPASS: max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, max_modify_header_actions); table_type = FS_FT_FDB; break; + case MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC: case MLX5_FLOW_NAMESPACE_KERNEL: case MLX5_FLOW_NAMESPACE_BYPASS: max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions); table_type = FS_FT_NIC_RX; break; case MLX5_FLOW_NAMESPACE_EGRESS: -#ifdef CONFIG_MLX5_IPSEC - case MLX5_FLOW_NAMESPACE_EGRESS_KERNEL: -#endif + case MLX5_FLOW_NAMESPACE_EGRESS_IPSEC: + case MLX5_FLOW_NAMESPACE_EGRESS_MACSEC: max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions); table_type = FS_FT_NIC_TX; break; @@ -968,6 +1027,12 @@ static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns, return err ? err : MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); } +static u32 mlx5_cmd_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + return 0; +} + static const struct mlx5_flow_cmds mlx5_flow_cmds = { .create_flow_table = mlx5_cmd_create_flow_table, .destroy_flow_table = mlx5_cmd_destroy_flow_table, @@ -987,6 +1052,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = { .set_peer = mlx5_cmd_stub_set_peer, .create_ns = mlx5_cmd_stub_create_ns, .destroy_ns = mlx5_cmd_stub_destroy_ns, + .get_capabilities = mlx5_cmd_get_capabilities, }; static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { @@ -1008,6 +1074,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { .set_peer = mlx5_cmd_stub_set_peer, .create_ns = mlx5_cmd_stub_create_ns, .destroy_ns = mlx5_cmd_stub_destroy_ns, + .get_capabilities = mlx5_cmd_stub_get_capabilities, }; const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 220ec632d35a..8ef4254b9ea1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -38,7 +38,7 @@ struct mlx5_flow_cmds { int (*create_flow_table)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, - unsigned int size, + struct mlx5_flow_table_attr *ft_attr, struct mlx5_flow_table *next_ft); int (*destroy_flow_table)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft); @@ -101,6 +101,9 @@ struct mlx5_flow_cmds { u16 format_id, u32 *match_mask); int (*destroy_match_definer)(struct mlx5_flow_root_namespace *ns, int definer_id); + + u32 (*get_capabilities)(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type); }; int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 386ab9a2d490..d53749248fa0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -40,8 +40,6 @@ #include "fs_cmd.h" #include "fs_ft_pool.h" #include "diag/fs_tracepoint.h" -#include "accel/ipsec.h" -#include "fpga/ipsec.h" #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) @@ -106,6 +104,10 @@ #define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ LEFTOVERS_NUM_PRIOS) +#define KERNEL_RX_MACSEC_NUM_PRIOS 1 +#define KERNEL_RX_MACSEC_NUM_LEVELS 2 +#define KERNEL_RX_MACSEC_MIN_LEVEL (BY_PASS_MIN_LEVEL + KERNEL_RX_MACSEC_NUM_PRIOS) + #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) @@ -116,7 +118,7 @@ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) #define KERNEL_NIC_TC_NUM_PRIOS 1 -#define KERNEL_NIC_TC_NUM_LEVELS 2 +#define KERNEL_NIC_TC_NUM_LEVELS 3 #define ANCHOR_NUM_LEVELS 1 #define ANCHOR_NUM_PRIOS 1 @@ -128,11 +130,15 @@ #define LAG_PRIO_NUM_LEVELS 1 #define LAG_NUM_PRIOS 1 -#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1) +#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + KERNEL_RX_MACSEC_MIN_LEVEL + 1) #define KERNEL_TX_IPSEC_NUM_PRIOS 1 #define KERNEL_TX_IPSEC_NUM_LEVELS 1 -#define KERNEL_TX_MIN_LEVEL (KERNEL_TX_IPSEC_NUM_LEVELS) +#define KERNEL_TX_IPSEC_MIN_LEVEL (KERNEL_TX_IPSEC_NUM_LEVELS) + +#define KERNEL_TX_MACSEC_NUM_PRIOS 1 +#define KERNEL_TX_MACSEC_NUM_LEVELS 2 +#define KERNEL_TX_MACSEC_MIN_LEVEL (KERNEL_TX_IPSEC_MIN_LEVEL + KERNEL_TX_MACSEC_NUM_PRIOS) struct node_caps { size_t arr_sz; @@ -151,12 +157,16 @@ static struct init_tree_node { enum mlx5_flow_table_miss_action def_miss_action; } root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 7, + .ar_size = 8, .children = (struct init_tree_node[]){ ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_RX_MACSEC_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(KERNEL_RX_MACSEC_NUM_PRIOS, + KERNEL_RX_MACSEC_NUM_LEVELS))), ADD_PRIO(0, LAG_MIN_LEVEL, 0, FS_CHAINING_CAPS, ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS, @@ -188,24 +198,23 @@ static struct init_tree_node { static struct init_tree_node egress_root_fs = { .type = FS_TYPE_NAMESPACE, -#ifdef CONFIG_MLX5_IPSEC - .ar_size = 2, -#else - .ar_size = 1, -#endif + .ar_size = 3, .children = (struct init_tree_node[]) { ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0, FS_CHAINING_CAPS_EGRESS, ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_NUM_LEVELS))), -#ifdef CONFIG_MLX5_IPSEC - ADD_PRIO(0, KERNEL_TX_MIN_LEVEL, 0, + ADD_PRIO(0, KERNEL_TX_IPSEC_MIN_LEVEL, 0, FS_CHAINING_CAPS_EGRESS, ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(KERNEL_TX_IPSEC_NUM_PRIOS, KERNEL_TX_IPSEC_NUM_LEVELS))), -#endif + ADD_PRIO(0, KERNEL_TX_MACSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS_EGRESS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(KERNEL_TX_MACSEC_NUM_PRIOS, + KERNEL_TX_MACSEC_NUM_LEVELS))), } }; @@ -432,6 +441,16 @@ static bool is_fwd_next_action(u32 action) MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS); } +static bool is_fwd_dest_type(enum mlx5_flow_destination_type type) +{ + return type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM || + type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE || + type == MLX5_FLOW_DESTINATION_TYPE_UPLINK || + type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER || + type == MLX5_FLOW_DESTINATION_TYPE_TIR; +} + static bool check_valid_spec(const struct mlx5_flow_spec *spec) { int i; @@ -558,8 +577,8 @@ static void del_sw_hw_rule(struct fs_node *node) mutex_unlock(&rule->dest_attr.ft->lock); } - if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER && - --fte->dests_size) { + if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) { + --fte->dests_size; fte->modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); @@ -567,17 +586,23 @@ static void del_sw_hw_rule(struct fs_node *node) goto out; } - if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_PORT && - --fte->dests_size) { + if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_PORT) { + --fte->dests_size; fte->modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION); fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_ALLOW; goto out; } - if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && - --fte->dests_size) { + if (is_fwd_dest_type(rule->dest_attr.type)) { + --fte->dests_size; + --fte->fwd_dests; + + if (!fte->fwd_dests) + fte->action.action &= + ~MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; fte->modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); + goto out; } out: kfree(rule); @@ -597,6 +622,7 @@ static void del_hw_fte(struct fs_node *node) fs_get_obj(ft, fg->node.parent); trace_mlx5_fs_del_fte(fte); + WARN_ON(fte->dests_size); dev = get_dev(&ft->node); root = find_root(&ft->node); if (node->active) { @@ -1146,7 +1172,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa find_next_chained_ft(fs_prio); ft->def_miss_action = ns->def_miss_action; ft->ns = ns; - err = root->cmds->create_flow_table(root, ft, ft_attr->max_fte, next_ft); + err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft); if (err) goto free_ft; @@ -1186,6 +1212,12 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, } EXPORT_SYMBOL(mlx5_create_flow_table); +u32 mlx5_flow_table_id(struct mlx5_flow_table *ft) +{ + return ft->id; +} +EXPORT_SYMBOL(mlx5_flow_table_id); + struct mlx5_flow_table * mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, struct mlx5_flow_table_attr *ft_attr, u16 vport) @@ -1296,6 +1328,8 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest) rule->node.type = FS_TYPE_FLOW_DEST; if (dest) memcpy(&rule->dest_attr, dest, sizeof(*dest)); + else + rule->dest_attr.type = MLX5_FLOW_DESTINATION_TYPE_NONE; return rule; } @@ -1372,6 +1406,9 @@ create_flow_handle(struct fs_fte *fte, if (dest) { fte->dests_size++; + if (is_fwd_dest_type(dest[i].type)) + fte->fwd_dests++; + type = dest[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER; *modify_mask |= type ? count : dst; @@ -1525,7 +1562,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, struct mlx5_flow_destination *d2) { if (d1->type == d2->type) { - if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + if (((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + d1->type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && d1->vport.num == d2->vport.num && d1->vport.flags == d2->vport.flags && ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ? @@ -1559,9 +1597,22 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, return NULL; } -static bool check_conflicting_actions(u32 action1, u32 action2) +static bool check_conflicting_actions_vlan(const struct mlx5_fs_vlan *vlan0, + const struct mlx5_fs_vlan *vlan1) { - u32 xored_actions = action1 ^ action2; + return vlan0->ethtype != vlan1->ethtype || + vlan0->vid != vlan1->vid || + vlan0->prio != vlan1->prio; +} + +static bool check_conflicting_actions(const struct mlx5_flow_act *act1, + const struct mlx5_flow_act *act2) +{ + u32 action1 = act1->action; + u32 action2 = act2->action; + u32 xored_actions; + + xored_actions = action1 ^ action2; /* if one rule only wants to count, it's ok */ if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT || @@ -1578,6 +1629,22 @@ static bool check_conflicting_actions(u32 action1, u32 action2) MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2)) return true; + if (action1 & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT && + act1->pkt_reformat != act2->pkt_reformat) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + act1->modify_hdr != act2->modify_hdr) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH && + check_conflicting_actions_vlan(&act1->vlan[0], &act2->vlan[0])) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 && + check_conflicting_actions_vlan(&act1->vlan[1], &act2->vlan[1])) + return true; + return false; } @@ -1585,7 +1652,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_context *flow_context, const struct mlx5_flow_act *flow_act) { - if (check_conflicting_actions(flow_act->action, fte->action.action)) { + if (check_conflicting_actions(flow_act, &fte->action)) { mlx5_core_warn(get_dev(&fte->node), "Found two FTEs with conflicting actions\n"); return -EEXIST; @@ -1695,6 +1762,7 @@ static void free_match_list(struct match_list *head, bool ft_locked) static int build_match_list(struct match_list *match_head, struct mlx5_flow_table *ft, const struct mlx5_flow_spec *spec, + struct mlx5_flow_group *fg, bool ft_locked) { struct rhlist_head *tmp, *list; @@ -1709,6 +1777,9 @@ static int build_match_list(struct match_list *match_head, rhl_for_each_entry_rcu(g, tmp, list, hash) { struct match_list *curr_match; + if (fg && fg != g) + continue; + if (unlikely(!tree_get_node(&g->node))) continue; @@ -1888,6 +1959,9 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, if (!check_valid_spec(spec)) return ERR_PTR(-EINVAL); + if (flow_act->fg && ft->autogroup.active) + return ERR_PTR(-EINVAL); + for (i = 0; i < dest_num; i++) { if (!dest_is_valid(&dest[i], flow_act, ft)) return ERR_PTR(-EINVAL); @@ -1897,7 +1971,7 @@ search_again_locked: version = atomic_read(&ft->node.version); /* Collect all fgs which has a matching match_criteria */ - err = build_match_list(&match_head, ft, spec, take_write); + err = build_match_list(&match_head, ft, spec, flow_act->fg, take_write); if (err) { if (take_write) up_write_ref_node(&ft->node, false); @@ -2063,16 +2137,18 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) down_write_ref_node(&fte->node, false); for (i = handle->num_rules - 1; i >= 0; i--) tree_remove_node(&handle->rule[i]->node, true); - if (fte->dests_size) { - if (fte->modify_mask) - modify_fte(fte); - up_write_ref_node(&fte->node, false); - } else if (list_empty(&fte->node.children)) { - del_hw_fte(&fte->node); + if (list_empty(&fte->node.children)) { + fte->node.del_hw_func(&fte->node); /* Avoid double call to del_hw_fte */ fte->node.del_hw_func = NULL; up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); + } else if (fte->dests_size) { + if (fte->modify_mask) + modify_fte(fte); + up_write_ref_node(&fte->node, false); + } else { + up_write_ref_node(&fte->node, false); } kfree(handle); } @@ -2206,6 +2282,23 @@ struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, } EXPORT_SYMBOL(mlx5_get_fdb_sub_ns); +static bool is_nic_rx_ns(enum mlx5_flow_namespace_type type) +{ + switch (type) { + case MLX5_FLOW_NAMESPACE_BYPASS: + case MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC: + case MLX5_FLOW_NAMESPACE_LAG: + case MLX5_FLOW_NAMESPACE_OFFLOADS: + case MLX5_FLOW_NAMESPACE_ETHTOOL: + case MLX5_FLOW_NAMESPACE_KERNEL: + case MLX5_FLOW_NAMESPACE_LEFTOVERS: + case MLX5_FLOW_NAMESPACE_ANCHOR: + return true; + default: + return false; + } +} + struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) { @@ -2235,31 +2328,40 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, if (steering->sniffer_tx_root_ns) return &steering->sniffer_tx_root_ns->ns; return NULL; - default: + case MLX5_FLOW_NAMESPACE_FDB_BYPASS: + root_ns = steering->fdb_root_ns; + prio = FDB_BYPASS_PATH; break; - } - - if (type == MLX5_FLOW_NAMESPACE_EGRESS || - type == MLX5_FLOW_NAMESPACE_EGRESS_KERNEL) { + case MLX5_FLOW_NAMESPACE_EGRESS: + case MLX5_FLOW_NAMESPACE_EGRESS_IPSEC: + case MLX5_FLOW_NAMESPACE_EGRESS_MACSEC: root_ns = steering->egress_root_ns; prio = type - MLX5_FLOW_NAMESPACE_EGRESS; - } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX) { + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX: root_ns = steering->rdma_rx_root_ns; prio = RDMA_RX_BYPASS_PRIO; - } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL) { + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL: root_ns = steering->rdma_rx_root_ns; prio = RDMA_RX_KERNEL_PRIO; - } else if (type == MLX5_FLOW_NAMESPACE_RDMA_TX) { + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX: root_ns = steering->rdma_tx_root_ns; - } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS) { + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS: root_ns = steering->rdma_rx_root_ns; prio = RDMA_RX_COUNTERS_PRIO; - } else if (type == MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS) { + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS: root_ns = steering->rdma_tx_root_ns; prio = RDMA_TX_COUNTERS_PRIO; - } else { /* Must be NIC RX */ + break; + default: /* Must be NIC RX */ + WARN_ON(!is_nic_rx_ns(type)); root_ns = steering->root_ns; prio = type; + break; } if (!root_ns) @@ -2485,10 +2587,6 @@ static struct mlx5_flow_root_namespace struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_namespace *ns; - if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE && - (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX)) - cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type); - /* Create the root namespace */ root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL); if (!root_ns) @@ -2629,28 +2727,6 @@ static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns) clean_tree(&root_ns->ns.node); } -void mlx5_cleanup_fs(struct mlx5_core_dev *dev) -{ - struct mlx5_flow_steering *steering = dev->priv.steering; - - cleanup_root_ns(steering->root_ns); - cleanup_root_ns(steering->fdb_root_ns); - steering->fdb_root_ns = NULL; - kfree(steering->fdb_sub_ns); - steering->fdb_sub_ns = NULL; - cleanup_root_ns(steering->port_sel_root_ns); - cleanup_root_ns(steering->sniffer_rx_root_ns); - cleanup_root_ns(steering->sniffer_tx_root_ns); - cleanup_root_ns(steering->rdma_rx_root_ns); - cleanup_root_ns(steering->rdma_tx_root_ns); - cleanup_root_ns(steering->egress_root_ns); - mlx5_cleanup_fc_stats(dev); - kmem_cache_destroy(steering->ftes_cache); - kmem_cache_destroy(steering->fgs_cache); - mlx5_ft_pool_destroy(dev); - kfree(steering); -} - static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering) { struct fs_prio *prio; @@ -2822,6 +2898,36 @@ static int create_fdb_fast_path(struct mlx5_flow_steering *steering) return 0; } +static int create_fdb_bypass(struct mlx5_flow_steering *steering) +{ + struct mlx5_flow_namespace *ns; + struct fs_prio *prio; + int i; + + prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH, 0); + if (IS_ERR(prio)) + return PTR_ERR(prio); + + ns = fs_create_namespace(prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF); + if (IS_ERR(ns)) + return PTR_ERR(ns); + + for (i = 0; i < MLX5_BY_PASS_NUM_REGULAR_PRIOS; i++) { + prio = fs_create_prio(ns, i, 1); + if (IS_ERR(prio)) + return PTR_ERR(prio); + } + return 0; +} + +static void cleanup_fdb_root_ns(struct mlx5_flow_steering *steering) +{ + cleanup_root_ns(steering->fdb_root_ns); + steering->fdb_root_ns = NULL; + kfree(steering->fdb_sub_ns); + steering->fdb_sub_ns = NULL; +} + static int init_fdb_root_ns(struct mlx5_flow_steering *steering) { struct fs_prio *maj_prio; @@ -2831,12 +2937,10 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) if (!steering->fdb_root_ns) return -ENOMEM; - maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH, - 1); - if (IS_ERR(maj_prio)) { - err = PTR_ERR(maj_prio); + err = create_fdb_bypass(steering); + if (err) goto out_err; - } + err = create_fdb_fast_path(steering); if (err) goto out_err; @@ -2874,10 +2978,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) return 0; out_err: - cleanup_root_ns(steering->fdb_root_ns); - kfree(steering->fdb_sub_ns); - steering->fdb_sub_ns = NULL; - steering->fdb_root_ns = NULL; + cleanup_fdb_root_ns(steering); return err; } @@ -2995,6 +3096,22 @@ void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev) steering->esw_ingress_root_ns = NULL; } +u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + + ns = mlx5_get_flow_namespace(dev, type); + if (!ns) + return 0; + + root = find_root(&ns->node); + if (!root) + return 0; + + return root->cmds->get_capabilities(root, root->table_type); +} + static int init_egress_root_ns(struct mlx5_flow_steering *steering) { int err; @@ -3016,37 +3133,24 @@ cleanup: return err; } -int mlx5_init_fs(struct mlx5_core_dev *dev) +void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev) { - struct mlx5_flow_steering *steering; - int err = 0; - - err = mlx5_init_fc_stats(dev); - if (err) - return err; - - err = mlx5_ft_pool_init(dev); - if (err) - return err; - - steering = kzalloc(sizeof(*steering), GFP_KERNEL); - if (!steering) { - err = -ENOMEM; - goto err; - } + struct mlx5_flow_steering *steering = dev->priv.steering; - steering->dev = dev; - dev->priv.steering = steering; + cleanup_root_ns(steering->root_ns); + cleanup_fdb_root_ns(steering); + cleanup_root_ns(steering->port_sel_root_ns); + cleanup_root_ns(steering->sniffer_rx_root_ns); + cleanup_root_ns(steering->sniffer_tx_root_ns); + cleanup_root_ns(steering->rdma_rx_root_ns); + cleanup_root_ns(steering->rdma_tx_root_ns); + cleanup_root_ns(steering->egress_root_ns); +} - steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs", - sizeof(struct mlx5_flow_group), 0, - 0, NULL); - steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0, - 0, NULL); - if (!steering->ftes_cache || !steering->fgs_cache) { - err = -ENOMEM; - goto err; - } +int mlx5_fs_core_init(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int err = 0; if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && (MLX5_CAP_GEN(dev, nic_flow_table))) || @@ -3097,16 +3201,71 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) goto err; } - if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE || - MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) { + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) { err = init_egress_root_ns(steering); if (err) goto err; } return 0; + +err: + mlx5_fs_core_cleanup(dev); + return err; +} + +void mlx5_fs_core_free(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + kmem_cache_destroy(steering->ftes_cache); + kmem_cache_destroy(steering->fgs_cache); + kfree(steering); + mlx5_ft_pool_destroy(dev); + mlx5_cleanup_fc_stats(dev); +} + +int mlx5_fs_core_alloc(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering; + int err = 0; + + err = mlx5_init_fc_stats(dev); + if (err) + return err; + + err = mlx5_ft_pool_init(dev); + if (err) + goto err; + + steering = kzalloc(sizeof(*steering), GFP_KERNEL); + if (!steering) { + err = -ENOMEM; + goto err; + } + + steering->dev = dev; + dev->priv.steering = steering; + + if (mlx5_fs_dr_is_supported(dev)) + steering->mode = MLX5_FLOW_STEERING_MODE_SMFS; + else + steering->mode = MLX5_FLOW_STEERING_MODE_DMFS; + + steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs", + sizeof(struct mlx5_flow_group), 0, + 0, NULL); + steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0, + 0, NULL); + if (!steering->ftes_cache || !steering->fgs_cache) { + err = -ENOMEM; + goto err; + } + + return 0; + err: - mlx5_cleanup_fs(dev); + mlx5_fs_core_free(dev); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 7711db245c63..3af50fd04d28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -120,6 +120,11 @@ enum mlx5_flow_steering_mode { MLX5_FLOW_STEERING_MODE_SMFS }; +enum mlx5_flow_steering_capabilty { + MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX = 1UL << 0, + MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX = 1UL << 1, +}; + struct mlx5_flow_steering { struct mlx5_core_dev *dev; enum mlx5_flow_steering_mode mode; @@ -203,7 +208,7 @@ struct mlx5_ft_underlay_qp { u32 qpn; }; -#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_c00 +#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_e00 /* Calculate the fte_match_param length and without the reserved length. * Make sure the reserved field is the last. */ @@ -221,6 +226,7 @@ struct fs_fte { struct mlx5_fs_dr_rule fs_dr_rule; u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; + u32 fwd_dests; u32 index; struct mlx5_flow_context flow_context; struct mlx5_flow_act action; @@ -293,14 +299,18 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, enum mlx5_flow_steering_mode mode); -int mlx5_init_fs(struct mlx5_core_dev *dev); -void mlx5_cleanup_fs(struct mlx5_core_dev *dev); +int mlx5_fs_core_alloc(struct mlx5_core_dev *dev); +void mlx5_fs_core_free(struct mlx5_core_dev *dev); +int mlx5_fs_core_init(struct mlx5_core_dev *dev); +void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev); int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports); void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev); int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports); void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev); +u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); + struct mlx5_flow_root_namespace *find_root(struct fs_node *node); #define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 7e0e04cf26f8..b406e0367af6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -38,9 +38,10 @@ #include "fs_cmd.h" #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) +#define MLX5_FC_BULK_QUERY_ALLOC_PERIOD msecs_to_jiffies(180 * 1000) /* Max number of counters to query in bulk read is 32K */ #define MLX5_SW_MAX_COUNTERS_BULK BIT(15) -#define MLX5_SF_NUM_COUNTERS_BULK 8 +#define MLX5_INIT_COUNTERS_BULK 8 #define MLX5_FC_POOL_MAX_THRESHOLD BIT(18) #define MLX5_FC_POOL_USED_BUFF_RATIO 10 @@ -145,13 +146,15 @@ static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev, spin_unlock(&fc_stats->counters_idr_lock); } -static int get_max_bulk_query_len(struct mlx5_core_dev *dev) +static int get_init_bulk_query_len(struct mlx5_core_dev *dev) { - int num_counters_bulk = mlx5_core_is_sf(dev) ? - MLX5_SF_NUM_COUNTERS_BULK : - MLX5_SW_MAX_COUNTERS_BULK; + return min_t(int, MLX5_INIT_COUNTERS_BULK, + (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); +} - return min_t(int, num_counters_bulk, +static int get_max_bulk_query_len(struct mlx5_core_dev *dev) +{ + return min_t(int, MLX5_SW_MAX_COUNTERS_BULK, (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); } @@ -177,7 +180,7 @@ static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev, { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; bool query_more_counters = (first->id <= last_id); - int max_bulk_len = get_max_bulk_query_len(dev); + int cur_bulk_len = fc_stats->bulk_query_len; u32 *data = fc_stats->bulk_query_out; struct mlx5_fc *counter = first; u32 bulk_base_id; @@ -189,7 +192,7 @@ static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev, bulk_base_id = counter->id & ~0x3; /* number of counters to query inc. the last counter */ - bulk_len = min_t(int, max_bulk_len, + bulk_len = min_t(int, cur_bulk_len, ALIGN(last_id - bulk_base_id + 1, 4)); err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len, @@ -230,6 +233,41 @@ static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter) mlx5_fc_free(dev, counter); } +static void mlx5_fc_stats_bulk_query_size_increase(struct mlx5_core_dev *dev) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + int max_bulk_len = get_max_bulk_query_len(dev); + unsigned long now = jiffies; + u32 *bulk_query_out_tmp; + int max_out_len; + + if (fc_stats->bulk_query_alloc_failed && + time_before(now, fc_stats->next_bulk_query_alloc)) + return; + + max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len); + bulk_query_out_tmp = kzalloc(max_out_len, GFP_KERNEL); + if (!bulk_query_out_tmp) { + mlx5_core_warn_once(dev, + "Can't increase flow counters bulk query buffer size, insufficient memory, bulk_size(%d)\n", + max_bulk_len); + fc_stats->bulk_query_alloc_failed = true; + fc_stats->next_bulk_query_alloc = + now + MLX5_FC_BULK_QUERY_ALLOC_PERIOD; + return; + } + + kfree(fc_stats->bulk_query_out); + fc_stats->bulk_query_out = bulk_query_out_tmp; + fc_stats->bulk_query_len = max_bulk_len; + if (fc_stats->bulk_query_alloc_failed) { + mlx5_core_info(dev, + "Flow counters bulk query buffer size increased, bulk_size(%d)\n", + max_bulk_len); + fc_stats->bulk_query_alloc_failed = false; + } +} + static void mlx5_fc_stats_work(struct work_struct *work) { struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, @@ -247,15 +285,22 @@ static void mlx5_fc_stats_work(struct work_struct *work) queue_delayed_work(fc_stats->wq, &fc_stats->work, fc_stats->sampling_interval); - llist_for_each_entry(counter, addlist, addlist) + llist_for_each_entry(counter, addlist, addlist) { mlx5_fc_stats_insert(dev, counter); + fc_stats->num_counters++; + } llist_for_each_entry_safe(counter, tmp, dellist, dellist) { mlx5_fc_stats_remove(dev, counter); mlx5_fc_release(dev, counter); + fc_stats->num_counters--; } + if (fc_stats->bulk_query_len < get_max_bulk_query_len(dev) && + fc_stats->num_counters > get_init_bulk_query_len(dev)) + mlx5_fc_stats_bulk_query_size_increase(dev); + if (time_before(now, fc_stats->next_query) || list_empty(&fc_stats->counters)) return; @@ -378,8 +423,8 @@ EXPORT_SYMBOL(mlx5_fc_destroy); int mlx5_init_fc_stats(struct mlx5_core_dev *dev) { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; - int max_bulk_len; - int max_out_len; + int init_bulk_len; + int init_out_len; spin_lock_init(&fc_stats->counters_idr_lock); idr_init(&fc_stats->counters_idr); @@ -387,11 +432,12 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) init_llist_head(&fc_stats->addlist); init_llist_head(&fc_stats->dellist); - max_bulk_len = get_max_bulk_query_len(dev); - max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len); - fc_stats->bulk_query_out = kzalloc(max_out_len, GFP_KERNEL); + init_bulk_len = get_init_bulk_query_len(dev); + init_out_len = mlx5_cmd_fc_get_bulk_query_out_len(init_bulk_len); + fc_stats->bulk_query_out = kzalloc(init_out_len, GFP_KERNEL); if (!fc_stats->bulk_query_out) return -ENOMEM; + fc_stats->bulk_query_len = init_bulk_len; fc_stats->wq = create_singlethread_workqueue("mlx5_fc"); if (!fc_stats->wq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 2d8406fab844..f34e758a2f1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -32,11 +32,9 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/eswitch.h> -#include <linux/module.h> #include "mlx5_core.h" #include "../../mlxfw/mlxfw.h" #include "lib/tout.h" -#include "accel/tls.h" enum { MCQS_IDENTIFIER_BOOT_IMG = 0x1, @@ -250,7 +248,7 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } - if (mlx5_accel_is_ktls_tx(dev) || mlx5_accel_is_ktls_rx(dev)) { + if (MLX5_CAP_GEN(dev, tls_tx) || MLX5_CAP_GEN(dev, tls_rx)) { err = mlx5_core_get_caps(dev, MLX5_CAP_TLS); if (err) return err; @@ -275,6 +273,19 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN_64(dev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD) { + err = mlx5_core_get_caps(dev, MLX5_CAP_MACSEC); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, adv_virtualization)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ADV_VIRTUALIZATION); + if (err) + return err; + } + return 0; } @@ -291,6 +302,10 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id) sw_owner_id[i]); } + if (MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) && + dev->priv.sw_vhca_id > 0) + MLX5_SET(init_hca_in, in, sw_vhca_id, dev->priv.sw_vhca_id); + return mlx5_cmd_exec_in(dev, init_hca, in); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 0b0234f9d694..9d908a0ccfef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -8,7 +8,8 @@ enum { MLX5_FW_RESET_FLAGS_RESET_REQUESTED, MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, - MLX5_FW_RESET_FLAGS_PENDING_COMP + MLX5_FW_RESET_FLAGS_PENDING_COMP, + MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS }; struct mlx5_fw_reset { @@ -57,7 +58,8 @@ static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level, return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1); } -static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type) +static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, + u8 *reset_type, u8 *reset_state) { u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; @@ -71,25 +73,67 @@ static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *r *reset_level = MLX5_GET(mfrl_reg, out, reset_level); if (reset_type) *reset_type = MLX5_GET(mfrl_reg, out, reset_type); + if (reset_state) + *reset_state = MLX5_GET(mfrl_reg, out, reset_state); return 0; } int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type) { - return mlx5_reg_mfrl_query(dev, reset_level, reset_type); + return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL); } -int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel) +static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev, + struct netlink_ext_ack *extack) +{ + u8 reset_state; + + if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state)) + goto out; + + switch (reset_state) { + case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION: + case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS: + NL_SET_ERR_MSG_MOD(extack, "Sync reset was already triggered"); + return -EBUSY; + case MLX5_MFRL_REG_RESET_STATE_TIMEOUT: + NL_SET_ERR_MSG_MOD(extack, "Sync reset got timeout"); + return -ETIMEDOUT; + case MLX5_MFRL_REG_RESET_STATE_NACK: + NL_SET_ERR_MSG_MOD(extack, "One of the hosts disabled reset"); + return -EPERM; + } + +out: + NL_SET_ERR_MSG_MOD(extack, "Sync reset failed"); + return -EIO; +} + +int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, + struct netlink_ext_ack *extack) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; int err; set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); - err = mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, reset_type_sel, 0, true); - if (err) - clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); - return err; + + MLX5_SET(mfrl_reg, in, reset_level, MLX5_MFRL_REG_RESET_LEVEL3); + MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel); + MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, 1); + err = mlx5_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MFRL, 0, 1, false); + if (!err) + return 0; + + clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); + if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state)) + return mlx5_fw_reset_get_reset_state_err(dev, extack); + + NL_SET_ERR_MSG_MOD(extack, "Sync reset command failed"); + return mlx5_cmd_check(dev, err, in, out); } int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) @@ -105,44 +149,48 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { complete(&fw_reset->done); } else { - mlx5_load_one(dev); + mlx5_unload_one(dev); + if (mlx5_health_wait_pci_up(dev)) + mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); + else + mlx5_load_one(dev, false); devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); } } -static void mlx5_sync_reset_reload_work(struct work_struct *work) -{ - struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, - reset_reload_work); - struct mlx5_core_dev *dev = fw_reset->dev; - int err; - - mlx5_enter_error_state(dev, true); - mlx5_unload_one(dev); - err = mlx5_health_wait_pci_up(dev); - if (err) - mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); - fw_reset->ret = err; - mlx5_fw_reset_complete_reload(dev); -} - static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; - del_timer(&fw_reset->timer); + del_timer_sync(&fw_reset->timer); } -static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) +static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) { + mlx5_core_warn(dev, "Reset request was already cleared\n"); + return -EALREADY; + } + mlx5_stop_sync_reset_poll(dev); - clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags); if (poll_health) mlx5_start_health_poll(dev); + return 0; +} + +static void mlx5_sync_reset_reload_work(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_reload_work); + struct mlx5_core_dev *dev = fw_reset->dev; + + mlx5_sync_reset_clear_reset_requested(dev, false); + mlx5_enter_error_state(dev, true); + mlx5_fw_reset_complete_reload(dev); } #define MLX5_RESET_POLL_INTERVAL (HZ / 10) @@ -159,8 +207,10 @@ static void poll_sync_reset(struct timer_list *t) if (fatal_error) { mlx5_core_warn(dev, "Got Device Reset\n"); - mlx5_sync_reset_clear_reset_requested(dev, false); - queue_work(fw_reset->wq, &fw_reset->reset_reload_work); + if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags)) + queue_work(fw_reset->wq, &fw_reset->reset_reload_work); + else + mlx5_core_err(dev, "Device is being removed, Drop new reset work\n"); return; } @@ -186,13 +236,17 @@ static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev) return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false); } -static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev) +static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) { + mlx5_core_warn(dev, "Reset request was already set\n"); + return -EALREADY; + } mlx5_stop_health_poll(dev, true); - set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags); mlx5_start_sync_reset_poll(dev); + return 0; } static void mlx5_fw_live_patch_event(struct work_struct *work) @@ -221,7 +275,9 @@ static void mlx5_sync_reset_request_event(struct work_struct *work) err ? "Failed" : "Sent"); return; } - mlx5_sync_reset_set_reset_requested(dev); + if (mlx5_sync_reset_set_reset_requested(dev)) + return; + err = mlx5_fw_reset_set_reset_sync_ack(dev); if (err) mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err); @@ -303,6 +359,23 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) err = -ETIMEDOUT; } + do { + err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, ®16); + if (err) + return err; + if (reg16 == dev_id) + break; + msleep(20); + } while (!time_after(jiffies, timeout)); + + if (reg16 == dev_id) { + mlx5_core_info(dev, "Firmware responds to PCI config cycles again\n"); + } else { + mlx5_core_err(dev, "Firmware is not responsive (0x%04x) after %llu ms\n", + reg16, mlx5_tout_ms(dev, PCI_TOGGLE)); + err = -ETIMEDOUT; + } + restore: list_for_each_entry(sdev, &bridge_bus->devices, bus_list) { pci_cfg_access_unlock(sdev); @@ -319,7 +392,8 @@ static void mlx5_sync_reset_now_event(struct work_struct *work) struct mlx5_core_dev *dev = fw_reset->dev; int err; - mlx5_sync_reset_clear_reset_requested(dev, false); + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); @@ -336,7 +410,6 @@ static void mlx5_sync_reset_now_event(struct work_struct *work) } mlx5_enter_error_state(dev, true); - mlx5_unload_one(dev); done: fw_reset->ret = err; mlx5_fw_reset_complete_reload(dev); @@ -348,10 +421,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work) reset_abort_work); struct mlx5_core_dev *dev = fw_reset->dev; - if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) + if (mlx5_sync_reset_clear_reset_requested(dev, true)) return; - - mlx5_sync_reset_clear_reset_requested(dev, true); mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n"); } @@ -380,9 +451,12 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb); struct mlx5_eqe *eqe = data; + if (test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags)) + return NOTIFY_DONE; + switch (eqe->sub_type) { case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT: - queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work); + queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work); break; case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT: mlx5_sync_reset_events_handle(fw_reset, eqe); @@ -426,6 +500,18 @@ void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev) mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb); } +void mlx5_drain_fw_reset(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags); + cancel_work_sync(&fw_reset->fw_live_patch_work); + cancel_work_sync(&fw_reset->reset_request_work); + cancel_work_sync(&fw_reset->reset_reload_work); + cancel_work_sync(&fw_reset->reset_now_work); + cancel_work_sync(&fw_reset->reset_abort_work); +} + int mlx5_fw_reset_init(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h index 7761ee5fc7d0..dc141c7e641a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -9,12 +9,14 @@ void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable); bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev); int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type); -int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel); +int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, + struct netlink_ext_ack *extack); int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev); void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev); +void mlx5_drain_fw_reset(struct mlx5_core_dev *dev); int mlx5_fw_reset_init(struct mlx5_core_dev *dev); void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 3ca998874c50..86ed87d704f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -31,7 +31,6 @@ */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/random.h> #include <linux/vmalloc.h> #include <linux/hardirq.h> @@ -420,6 +419,11 @@ static void print_health_info(struct mlx5_core_dev *dev) if (!ioread8(&h->synd)) return; + if (ioread32be(&h->fw_ver) == 0xFFFFFFFF) { + mlx5_log(dev, LOGLEVEL_ERR, "PCI slot is unavailable\n"); + return; + } + rfr_severity = ioread8(&h->rfr_severity); severity = mlx5_health_get_severity(rfr_severity); mlx5_log(dev, severity, "Health issue observed, %s, severity(%d) %s:\n", @@ -597,7 +601,7 @@ static void mlx5_fw_reporter_err_work(struct work_struct *work) fw_reporter_ctx.miss_counter = health->miss_counter; if (fw_reporter_ctx.err_synd) { devlink_health_report(health->fw_reporter, - "FW syndrom reported", &fw_reporter_ctx); + "FW syndrome reported", &fw_reporter_ctx); return; } if (fw_reporter_ctx.miss_counter) @@ -662,16 +666,20 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) struct mlx5_fw_reporter_ctx fw_reporter_ctx; struct mlx5_core_health *health; struct mlx5_core_dev *dev; + struct devlink *devlink; struct mlx5_priv *priv; health = container_of(work, struct mlx5_core_health, fatal_report_work); priv = container_of(health, struct mlx5_priv, health); dev = container_of(priv, struct mlx5_core_dev, priv); + devlink = priv_to_devlink(dev); enter_error_state(dev, false); if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { + devl_lock(devlink); if (mlx5_health_try_recover(dev)) mlx5_core_err(dev, "health recovery failed\n"); + devl_unlock(devlink); return; } fw_reporter_ctx.err_synd = health->synd; @@ -694,11 +702,25 @@ static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { .dump = mlx5_fw_fatal_reporter_dump, }; -#define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000 +#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000 +#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000 +#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000 +#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD + static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; struct devlink *devlink = priv_to_devlink(dev); + u64 grace_period; + + if (mlx5_core_is_ecpf(dev)) { + grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD; + } else if (mlx5_core_is_pf(dev)) { + grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD; + } else { + /* VF or SF */ + grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD; + } health->fw_reporter = devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops, @@ -710,7 +732,7 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) health->fw_fatal_reporter = devlink_health_reporter_create(devlink, &mlx5_fw_fatal_reporter_ops, - MLX5_REPORTER_FW_GRACEFUL_PERIOD, + grace_period, dev); if (IS_ERR(health->fw_fatal_reporter)) mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", @@ -835,9 +857,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms); add_timer(&health->timer); - - if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc)) - queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); } void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) @@ -854,6 +873,14 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) del_timer_sync(&health->timer); } +void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc)) + queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); +} + void mlx5_drain_health_wq(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; @@ -867,13 +894,6 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev) cancel_work_sync(&health->fatal_report_work); } -void mlx5_health_flush(struct mlx5_core_dev *dev) -{ - struct mlx5_core_health *health = &dev->priv.health; - - flush_workqueue(health->wq); -} - void mlx5_health_cleanup(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 962d41418ce7..c247cca154e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -32,6 +32,7 @@ #include "en.h" #include "ipoib.h" +#include "en/fs_ethtool.h" static void mlx5i_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) @@ -39,7 +40,7 @@ static void mlx5i_get_drvinfo(struct net_device *dev, struct mlx5e_priv *priv = mlx5i_epriv(dev); mlx5e_ethtool_get_drvinfo(priv, drvinfo); - strlcpy(drvinfo->driver, KBUILD_MODNAME "[ib_ipoib]", + strscpy(drvinfo->driver, KBUILD_MODNAME "[ib_ipoib]", sizeof(drvinfo->driver)); } @@ -67,7 +68,9 @@ static void mlx5i_get_ethtool_stats(struct net_device *dev, } static int mlx5i_set_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = mlx5i_epriv(dev); @@ -75,11 +78,13 @@ static int mlx5i_set_ringparam(struct net_device *dev, } static void mlx5i_get_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = mlx5i_epriv(dev); - mlx5e_ethtool_get_ringparam(priv, param); + mlx5e_ethtool_get_ringparam(priv, param, kernel_param); } static int mlx5i_set_channels(struct net_device *dev, @@ -105,7 +110,7 @@ static int mlx5i_set_coalesce(struct net_device *netdev, { struct mlx5e_priv *priv = mlx5i_epriv(netdev); - return mlx5e_ethtool_set_coalesce(priv, coal); + return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack); } static int mlx5i_get_coalesce(struct net_device *netdev, @@ -115,7 +120,7 @@ static int mlx5i_get_coalesce(struct net_device *netdev, { struct mlx5e_priv *priv = mlx5i_epriv(netdev); - return mlx5e_ethtool_get_coalesce(priv, coal); + return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal); } static int mlx5i_get_ts_info(struct net_device *netdev, @@ -217,7 +222,6 @@ static int mlx5i_get_link_ksettings(struct net_device *netdev, return 0; } -#ifdef CONFIG_MLX5_EN_RXNFC static u32 mlx5i_flow_type_mask(u32 flow_type) { return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); @@ -239,9 +243,18 @@ static int mlx5i_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, { struct mlx5e_priv *priv = mlx5i_epriv(dev); + /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part + * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc, + * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc + * is compiled out via CONFIG_MLX5_EN_RXNFC=n. + */ + if (info->cmd == ETHTOOL_GRXRINGS) { + info->data = priv->channels.params.num_channels; + return 0; + } + return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs); } -#endif const struct ethtool_ops mlx5i_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | @@ -259,10 +272,8 @@ const struct ethtool_ops mlx5i_ethtool_ops = { .get_coalesce = mlx5i_get_coalesce, .set_coalesce = mlx5i_set_coalesce, .get_ts_info = mlx5i_get_ts_info, -#ifdef CONFIG_MLX5_EN_RXNFC .get_rxnfc = mlx5i_get_rxnfc, .set_rxnfc = mlx5i_set_rxnfc, -#endif .get_link_ksettings = mlx5i_get_link_ksettings, .get_link = ethtool_op_get_link, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index ea1efdecc88c..4e3a75496dd9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -35,6 +35,7 @@ #include "en.h" #include "en/params.h" #include "ipoib.h" +#include "en/fs_ethtool.h" #define IB_DEFAULT_Q_KEY 0xb1b #define MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE 9 @@ -110,14 +111,14 @@ void mlx5i_cleanup(struct mlx5e_priv *priv) static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv) { - struct mlx5e_sw_stats s = { 0 }; + struct rtnl_link_stats64 s = {}; int i, j; for (i = 0; i < priv->stats_nch; i++) { struct mlx5e_channel_stats *channel_stats; struct mlx5e_rq_stats *rq_stats; - channel_stats = &priv->channel_stats[i]; + channel_stats = priv->channel_stats[i]; rq_stats = &channel_stats->rq; s.rx_packets += rq_stats->packets; @@ -128,11 +129,17 @@ static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv) s.tx_packets += sq_stats->packets; s.tx_bytes += sq_stats->bytes; - s.tx_queue_dropped += sq_stats->dropped; + s.tx_dropped += sq_stats->dropped; } } - memcpy(&priv->stats.sw, &s, sizeof(s)); + memset(&priv->stats.sw, 0, sizeof(s)); + + priv->stats.sw.rx_packets = s.rx_packets; + priv->stats.sw.rx_bytes = s.rx_bytes; + priv->stats.sw.tx_packets = s.tx_packets; + priv->stats.sw.tx_bytes = s.tx_bytes; + priv->stats.sw.tx_queue_dropped = s.tx_dropped; } void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) @@ -314,43 +321,47 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) { + struct mlx5_flow_namespace *ns = + mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); int err; - priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, - MLX5_FLOW_NAMESPACE_KERNEL); - if (!priv->fs.ns) + if (!ns) return -EINVAL; - err = mlx5e_arfs_create_tables(priv); + mlx5e_fs_set_ns(priv->fs, ns, false); + err = mlx5e_arfs_create_tables(priv->fs, priv->rx_res, + !!(priv->netdev->hw_features & NETIF_F_NTUPLE)); if (err) { netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n", err); priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - err = mlx5e_create_ttc_table(priv); + err = mlx5e_create_ttc_table(priv->fs, priv->rx_res); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); goto err_destroy_arfs_tables; } - mlx5e_ethtool_init_steering(priv); + mlx5e_ethtool_init_steering(priv->fs); return 0; err_destroy_arfs_tables: - mlx5e_arfs_destroy_tables(priv); + mlx5e_arfs_destroy_tables(priv->fs, + !!(priv->netdev->hw_features & NETIF_F_NTUPLE)); return err; } static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) { - mlx5e_destroy_ttc_table(priv); - mlx5e_arfs_destroy_tables(priv); - mlx5e_ethtool_cleanup_steering(priv); + mlx5e_destroy_ttc_table(priv->fs); + mlx5e_arfs_destroy_tables(priv->fs, + !!(priv->netdev->hw_features & NETIF_F_NTUPLE)); + mlx5e_ethtool_cleanup_steering(priv->fs); } static int mlx5i_init_rx(struct mlx5e_priv *priv) @@ -358,9 +369,18 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) struct mlx5_core_dev *mdev = priv->mdev; int err; - priv->rx_res = mlx5e_rx_res_alloc(); - if (!priv->rx_res) + priv->fs = mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + if (!priv->fs) { + netdev_err(priv->netdev, "FS allocation failed\n"); return -ENOMEM; + } + + priv->rx_res = mlx5e_rx_res_alloc(); + if (!priv->rx_res) { + err = -ENOMEM; + goto err_free_fs; + } mlx5e_create_q_counters(priv); @@ -391,6 +411,8 @@ err_destroy_q_counters: mlx5e_destroy_q_counters(priv); mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; +err_free_fs: + mlx5e_fs_cleanup(priv->fs); return err; } @@ -402,6 +424,7 @@ static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) mlx5e_destroy_q_counters(priv); mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; + mlx5e_fs_cleanup(priv->fs); } /* The stats groups order is opposite to the update_stats() order calls */ @@ -440,10 +463,8 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .update_carrier = NULL, /* no HW update in IB link */ .rx_handlers = &mlx5i_rx_handlers, .max_tc = MLX5I_MAX_NUM_TC, - .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5i_stats_grps, .stats_grps_num = mlx5i_stats_grps_num, - .rx_ptp_support = false, }; /* mlx5i netdev NDos */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 5308f23702bc..0227a521d301 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -349,8 +349,6 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = { .update_stats = NULL, .rx_handlers = &mlx5i_rx_handlers, .max_tc = MLX5I_MAX_NUM_TC, - .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), - .rx_ptp_support = false, }; const struct mlx5e_profile *mlx5i_pkey_get_profile(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c new file mode 100644 index 000000000000..380a208ab137 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "mlx5_core.h" +#include "mlx5_irq.h" +#include "pci_irq.h" + +static void cpu_put(struct mlx5_irq_pool *pool, int cpu) +{ + pool->irqs_per_cpu[cpu]--; +} + +static void cpu_get(struct mlx5_irq_pool *pool, int cpu) +{ + pool->irqs_per_cpu[cpu]++; +} + +/* Gets the least loaded CPU. e.g.: the CPU with least IRQs bound to it */ +static int cpu_get_least_loaded(struct mlx5_irq_pool *pool, + const struct cpumask *req_mask) +{ + int best_cpu = -1; + int cpu; + + for_each_cpu_and(cpu, req_mask, cpu_online_mask) { + /* CPU has zero IRQs on it. No need to search any more CPUs. */ + if (!pool->irqs_per_cpu[cpu]) { + best_cpu = cpu; + break; + } + if (best_cpu < 0) + best_cpu = cpu; + if (pool->irqs_per_cpu[cpu] < pool->irqs_per_cpu[best_cpu]) + best_cpu = cpu; + } + if (best_cpu == -1) { + /* There isn't online CPUs in req_mask */ + mlx5_core_err(pool->dev, "NO online CPUs in req_mask (%*pbl)\n", + cpumask_pr_args(req_mask)); + best_cpu = cpumask_first(cpu_online_mask); + } + pool->irqs_per_cpu[best_cpu]++; + return best_cpu; +} + +/* Creating an IRQ from irq_pool */ +static struct mlx5_irq * +irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + cpumask_var_t auto_mask; + struct mlx5_irq *irq; + u32 irq_index; + int err; + + if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL)) + return ERR_PTR(-ENOMEM); + err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL); + if (err) + return ERR_PTR(err); + if (pool->irqs_per_cpu) { + if (cpumask_weight(req_mask) > 1) + /* if req_mask contain more then one CPU, set the least loadad CPU + * of req_mask + */ + cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask); + else + cpu_get(pool, cpumask_first(req_mask)); + } + irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask); + free_cpumask_var(auto_mask); + return irq; +} + +/* Looking for the IRQ with the smallest refcount that fits req_mask. + * If pool is sf_comp_pool, then we are looking for an IRQ with any of the + * requested CPUs in req_mask. + * for example: req_mask = 0xf, irq0_mask = 0x10, irq1_mask = 0x1. irq0_mask + * isn't subset of req_mask, so we will skip it. irq1_mask is subset of req_mask, + * we don't skip it. + * If pool is sf_ctrl_pool, then all IRQs have the same mask, so any IRQ will + * fit. And since mask is subset of itself, we will pass the first if bellow. + */ +static struct mlx5_irq * +irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + int start = pool->xa_num_irqs.min; + int end = pool->xa_num_irqs.max; + struct mlx5_irq *irq = NULL; + struct mlx5_irq *iter; + int irq_refcount = 0; + unsigned long index; + + lockdep_assert_held(&pool->lock); + xa_for_each_range(&pool->irqs, index, iter, start, end) { + struct cpumask *iter_mask = mlx5_irq_get_affinity_mask(iter); + int iter_refcount = mlx5_irq_read_locked(iter); + + if (!cpumask_subset(iter_mask, req_mask)) + /* skip IRQs with a mask which is not subset of req_mask */ + continue; + if (iter_refcount < pool->min_threshold) + /* If we found an IRQ with less than min_thres, return it */ + return iter; + if (!irq || iter_refcount < irq_refcount) { + /* In case we won't find an IRQ with less than min_thres, + * keep a pointer to the least used IRQ + */ + irq_refcount = iter_refcount; + irq = iter; + } + } + return irq; +} + +/** + * mlx5_irq_affinity_request - request an IRQ according to the given mask. + * @pool: IRQ pool to request from. + * @req_mask: cpumask requested for this IRQ. + * + * This function returns a pointer to IRQ, or ERR_PTR in case of error. + */ +struct mlx5_irq * +mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + struct mlx5_irq *least_loaded_irq, *new_irq; + + mutex_lock(&pool->lock); + least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask); + if (least_loaded_irq && + mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold) + goto out; + /* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */ + new_irq = irq_pool_request_irq(pool, req_mask); + if (IS_ERR(new_irq)) { + if (!least_loaded_irq) { + /* We failed to create an IRQ and we didn't find an IRQ */ + mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n", + PTR_ERR(new_irq)); + mutex_unlock(&pool->lock); + return new_irq; + } + /* We failed to create a new IRQ for the requested affinity, + * sharing existing IRQ. + */ + goto out; + } + least_loaded_irq = new_irq; + goto unlock; +out: + mlx5_irq_get_locked(least_loaded_irq); + if (mlx5_irq_read_locked(least_loaded_irq) > pool->max_threshold) + mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n", + pci_irq_vector(pool->dev->pdev, + mlx5_irq_get_index(least_loaded_irq)), pool->name, + mlx5_irq_read_locked(least_loaded_irq) / MLX5_EQ_REFS_PER_IRQ); +unlock: + mutex_unlock(&pool->lock); + return least_loaded_irq; +} + +void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, + int num_irqs) +{ + struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + int i; + + for (i = 0; i < num_irqs; i++) { + int cpu = cpumask_first(mlx5_irq_get_affinity_mask(irqs[i])); + + synchronize_irq(pci_irq_vector(pool->dev->pdev, + mlx5_irq_get_index(irqs[i]))); + if (mlx5_irq_put(irqs[i])) + if (pool->irqs_per_cpu) + cpu_put(pool, cpu); + } +} + +/** + * mlx5_irq_affinity_irqs_request_auto - request one or more IRQs for mlx5 device. + * @dev: mlx5 device that is requesting the IRQs. + * @nirqs: number of IRQs to request. + * @irqs: an output array of IRQs pointers. + * + * Each IRQ is bounded to at most 1 CPU. + * This function is requesting IRQs according to the default assignment. + * The default assignment policy is: + * - in each iteration, request the least loaded IRQ which is not bound to any + * CPU of the previous IRQs requested. + * + * This function returns the number of IRQs requested, (which might be smaller than + * @nirqs), if successful, or a negative error code in case of an error. + */ +int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, + struct mlx5_irq **irqs) +{ + struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + cpumask_var_t req_mask; + struct mlx5_irq *irq; + int i = 0; + + if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_copy(req_mask, cpu_online_mask); + for (i = 0; i < nirqs; i++) { + if (mlx5_irq_pool_is_sf_pool(pool)) + irq = mlx5_irq_affinity_request(pool, req_mask); + else + /* In case SF pool doesn't exists, fallback to the PF IRQs. + * The PF IRQs are already allocated and binded to CPU + * at this point. Hence, only an index is needed. + */ + irq = mlx5_irq_request(dev, i, NULL); + if (IS_ERR(irq)) + break; + irqs[i] = irq; + cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask); + mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", + pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), + cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), + mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); + } + free_cpumask_var(req_mask); + if (!i) + return PTR_ERR(irq); + return i; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c new file mode 100644 index 000000000000..b8feaf0f5c4c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "lag.h" + +static char *get_str_mode_type(struct mlx5_lag *ldev) +{ + switch (ldev->mode) { + case MLX5_LAG_MODE_ROCE: return "roce"; + case MLX5_LAG_MODE_SRIOV: return "switchdev"; + case MLX5_LAG_MODE_MULTIPATH: return "multipath"; + case MLX5_LAG_MODE_MPESW: return "multiport_eswitch"; + default: return "invalid"; + } + + return NULL; +} + +static int type_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + char *mode = NULL; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + if (__mlx5_lag_is_active(ldev)) + mode = get_str_mode_type(ldev); + mutex_unlock(&ldev->lock); + if (!mode) + return -EINVAL; + seq_printf(file, "%s\n", mode); + + return 0; +} + +static int port_sel_mode_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + int ret = 0; + char *mode; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + if (__mlx5_lag_is_active(ldev)) + mode = mlx5_get_str_port_sel_mode(ldev->mode, ldev->mode_flags); + else + ret = -EINVAL; + mutex_unlock(&ldev->lock); + if (ret) + return ret; + + seq_printf(file, "%s\n", mode); + return 0; +} + +static int state_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + bool active; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + active = __mlx5_lag_is_active(ldev); + mutex_unlock(&ldev->lock); + seq_printf(file, "%s\n", active ? "active" : "disabled"); + return 0; +} + +static int flags_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + bool fdb_sel_mode_native; + struct mlx5_lag *ldev; + bool shared_fdb; + bool lag_active; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + lag_active = __mlx5_lag_is_active(ldev); + if (!lag_active) + goto unlock; + + shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); + fdb_sel_mode_native = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, + &ldev->mode_flags); + +unlock: + mutex_unlock(&ldev->lock); + if (!lag_active) + return -EINVAL; + + seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off"); + seq_printf(file, "%s:%s\n", "fdb_selection_mode", + fdb_sel_mode_native ? "native" : "affinity"); + return 0; +} + +static int mapping_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + u8 ports[MLX5_MAX_PORTS] = {}; + struct mlx5_lag *ldev; + bool hash = false; + bool lag_active; + int num_ports; + int i; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + lag_active = __mlx5_lag_is_active(ldev); + if (lag_active) { + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { + mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports, + &num_ports); + hash = true; + } else { + for (i = 0; i < ldev->ports; i++) + ports[i] = ldev->v2p_map[i]; + num_ports = ldev->ports; + } + } + mutex_unlock(&ldev->lock); + if (!lag_active) + return -EINVAL; + + for (i = 0; i < num_ports; i++) { + if (hash) + seq_printf(file, "%d\n", ports[i] + 1); + else + seq_printf(file, "%d:%d\n", i + 1, ports[i]); + } + + return 0; +} + +static int members_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + int i; + + ldev = dev->priv.lag; + mutex_lock(&ldev->lock); + for (i = 0; i < ldev->ports; i++) { + if (!ldev->pf[i].dev) + continue; + seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device)); + } + mutex_unlock(&ldev->lock); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(type); +DEFINE_SHOW_ATTRIBUTE(port_sel_mode); +DEFINE_SHOW_ATTRIBUTE(state); +DEFINE_SHOW_ATTRIBUTE(flags); +DEFINE_SHOW_ATTRIBUTE(mapping); +DEFINE_SHOW_ATTRIBUTE(members); + +void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev) +{ + struct dentry *dbg; + + dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev)); + dev->priv.dbg.lag_debugfs = dbg; + + debugfs_create_file("type", 0444, dbg, dev, &type_fops); + debugfs_create_file("port_sel_mode", 0444, dbg, dev, &port_sel_mode_fops); + debugfs_create_file("state", 0444, dbg, dev, &state_fops); + debugfs_create_file("flags", 0444, dbg, dev, &flags_fops); + debugfs_create_file("mapping", 0444, dbg, dev, &mapping_fops); + debugfs_create_file("members", 0444, dbg, dev, &members_fops); +} + +void mlx5_ldev_remove_debugfs(struct dentry *dbg) +{ + debugfs_remove_recursive(dbg); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 4ddf6b330a44..a9f4ede4a9bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -31,14 +31,22 @@ */ #include <linux/netdevice.h> +#include <net/bonding.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/eswitch.h> #include <linux/mlx5/vport.h> #include "lib/devcom.h" #include "mlx5_core.h" #include "eswitch.h" +#include "esw/acl/ofld.h" #include "lag.h" #include "mp.h" +#include "mpesw.h" + +enum { + MLX5_LAG_EGRESS_PORT_1 = 1, + MLX5_LAG_EGRESS_PORT_2, +}; /* General purpose, use for short periods of time. * Beware of lock dependencies (preferably, no locks should be acquired @@ -46,28 +54,67 @@ */ static DEFINE_SPINLOCK(lag_lock); -static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, - u8 remap_port2, bool shared_fdb, u8 flags) +static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) { + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) + return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT; + + if (mode == MLX5_LAG_MODE_MPESW) + return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW; + + return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY; +} + +static u8 lag_active_port_bits(struct mlx5_lag *ldev) +{ + u8 enabled_ports[MLX5_MAX_PORTS] = {}; + u8 active_port = 0; + int num_enabled; + int idx; + + mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports, + &num_enabled); + for (idx = 0; idx < num_enabled; idx++) + active_port |= BIT_MASK(enabled_ports[idx]); + + return active_port; +} + +static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, + unsigned long flags) +{ + bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, + &flags); + int port_sel_mode = get_port_sel_mode(mode, flags); u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; - void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); + void *lag_ctx; + lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); + MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); - MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb); - if (!(flags & MLX5_LAG_FLAG_HASH_BASED)) { - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); - } else { - MLX5_SET(lagc, lag_ctx, port_select_mode, - MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT); + switch (port_sel_mode) { + case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); + break; + case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: + if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass)) + break; + + MLX5_SET(lagc, lag_ctx, active_port, + lag_active_port_bits(mlx5_lag_dev(dev))); + break; + default: + break; } + MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode); return mlx5_cmd_exec_in(dev, create_lag, in); } -static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1, - u8 remap_port2) +static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, + u8 *ports) { u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); @@ -75,8 +122,8 @@ static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1, MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); MLX5_SET(modify_lag_in, in, field_select, 0x1); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); return mlx5_cmd_exec_in(dev, modify_lag, in); } @@ -101,6 +148,75 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); +static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_disabled) +{ + int i; + + *num_disabled = 0; + for (i = 0; i < num_ports; i++) { + if (!tracker->netdev_state[i].tx_enabled || + !tracker->netdev_state[i].link_up) + ports[(*num_disabled)++] = i; + } +} + +void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_enabled) +{ + int i; + + *num_enabled = 0; + for (i = 0; i < num_ports; i++) { + if (tracker->netdev_state[i].tx_enabled && + tracker->netdev_state[i].link_up) + ports[(*num_enabled)++] = i; + } + + if (*num_enabled == 0) + mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); +} + +static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, + struct mlx5_lag *ldev, + struct lag_tracker *tracker, + unsigned long flags) +{ + char buf[MLX5_MAX_PORTS * 10 + 1] = {}; + u8 enabled_ports[MLX5_MAX_PORTS] = {}; + int written = 0; + int num_enabled; + int idx; + int err; + int i; + int j; + + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { + mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, + &num_enabled); + for (i = 0; i < num_enabled; i++) { + err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); + if (err != 3) + return; + written += err; + } + buf[written - 2] = 0; + mlx5_core_info(dev, "lag map active ports: %s\n", buf); + } else { + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + err = scnprintf(buf + written, 10, + " port %d:%d", i + 1, ldev->v2p_map[idx]); + if (err != 9) + return; + written += err; + } + } + mlx5_core_info(dev, "lag map:%s\n", buf); + } +} + static int mlx5_lag_netdev_event(struct notifier_block *this, unsigned long event, void *ptr); static void mlx5_do_bond_work(struct work_struct *work); @@ -112,8 +228,10 @@ static void mlx5_ldev_free(struct kref *ref) if (ldev->nb.notifier_call) unregister_netdevice_notifier_net(&init_net, &ldev->nb); mlx5_lag_mp_cleanup(ldev); - cancel_delayed_work_sync(&ldev->bond_work); + mlx5_lag_mpesw_cleanup(ldev); + cancel_work_sync(&ldev->mpesw_work); destroy_workqueue(ldev->wq); + mutex_destroy(&ldev->lock); kfree(ldev); } @@ -143,6 +261,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) } kref_init(&ldev->ref); + mutex_init(&ldev->lock); INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); ldev->nb.notifier_call = mlx5_lag_netdev_event; @@ -150,12 +269,17 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) ldev->nb.notifier_call = NULL; mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); } + ldev->mode = MLX5_LAG_MODE_NONE; err = mlx5_lag_mp_init(ldev); if (err) mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", err); + mlx5_lag_mpesw_init(ldev); + ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); + ldev->buckets = 1; + return ldev; } @@ -164,7 +288,7 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, { int i; - for (i = 0; i < MLX5_MAX_PORTS; i++) + for (i = 0; i < ldev->ports; i++) if (ldev->pf[i].netdev == ndev) return i; @@ -173,108 +297,278 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) { - return !!(ldev->flags & MLX5_LAG_FLAG_ROCE); + return ldev->mode == MLX5_LAG_MODE_ROCE; } static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) { - return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); + return ldev->mode == MLX5_LAG_MODE_SRIOV; } +/* Create a mapping between steering slots and active ports. + * As we have ldev->buckets slots per port first assume the native + * mapping should be used. + * If there are ports that are disabled fill the relevant slots + * with mapping that points to active ports. + */ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, - u8 *port1, u8 *port2) + u8 num_ports, + u8 buckets, + u8 *ports) { - bool p1en; - bool p2en; + int disabled[MLX5_MAX_PORTS] = {}; + int enabled[MLX5_MAX_PORTS] = {}; + int disabled_ports_num = 0; + int enabled_ports_num = 0; + int idx; + u32 rand; + int i; + int j; - p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled && - tracker->netdev_state[MLX5_LAG_P1].link_up; + for (i = 0; i < num_ports; i++) { + if (tracker->netdev_state[i].tx_enabled && + tracker->netdev_state[i].link_up) + enabled[enabled_ports_num++] = i; + else + disabled[disabled_ports_num++] = i; + } - p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled && - tracker->netdev_state[MLX5_LAG_P2].link_up; + /* Use native mapping by default where each port's buckets + * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc + */ + for (i = 0; i < num_ports; i++) + for (j = 0; j < buckets; j++) { + idx = i * buckets + j; + ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; + } - *port1 = 1; - *port2 = 2; - if ((!p1en && !p2en) || (p1en && p2en)) + /* If all ports are disabled/enabled keep native mapping */ + if (enabled_ports_num == num_ports || + disabled_ports_num == num_ports) return; - if (p1en) - *port2 = 1; - else - *port1 = 2; + /* Go over the disabled ports and for each assign a random active port */ + for (i = 0; i < disabled_ports_num; i++) { + for (j = 0; j < buckets; j++) { + get_random_bytes(&rand, 4); + ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; + } + } +} + +static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < ldev->ports; i++) + if (ldev->pf[i].has_drop) + return true; + return false; } -static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 v2p_port1, u8 v2p_port2) +static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < ldev->ports; i++) { + if (!ldev->pf[i].has_drop) + continue; + + mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, + MLX5_VPORT_UPLINK); + ldev->pf[i].has_drop = false; + } +} + +static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, + struct lag_tracker *tracker) +{ + u8 disabled_ports[MLX5_MAX_PORTS] = {}; + struct mlx5_core_dev *dev; + int disabled_index; + int num_disabled; + int err; + int i; + + /* First delete the current drop rule so there won't be any dropped + * packets + */ + mlx5_lag_drop_rule_cleanup(ldev); + + if (!ldev->tracker.has_inactive) + return; + + mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); + + for (i = 0; i < num_disabled; i++) { + disabled_index = disabled_ports[i]; + dev = ldev->pf[disabled_index].dev; + err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, + MLX5_VPORT_UPLINK); + if (!err) + ldev->pf[disabled_index].has_drop = true; + else + mlx5_core_err(dev, + "Failed to create lag drop rule, error: %d", err); + } +} + +static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports) +{ + u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; + void *lag_ctx; + + lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); + + MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); + MLX5_SET(modify_lag_in, in, field_select, 0x2); + + MLX5_SET(lagc, lag_ctx, active_port, ports); + + return mlx5_cmd_exec_in(dev, modify_lag, in); +} + +static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + u8 active_ports; + int ret; + + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { + ret = mlx5_lag_port_sel_modify(ldev, ports); + if (ret || + !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass)) + return ret; - if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED) - return mlx5_lag_port_sel_modify(ldev, v2p_port1, v2p_port2); - return mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); + active_ports = lag_active_port_bits(ldev); + + return mlx5_cmd_modify_active_port(dev0, active_ports); + } + return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); } void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker) { + u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - u8 v2p_port1, v2p_port2; + int idx; int err; + int i; + int j; - mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, - &v2p_port2); + mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); - if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] || - v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) { - err = _mlx5_modify_lag(ldev, v2p_port1, v2p_port2); - if (err) { - mlx5_core_err(dev0, - "Failed to modify LAG (%d)\n", - err); - return; + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + if (ports[idx] == ldev->v2p_map[idx]) + continue; + err = _mlx5_modify_lag(ldev, ports); + if (err) { + mlx5_core_err(dev0, + "Failed to modify LAG (%d)\n", + err); + return; + } + memcpy(ldev->v2p_map, ports, sizeof(ports)); + + mlx5_lag_print_mapping(dev0, ldev, tracker, + ldev->mode_flags); + break; } - ldev->v2p_map[MLX5_LAG_P1] = v2p_port1; - ldev->v2p_map[MLX5_LAG_P2] = v2p_port2; - mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d", - ldev->v2p_map[MLX5_LAG_P1], - ldev->v2p_map[MLX5_LAG_P2]); } + + if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + !(ldev->mode == MLX5_LAG_MODE_ROCE)) + mlx5_lag_drop_rule_setup(ldev, tracker); +} + +static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, + unsigned long *flags) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + + if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { + if (ldev->ports > 2) + return -EINVAL; + return 0; + } + + if (ldev->ports > 2) + ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; + + set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); + + return 0; } -static void mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev, - struct lag_tracker *tracker, u8 *flags) +static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + enum mlx5_lag_mode mode, + unsigned long *flags) { - bool roce_lag = !!(*flags & MLX5_LAG_FLAG_ROCE); struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; - if (roce_lag || - !MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) || - tracker->tx_type != NETDEV_LAG_TX_TYPE_HASH) + if (mode == MLX5_LAG_MODE_MPESW) return; - *flags |= MLX5_LAG_FLAG_HASH_BASED; + + if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && + tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) + set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); +} + +static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, + struct lag_tracker *tracker, bool shared_fdb, + unsigned long *flags) +{ + bool roce_lag = mode == MLX5_LAG_MODE_ROCE; + + *flags = 0; + if (shared_fdb) { + set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); + set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); + } + + if (mode == MLX5_LAG_MODE_MPESW) + set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); + + if (roce_lag) + return mlx5_lag_set_port_sel_mode_roce(ldev, flags); + + mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags); + return 0; } -static char *get_str_port_sel_mode(u8 flags) +char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) { - if (flags & MLX5_LAG_FLAG_HASH_BASED) - return "hash"; - return "queue_affinity"; + int port_sel_mode = get_port_sel_mode(mode, flags); + + switch (port_sel_mode) { + case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; + case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash"; + case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw"; + default: return "invalid"; + } } static int mlx5_create_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker, - bool shared_fdb, u8 flags) + enum mlx5_lag_mode mode, + unsigned long flags) { + bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; int err; - mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d mode:%s", - ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2], - shared_fdb, get_str_port_sel_mode(flags)); + if (tracker) + mlx5_lag_print_mapping(dev0, ldev, tracker, flags); + mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", + shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); - err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1], - ldev->v2p_map[MLX5_LAG_P2], shared_fdb, flags); + err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); if (err) { mlx5_core_err(dev0, "Failed to create LAG (%d)\n", @@ -303,31 +597,35 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, int mlx5_activate_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker, - u8 flags, + enum mlx5_lag_mode mode, bool shared_fdb) { - bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE); + bool roce_lag = mode == MLX5_LAG_MODE_ROCE; struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + unsigned long flags = 0; int err; - mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1], - &ldev->v2p_map[MLX5_LAG_P2]); - mlx5_lag_set_port_sel_mode(ldev, tracker, &flags); - if (flags & MLX5_LAG_FLAG_HASH_BASED) { - err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, - ldev->v2p_map[MLX5_LAG_P1], - ldev->v2p_map[MLX5_LAG_P2]); - if (err) { - mlx5_core_err(dev0, - "Failed to create LAG port selection(%d)\n", - err); - return err; + err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); + if (err) + return err; + + if (mode != MLX5_LAG_MODE_MPESW) { + mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { + err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, + ldev->v2p_map); + if (err) { + mlx5_core_err(dev0, + "Failed to create LAG port selection(%d)\n", + err); + return err; + } } } - err = mlx5_create_lag(ldev, tracker, shared_fdb, flags); + err = mlx5_create_lag(ldev, tracker, mode, flags); if (err) { - if (flags & MLX5_LAG_FLAG_HASH_BASED) + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) mlx5_lag_port_sel_destroy(ldev); if (roce_lag) mlx5_core_err(dev0, @@ -339,26 +637,32 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, return err; } - ldev->flags |= flags; - ldev->shared_fdb = shared_fdb; + if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + !roce_lag) + mlx5_lag_drop_rule_setup(ldev, tracker); + + ldev->mode = mode; + ldev->mode_flags = flags; return 0; } static int mlx5_deactivate_lag(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; bool roce_lag = __mlx5_lag_is_roce(ldev); - u8 flags = ldev->flags; + unsigned long flags = ldev->mode_flags; int err; - ldev->flags &= ~MLX5_LAG_MODE_FLAGS; + ldev->mode = MLX5_LAG_MODE_NONE; + ldev->mode_flags = 0; mlx5_lag_mp_reset(ldev); - if (ldev->shared_fdb) { - mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch, - ldev->pf[MLX5_LAG_P2].dev->priv.eswitch); - ldev->shared_fdb = false; + if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { + mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch, + dev1->priv.eswitch); + clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); } MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); @@ -372,32 +676,55 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) "Failed to deactivate VF LAG; driver restart required\n" "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); } - } else if (flags & MLX5_LAG_FLAG_HASH_BASED) { - mlx5_lag_port_sel_destroy(ldev); + return err; } - return err; + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) + mlx5_lag_port_sel_destroy(ldev); + if (mlx5_lag_has_drop_rule(ldev)) + mlx5_lag_drop_rule_cleanup(ldev); + + return 0; } +#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) { - if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev) - return false; +#ifdef CONFIG_MLX5_ESWITCH + struct mlx5_core_dev *dev; + u8 mode; +#endif + int i; + + for (i = 0; i < ldev->ports; i++) + if (!ldev->pf[i].dev) + return false; #ifdef CONFIG_MLX5_ESWITCH - return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev, - ldev->pf[MLX5_LAG_P2].dev); + dev = ldev->pf[MLX5_LAG_P1].dev; + if ((mlx5_sriov_is_enabled(dev)) && !is_mdev_switchdev_mode(dev)) + return false; + + mode = mlx5_eswitch_mode(dev); + for (i = 0; i < ldev->ports; i++) + if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) + return false; + + if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) + return false; #else - return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) && - !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev)); + for (i = 0; i < ldev->ports; i++) + if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) + return false; #endif + return true; } static void mlx5_lag_add_devices(struct mlx5_lag *ldev) { int i; - for (i = 0; i < MLX5_MAX_PORTS; i++) { + for (i = 0; i < ldev->ports; i++) { if (!ldev->pf[i].dev) continue; @@ -414,7 +741,7 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) { int i; - for (i = 0; i < MLX5_MAX_PORTS; i++) { + for (i = 0; i < ldev->ports; i++) { if (!ldev->pf[i].dev) continue; @@ -427,13 +754,14 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) } } -static void mlx5_disable_lag(struct mlx5_lag *ldev) +void mlx5_disable_lag(struct mlx5_lag *ldev) { + bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; - bool shared_fdb = ldev->shared_fdb; bool roce_lag; int err; + int i; roce_lag = __mlx5_lag_is_roce(ldev); @@ -444,7 +772,8 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev) dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); } - mlx5_nic_vport_disable_roce(dev1); + for (i = 1; i < ldev->ports; i++) + mlx5_nic_vport_disable_roce(ldev->pf[i].dev); } err = mlx5_deactivate_lag(ldev); @@ -462,7 +791,7 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev) } } -static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) +bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; @@ -481,13 +810,42 @@ static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) return false; } +static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) +{ + bool roce_lag = true; + int i; + + for (i = 0; i < ldev->ports; i++) + roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); + +#ifdef CONFIG_MLX5_ESWITCH + for (i = 0; i < ldev->ports; i++) + roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); +#endif + + return roce_lag; +} + +static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond) +{ + return do_bond && __mlx5_lag_is_active(ldev) && + ldev->mode != MLX5_LAG_MODE_MPESW; +} + +static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) +{ + return !do_bond && __mlx5_lag_is_active(ldev) && + ldev->mode != MLX5_LAG_MODE_MPESW; +} + static void mlx5_do_bond(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; - struct lag_tracker tracker; + struct lag_tracker tracker = { }; bool do_bond, roce_lag; int err; + int i; if (!mlx5_lag_is_ready(ldev)) { do_bond = false; @@ -504,21 +862,14 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) if (do_bond && !__mlx5_lag_is_active(ldev)) { bool shared_fdb = mlx5_shared_fdb_supported(ldev); - roce_lag = !mlx5_sriov_is_enabled(dev0) && - !mlx5_sriov_is_enabled(dev1); - -#ifdef CONFIG_MLX5_ESWITCH - roce_lag = roce_lag && - dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && - dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE; -#endif + roce_lag = mlx5_lag_is_roce_lag(ldev); if (shared_fdb || roce_lag) mlx5_lag_remove_devices(ldev); err = mlx5_activate_lag(ldev, &tracker, - roce_lag ? MLX5_LAG_FLAG_ROCE : - MLX5_LAG_FLAG_SRIOV, + roce_lag ? MLX5_LAG_MODE_ROCE : + MLX5_LAG_MODE_SRIOV, shared_fdb); if (err) { if (shared_fdb || roce_lag) @@ -528,7 +879,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) } else if (roce_lag) { dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); - mlx5_nic_vport_enable_roce(dev1); + for (i = 1; i < ldev->ports; i++) + mlx5_nic_vport_enable_roce(ldev->pf[i].dev); } else if (shared_fdb) { dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); @@ -548,9 +900,9 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) return; } } - } else if (do_bond && __mlx5_lag_is_active(ldev)) { + } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) { mlx5_modify_lag(ldev, &tracker); - } else if (!do_bond && __mlx5_lag_is_active(ldev)) { + } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) { mlx5_disable_lag(ldev); } } @@ -560,31 +912,11 @@ static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) queue_delayed_work(ldev->wq, &ldev->bond_work, delay); } -static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0, - struct mlx5_core_dev *dev1) -{ - if (dev0) - mlx5_esw_lock(dev0->priv.eswitch); - if (dev1) - mlx5_esw_lock(dev1->priv.eswitch); -} - -static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0, - struct mlx5_core_dev *dev1) -{ - if (dev1) - mlx5_esw_unlock(dev1->priv.eswitch); - if (dev0) - mlx5_esw_unlock(dev0->priv.eswitch); -} - static void mlx5_do_bond_work(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, bond_work); - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; int status; status = mlx5_dev_list_trylock(); @@ -593,27 +925,29 @@ static void mlx5_do_bond_work(struct work_struct *work) return; } + mutex_lock(&ldev->lock); if (ldev->mode_changes_in_progress) { + mutex_unlock(&ldev->lock); mlx5_dev_list_unlock(); mlx5_queue_bond_work(ldev, HZ); return; } - mlx5_lag_lock_eswitches(dev0, dev1); mlx5_do_bond(ldev); - mlx5_lag_unlock_eswitches(dev0, dev1); + mutex_unlock(&ldev->lock); mlx5_dev_list_unlock(); } static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, struct lag_tracker *tracker, - struct net_device *ndev, struct netdev_notifier_changeupper_info *info) { struct net_device *upper = info->upper_dev, *ndev_tmp; struct netdev_lag_upper_info *lag_upper_info = NULL; bool is_bonded, is_in_lag, mode_supported; - int bond_status = 0; + bool has_inactive = 0; + struct slave *slave; + u8 bond_status = 0; int num_slaves = 0; int changed = 0; int idx; @@ -632,15 +966,19 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, rcu_read_lock(); for_each_netdev_in_bond_rcu(upper, ndev_tmp) { idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); - if (idx >= 0) + if (idx >= 0) { + slave = bond_slave_get_rcu(ndev_tmp); + if (slave) + has_inactive |= bond_is_slave_inactive(slave); bond_status |= (1 << idx); + } num_slaves++; } rcu_read_unlock(); /* None of this lagdev's netdevs are slaves of this master. */ - if (!(bond_status & 0x3)) + if (!(bond_status & GENMASK(ldev->ports - 1, 0))) return 0; if (lag_upper_info) { @@ -648,11 +986,13 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, tracker->hash_type = lag_upper_info->hash_type; } + tracker->has_inactive = has_inactive; /* Determine bonding status: * A device is considered bonded if both its physical ports are slaves * of the same lag master, and only them. */ - is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3; + is_in_lag = num_slaves == ldev->ports && + bond_status == GENMASK(ldev->ports - 1, 0); /* Lag mode must be activebackup or hash. */ mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || @@ -704,6 +1044,39 @@ static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, return 1; } +static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev) +{ + struct net_device *ndev_tmp; + struct slave *slave; + bool has_inactive = 0; + int idx; + + if (!netif_is_lag_master(ndev)) + return 0; + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); + if (idx < 0) + continue; + + slave = bond_slave_get_rcu(ndev_tmp); + if (slave) + has_inactive |= bond_is_slave_inactive(slave); + } + rcu_read_unlock(); + + if (tracker->has_inactive == has_inactive) + return 0; + + tracker->has_inactive = has_inactive; + + return 1; +} + +/* this handler is always registered to netdev events */ static int mlx5_lag_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -712,7 +1085,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, struct mlx5_lag *ldev; int changed = 0; - if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE)) + if (event != NETDEV_CHANGEUPPER && + event != NETDEV_CHANGELOWERSTATE && + event != NETDEV_CHANGEINFODATA) return NOTIFY_DONE; ldev = container_of(this, struct mlx5_lag, nb); @@ -721,13 +1096,15 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, switch (event) { case NETDEV_CHANGEUPPER: - changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev, - ptr); + changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr); break; case NETDEV_CHANGELOWERSTATE: changed = mlx5_handle_changelowerstate_event(ldev, &tracker, ndev, ptr); break; + case NETDEV_CHANGEINFODATA: + changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); + break; } ldev->tracker = tracker; @@ -743,30 +1120,32 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, struct net_device *netdev) { unsigned int fn = mlx5_get_dev_index(dev); + unsigned long flags; - if (fn >= MLX5_MAX_PORTS) + if (fn >= ldev->ports) return; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev->pf[fn].netdev = netdev; ldev->tracker.netdev_state[fn].link_up = 0; ldev->tracker.netdev_state[fn].tx_enabled = 0; - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); } static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, struct net_device *netdev) { + unsigned long flags; int i; - spin_lock(&lag_lock); - for (i = 0; i < MLX5_MAX_PORTS; i++) { + spin_lock_irqsave(&lag_lock, flags); + for (i = 0; i < ldev->ports; i++) { if (ldev->pf[i].netdev == netdev) { ldev->pf[i].netdev = NULL; break; } } - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); } static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, @@ -774,24 +1153,23 @@ static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, { unsigned int fn = mlx5_get_dev_index(dev); - if (fn >= MLX5_MAX_PORTS) + if (fn >= ldev->ports) return; ldev->pf[fn].dev = dev; dev->priv.lag = ldev; } -/* Must be called with intf_mutex held */ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev) { int i; - for (i = 0; i < MLX5_MAX_PORTS; i++) + for (i = 0; i < ldev->ports; i++) if (ldev->pf[i].dev == dev) break; - if (i == MLX5_MAX_PORTS) + if (i == ldev->ports) return; ldev->pf[i].dev = NULL; @@ -804,12 +1182,7 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; - if (!MLX5_CAP_GEN(dev, vport_group_manager) || - !MLX5_CAP_GEN(dev, lag_master) || - MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS) - return 0; - - tmp_dev = mlx5_get_next_phys_dev(dev); + tmp_dev = mlx5_get_next_phys_dev_lag(dev); if (tmp_dev) ldev = tmp_dev->priv.lag; @@ -819,13 +1192,18 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) mlx5_core_err(dev, "Failed to alloc lag dev\n"); return 0; } - } else { - if (ldev->mode_changes_in_progress) - return -EAGAIN; - mlx5_ldev_get(ldev); + mlx5_ldev_add_mdev(ldev, dev); + return 0; } + mutex_lock(&ldev->lock); + if (ldev->mode_changes_in_progress) { + mutex_unlock(&ldev->lock); + return -EAGAIN; + } + mlx5_ldev_get(ldev); mlx5_ldev_add_mdev(ldev, dev); + mutex_unlock(&ldev->lock); return 0; } @@ -838,15 +1216,19 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) if (!ldev) return; + /* mdev is being removed, might as well remove debugfs + * as early as possible. + */ + mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); recheck: - mlx5_dev_list_lock(); + mutex_lock(&ldev->lock); if (ldev->mode_changes_in_progress) { - mlx5_dev_list_unlock(); + mutex_unlock(&ldev->lock); msleep(100); goto recheck; } mlx5_ldev_remove_mdev(ldev, dev); - mlx5_dev_list_unlock(); + mutex_unlock(&ldev->lock); mlx5_ldev_put(ldev); } @@ -854,35 +1236,45 @@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) { int err; + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS || + MLX5_CAP_GEN(dev, num_lag_ports) <= 1)) + return; + recheck: mlx5_dev_list_lock(); err = __mlx5_lag_dev_add_mdev(dev); + mlx5_dev_list_unlock(); + if (err) { - mlx5_dev_list_unlock(); msleep(100); goto recheck; } - mlx5_dev_list_unlock(); + mlx5_ldev_add_debugfs(dev); } -/* Must be called with intf_mutex held */ void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev) { struct mlx5_lag *ldev; + bool lag_is_active; ldev = mlx5_lag_dev(dev); if (!ldev) return; + mutex_lock(&ldev->lock); mlx5_ldev_remove_netdev(ldev, netdev); - ldev->flags &= ~MLX5_LAG_FLAG_READY; + clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); - if (__mlx5_lag_is_active(ldev)) + lag_is_active = __mlx5_lag_is_active(ldev); + mutex_unlock(&ldev->lock); + + if (lag_is_active) mlx5_queue_bond_work(ldev, 0); } -/* Must be called with intf_mutex held */ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev) { @@ -893,26 +1285,29 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, if (!ldev) return; + mutex_lock(&ldev->lock); mlx5_ldev_add_netdev(ldev, dev, netdev); - for (i = 0; i < MLX5_MAX_PORTS; i++) - if (!ldev->pf[i].dev) + for (i = 0; i < ldev->ports; i++) + if (!ldev->pf[i].netdev) break; - if (i >= MLX5_MAX_PORTS) - ldev->flags |= MLX5_LAG_FLAG_READY; + if (i >= ldev->ports) + set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); + mutex_unlock(&ldev->lock); mlx5_queue_bond_work(ldev, 0); } bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; + unsigned long flags; bool res; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); res = ldev && __mlx5_lag_is_roce(ldev); - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return res; } @@ -921,27 +1316,45 @@ EXPORT_SYMBOL(mlx5_lag_is_roce); bool mlx5_lag_is_active(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; + unsigned long flags; bool res; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); res = ldev && __mlx5_lag_is_active(ldev); - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return res; } EXPORT_SYMBOL(mlx5_lag_is_active); +bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res = 0; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + if (ldev) + res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags); + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_mode_is_hash); + bool mlx5_lag_is_master(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; + unsigned long flags; bool res; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); res = ldev && __mlx5_lag_is_active(ldev) && dev == ldev->pf[MLX5_LAG_P1].dev; - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return res; } @@ -950,12 +1363,13 @@ EXPORT_SYMBOL(mlx5_lag_is_master); bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; + unsigned long flags; bool res; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); res = ldev && __mlx5_lag_is_sriov(ldev); - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return res; } @@ -964,12 +1378,14 @@ EXPORT_SYMBOL(mlx5_lag_is_sriov); bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; + unsigned long flags; bool res; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); - res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb; - spin_unlock(&lag_lock); + res = ldev && __mlx5_lag_is_sriov(ldev) && + test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); + spin_unlock_irqrestore(&lag_lock, flags); return res; } @@ -977,8 +1393,6 @@ EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); void mlx5_lag_disable_change(struct mlx5_core_dev *dev) { - struct mlx5_core_dev *dev0; - struct mlx5_core_dev *dev1; struct mlx5_lag *ldev; ldev = mlx5_lag_dev(dev); @@ -986,16 +1400,13 @@ void mlx5_lag_disable_change(struct mlx5_core_dev *dev) return; mlx5_dev_list_lock(); - - dev0 = ldev->pf[MLX5_LAG_P1].dev; - dev1 = ldev->pf[MLX5_LAG_P2].dev; + mutex_lock(&ldev->lock); ldev->mode_changes_in_progress++; - if (__mlx5_lag_is_active(ldev)) { - mlx5_lag_lock_eswitches(dev0, dev1); + if (__mlx5_lag_is_active(ldev)) mlx5_disable_lag(ldev); - mlx5_lag_unlock_eswitches(dev0, dev1); - } + + mutex_unlock(&ldev->lock); mlx5_dev_list_unlock(); } @@ -1007,9 +1418,9 @@ void mlx5_lag_enable_change(struct mlx5_core_dev *dev) if (!ldev) return; - mlx5_dev_list_lock(); + mutex_lock(&ldev->lock); ldev->mode_changes_in_progress--; - mlx5_dev_list_unlock(); + mutex_unlock(&ldev->lock); mlx5_queue_bond_work(ldev, 0); } @@ -1017,17 +1428,21 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) { struct net_device *ndev = NULL; struct mlx5_lag *ldev; + unsigned long flags; + int i; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); if (!(ldev && __mlx5_lag_is_roce(ldev))) goto unlock; if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { - ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ? - ldev->pf[MLX5_LAG_P1].netdev : - ldev->pf[MLX5_LAG_P2].netdev; + for (i = 0; i < ldev->ports; i++) + if (ldev->tracker.netdev_state[i].tx_enabled) + ndev = ldev->pf[i].netdev; + if (!ndev) + ndev = ldev->pf[ldev->ports - 1].netdev; } else { ndev = ldev->pf[MLX5_LAG_P1].netdev; } @@ -1035,7 +1450,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) dev_hold(ndev); unlock: - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return ndev; } @@ -1045,32 +1460,49 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, struct net_device *slave) { struct mlx5_lag *ldev; + unsigned long flags; u8 port = 0; + int i; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); if (!(ldev && __mlx5_lag_is_roce(ldev))) goto unlock; - if (ldev->pf[MLX5_LAG_P1].netdev == slave) - port = MLX5_LAG_P1; - else - port = MLX5_LAG_P2; + for (i = 0; i < ldev->ports; i++) { + if (ldev->pf[MLX5_LAG_P1].netdev == slave) { + port = i; + break; + } + } - port = ldev->v2p_map[port]; + port = ldev->v2p_map[port * ldev->buckets]; unlock: - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return port; } EXPORT_SYMBOL(mlx5_lag_get_slave_port); +u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return 0; + + return ldev->ports; +} +EXPORT_SYMBOL(mlx5_lag_get_num_ports); + struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) { struct mlx5_core_dev *peer_dev = NULL; struct mlx5_lag *ldev; + unsigned long flags; - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); if (!ldev) goto unlock; @@ -1080,7 +1512,7 @@ struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) ldev->pf[MLX5_LAG_P1].dev; unlock: - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); return peer_dev; } EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); @@ -1091,8 +1523,9 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, size_t *offsets) { int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); - struct mlx5_core_dev *mdev[MLX5_MAX_PORTS]; + struct mlx5_core_dev **mdev; struct mlx5_lag *ldev; + unsigned long flags; int num_ports; int ret, i, j; void *out; @@ -1101,19 +1534,25 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, if (!out) return -ENOMEM; + mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); + if (!mdev) { + ret = -ENOMEM; + goto free_out; + } + memset(values, 0, sizeof(*values) * num_counters); - spin_lock(&lag_lock); + spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); if (ldev && __mlx5_lag_is_active(ldev)) { - num_ports = MLX5_MAX_PORTS; - mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev; - mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev; + num_ports = ldev->ports; + for (i = 0; i < ldev->ports; i++) + mdev[i] = ldev->pf[i].dev; } else { num_ports = 1; mdev[MLX5_LAG_P1] = dev; } - spin_unlock(&lag_lock); + spin_unlock_irqrestore(&lag_lock, flags); for (i = 0; i < num_ports; ++i) { u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; @@ -1123,13 +1562,15 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, out); if (ret) - goto free; + goto free_mdev; for (j = 0; j < num_counters; ++j) values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); } -free: +free_mdev: + kvfree(mdev); +free_out: kvfree(out); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index e5d231c31b54..ce2ce8ccbd70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -4,9 +4,13 @@ #ifndef __MLX5_LAG_H__ #define __MLX5_LAG_H__ +#include <linux/debugfs.h> + +#define MLX5_LAG_MAX_HASH_BUCKETS 16 #include "mlx5_core.h" #include "mp.h" #include "port_sel.h" +#include "mpesw.h" enum { MLX5_LAG_P1, @@ -14,20 +18,27 @@ enum { }; enum { - MLX5_LAG_FLAG_ROCE = 1 << 0, - MLX5_LAG_FLAG_SRIOV = 1 << 1, - MLX5_LAG_FLAG_MULTIPATH = 1 << 2, - MLX5_LAG_FLAG_READY = 1 << 3, - MLX5_LAG_FLAG_HASH_BASED = 1 << 4, + MLX5_LAG_FLAG_NDEVS_READY, }; -#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\ - MLX5_LAG_FLAG_MULTIPATH | \ - MLX5_LAG_FLAG_HASH_BASED) +enum { + MLX5_LAG_MODE_FLAG_HASH_BASED, + MLX5_LAG_MODE_FLAG_SHARED_FDB, + MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, +}; + +enum mlx5_lag_mode { + MLX5_LAG_MODE_NONE, + MLX5_LAG_MODE_ROCE, + MLX5_LAG_MODE_SRIOV, + MLX5_LAG_MODE_MULTIPATH, + MLX5_LAG_MODE_MPESW, +}; struct lag_func { struct mlx5_core_dev *dev; struct net_device *netdev; + bool has_drop; }; /* Used for collection of netdev event info. */ @@ -35,6 +46,7 @@ struct lag_tracker { enum netdev_lag_tx_type tx_type; struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; unsigned int is_bonded:1; + unsigned int has_inactive:1; enum netdev_lag_hash hash_type; }; @@ -42,20 +54,37 @@ struct lag_tracker { * It serves both its phys functions. */ struct mlx5_lag { - u8 flags; + enum mlx5_lag_mode mode; + unsigned long mode_flags; + unsigned long state_flags; + u8 ports; + u8 buckets; int mode_changes_in_progress; - bool shared_fdb; - u8 v2p_map[MLX5_MAX_PORTS]; + u8 v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS]; struct kref ref; struct lag_func pf[MLX5_MAX_PORTS]; struct lag_tracker tracker; struct workqueue_struct *wq; struct delayed_work bond_work; + struct work_struct mpesw_work; struct notifier_block nb; struct lag_mp lag_mp; struct mlx5_lag_port_sel port_sel; + /* Protect lag fields/state changes */ + struct mutex lock; + struct lag_mpesw lag_mpesw; }; +static inline bool mlx5_is_lag_supported(struct mlx5_core_dev *dev) +{ + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + MLX5_CAP_GEN(dev, num_lag_ports) < 2 || + MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS) + return false; + return true; +} + static inline struct mlx5_lag * mlx5_lag_dev(struct mlx5_core_dev *dev) { @@ -65,22 +94,33 @@ mlx5_lag_dev(struct mlx5_core_dev *dev) static inline bool __mlx5_lag_is_active(struct mlx5_lag *ldev) { - return !!(ldev->flags & MLX5_LAG_MODE_FLAGS); + return ldev->mode != MLX5_LAG_MODE_NONE; } static inline bool mlx5_lag_is_ready(struct mlx5_lag *ldev) { - return ldev->flags & MLX5_LAG_FLAG_READY; + return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); } void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker); int mlx5_activate_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker, - u8 flags, + enum mlx5_lag_mode mode, bool shared_fdb); int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, struct net_device *ndev); +bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev); +void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev); +int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev); + +char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags); +void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_enabled); + +void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev); +void mlx5_ldev_remove_debugfs(struct dentry *dbg); +void mlx5_disable_lag(struct mlx5_lag *ldev); #endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index bf4d3cbefa63..0259a149a64c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -11,7 +11,7 @@ static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) { - return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH); + return ldev->mode == MLX5_LAG_MODE_MULTIPATH; } static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) @@ -50,7 +50,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, enum mlx5_lag_port_affinity port) { - struct lag_tracker tracker; + struct lag_tracker tracker = {}; if (!__mlx5_lag_is_multipath(ldev)) return; @@ -100,6 +100,14 @@ static void mlx5_lag_fib_event_flush(struct notifier_block *nb) flush_workqueue(mp->wq); } +static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len) +{ + mp->fib.mfi = fi; + mp->fib.priority = fi->fib_priority; + mp->fib.dst = dst; + mp->fib.dst_len = dst_len; +} + struct mlx5_fib_event_work { struct work_struct work; struct mlx5_lag *ldev; @@ -110,10 +118,10 @@ struct mlx5_fib_event_work { }; }; -static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, - unsigned long event, - struct fib_info *fi) +static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, + struct fib_entry_notifier_info *fen_info) { + struct fib_info *fi = fen_info->fi; struct lag_mp *mp = &ldev->lag_mp; struct fib_nh *fib_nh0, *fib_nh1; unsigned int nhs; @@ -121,11 +129,17 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, /* Handle delete event */ if (event == FIB_EVENT_ENTRY_DEL) { /* stop track */ - if (mp->mfi == fi) - mp->mfi = NULL; + if (mp->fib.mfi == fi) + mp->fib.mfi = NULL; return; } + /* Handle multipath entry with lower priority value */ + if (mp->fib.mfi && mp->fib.mfi != fi && + (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) && + fi->fib_priority >= mp->fib.priority) + return; + /* Handle add/replace event */ nhs = fib_info_num_path(fi); if (nhs == 1) { @@ -135,12 +149,13 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); if (i < 0) - i = MLX5_LAG_NORMAL_AFFINITY; - else - ++i; + return; + i++; mlx5_lag_set_port_affinity(ldev, i); + mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); } + return; } @@ -160,15 +175,15 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } /* First time we see multipath route */ - if (!mp->mfi && !__mlx5_lag_is_active(ldev)) { + if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) { struct lag_tracker tracker; tracker = ldev->tracker; - mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false); + mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false); } mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); - mp->mfi = fi; + mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); } static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, @@ -179,7 +194,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, struct lag_mp *mp = &ldev->lag_mp; /* Check the nh event is related to the route */ - if (!mp->mfi || mp->mfi != fi) + if (!mp->fib.mfi || mp->fib.mfi != fi) return; /* nh added/removed */ @@ -209,7 +224,7 @@ static void mlx5_lag_fib_update(struct work_struct *work) case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_DEL: mlx5_lag_fib_route_event(ldev, fib_work->event, - fib_work->fen_info.fi); + &fib_work->fen_info); fib_info_put(fib_work->fen_info.fi); break; case FIB_EVENT_NH_ADD: @@ -268,10 +283,8 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, fen_info = container_of(info, struct fib_entry_notifier_info, info); fi = fen_info->fi; - if (fi->nh) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); - return notifier_from_errno(-EINVAL); - } + if (fi->nh) + return NOTIFY_DONE; fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev && fib_dev != ldev->pf[MLX5_LAG_P2].netdev) { @@ -310,7 +323,7 @@ void mlx5_lag_mp_reset(struct mlx5_lag *ldev) /* Clear mfi, as it might become stale when a route delete event * has been missed, see mlx5_lag_fib_route_event(). */ - ldev->lag_mp.mfi = NULL; + ldev->lag_mp.fib.mfi = NULL; } int mlx5_lag_mp_init(struct mlx5_lag *ldev) @@ -321,7 +334,7 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev) /* always clear mfi, as it might become stale when a route delete event * has been missed */ - mp->mfi = NULL; + mp->fib.mfi = NULL; if (mp->fib_nb.notifier_call) return 0; @@ -351,5 +364,5 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) unregister_fib_notifier(&init_net, &mp->fib_nb); destroy_workqueue(mp->wq); mp->fib_nb.notifier_call = NULL; - mp->mfi = NULL; + mp->fib.mfi = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h index 57af962cad29..056a066da604 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h @@ -15,7 +15,12 @@ enum mlx5_lag_port_affinity { struct lag_mp { struct notifier_block fib_nb; - struct fib_info *mfi; /* used in tracking fib events */ + struct { + const void *mfi; /* used in tracking fib events */ + u32 priority; + u32 dst; + int dst_len; + } fib; struct workqueue_struct *wq; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c new file mode 100644 index 000000000000..f643202b29c6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/netdevice.h> +#include <net/nexthop.h> +#include "lag/lag.h" +#include "eswitch.h" +#include "lib/mlx5.h" + +void mlx5_mpesw_work(struct work_struct *work) +{ + struct mlx5_lag *ldev = container_of(work, struct mlx5_lag, mpesw_work); + + mutex_lock(&ldev->lock); + mlx5_disable_lag(ldev); + mutex_unlock(&ldev->lock); +} + +static void mlx5_lag_disable_mpesw(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev = dev->priv.lag; + + if (!queue_work(ldev->wq, &ldev->mpesw_work)) + mlx5_core_warn(dev, "failed to queue work\n"); +} + +void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev = dev->priv.lag; + + if (!ldev) + return; + + mutex_lock(&ldev->lock); + if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) && + ldev->mode == MLX5_LAG_MODE_MPESW) + mlx5_lag_disable_mpesw(dev); + mutex_unlock(&ldev->lock); +} + +int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev = dev->priv.lag; + int err = 0; + + if (!ldev) + return 0; + + mutex_lock(&ldev->lock); + if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1) + goto out; + + if (ldev->mode != MLX5_LAG_MODE_NONE) { + err = -EINVAL; + goto out; + } + + err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); + if (err) + mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); + +out: + mutex_unlock(&ldev->lock); + return err; +} + +int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev) +{ + struct mlx5_lag *ldev = mdev->priv.lag; + + if (!netif_is_bond_master(out_dev) || !ldev) + return 0; + + mutex_lock(&ldev->lock); + if (ldev->mode == MLX5_LAG_MODE_MPESW) { + mutex_unlock(&ldev->lock); + return -EOPNOTSUPP; + } + mutex_unlock(&ldev->lock); + return 0; +} + +bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev) +{ + bool ret; + + ret = dev->priv.lag && dev->priv.lag->mode == MLX5_LAG_MODE_MPESW; + return ret; +} + +void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) +{ + INIT_WORK(&ldev->mpesw_work, mlx5_mpesw_work); + atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0); +} + +void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) +{ + cancel_delayed_work_sync(&ldev->bond_work); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h new file mode 100644 index 000000000000..be4abcb8fcd5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_LAG_MPESW_H__ +#define __MLX5_LAG_MPESW_H__ + +#include "lag.h" +#include "mlx5_core.h" + +struct lag_mpesw { + struct work_struct mpesw_work; + atomic_t mpesw_rule_count; +}; + +void mlx5_mpesw_work(struct work_struct *work); +int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev); +bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev); +#if IS_ENABLED(CONFIG_MLX5_ESWITCH) +void mlx5_lag_mpesw_init(struct mlx5_lag *ldev); +void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev); +#else +static inline void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) {} +static inline void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) {} +#endif + +#endif /* __MLX5_LAG_MPESW_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index a6592f9c3c05..d3a3fe4ce670 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -12,7 +12,8 @@ enum { static struct mlx5_flow_group * mlx5_create_hash_flow_group(struct mlx5_flow_table *ft, - struct mlx5_flow_definer *definer) + struct mlx5_flow_definer *definer, + u8 rules) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *fg; @@ -25,7 +26,7 @@ mlx5_create_hash_flow_group(struct mlx5_flow_table *ft, MLX5_SET(create_flow_group_in, in, match_definer_id, mlx5_get_match_definer_id(definer)); MLX5_SET(create_flow_group_in, in, start_flow_index, 0); - MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_MAX_PORTS - 1); + MLX5_SET(create_flow_group_in, in, end_flow_index, rules - 1); MLX5_SET(create_flow_group_in, in, group_type, MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT); @@ -36,7 +37,7 @@ mlx5_create_hash_flow_group(struct mlx5_flow_table *ft, static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, struct mlx5_lag_definer *lag_definer, - u8 port1, u8 port2) + u8 *ports) { struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_flow_table_attr ft_attr = {}; @@ -44,8 +45,10 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_namespace *ns; int err, i; + int idx; + int j; - ft_attr.max_fte = MLX5_MAX_PORTS; + ft_attr.max_fte = ldev->ports * ldev->buckets; ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER; ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_PORT_SEL); @@ -61,7 +64,8 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, } lag_definer->fg = mlx5_create_hash_flow_group(lag_definer->ft, - lag_definer->definer); + lag_definer->definer, + ft_attr.max_fte); if (IS_ERR(lag_definer->fg)) { err = PTR_ERR(lag_definer->fg); goto destroy_ft; @@ -70,19 +74,25 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; flow_act.flags |= FLOW_ACT_NO_APPEND; - for (i = 0; i < MLX5_MAX_PORTS; i++) { - u8 affinity = i == 0 ? port1 : port2; - - dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev, - vhca_id); - lag_definer->rules[i] = mlx5_add_flow_rules(lag_definer->ft, - NULL, &flow_act, - &dest, 1); - if (IS_ERR(lag_definer->rules[i])) { - err = PTR_ERR(lag_definer->rules[i]); - while (i--) - mlx5_del_flow_rules(lag_definer->rules[i]); - goto destroy_fg; + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + u8 affinity; + + idx = i * ldev->buckets + j; + affinity = ports[idx]; + + dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev, + vhca_id); + lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft, + NULL, &flow_act, + &dest, 1); + if (IS_ERR(lag_definer->rules[idx])) { + err = PTR_ERR(lag_definer->rules[idx]); + while (i--) + while (j--) + mlx5_del_flow_rules(lag_definer->rules[idx]); + goto destroy_fg; + } } } @@ -279,8 +289,7 @@ static int mlx5_lag_set_definer(u32 *match_definer_mask, static struct mlx5_lag_definer * mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash, - enum mlx5_traffic_types tt, bool tunnel, u8 port1, - u8 port2) + enum mlx5_traffic_types tt, bool tunnel, u8 *ports) { struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_lag_definer *lag_definer; @@ -308,7 +317,7 @@ mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash, goto free_mask; } - err = mlx5_lag_create_port_sel_table(ldev, lag_definer, port1, port2); + err = mlx5_lag_create_port_sel_table(ldev, lag_definer, ports); if (err) goto destroy_match_definer; @@ -329,10 +338,16 @@ static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev, struct mlx5_lag_definer *lag_definer) { struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int idx; int i; + int j; - for (i = 0; i < MLX5_MAX_PORTS; i++) - mlx5_del_flow_rules(lag_definer->rules[i]); + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + mlx5_del_flow_rules(lag_definer->rules[idx]); + } + } mlx5_destroy_flow_group(lag_definer->fg); mlx5_destroy_flow_table(lag_definer->ft); mlx5_destroy_match_definer(dev, lag_definer->definer); @@ -356,7 +371,7 @@ static void mlx5_lag_destroy_definers(struct mlx5_lag *ldev) static int mlx5_lag_create_definers(struct mlx5_lag *ldev, enum netdev_lag_hash hash_type, - u8 port1, u8 port2) + u8 *ports) { struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; struct mlx5_lag_definer *lag_definer; @@ -364,7 +379,7 @@ static int mlx5_lag_create_definers(struct mlx5_lag *ldev, for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { lag_definer = mlx5_lag_create_definer(ldev, hash_type, tt, - false, port1, port2); + false, ports); if (IS_ERR(lag_definer)) { err = PTR_ERR(lag_definer); goto destroy_definers; @@ -376,7 +391,7 @@ static int mlx5_lag_create_definers(struct mlx5_lag *ldev, lag_definer = mlx5_lag_create_definer(ldev, hash_type, tt, - true, port1, port2); + true, ports); if (IS_ERR(lag_definer)) { err = PTR_ERR(lag_definer); goto destroy_definers; @@ -505,7 +520,7 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev) struct ttc_params ttc_params = {}; mlx5_lag_set_inner_ttc_params(ldev, &ttc_params); - port_sel->inner.ttc = mlx5_create_ttc_table(dev, &ttc_params); + port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params); if (IS_ERR(port_sel->inner.ttc)) return PTR_ERR(port_sel->inner.ttc); @@ -513,13 +528,13 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev) } int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, - enum netdev_lag_hash hash_type, u8 port1, u8 port2) + enum netdev_lag_hash hash_type, u8 *ports) { struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; int err; set_tt_map(port_sel, hash_type); - err = mlx5_lag_create_definers(ldev, hash_type, port1, port2); + err = mlx5_lag_create_definers(ldev, hash_type, ports); if (err) return err; @@ -543,52 +558,62 @@ destroy_definers: return err; } -static int -mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, - struct mlx5_lag_definer **definers, - u8 port1, u8 port2) +static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, + struct mlx5_lag_definer *def, + u8 *ports) { - struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; struct mlx5_flow_destination dest = {}; + int idx; int err; - int tt; + int i; + int j; dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; - for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { - struct mlx5_flow_handle **rules = definers[tt]->rules; + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + if (ldev->v2p_map[i] == ports[i]) + continue; - if (ldev->v2p_map[MLX5_LAG_P1] != port1) { - dest.vport.vhca_id = - MLX5_CAP_GEN(ldev->pf[port1 - 1].dev, vhca_id); - err = mlx5_modify_rule_destination(rules[MLX5_LAG_P1], - &dest, NULL); + dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev, + vhca_id); + err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL); if (err) return err; } + } - if (ldev->v2p_map[MLX5_LAG_P2] != port2) { - dest.vport.vhca_id = - MLX5_CAP_GEN(ldev->pf[port2 - 1].dev, vhca_id); - err = mlx5_modify_rule_destination(rules[MLX5_LAG_P2], - &dest, NULL); - if (err) - return err; - } + return 0; +} + +static int +mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, + struct mlx5_lag_definer **definers, + u8 *ports) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + int err; + int tt; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + err = __mlx5_lag_modify_definers_destinations(ldev, definers[tt], ports); + if (err) + return err; } return 0; } -int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2) +int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports) { struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; int err; err = mlx5_lag_modify_definers_destinations(ldev, port_sel->outer.definers, - port1, port2); + ports); if (err) return err; @@ -597,7 +622,7 @@ int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2) return mlx5_lag_modify_definers_destinations(ldev, port_sel->inner.definers, - port1, port2); + ports); } void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h index 6d15b28a42fc..5ec3af2a3ecd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h @@ -10,7 +10,10 @@ struct mlx5_lag_definer { struct mlx5_flow_definer *definer; struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; - struct mlx5_flow_handle *rules[MLX5_MAX_PORTS]; + /* Each port has ldev->buckets number of rules and they are arrange in + * [port * buckets .. port * buckets + buckets) locations + */ + struct mlx5_flow_handle *rules[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS]; }; struct mlx5_lag_ttc { @@ -27,22 +30,20 @@ struct mlx5_lag_port_sel { #ifdef CONFIG_MLX5_ESWITCH -int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2); +int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports); void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev); int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, - enum netdev_lag_hash hash_type, u8 port1, - u8 port2); + enum netdev_lag_hash hash_type, u8 *ports); #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, enum netdev_lag_hash hash_type, - u8 port1, u8 port2) + u8 *ports) { return 0; } -static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, - u8 port2) +static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c new file mode 100644 index 000000000000..c971ff04dd04 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/mlx5/device.h> +#include <linux/mlx5/transobj.h> +#include "clock.h" +#include "aso.h" +#include "wq.h" + +struct mlx5_aso_cq { + /* data path - accessed per cqe */ + struct mlx5_cqwq wq; + + /* data path - accessed per napi poll */ + struct mlx5_core_cq mcq; + + /* control */ + struct mlx5_core_dev *mdev; + struct mlx5_wq_ctrl wq_ctrl; +} ____cacheline_aligned_in_smp; + +struct mlx5_aso { + /* data path */ + u16 cc; + u16 pc; + + struct mlx5_wqe_ctrl_seg *doorbell_cseg; + struct mlx5_aso_cq cq; + + /* read only */ + struct mlx5_wq_cyc wq; + void __iomem *uar_map; + u32 sqn; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + +} ____cacheline_aligned_in_smp; + +static void mlx5_aso_free_cq(struct mlx5_aso_cq *cq) +{ + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int mlx5_aso_alloc_cq(struct mlx5_core_dev *mdev, int numa_node, + void *cqc_data, struct mlx5_aso_cq *cq) +{ + struct mlx5_core_cq *mcq = &cq->mcq; + struct mlx5_wq_param param; + int err; + u32 i; + + param.buf_numa_node = numa_node; + param.db_numa_node = numa_node; + + err = mlx5_cqwq_create(mdev, ¶m, cqc_data, &cq->wq, &cq->wq_ctrl); + if (err) + return err; + + mcq->cqe_sz = 64; + mcq->set_ci_db = cq->wq_ctrl.db.db; + mcq->arm_db = cq->wq_ctrl.db.db + 1; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + + cqe->op_own = 0xf1; + } + + cq->mdev = mdev; + + return 0; +} + +static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data) +{ + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_core_dev *mdev = cq->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + void *in, *cqc; + int inlen, eqn; + int err; + + err = mlx5_vector2eqn(mdev, 0, &eqn); + if (err) + return err; + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + + memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc)); + + mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); + + MLX5_SET(cqc, cqc, cq_period_mode, DIM_CQ_PERIOD_MODE_START_FROM_EQE); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out)); + + kvfree(in); + + return err; +} + +static void mlx5_aso_destroy_cq(struct mlx5_aso_cq *cq) +{ + mlx5_core_destroy_cq(cq->mdev, &cq->mcq); + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int mlx5_aso_create_cq(struct mlx5_core_dev *mdev, int numa_node, + struct mlx5_aso_cq *cq) +{ + void *cqc_data; + int err; + + cqc_data = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL); + if (!cqc_data) + return -ENOMEM; + + MLX5_SET(cqc, cqc_data, log_cq_size, 1); + MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index); + if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128) + MLX5_SET(cqc, cqc_data, cqe_sz, CQE_STRIDE_128_PAD); + + err = mlx5_aso_alloc_cq(mdev, numa_node, cqc_data, cq); + if (err) { + mlx5_core_err(mdev, "Failed to alloc aso wq cq, err=%d\n", err); + goto err_out; + } + + err = create_aso_cq(cq, cqc_data); + if (err) { + mlx5_core_err(mdev, "Failed to create aso wq cq, err=%d\n", err); + goto err_free_cq; + } + + kvfree(cqc_data); + return 0; + +err_free_cq: + mlx5_aso_free_cq(cq); +err_out: + kvfree(cqc_data); + return err; +} + +static int mlx5_aso_alloc_sq(struct mlx5_core_dev *mdev, int numa_node, + void *sqc_data, struct mlx5_aso *sq) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, sqc_data, wq); + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5_wq_param param; + int err; + + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + + param.db_numa_node = numa_node; + param.buf_numa_node = numa_node; + err = mlx5_wq_cyc_create(mdev, ¶m, sqc_wq, wq, &sq->wq_ctrl); + if (err) + return err; + wq->db = &wq->db[MLX5_SND_DBR]; + + return 0; +} + +static int create_aso_sq(struct mlx5_core_dev *mdev, int pdn, + void *sqc_data, struct mlx5_aso *sq) +{ + void *in, *sqc, *wq; + int inlen, err; + u8 ts_format; + + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * sq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc)); + MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); + + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); + + ts_format = mlx5_is_real_time_sq(mdev) ? + MLX5_TIMESTAMP_FORMAT_REAL_TIME : + MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; + MLX5_SET(sqc, sqc, ts_format, ts_format); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index); + MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); + + mlx5_fill_page_frag_array(&sq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn); + + kvfree(in); + + return err; +} + +static int mlx5_aso_set_sq_rdy(struct mlx5_core_dev *mdev, u32 sqn) +{ + void *in, *sqc; + int inlen, err; + + inlen = MLX5_ST_SZ_BYTES(modify_sq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST); + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); + + err = mlx5_core_modify_sq(mdev, sqn, in); + + kvfree(in); + + return err; +} + +static int mlx5_aso_create_sq_rdy(struct mlx5_core_dev *mdev, u32 pdn, + void *sqc_data, struct mlx5_aso *sq) +{ + int err; + + err = create_aso_sq(mdev, pdn, sqc_data, sq); + if (err) + return err; + + err = mlx5_aso_set_sq_rdy(mdev, sq->sqn); + if (err) + mlx5_core_destroy_sq(mdev, sq->sqn); + + return err; +} + +static void mlx5_aso_free_sq(struct mlx5_aso *sq) +{ + mlx5_wq_destroy(&sq->wq_ctrl); +} + +static void mlx5_aso_destroy_sq(struct mlx5_aso *sq) +{ + mlx5_core_destroy_sq(sq->cq.mdev, sq->sqn); + mlx5_aso_free_sq(sq); +} + +static int mlx5_aso_create_sq(struct mlx5_core_dev *mdev, int numa_node, + u32 pdn, struct mlx5_aso *sq) +{ + void *sqc_data, *wq; + int err; + + sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL); + if (!sqc_data) + return -ENOMEM; + + wq = MLX5_ADDR_OF(sqc, sqc_data, wq); + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, pdn); + MLX5_SET(wq, wq, log_wq_sz, 1); + + err = mlx5_aso_alloc_sq(mdev, numa_node, sqc_data, sq); + if (err) { + mlx5_core_err(mdev, "Failed to alloc aso wq sq, err=%d\n", err); + goto err_out; + } + + err = mlx5_aso_create_sq_rdy(mdev, pdn, sqc_data, sq); + if (err) { + mlx5_core_err(mdev, "Failed to open aso wq sq, err=%d\n", err); + goto err_free_asosq; + } + + mlx5_core_dbg(mdev, "aso sq->sqn = 0x%x\n", sq->sqn); + + kvfree(sqc_data); + return 0; + +err_free_asosq: + mlx5_aso_free_sq(sq); +err_out: + kvfree(sqc_data); + return err; +} + +struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn) +{ + int numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); + struct mlx5_aso *aso; + int err; + + aso = kzalloc(sizeof(*aso), GFP_KERNEL); + if (!aso) + return ERR_PTR(-ENOMEM); + + err = mlx5_aso_create_cq(mdev, numa_node, &aso->cq); + if (err) + goto err_cq; + + err = mlx5_aso_create_sq(mdev, numa_node, pdn, aso); + if (err) + goto err_sq; + + return aso; + +err_sq: + mlx5_aso_destroy_cq(&aso->cq); +err_cq: + kfree(aso); + return ERR_PTR(err); +} + +void mlx5_aso_destroy(struct mlx5_aso *aso) +{ + if (IS_ERR_OR_NULL(aso)) + return; + + mlx5_aso_destroy_sq(aso); + mlx5_aso_destroy_cq(&aso->cq); + kfree(aso); +} + +void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt, + struct mlx5_aso_wqe *aso_wqe, + u32 obj_id, u32 opc_mode) +{ + struct mlx5_wqe_ctrl_seg *cseg = &aso_wqe->ctrl; + + cseg->opmod_idx_opcode = cpu_to_be32((opc_mode << MLX5_WQE_CTRL_WQE_OPC_MOD_SHIFT) | + (aso->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_ACCESS_ASO); + cseg->qpn_ds = cpu_to_be32((aso->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | ds_cnt); + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + cseg->general_id = cpu_to_be32(obj_id); +} + +void *mlx5_aso_get_wqe(struct mlx5_aso *aso) +{ + u16 pi; + + pi = mlx5_wq_cyc_ctr2ix(&aso->wq, aso->pc); + return mlx5_wq_cyc_get_wqe(&aso->wq, pi); +} + +void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data, + struct mlx5_wqe_ctrl_seg *doorbell_cseg) +{ + doorbell_cseg->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + + if (with_data) + aso->pc += MLX5_ASO_WQEBBS_DATA; + else + aso->pc += MLX5_ASO_WQEBBS; + *aso->wq.db = cpu_to_be32(aso->pc); + + /* ensure doorbell record is visible to device before ringing the + * doorbell + */ + wmb(); + + mlx5_write64((__be32 *)doorbell_cseg, aso->uar_map); + + /* Ensure doorbell is written on uar_page before poll_cq */ + WRITE_ONCE(doorbell_cseg, NULL); +} + +int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data) +{ + struct mlx5_aso_cq *cq = &aso->cq; + struct mlx5_cqe64 *cqe; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return -ETIMEDOUT; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + mlx5_cqwq_pop(&cq->wq); + + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { + struct mlx5_err_cqe *err_cqe; + + mlx5_core_err(cq->mdev, "Bad OP in ASOSQ CQE: 0x%x\n", + get_cqe_opcode(cqe)); + + err_cqe = (struct mlx5_err_cqe *)cqe; + mlx5_core_err(cq->mdev, "vendor_err_synd=%x\n", + err_cqe->vendor_err_synd); + mlx5_core_err(cq->mdev, "syndrome=%x\n", + err_cqe->syndrome); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, + 16, 1, err_cqe, + sizeof(*err_cqe), false); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + if (with_data) + aso->cc += MLX5_ASO_WQEBBS_DATA; + else + aso->cc += MLX5_ASO_WQEBBS; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h new file mode 100644 index 000000000000..2d40dcf9d42e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_LIB_ASO_H__ +#define __MLX5_LIB_ASO_H__ + +#include <linux/mlx5/qp.h> +#include "mlx5_core.h" + +#define MLX5_ASO_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe), MLX5_SEND_WQE_BB)) +#define MLX5_ASO_WQEBBS_DATA \ + (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_BB)) +#define ASO_CTRL_READ_EN BIT(0) +#define MLX5_WQE_CTRL_WQE_OPC_MOD_SHIFT 24 +#define MLX5_MACSEC_ASO_DS_CNT (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe), MLX5_SEND_WQE_DS)) + +struct mlx5_wqe_aso_ctrl_seg { + __be32 va_h; + __be32 va_l; /* include read_enable */ + __be32 l_key; + u8 data_mask_mode; + u8 condition_1_0_operand; + u8 condition_1_0_offset; + u8 data_offset_condition_operand; + __be32 condition_0_data; + __be32 condition_0_mask; + __be32 condition_1_data; + __be32 condition_1_mask; + __be64 bitwise_data; + __be64 data_mask; +}; + +struct mlx5_wqe_aso_data_seg { + __be32 bytewise_data[16]; +}; + +struct mlx5_aso_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_aso_ctrl_seg aso_ctrl; +}; + +struct mlx5_aso_wqe_data { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_aso_ctrl_seg aso_ctrl; + struct mlx5_wqe_aso_data_seg aso_data; +}; + +enum { + MLX5_ASO_LOGICAL_AND, + MLX5_ASO_LOGICAL_OR, +}; + +enum { + MLX5_ASO_ALWAYS_FALSE, + MLX5_ASO_ALWAYS_TRUE, + MLX5_ASO_EQUAL, + MLX5_ASO_NOT_EQUAL, + MLX5_ASO_GREATER_OR_EQUAL, + MLX5_ASO_LESSER_OR_EQUAL, + MLX5_ASO_LESSER, + MLX5_ASO_GREATER, + MLX5_ASO_CYCLIC_GREATER, + MLX5_ASO_CYCLIC_LESSER, +}; + +enum { + MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT, + MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE, + MLX5_ASO_DATA_MASK_MODE_CALCULATED_64BYTE, +}; + +enum { + MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER = 0x2, + MLX5_ACCESS_ASO_OPC_MOD_MACSEC = 0x5, +}; + +struct mlx5_aso; + +void *mlx5_aso_get_wqe(struct mlx5_aso *aso); +void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt, + struct mlx5_aso_wqe *aso_wqe, + u32 obj_id, u32 opc_mode); +void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data, + struct mlx5_wqe_ctrl_seg *doorbell_cseg); +int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data); + +struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn); +void mlx5_aso_destroy(struct mlx5_aso *aso); +#endif /* __MLX5_LIB_ASO_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 91e806c1aa21..d3a9ae80fd30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -65,6 +65,8 @@ enum { MLX5_MTPPS_FS_TIME_STAMP = BIT(0x4), MLX5_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5), MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), + MLX5_MTPPS_FS_NPPS_PERIOD = BIT(0x9), + MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS = BIT(0xa), }; static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev) @@ -72,6 +74,13 @@ static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev) return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev)); } +static bool mlx5_npps_real_time_supported(struct mlx5_core_dev *mdev) +{ + return (mlx5_real_time_mode(mdev) && + MLX5_CAP_MCAM_FEATURE(mdev, npps_period) && + MLX5_CAP_MCAM_FEATURE(mdev, out_pulse_duration_ns)); +} + static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev) { return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify); @@ -459,9 +468,95 @@ static u64 perout_conf_internal_timer(struct mlx5_core_dev *mdev, s64 sec) return find_target_cycles(mdev, target_ns); } -static u64 perout_conf_real_time(s64 sec) +static u64 perout_conf_real_time(s64 sec, u32 nsec) +{ + return (u64)nsec | (u64)sec << 32; +} + +static int perout_conf_1pps(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq, + u64 *time_stamp, bool real_time) +{ + struct timespec64 ts; + s64 ns; + + ts.tv_nsec = rq->perout.period.nsec; + ts.tv_sec = rq->perout.period.sec; + ns = timespec64_to_ns(&ts); + + if ((ns >> 1) != 500000000LL) + return -EINVAL; + + *time_stamp = real_time ? perout_conf_real_time(rq->perout.start.sec, 0) : + perout_conf_internal_timer(mdev, rq->perout.start.sec); + + return 0; +} + +#define MLX5_MAX_PULSE_DURATION (BIT(__mlx5_bit_sz(mtpps_reg, out_pulse_duration_ns)) - 1) +static int mlx5_perout_conf_out_pulse_duration(struct mlx5_core_dev *mdev, + struct ptp_clock_request *rq, + u32 *out_pulse_duration_ns) { - return (u64)sec << 32; + struct mlx5_pps *pps_info = &mdev->clock.pps_info; + u32 out_pulse_duration; + struct timespec64 ts; + + if (rq->perout.flags & PTP_PEROUT_DUTY_CYCLE) { + ts.tv_sec = rq->perout.on.sec; + ts.tv_nsec = rq->perout.on.nsec; + out_pulse_duration = (u32)timespec64_to_ns(&ts); + } else { + /* out_pulse_duration_ns should be up to 50% of the + * pulse period as default + */ + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + out_pulse_duration = (u32)timespec64_to_ns(&ts) >> 1; + } + + if (out_pulse_duration < pps_info->min_out_pulse_duration_ns || + out_pulse_duration > MLX5_MAX_PULSE_DURATION) { + mlx5_core_err(mdev, "NPPS pulse duration %u is not in [%llu, %lu]\n", + out_pulse_duration, pps_info->min_out_pulse_duration_ns, + MLX5_MAX_PULSE_DURATION); + return -EINVAL; + } + *out_pulse_duration_ns = out_pulse_duration; + + return 0; +} + +static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq, + u32 *field_select, u32 *out_pulse_duration_ns, + u64 *period, u64 *time_stamp) +{ + struct mlx5_pps *pps_info = &mdev->clock.pps_info; + struct ptp_clock_time *time = &rq->perout.start; + struct timespec64 ts; + + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + if (timespec64_to_ns(&ts) < pps_info->min_npps_period) { + mlx5_core_err(mdev, "NPPS period is lower than minimal npps period %llu\n", + pps_info->min_npps_period); + return -EINVAL; + } + *period = perout_conf_real_time(rq->perout.period.sec, rq->perout.period.nsec); + + if (mlx5_perout_conf_out_pulse_duration(mdev, rq, out_pulse_duration_ns)) + return -EINVAL; + + *time_stamp = perout_conf_real_time(time->sec, time->nsec); + *field_select |= MLX5_MTPPS_FS_NPPS_PERIOD | + MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS; + + return 0; +} + +static bool mlx5_perout_verify_flags(struct mlx5_core_dev *mdev, unsigned int flags) +{ + return ((!mlx5_npps_real_time_supported(mdev) && flags) || + (mlx5_npps_real_time_supported(mdev) && flags & ~PTP_PEROUT_DUTY_CYCLE)); } static int mlx5_perout_configure(struct ptp_clock_info *ptp, @@ -474,20 +569,20 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, container_of(clock, struct mlx5_core_dev, clock); bool rt_mode = mlx5_real_time_mode(mdev); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; - struct timespec64 ts; + u32 out_pulse_duration_ns = 0; u32 field_select = 0; + u64 npps_period = 0; u64 time_stamp = 0; u8 pin_mode = 0; u8 pattern = 0; int pin = -1; int err = 0; - s64 ns; if (!MLX5_PPS_CAP(mdev)) return -EOPNOTSUPP; /* Reject requests with unsupported flags */ - if (rq->perout.flags) + if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) return -EOPNOTSUPP; if (rq->perout.index >= clock->ptp_info.n_pins) @@ -500,29 +595,25 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, if (on) { bool rt_mode = mlx5_real_time_mode(mdev); - s64 sec = rq->perout.start.sec; - - if (rq->perout.start.nsec) - return -EINVAL; pin_mode = MLX5_PIN_MODE_OUT; pattern = MLX5_OUT_PATTERN_PERIODIC; - ts.tv_sec = rq->perout.period.sec; - ts.tv_nsec = rq->perout.period.nsec; - ns = timespec64_to_ns(&ts); - if ((ns >> 1) != 500000000LL) + if (rt_mode && rq->perout.start.sec > U32_MAX) return -EINVAL; - if (rt_mode && sec > U32_MAX) - return -EINVAL; - - time_stamp = rt_mode ? perout_conf_real_time(sec) : - perout_conf_internal_timer(mdev, sec); - field_select |= MLX5_MTPPS_FS_PIN_MODE | MLX5_MTPPS_FS_PATTERN | MLX5_MTPPS_FS_TIME_STAMP; + + if (mlx5_npps_real_time_supported(mdev)) + err = perout_conf_npps_real_time(mdev, rq, &field_select, + &out_pulse_duration_ns, &npps_period, + &time_stamp); + else + err = perout_conf_1pps(mdev, rq, &time_stamp, rt_mode); + if (err) + return err; } MLX5_SET(mtpps_reg, in, pin, pin); @@ -531,7 +622,8 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, MLX5_SET(mtpps_reg, in, enable, on); MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); MLX5_SET(mtpps_reg, in, field_select, field_select); - + MLX5_SET64(mtpps_reg, in, npps_period, npps_period); + MLX5_SET(mtpps_reg, in, out_pulse_duration_ns, out_pulse_duration_ns); err = mlx5_set_mtpps(mdev, in, sizeof(in)); if (err) return err; @@ -687,6 +779,13 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out, cap_max_num_of_pps_out_pins); + if (MLX5_CAP_MCAM_FEATURE(mdev, npps_period)) + clock->pps_info.min_npps_period = 1 << MLX5_GET(mtpps_reg, out, + cap_log_min_npps_period); + if (MLX5_CAP_MCAM_FEATURE(mdev, out_pulse_duration_ns)) + clock->pps_info.min_out_pulse_duration_ns = 1 << MLX5_GET(mtpps_reg, out, + cap_log_min_out_pulse_duration_ns); + clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index bced2efe9bef..adefde3ea941 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -14,7 +14,7 @@ static LIST_HEAD(devcom_list); struct mlx5_devcom_component { struct { void *data; - } device[MLX5_MAX_PORTS]; + } device[MLX5_DEVCOM_PORTS_SUPPORTED]; mlx5_devcom_event_handler_t handler; struct rw_semaphore sem; @@ -25,7 +25,7 @@ struct mlx5_devcom_list { struct list_head list; struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS]; - struct mlx5_core_dev *devs[MLX5_MAX_PORTS]; + struct mlx5_core_dev *devs[MLX5_DEVCOM_PORTS_SUPPORTED]; }; struct mlx5_devcom { @@ -74,13 +74,15 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return NULL; + if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED) + return NULL; sguid0 = mlx5_query_nic_system_image_guid(dev); list_for_each_entry(iter, &devcom_list, list) { struct mlx5_core_dev *tmp_dev = NULL; idx = -1; - for (i = 0; i < MLX5_MAX_PORTS; i++) { + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) { if (iter->devs[i]) tmp_dev = iter->devs[i]; else @@ -134,11 +136,11 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom) kfree(devcom); - for (i = 0; i < MLX5_MAX_PORTS; i++) + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) if (priv->devs[i]) break; - if (i != MLX5_MAX_PORTS) + if (i != MLX5_DEVCOM_PORTS_SUPPORTED) return; list_del(&priv->list); @@ -191,7 +193,7 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom, comp = &devcom->priv->components[id]; down_write(&comp->sem); - for (i = 0; i < MLX5_MAX_PORTS; i++) + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) if (i != devcom->idx && comp->device[i].data) { err = comp->handler(event, comp->device[i].data, event_data); @@ -239,7 +241,7 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, return NULL; } - for (i = 0; i < MLX5_MAX_PORTS; i++) + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) if (i != devcom->idx) break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index 939d5bf1581b..94313c18bb64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -6,6 +6,8 @@ #include <linux/mlx5/driver.h> +#define MLX5_DEVCOM_PORTS_SUPPORTED 2 + enum mlx5_devcom_components { MLX5_DEVCOM_ESW_OFFLOADS, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c index 3d5e57ff558c..9482e51ac82a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c @@ -12,13 +12,16 @@ struct mlx5_dm { spinlock_t lock; unsigned long *steering_sw_icm_alloc_blocks; unsigned long *header_modify_sw_icm_alloc_blocks; + unsigned long *header_modify_pattern_sw_icm_alloc_blocks; }; struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) { + u64 header_modify_pattern_icm_blocks = 0; u64 header_modify_icm_blocks = 0; u64 steering_icm_blocks = 0; struct mlx5_dm *dm; + bool support_v2; if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)) return NULL; @@ -35,8 +38,7 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); dm->steering_sw_icm_alloc_blocks = - kcalloc(BITS_TO_LONGS(steering_icm_blocks), - sizeof(unsigned long), GFP_KERNEL); + bitmap_zalloc(steering_icm_blocks, GFP_KERNEL); if (!dm->steering_sw_icm_alloc_blocks) goto err_steering; } @@ -47,16 +49,33 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); dm->header_modify_sw_icm_alloc_blocks = - kcalloc(BITS_TO_LONGS(header_modify_icm_blocks), - sizeof(unsigned long), GFP_KERNEL); + bitmap_zalloc(header_modify_icm_blocks, GFP_KERNEL); if (!dm->header_modify_sw_icm_alloc_blocks) goto err_modify_hdr; } + support_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) && + MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2) && + MLX5_CAP64_DEV_MEM(dev, header_modify_pattern_sw_icm_start_address); + + if (support_v2) { + header_modify_pattern_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_pattern_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->header_modify_pattern_sw_icm_alloc_blocks = + bitmap_zalloc(header_modify_pattern_icm_blocks, GFP_KERNEL); + if (!dm->header_modify_pattern_sw_icm_alloc_blocks) + goto err_pattern; + } + return dm; +err_pattern: + bitmap_free(dm->header_modify_sw_icm_alloc_blocks); + err_modify_hdr: - kfree(dm->steering_sw_icm_alloc_blocks); + bitmap_free(dm->steering_sw_icm_alloc_blocks); err_steering: kfree(dm); @@ -75,7 +94,7 @@ void mlx5_dm_cleanup(struct mlx5_core_dev *dev) WARN_ON(!bitmap_empty(dm->steering_sw_icm_alloc_blocks, BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); - kfree(dm->steering_sw_icm_alloc_blocks); + bitmap_free(dm->steering_sw_icm_alloc_blocks); } if (dm->header_modify_sw_icm_alloc_blocks) { @@ -83,7 +102,15 @@ void mlx5_dm_cleanup(struct mlx5_core_dev *dev) BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_sw_icm_size) - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); - kfree(dm->header_modify_sw_icm_alloc_blocks); + bitmap_free(dm->header_modify_sw_icm_alloc_blocks); + } + + if (dm->header_modify_pattern_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->header_modify_pattern_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, + log_header_modify_pattern_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + bitmap_free(dm->header_modify_pattern_sw_icm_alloc_blocks); } kfree(dm); @@ -130,6 +157,13 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, log_header_modify_sw_icm_size); block_map = dm->header_modify_sw_icm_alloc_blocks; break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + header_modify_pattern_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, + log_header_modify_pattern_sw_icm_size); + block_map = dm->header_modify_pattern_sw_icm_alloc_blocks; + break; default: return -EINVAL; } @@ -203,6 +237,11 @@ int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address); block_map = dm->header_modify_sw_icm_alloc_blocks; break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + header_modify_pattern_sw_icm_start_address); + block_map = dm->header_modify_pattern_sw_icm_alloc_blocks; + break; default: return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index 97e5845b4cfd..df58cba37930 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -124,6 +124,10 @@ u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) if (mlx5_chains_ignore_flow_level_supported(chains)) return UINT_MAX; + if (!chains->dev->priv.eswitch || + chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS) + return 1; + /* We should get here only for eswitch case */ return FDB_TC_MAX_PRIO; } @@ -208,7 +212,7 @@ static int create_chain_restore(struct fs_chain *chain) { struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch; - char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)]; + u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; struct mlx5_fs_chains *chains = chain->chains; enum mlx5e_tc_attr_to_reg chain_to_reg; struct mlx5_modify_hdr *mod_hdr; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index b63dec24747a..b78f2ba25c19 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -408,6 +408,8 @@ static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev, for (tt = 0; tt < MLX5_NUM_TT; tt++) { struct mlx5_ttc_rule *rule = &rules[tt]; + if (test_bit(tt, params->ignore_dests)) + continue; rule->rule = mlx5_generate_inner_ttc_rule(dev, ft, ¶ms->dests[tt], ttc_rules[tt].etype, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h index 4bad6a5fde56..f240ffe5116c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h @@ -92,13 +92,6 @@ mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent) { } - -static inline int -mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent, - void *buf, int len) -{ - return 0; -} #endif #endif /* __LIB_HV_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 2f536c5d30b1..032adb21ad4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -83,6 +83,7 @@ int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, voi enum { MLX5_ACCEL_OBJ_TLS_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS, MLX5_ACCEL_OBJ_IPSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_IPSEC, + MLX5_ACCEL_OBJ_MACSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_MACSEC, }; int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index 839a01da110f..8ff16318e32d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -122,7 +122,7 @@ void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) { struct mlx5_mpfs *mpfs = dev->priv.mpfs; - if (!MLX5_ESWITCH_MANAGER(dev)) + if (!mpfs) return; WARN_ON(!hlist_empty(mpfs->hash)); @@ -137,7 +137,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) int err = 0; u32 index; - if (!MLX5_ESWITCH_MANAGER(dev)) + if (!mpfs) return 0; mutex_lock(&mpfs->lock); @@ -185,7 +185,7 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) int err = 0; u32 index; - if (!MLX5_ESWITCH_MANAGER(dev)) + if (!mpfs) return 0; mutex_lock(&mpfs->lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c index e042e0924079..4571c56ec3c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* Copyright (c) 2019 Mellanox Technologies. */ -#include <linux/module.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/port.h> #include "mlx5_core.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c new file mode 100644 index 000000000000..9b8c051ccf65 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#include <linux/kernel.h> +#include <linux/mlx5/driver.h> + +#include "smfs.h" + +struct mlx5dr_matcher * +mlx5_smfs_matcher_create(struct mlx5dr_table *table, u32 priority, struct mlx5_flow_spec *spec) +{ + struct mlx5dr_match_parameters matcher_mask = {}; + + matcher_mask.match_buf = (u64 *)&spec->match_criteria; + matcher_mask.match_sz = DR_SZ_MATCH_PARAM; + + return mlx5dr_matcher_create(table, priority, spec->match_criteria_enable, &matcher_mask); +} + +void +mlx5_smfs_matcher_destroy(struct mlx5dr_matcher *matcher) +{ + mlx5dr_matcher_destroy(matcher); +} + +struct mlx5dr_table * +mlx5_smfs_table_get_from_fs_ft(struct mlx5_flow_table *ft) +{ + return mlx5dr_table_get_from_fs_ft(ft); +} + +struct mlx5dr_action * +mlx5_smfs_action_create_dest_table(struct mlx5dr_table *table) +{ + return mlx5dr_action_create_dest_table(table); +} + +struct mlx5dr_action * +mlx5_smfs_action_create_flow_counter(u32 counter_id) +{ + return mlx5dr_action_create_flow_counter(counter_id); +} + +void +mlx5_smfs_action_destroy(struct mlx5dr_action *action) +{ + mlx5dr_action_destroy(action); +} + +struct mlx5dr_rule * +mlx5_smfs_rule_create(struct mlx5dr_matcher *matcher, struct mlx5_flow_spec *spec, + size_t num_actions, struct mlx5dr_action *actions[], + u32 flow_source) +{ + struct mlx5dr_match_parameters value = {}; + + value.match_buf = (u64 *)spec->match_value; + value.match_sz = DR_SZ_MATCH_PARAM; + + return mlx5dr_rule_create(matcher, &value, num_actions, actions, flow_source); +} + +void +mlx5_smfs_rule_destroy(struct mlx5dr_rule *rule) +{ + mlx5dr_rule_destroy(rule); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h new file mode 100644 index 000000000000..452d0df339ac --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_LIB_SMFS_H__ +#define __MLX5_LIB_SMFS_H__ + +#include "steering/mlx5dr.h" +#include "steering/dr_types.h" + +struct mlx5dr_matcher * +mlx5_smfs_matcher_create(struct mlx5dr_table *table, u32 priority, struct mlx5_flow_spec *spec); + +void +mlx5_smfs_matcher_destroy(struct mlx5dr_matcher *matcher); + +struct mlx5dr_table * +mlx5_smfs_table_get_from_fs_ft(struct mlx5_flow_table *ft); + +struct mlx5dr_action * +mlx5_smfs_action_create_dest_table(struct mlx5dr_table *table); + +struct mlx5dr_action * +mlx5_smfs_action_create_flow_counter(u32 counter_id); + +void +mlx5_smfs_action_destroy(struct mlx5dr_action *action); + +struct mlx5dr_rule * +mlx5_smfs_rule_create(struct mlx5dr_matcher *matcher, struct mlx5_flow_spec *spec, + size_t num_actions, struct mlx5dr_action *actions[], + u32 flow_source); + +void +mlx5_smfs_rule_destroy(struct mlx5dr_rule *rule); + +#endif /* __MLX5_LIB_SMFS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c index c1df0d3595d8..696e45e2bd06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -10,6 +10,7 @@ struct mlx5_timeouts { static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = { [MLX5_TO_FW_PRE_INIT_TIMEOUT_MS] = 120000, + [MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS] = 7200000, [MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000, [MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2, [MLX5_TO_FW_INIT_MS] = 2000, @@ -31,20 +32,17 @@ static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_type dev->timeouts->to[type] = val; } -void mlx5_tout_set_def_val(struct mlx5_core_dev *dev) +int mlx5_tout_init(struct mlx5_core_dev *dev) { int i; - for (i = 0; i < MAX_TIMEOUT_TYPES; i++) - tout_set(dev, tout_def_sw_val[i], i); -} - -int mlx5_tout_init(struct mlx5_core_dev *dev) -{ dev->timeouts = kmalloc(sizeof(*dev->timeouts), GFP_KERNEL); if (!dev->timeouts) return -ENOMEM; + for (i = 0; i < MAX_TIMEOUT_TYPES; i++) + tout_set(dev, tout_def_sw_val[i], i); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h index 1c42ead782fa..bc9e9aeda847 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -7,6 +7,7 @@ enum mlx5_timeouts_types { /* pre init timeouts (not read from FW) */ MLX5_TO_FW_PRE_INIT_TIMEOUT_MS, + MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS, MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS, MLX5_TO_FW_PRE_INIT_WAIT_MS, @@ -34,7 +35,6 @@ int mlx5_tout_init(struct mlx5_core_dev *dev); void mlx5_tout_cleanup(struct mlx5_core_dev *dev); void mlx5_tout_query_iseg(struct mlx5_core_dev *dev); int mlx5_tout_query_dtor(struct mlx5_core_dev *dev); -void mlx5_tout_set_def_val(struct mlx5_core_dev *dev); u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type); #define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c index e3b0a131c3e1..d55e15c1f380 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c @@ -31,7 +31,6 @@ */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/refcount.h> #include <linux/mlx5/driver.h> #include <net/vxlan.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 7df9c7f8d9c8..283c4cc28944 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -62,9 +62,7 @@ #include "lib/mlx5.h" #include "lib/tout.h" #include "fpga/core.h" -#include "fpga/ipsec.h" -#include "accel/ipsec.h" -#include "accel/tls.h" +#include "en_accel/ipsec.h" #include "lib/clock.h" #include "lib/vxlan.h" #include "lib/geneve.h" @@ -92,12 +90,16 @@ module_param_named(prof_sel, prof_sel, uint, 0444); MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); static u32 sw_owner_id[4]; +#define MAX_SW_VHCA_ID (BIT(__mlx5_bit_sz(cmd_hca_cap_2, sw_vhca_id)) - 1) +static DEFINE_IDA(sw_vhca_ida); enum { MLX5_ATOMIC_REQ_MODE_BE = 0x0, MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, }; +#define LOG_MAX_SUPPORTED_QPS 0xff + static struct mlx5_profile profile[] = { [0] = { .mask = 0, @@ -109,7 +111,7 @@ static struct mlx5_profile profile[] = { [2] = { .mask = MLX5_PROF_MASK_QP_SIZE | MLX5_PROF_MASK_MR_CACHE, - .log_max_qp = 18, + .log_max_qp = LOG_MAX_SUPPORTED_QPS, .mr_cache[0] = { .size = 500, .limit = 250 @@ -177,30 +179,30 @@ static struct mlx5_profile profile[] = { }, }; -static int fw_initializing(struct mlx5_core_dev *dev) -{ - return ioread32be(&dev->iseg->initializing) >> 31; -} - static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, u32 warn_time_mili) { unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili); unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); + u32 fw_initializing; int err = 0; - while (fw_initializing(dev)) { - if (time_after(jiffies, end)) { + do { + fw_initializing = ioread32be(&dev->iseg->initializing); + if (!(fw_initializing >> 31)) + break; + if (time_after(jiffies, end) || + test_and_clear_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { err = -EBUSY; break; } if (warn_time_mili && time_after(jiffies, warn)) { - mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n", - jiffies_to_msecs(end - warn) / 1000); + mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds (0x%x)\n", + jiffies_to_msecs(end - warn) / 1000, fw_initializing); warn = jiffies + msecs_to_jiffies(warn_time_mili); } msleep(mlx5_tout_ms(dev, FW_PRE_INIT_WAIT)); - } + } while (true); return err; } @@ -314,13 +316,6 @@ struct mlx5_reg_host_endianness { u8 rsvd[15]; }; -#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos)) - -enum { - MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) | - MLX5_DEV_CAP_FLAG_DCT, -}; - static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size) { switch (size) { @@ -484,10 +479,69 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP); } +static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); + return err; +} + +bool mlx5_is_roce_on(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + &val); + + if (!err) + return val.vbool; + + mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); + return MLX5_CAP_GEN(dev, roce); +} +EXPORT_SYMBOL(mlx5_is_roce_on); + +static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx) +{ + void *set_hca_cap; + int err; + + if (!MLX5_CAP_GEN_MAX(dev, hca_cap_2)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2); + if (err) + return err; + + if (!MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) || + !(dev->priv.sw_vhca_id > 0)) + return 0; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, + capability); + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL_2]->cur, + MLX5_ST_SZ_BYTES(cmd_hca_cap_2)); + MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1); + + return set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2); +} + static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) { struct mlx5_profile *prof = &dev->profile; void *set_hca_cap; + int max_uc_list; int err; err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); @@ -507,7 +561,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) to_fw_pkey_sz(dev, 128)); /* Check log_max_qp from HCA caps to set in current profile */ - if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { + if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) { + prof->log_max_qp = min_t(u8, 18, MLX5_CAP_GEN_MAX(dev, log_max_qp)); + } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", prof->log_max_qp, MLX5_CAP_GEN_MAX(dev, log_max_qp)); @@ -559,7 +615,13 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); if (MLX5_CAP_GEN(dev, roce_rw_supported)) - MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev)); + MLX5_SET(cmd_hca_cap, set_hca_cap, roce, + mlx5_is_roce_on(dev)); + + max_uc_list = max_uc_list_get_devlink_param(dev); + if (max_uc_list > 0) + MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list, + ilog2(max_uc_list)); return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); } @@ -580,7 +642,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) */ static bool is_roce_fw_disabled(struct mlx5_core_dev *dev) { - return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_init_enabled(dev)) || + return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) || (!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce)); } @@ -609,6 +671,33 @@ static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx) return err; } +static int handle_hca_cap_port_selection(struct mlx5_core_dev *dev, + void *set_ctx) +{ + void *set_hca_cap; + int err; + + if (!MLX5_CAP_GEN(dev, port_selection_cap)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION); + if (err) + return err; + + if (MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass) || + !MLX5_CAP_PORT_SELECTION_MAX(dev, port_select_flow_table_bypass)) + return 0; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur, + MLX5_ST_SZ_BYTES(port_selection_cap)); + MLX5_SET(port_selection_cap, set_hca_cap, port_select_flow_table_bypass, 1); + + err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION); + + return err; +} + static int set_hca_cap(struct mlx5_core_dev *dev) { int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); @@ -646,6 +735,20 @@ static int set_hca_cap(struct mlx5_core_dev *dev) goto out; } + memset(set_ctx, 0, set_sz); + err = handle_hca_cap_2(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap_2 failed\n"); + goto out; + } + + memset(set_ctx, 0, set_sz); + err = handle_hca_cap_port_selection(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap_port_selection failed\n"); + goto out; + } + out: kfree(set_ctx); return err; @@ -711,10 +814,9 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); err = mlx5_cmd_exec_inout(dev, query_issi, query_in, query_out); if (err) { - u32 syndrome; - u8 status; + u32 syndrome = MLX5_GET(query_issi_out, query_out, syndrome); + u8 status = MLX5_GET(query_issi_out, query_out, status); - mlx5_cmd_mbox_status(query_out, &status, &syndrome); if (!status || syndrome == MLX5_DRIVER_SYND) { mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n", err, status, syndrome); @@ -914,6 +1016,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_sf_table_cleanup; } + err = mlx5_fs_core_alloc(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc flow steering\n"); + goto err_fs; + } + dev->dm = mlx5_dm_create(dev); if (IS_ERR(dev->dm)) mlx5_core_warn(dev, "Failed to init device memory%d\n", err); @@ -924,6 +1032,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) return 0; +err_fs: + mlx5_sf_table_cleanup(dev); err_sf_table_cleanup: mlx5_sf_hw_table_cleanup(dev); err_sf_hw_table_cleanup: @@ -961,6 +1071,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_hv_vhca_destroy(dev->hv_vhca); mlx5_fw_tracer_destroy(dev->tracer); mlx5_dm_cleanup(dev); + mlx5_fs_core_free(dev); mlx5_sf_table_cleanup(dev); mlx5_sf_hw_table_cleanup(dev); mlx5_vhca_event_cleanup(dev); @@ -981,7 +1092,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_devcom_unregister_device(dev->priv.devcom); } -static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) +static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout) { int err; @@ -992,15 +1103,13 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) if (mlx5_core_is_pf(dev)) pcie_print_link_status(dev->pdev); - mlx5_tout_set_def_val(dev); - /* wait for firmware to accept initialization segments configurations */ - err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT), + err = wait_fw_init(dev, timeout, mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL)); if (err) { mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n", - mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); + timeout); return err; } @@ -1021,10 +1130,12 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP); + mlx5_start_health_poll(dev); + err = mlx5_core_enable_hca(dev, 0); if (err) { mlx5_core_err(dev, "enable hca failed\n"); - goto err_cmd_cleanup; + goto stop_health_poll; } err = mlx5_core_set_issi(dev); @@ -1076,8 +1187,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) mlx5_core_err(dev, "query hca failed\n"); goto reclaim_boot_pages; } - - mlx5_start_health_poll(dev); + mlx5_start_health_fw_log_up(dev); return 0; @@ -1085,6 +1195,8 @@ reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); err_disable_hca: mlx5_core_disable_hca(dev, 0); +stop_health_poll: + mlx5_stop_health_poll(dev, boot); err_cmd_cleanup: mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); @@ -1096,7 +1208,6 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) { int err; - mlx5_stop_health_poll(dev, boot); err = mlx5_cmd_teardown_hca(dev); if (err) { mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n"); @@ -1104,6 +1215,7 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) } mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev, 0); + mlx5_stop_health_poll(dev, boot); mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); @@ -1159,15 +1271,7 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_fpga_start; } - mlx5_accel_ipsec_init(dev); - - err = mlx5_accel_tls_init(dev); - if (err) { - mlx5_core_err(dev, "TLS device start failed %d\n", err); - goto err_tls_start; - } - - err = mlx5_init_fs(dev); + err = mlx5_fs_core_init(dev); if (err) { mlx5_core_err(dev, "Failed to init flow steering\n"); goto err_fs; @@ -1212,11 +1316,8 @@ err_ec: err_vhca: mlx5_vhca_event_stop(dev); err_set_hca: - mlx5_cleanup_fs(dev); + mlx5_fs_core_cleanup(dev); err_fs: - mlx5_accel_tls_cleanup(dev); -err_tls_start: - mlx5_accel_ipsec_cleanup(dev); mlx5_fpga_device_stop(dev); err_fpga_start: mlx5_rsc_dump_cleanup(dev); @@ -1237,13 +1338,12 @@ static void mlx5_unload(struct mlx5_core_dev *dev) { mlx5_sf_dev_table_destroy(dev); mlx5_sriov_detach(dev); + mlx5_eswitch_disable(dev->priv.eswitch); mlx5_lag_remove_mdev(dev); mlx5_ec_cleanup(dev); mlx5_sf_hw_table_destroy(dev); mlx5_vhca_event_stop(dev); - mlx5_cleanup_fs(dev); - mlx5_accel_ipsec_cleanup(dev); - mlx5_accel_tls_cleanup(dev); + mlx5_fs_core_cleanup(dev); mlx5_fpga_device_stop(dev); mlx5_rsc_dump_cleanup(dev); mlx5_hv_vhca_cleanup(dev->hv_vhca); @@ -1258,12 +1358,14 @@ static void mlx5_unload(struct mlx5_core_dev *dev) int mlx5_init_one(struct mlx5_core_dev *dev) { + struct devlink *devlink = priv_to_devlink(dev); int err = 0; + devl_lock(devlink); mutex_lock(&dev->intf_state_mutex); dev->state = MLX5_DEVICE_STATE_UP; - err = mlx5_function_setup(dev, true); + err = mlx5_function_setup(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); if (err) goto err_function; @@ -1288,6 +1390,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev) goto err_register; mutex_unlock(&dev->intf_state_mutex); + devl_unlock(devlink); return 0; err_register: @@ -1302,11 +1405,15 @@ function_teardown: err_function: dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mutex_unlock(&dev->intf_state_mutex); + devl_unlock(devlink); return err; } void mlx5_uninit_one(struct mlx5_core_dev *dev) { + struct devlink *devlink = priv_to_devlink(dev); + + devl_lock(devlink); mutex_lock(&dev->intf_state_mutex); mlx5_unregister_device(dev); @@ -1325,12 +1432,15 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) mlx5_function_teardown(dev, true); out: mutex_unlock(&dev->intf_state_mutex); + devl_unlock(devlink); } -int mlx5_load_one(struct mlx5_core_dev *dev) +int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery) { int err = 0; + u64 timeout; + devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "interface is up, NOP\n"); @@ -1339,7 +1449,11 @@ int mlx5_load_one(struct mlx5_core_dev *dev) /* remove any previous indication of internal error */ dev->state = MLX5_DEVICE_STATE_UP; - err = mlx5_function_setup(dev, false); + if (recovery) + timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT); + else + timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT); + err = mlx5_function_setup(dev, false, timeout); if (err) goto err_function; @@ -1368,8 +1482,20 @@ out: return err; } -void mlx5_unload_one(struct mlx5_core_dev *dev) +int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery) +{ + struct devlink *devlink = priv_to_devlink(dev); + int ret; + + devl_lock(devlink); + ret = mlx5_load_one_devl_locked(dev, recovery); + devl_unlock(devlink); + return ret; +} + +void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev) { + devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&dev->intf_state_mutex); mlx5_detach_device(dev); @@ -1387,6 +1513,15 @@ out: mutex_unlock(&dev->intf_state_mutex); } +void mlx5_unload_one(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + + devl_lock(devlink); + mlx5_unload_one_devl_locked(dev); + devl_unlock(devlink); +} + static const int types[] = { MLX5_CAP_GENERAL, MLX5_CAP_GENERAL_2, @@ -1409,6 +1544,8 @@ static const int types[] = { MLX5_CAP_IPSEC, MLX5_CAP_PORT_SELECTION, MLX5_CAP_DEV_SHAMPO, + MLX5_CAP_MACSEC, + MLX5_CAP_ADV_VIRTUALIZATION, }; static void mlx5_hca_caps_free(struct mlx5_core_dev *dev) @@ -1451,7 +1588,9 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); + lockdep_register_key(&dev->lock_key); mutex_init(&dev->intf_state_mutex); + lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); @@ -1463,8 +1602,8 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) INIT_LIST_HEAD(&priv->pgdir_list); priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev)); - priv->dbg_root = debugfs_create_dir(dev_name(dev->device), - mlx5_debugfs_root); + priv->dbg.dbg_root = debugfs_create_dir(dev_name(dev->device), + mlx5_debugfs_root); INIT_LIST_HEAD(&priv->traps); err = mlx5_tout_init(dev); @@ -1489,6 +1628,18 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) if (err) goto err_hca_caps; + /* The conjunction of sw_vhca_id with sw_owner_id will be a global + * unique id per function which uses mlx5_core. + * Those values are supplied to FW as part of the init HCA command to + * be used by both driver and FW when it's applicable. + */ + dev->priv.sw_vhca_id = ida_alloc_range(&sw_vhca_ida, 1, + MAX_SW_VHCA_ID, + GFP_KERNEL); + if (dev->priv.sw_vhca_id < 0) + mlx5_core_err(dev, "failed to allocate sw_vhca_id, err=%d\n", + dev->priv.sw_vhca_id); + return 0; err_hca_caps: @@ -1500,12 +1651,13 @@ err_pagealloc_init: err_health_init: mlx5_tout_cleanup(dev); err_timeout_init: - debugfs_remove(dev->priv.dbg_root); + debugfs_remove(dev->priv.dbg.dbg_root); mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); mutex_destroy(&priv->bfregs.wc_head.lock); mutex_destroy(&priv->bfregs.reg_head.lock); mutex_destroy(&dev->intf_state_mutex); + lockdep_unregister_key(&dev->lock_key); return err; } @@ -1513,17 +1665,21 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; + if (priv->sw_vhca_id > 0) + ida_free(&sw_vhca_ida, dev->priv.sw_vhca_id); + mlx5_hca_caps_free(dev); mlx5_adev_cleanup(dev); mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_tout_cleanup(dev); - debugfs_remove_recursive(dev->priv.dbg_root); + debugfs_remove_recursive(dev->priv.dbg.dbg_root); mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); mutex_destroy(&priv->bfregs.wc_head.lock); mutex_destroy(&priv->bfregs.reg_head.lock); mutex_destroy(&dev->intf_state_mutex); + lockdep_unregister_key(&dev->lock_key); } static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) @@ -1594,7 +1750,13 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); + /* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain + * fw_reset before unregistering the devlink. + */ + mlx5_drain_fw_reset(dev); + set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); devlink_unregister(devlink); + mlx5_sriov_disable(pdev); mlx5_crdump_disable(dev); mlx5_drain_health_wq(dev); mlx5_uninit_one(dev); @@ -1604,12 +1766,28 @@ static void remove_one(struct pci_dev *pdev) mlx5_devlink_free(devlink); } +#define mlx5_pci_trace(dev, fmt, ...) ({ \ + struct mlx5_core_dev *__dev = (dev); \ + mlx5_core_info(__dev, "%s Device state = %d health sensors: %d pci_status: %d. " fmt, \ + __func__, __dev->state, mlx5_health_check_fatal_sensors(__dev), \ + __dev->pci_status, ##__VA_ARGS__); \ +}) + +static const char *result2str(enum pci_ers_result result) +{ + return result == PCI_ERS_RESULT_NEED_RESET ? "need reset" : + result == PCI_ERS_RESULT_DISCONNECT ? "disconnect" : + result == PCI_ERS_RESULT_RECOVERED ? "recovered" : + "unknown"; +} + static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + enum pci_ers_result res; - mlx5_core_info(dev, "%s was called\n", __func__); + mlx5_pci_trace(dev, "Enter, pci channel state = %d\n", state); mlx5_enter_error_state(dev, false); mlx5_error_sw_reset(dev); @@ -1617,8 +1795,11 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); - return state == pci_channel_io_perm_failure ? + res = state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; + + mlx5_pci_trace(dev, "Exit, result = %d, %s\n", res, result2str(res)); + return res; } /* wait for the device to show vital signs by waiting @@ -1652,28 +1833,34 @@ static int wait_vital(struct pci_dev *pdev) static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) { + enum pci_ers_result res = PCI_ERS_RESULT_DISCONNECT; struct mlx5_core_dev *dev = pci_get_drvdata(pdev); int err; - mlx5_core_info(dev, "%s was called\n", __func__); + mlx5_pci_trace(dev, "Enter\n"); err = mlx5_pci_enable_device(dev); if (err) { mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n", __func__, err); - return PCI_ERS_RESULT_DISCONNECT; + goto out; } pci_set_master(pdev); pci_restore_state(pdev); pci_save_state(pdev); - if (wait_vital(pdev)) { - mlx5_core_err(dev, "%s: wait_vital timed out\n", __func__); - return PCI_ERS_RESULT_DISCONNECT; + err = wait_vital(pdev); + if (err) { + mlx5_core_err(dev, "%s: wait vital failed with error code: %d\n", + __func__, err); + goto out; } - return PCI_ERS_RESULT_RECOVERED; + res = PCI_ERS_RESULT_RECOVERED; +out: + mlx5_pci_trace(dev, "Exit, err = %d, result = %d, %s\n", err, res, result2str(res)); + return res; } static void mlx5_pci_resume(struct pci_dev *pdev) @@ -1681,14 +1868,16 @@ static void mlx5_pci_resume(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); int err; - mlx5_core_info(dev, "%s was called\n", __func__); + mlx5_pci_trace(dev, "Enter, loading driver..\n"); - err = mlx5_load_one(dev); - if (err) - mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n", - __func__, err); - else - mlx5_core_info(dev, "%s: device recovered\n", __func__); + err = mlx5_load_one(dev, false); + + if (!err) + devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); + + mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err, + !err ? "recovered" : "Failed"); } static const struct pci_error_handlers mlx5_err_handler = { @@ -1717,7 +1906,7 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) } /* Panic tear down fw command will stop the PCI bus communication - * with the HCA, so the health polll is no longer needed. + * with the HCA, so the health poll is no longer needed. */ mlx5_drain_health_wq(dev); mlx5_stop_health_poll(dev, false); @@ -1753,6 +1942,7 @@ static void shutdown(struct pci_dev *pdev) int err; mlx5_core_info(dev, "Shutdown was called\n"); + set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); err = mlx5_try_fast_unload(dev); if (err) mlx5_unload_one(dev); @@ -1772,7 +1962,7 @@ static int mlx5_resume(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - return mlx5_load_one(dev); + return mlx5_load_one(dev, false); } static const struct pci_device_id mlx5_core_pci_table[] = { @@ -1792,10 +1982,12 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */ { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */ { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ + { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ { PCI_VDEVICE(MELLANOX, 0xa2dc) }, /* BlueField-3 integrated ConnectX-7 network controller */ + { PCI_VDEVICE(MELLANOX, 0xa2df) }, /* BlueField-4 integrated ConnectX-8 network controller */ { 0, } }; @@ -1804,17 +1996,18 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); void mlx5_disable_device(struct mlx5_core_dev *dev) { mlx5_error_sw_reset(dev); - mlx5_unload_one(dev); + mlx5_unload_one_devl_locked(dev); } int mlx5_recover_device(struct mlx5_core_dev *dev) { - int ret = -EIO; + if (!mlx5_core_is_sf(dev)) { + mlx5_pci_disable_device(dev); + if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED) + return -EIO; + } - mlx5_pci_disable_device(dev); - if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED) - ret = mlx5_load_one(dev); - return ret; + return mlx5_load_one_devl_locked(dev, true); } static struct pci_driver mlx5_core_driver = { @@ -1831,6 +2024,48 @@ static struct pci_driver mlx5_core_driver = { .sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count, }; +/** + * mlx5_vf_get_core_dev - Get the mlx5 core device from a given VF PCI device if + * mlx5_core is its driver. + * @pdev: The associated PCI device. + * + * Upon return the interface state lock stay held to let caller uses it safely. + * Caller must ensure to use the returned mlx5 device for a narrow window + * and put it back with mlx5_vf_put_core_dev() immediately once usage was over. + * + * Return: Pointer to the associated mlx5_core_dev or NULL. + */ +struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev) +{ + struct mlx5_core_dev *mdev; + + mdev = pci_iov_get_pf_drvdata(pdev, &mlx5_core_driver); + if (IS_ERR(mdev)) + return NULL; + + mutex_lock(&mdev->intf_state_mutex); + if (!test_bit(MLX5_INTERFACE_STATE_UP, &mdev->intf_state)) { + mutex_unlock(&mdev->intf_state_mutex); + return NULL; + } + + return mdev; +} +EXPORT_SYMBOL(mlx5_vf_get_core_dev); + +/** + * mlx5_vf_put_core_dev - Put the mlx5 core device back. + * @mdev: The mlx5 core device. + * + * Upon return the interface state lock is unlocked and caller should not + * access the mdev any more. + */ +void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev) +{ + mutex_unlock(&mdev->intf_state_mutex); +} +EXPORT_SYMBOL(mlx5_vf_put_core_dev); + static void mlx5_core_verify_params(void) { if (prof_sel >= ARRAY_SIZE(profile)) { @@ -1852,7 +2087,6 @@ static int __init init(void) get_random_bytes(&sw_owner_id, sizeof(sw_owner_id)); mlx5_core_verify_params(); - mlx5_fpga_ipsec_build_fs_cmds(); mlx5_register_debugfs(); err = pci_register_driver(&mlx5_core_driver); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c index e019d68062d8..495cca58dccc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c @@ -31,7 +31,6 @@ */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/mlx5/driver.h> #include <rdma/ib_verbs.h> #include "mlx5_core.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index bb677329ea08..a806e3de7b7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -143,6 +143,36 @@ enum mlx5_semaphore_space_address { #define MLX5_DEFAULT_PROF 2 +static inline int mlx5_flexible_inlen(struct mlx5_core_dev *dev, size_t fixed, + size_t item_size, size_t num_items, + const char *func, int line) +{ + int inlen; + + if (fixed > INT_MAX || item_size > INT_MAX || num_items > INT_MAX) { + mlx5_core_err(dev, "%s: %s:%d: input values too big: %zu + %zu * %zu\n", + __func__, func, line, fixed, item_size, num_items); + return -ENOMEM; + } + + if (check_mul_overflow((int)item_size, (int)num_items, &inlen)) { + mlx5_core_err(dev, "%s: %s:%d: multiplication overflow: %zu + %zu * %zu\n", + __func__, func, line, fixed, item_size, num_items); + return -ENOMEM; + } + + if (check_add_overflow((int)fixed, inlen, &inlen)) { + mlx5_core_err(dev, "%s: %s:%d: addition overflow: %zu + %zu * %zu\n", + __func__, func, line, fixed, item_size, num_items); + return -ENOMEM; + } + + return inlen; +} + +#define MLX5_FLEXIBLE_INLEN(dev, fixed, item_size, num_items) \ + mlx5_flexible_inlen(dev, fixed, item_size, num_items, __func__, __LINE__) + int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init(struct mlx5_core_dev *dev); @@ -164,6 +194,7 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); int mlx5_sriov_attach(struct mlx5_core_dev *dev); void mlx5_sriov_detach(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); +void mlx5_sriov_disable(struct pci_dev *pdev); int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); @@ -176,7 +207,6 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages); -void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev); void mlx5_cmd_flush(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); @@ -209,7 +239,7 @@ int mlx5_attach_device(struct mlx5_core_dev *dev); void mlx5_detach_device(struct mlx5_core_dev *dev); int mlx5_register_device(struct mlx5_core_dev *dev); void mlx5_unregister_device(struct mlx5_core_dev *dev); -struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev); +struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev); void mlx5_dev_list_lock(void); void mlx5_dev_list_unlock(void); int mlx5_dev_list_trylock(void); @@ -290,7 +320,9 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev); int mlx5_init_one(struct mlx5_core_dev *dev); void mlx5_uninit_one(struct mlx5_core_dev *dev); void mlx5_unload_one(struct mlx5_core_dev *dev); -int mlx5_load_one(struct mlx5_core_dev *dev); +void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev); +int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); +int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery); int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out); @@ -305,5 +337,6 @@ static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) bool mlx5_eth_supported(struct mlx5_core_dev *dev); bool mlx5_rdma_supported(struct mlx5_core_dev *dev); bool mlx5_vnet_supported(struct mlx5_core_dev *dev); +bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev); #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h index 8116815663a7..23cb63fa4588 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -22,12 +22,40 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn, int msix_vec_count); int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs); +struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev); +void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq); struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, struct cpumask *affinity); -void mlx5_irq_release(struct mlx5_irq *irq); +int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, + struct mlx5_irq **irqs); +void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs); int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb); int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb); struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq); int mlx5_irq_get_index(struct mlx5_irq *irq); +struct mlx5_irq_pool; +#ifdef CONFIG_MLX5_SF +int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, + struct mlx5_irq **irqs); +struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, + const struct cpumask *req_mask); +void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, + int num_irqs); +#else +static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, + struct mlx5_irq **irqs) +{ + return -EOPNOTSUPP; +} + +static inline struct mlx5_irq * +mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, + struct mlx5_irq **irqs, int num_irqs) {} +#endif #endif /* __MLX5_IRQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index f099a087400e..9d735c343a3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -31,7 +31,6 @@ */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/mlx5/driver.h> #include "mlx5_core.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index f6b5451328fc..60596357bfc7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -32,7 +32,6 @@ #include <linux/highmem.h> #include <linux/kernel.h> -#include <linux/module.h> #include <linux/delay.h> #include <linux/mlx5/driver.h> #include <linux/xarray.h> @@ -327,11 +326,12 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id, } static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, - int notify_fail, bool ec_function) + int event, bool ec_function) { u32 function = get_function(func_id, ec_function); u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); + int notify_fail = event; u64 addr; int err; u32 *in; @@ -351,8 +351,10 @@ retry: if (err) { if (err == -ENOMEM) err = alloc_system_page(dev, function); - if (err) + if (err) { + dev->priv.fw_pages_alloc_failed += (npages - i); goto out_4k; + } goto retry; } @@ -365,11 +367,20 @@ retry: MLX5_SET(manage_pages_in, in, input_num_entries, npages); MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_do(dev, in, inlen, out, sizeof(out)); + if (err == -EREMOTEIO) { + notify_fail = 0; + /* if triggered by FW and failed by FW ignore */ + if (event) { + err = 0; + goto out_dropped; + } + } + err = mlx5_cmd_check(dev, err, in, out); if (err) { mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err); - goto out_4k; + goto out_dropped; } dev->priv.fw_pages += npages; @@ -384,6 +395,8 @@ retry: kvfree(in); return 0; +out_dropped: + dev->priv.give_pages_dropped += npages; out_4k: for (i--; i >= 0; i--) free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), function); @@ -455,7 +468,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, u32 i = 0; if (!mlx5_cmd_is_down(dev)) - return mlx5_cmd_exec(dev, in, in_size, out, out_size); + return mlx5_cmd_do(dev, in, in_size, out, out_size); /* No hard feelings, we want our pages back! */ npages = MLX5_GET(manage_pages_in, in, input_num_entries); @@ -479,7 +492,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, } static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, - int *nclaimed, bool ec_function) + int *nclaimed, bool event, bool ec_function) { u32 function = get_function(func_id, ec_function); int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); @@ -507,6 +520,17 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, func_id, npages, outlen); err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); if (err) { + npages = MLX5_GET(manage_pages_in, in, input_num_entries); + dev->priv.reclaim_pages_discard += npages; + } + /* if triggered by FW event and failed by FW then ignore */ + if (event && err == -EREMOTEIO) { + err = 0; + goto out_free; + } + + err = mlx5_cmd_check(dev, err, in, out); + if (err) { mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err); goto out_free; } @@ -546,7 +570,7 @@ static void pages_work_handler(struct work_struct *work) release_all_pages(dev, req->func_id, req->ec_function); else if (req->npages < 0) err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL, - req->ec_function); + true, req->ec_function); else if (req->npages > 0) err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function); @@ -645,7 +669,7 @@ static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev, int err; err = reclaim_pages(dev, func_id, optimal_reclaimed_pages(), - &nclaimed, mlx5_core_is_ecpf(dev)); + &nclaimed, false, mlx5_core_is_ecpf(dev)); if (err) { mlx5_core_warn(dev, "failed reclaiming pages (%d) for func id 0x%x\n", err, func_id); @@ -700,12 +724,14 @@ int mlx5_pagealloc_init(struct mlx5_core_dev *dev) return -ENOMEM; xa_init(&dev->priv.page_root_xa); + mlx5_pages_debugfs_init(dev); return 0; } void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) { + mlx5_pages_debugfs_cleanup(dev); xa_destroy(&dev->priv.page_root_xa); destroy_workqueue(dev->priv.pg_wq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 830444f927d4..662f1d55e30e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -3,19 +3,15 @@ #include <linux/interrupt.h> #include <linux/notifier.h> -#include <linux/module.h> #include <linux/mlx5/driver.h> #include "mlx5_core.h" #include "mlx5_irq.h" +#include "pci_irq.h" #include "lib/sf.h" #ifdef CONFIG_RFS_ACCEL #include <linux/cpu_rmap.h> #endif -#define MLX5_MAX_IRQ_NAME (32) -/* max irq_index is 2047, so four chars */ -#define MLX5_MAX_IRQ_IDX_CHARS (4) - #define MLX5_SFS_PER_CTRL_IRQ 64 #define MLX5_IRQ_CTRL_SF_MAX 8 /* min num of vectors for SFs to be enabled */ @@ -25,7 +21,6 @@ #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX) #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1) #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4) -#define MLX5_EQ_REFS_PER_IRQ (2) struct mlx5_irq { struct atomic_notifier_head nh; @@ -37,16 +32,6 @@ struct mlx5_irq { int irqn; }; -struct mlx5_irq_pool { - char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS]; - struct xa_limit xa_num_irqs; - struct mutex lock; /* sync IRQs creations */ - struct xarray irqs; - u32 max_threshold; - u32 min_threshold; - struct mlx5_core_dev *dev; -}; - struct mlx5_irq_table { struct mlx5_irq_pool *pf_pool; struct mlx5_irq_pool *sf_ctrl_pool; @@ -109,8 +94,8 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, if (msix_vec_count > max_msix) return -EOVERFLOW; - query_cap = kzalloc(query_sz, GFP_KERNEL); - hca_cap = kzalloc(set_sz, GFP_KERNEL); + query_cap = kvzalloc(query_sz, GFP_KERNEL); + hca_cap = kvzalloc(set_sz, GFP_KERNEL); if (!hca_cap || !query_cap) { ret = -ENOMEM; goto out; @@ -133,8 +118,8 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1); ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); out: - kfree(hca_cap); - kfree(query_cap); + kvfree(hca_cap); + kvfree(query_cap); return ret; } @@ -143,28 +128,38 @@ static void irq_release(struct mlx5_irq *irq) struct mlx5_irq_pool *pool = irq->pool; xa_erase(&pool->irqs, irq->index); - /* free_irq requires that affinity and rmap will be cleared + /* free_irq requires that affinity_hint and rmap will be cleared * before calling it. This is why there is asymmetry with set_rmap * which should be called after alloc_irq but before request_irq. */ - irq_set_affinity_hint(irq->irqn, NULL); + irq_update_affinity_hint(irq->irqn, NULL); free_cpumask_var(irq->mask); free_irq(irq->irqn, &irq->nh); kfree(irq); } -static void irq_put(struct mlx5_irq *irq) +int mlx5_irq_put(struct mlx5_irq *irq) { struct mlx5_irq_pool *pool = irq->pool; + int ret = 0; mutex_lock(&pool->lock); irq->refcount--; - if (!irq->refcount) + if (!irq->refcount) { irq_release(irq); + ret = 1; + } mutex_unlock(&pool->lock); + return ret; +} + +int mlx5_irq_read_locked(struct mlx5_irq *irq) +{ + lockdep_assert_held(&irq->pool->lock); + return irq->refcount; } -static int irq_get_locked(struct mlx5_irq *irq) +int mlx5_irq_get_locked(struct mlx5_irq *irq) { lockdep_assert_held(&irq->pool->lock); if (WARN_ON_ONCE(!irq->refcount)) @@ -178,7 +173,7 @@ static int irq_get(struct mlx5_irq *irq) int err; mutex_lock(&irq->pool->lock); - err = irq_get_locked(irq); + err = mlx5_irq_get_locked(irq); mutex_unlock(&irq->pool->lock); return err; } @@ -210,12 +205,8 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx); } -static bool irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) -{ - return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf")); -} - -static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) +struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, + const struct cpumask *affinity) { struct mlx5_core_dev *dev = pool->dev; char name[MLX5_MAX_IRQ_NAME]; @@ -226,7 +217,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) if (!irq) return ERR_PTR(-ENOMEM); irq->irqn = pci_irq_vector(dev->pdev, i); - if (!irq_pool_is_sf_pool(pool)) + if (!mlx5_irq_pool_is_sf_pool(pool)) irq_set_name(pool, name, i); else irq_sf_set_name(pool, name, i); @@ -244,6 +235,10 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) err = -ENOMEM; goto err_cpumask; } + if (affinity) { + cpumask_copy(irq->mask, affinity); + irq_set_affinity_and_hint(irq->irqn, irq->mask); + } irq->pool = pool; irq->refcount = 1; irq->index = i; @@ -255,6 +250,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) } return irq; err_xa: + irq_update_affinity_hint(irq->irqn, NULL); free_cpumask_var(irq->mask); err_cpumask: free_irq(irq->irqn, &irq->nh); @@ -275,7 +271,7 @@ int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb) return -ENOENT; ret = atomic_notifier_chain_register(&irq->nh, nb); if (ret) - irq_put(irq); + mlx5_irq_put(irq); return ret; } @@ -284,7 +280,7 @@ int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb) int err = 0; err = atomic_notifier_chain_unregister(&irq->nh, nb); - irq_put(irq); + mlx5_irq_put(irq); return err; } @@ -300,131 +296,121 @@ int mlx5_irq_get_index(struct mlx5_irq *irq) /* irq_pool API */ -/* creating an irq from irq_pool */ -static struct mlx5_irq *irq_pool_create_irq(struct mlx5_irq_pool *pool, - struct cpumask *affinity) +/* requesting an irq from a given pool according to given index */ +static struct mlx5_irq * +irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, + struct cpumask *affinity) { struct mlx5_irq *irq; - u32 irq_index; - int err; - err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, - GFP_KERNEL); - if (err) - return ERR_PTR(err); - irq = irq_request(pool, irq_index); - if (IS_ERR(irq)) - return irq; - cpumask_copy(irq->mask, affinity); - irq_set_affinity_hint(irq->irqn, irq->mask); + mutex_lock(&pool->lock); + irq = xa_load(&pool->irqs, vecidx); + if (irq) { + mlx5_irq_get_locked(irq); + goto unlock; + } + irq = mlx5_irq_alloc(pool, vecidx, affinity); +unlock: + mutex_unlock(&pool->lock); return irq; } -/* looking for the irq with the smallest refcount and the same affinity */ -static struct mlx5_irq *irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, - struct cpumask *affinity) +static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table) { - int start = pool->xa_num_irqs.min; - int end = pool->xa_num_irqs.max; - struct mlx5_irq *irq = NULL; - struct mlx5_irq *iter; - unsigned long index; + return irq_table->sf_ctrl_pool; +} - lockdep_assert_held(&pool->lock); - xa_for_each_range(&pool->irqs, index, iter, start, end) { - if (!cpumask_equal(iter->mask, affinity)) - continue; - if (iter->refcount < pool->min_threshold) - return iter; - if (!irq || iter->refcount < irq->refcount) - irq = iter; - } - return irq; +static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table) +{ + return irq_table->sf_comp_pool; } -/* requesting an irq from a given pool according to given affinity */ -static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool, - struct cpumask *affinity) +struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) { - struct mlx5_irq *least_loaded_irq, *new_irq; + struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool = NULL; - mutex_lock(&pool->lock); - least_loaded_irq = irq_pool_find_least_loaded(pool, affinity); - if (least_loaded_irq && - least_loaded_irq->refcount < pool->min_threshold) - goto out; - new_irq = irq_pool_create_irq(pool, affinity); - if (IS_ERR(new_irq)) { - if (!least_loaded_irq) { - mlx5_core_err(pool->dev, "Didn't find IRQ for cpu = %u\n", - cpumask_first(affinity)); - mutex_unlock(&pool->lock); - return new_irq; - } - /* We failed to create a new IRQ for the requested affinity, - * sharing existing IRQ. - */ - goto out; - } - least_loaded_irq = new_irq; - goto unlock; -out: - irq_get_locked(least_loaded_irq); - if (least_loaded_irq->refcount > pool->max_threshold) - mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n", - least_loaded_irq->irqn, pool->name, - least_loaded_irq->refcount / MLX5_EQ_REFS_PER_IRQ); -unlock: - mutex_unlock(&pool->lock); - return least_loaded_irq; + if (mlx5_core_is_sf(dev)) + pool = sf_irq_pool_get(irq_table); + + /* In some configs, there won't be a pool of SFs IRQs. Hence, returning + * the PF IRQs pool in case the SF pool doesn't exist. + */ + return pool ? pool : irq_table->pf_pool; } -/* requesting an irq from a given pool according to given index */ -static struct mlx5_irq * -irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, - struct cpumask *affinity) +static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) { - struct mlx5_irq *irq; + struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool = NULL; - mutex_lock(&pool->lock); - irq = xa_load(&pool->irqs, vecidx); - if (irq) { - irq_get_locked(irq); - goto unlock; + if (mlx5_core_is_sf(dev)) + pool = sf_ctrl_irq_pool_get(irq_table); + + /* In some configs, there won't be a pool of SFs IRQs. Hence, returning + * the PF IRQs pool in case the SF pool doesn't exist. + */ + return pool ? pool : irq_table->pf_pool; +} + +/** + * mlx5_irqs_release - release one or more IRQs back to the system. + * @irqs: IRQs to be released. + * @nirqs: number of IRQs to be released. + */ +static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs) +{ + int i; + + for (i = 0; i < nirqs; i++) { + synchronize_irq(irqs[i]->irqn); + mlx5_irq_put(irqs[i]); } - irq = irq_request(pool, vecidx); - if (IS_ERR(irq) || !affinity) - goto unlock; - cpumask_copy(irq->mask, affinity); - if (!irq_pool_is_sf_pool(pool) && !pool->xa_num_irqs.max && - cpumask_empty(irq->mask)) - cpumask_set_cpu(0, irq->mask); - irq_set_affinity_hint(irq->irqn, irq->mask); -unlock: - mutex_unlock(&pool->lock); - return irq; } -static struct mlx5_irq_pool *find_sf_irq_pool(struct mlx5_irq_table *irq_table, - int i, struct cpumask *affinity) +/** + * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system. + * @ctrl_irq: ctrl IRQ to be released. + */ +void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq) { - if (cpumask_empty(affinity) && i == MLX5_IRQ_EQ_CTRL) - return irq_table->sf_ctrl_pool; - return irq_table->sf_comp_pool; + mlx5_irqs_release(&ctrl_irq, 1); } /** - * mlx5_irq_release - release an IRQ back to the system. - * @irq: irq to be released. + * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device. + * @dev: mlx5 device that requesting the IRQ. + * + * This function returns a pointer to IRQ, or ERR_PTR in case of error. */ -void mlx5_irq_release(struct mlx5_irq *irq) +struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) { - synchronize_irq(irq->irqn); - irq_put(irq); + struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev); + cpumask_var_t req_mask; + struct mlx5_irq *irq; + + if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) + return ERR_PTR(-ENOMEM); + cpumask_copy(req_mask, cpu_online_mask); + if (!mlx5_irq_pool_is_sf_pool(pool)) { + /* In case we are allocating a control IRQ for PF/VF */ + if (!pool->xa_num_irqs.max) { + cpumask_clear(req_mask); + /* In case we only have a single IRQ for PF/VF */ + cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask); + } + /* Allocate the IRQ in the last index of the pool */ + irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask); + } else { + irq = mlx5_irq_affinity_request(pool, req_mask); + } + + free_cpumask_var(req_mask); + return irq; } /** - * mlx5_irq_request - request an IRQ for mlx5 device. + * mlx5_irq_request - request an IRQ for mlx5 PF/VF device. * @dev: mlx5 device that requesting the IRQ. * @vecidx: vector index of the IRQ. This argument is ignore if affinity is * provided. @@ -439,23 +425,8 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, struct mlx5_irq_pool *pool; struct mlx5_irq *irq; - if (mlx5_core_is_sf(dev)) { - pool = find_sf_irq_pool(irq_table, vecidx, affinity); - if (!pool) - /* we don't have IRQs for SFs, using the PF IRQs */ - goto pf_irq; - if (cpumask_empty(affinity) && !strcmp(pool->name, "mlx5_sf_comp")) - /* In case an SF user request IRQ with vecidx */ - irq = irq_pool_request_vector(pool, vecidx, NULL); - else - irq = irq_pool_request_affinity(pool, affinity); - goto out; - } -pf_irq: pool = irq_table->pf_pool; - vecidx = (vecidx == MLX5_IRQ_EQ_CTRL) ? pool->xa_num_irqs.max : vecidx; irq = irq_pool_request_vector(pool, vecidx, affinity); -out: if (IS_ERR(irq)) return irq; mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n", @@ -464,6 +435,51 @@ out: return irq; } +/** + * mlx5_irqs_release_vectors - release one or more IRQs back to the system. + * @irqs: IRQs to be released. + * @nirqs: number of IRQs to be released. + */ +void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs) +{ + mlx5_irqs_release(irqs, nirqs); +} + +/** + * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device. + * @dev: mlx5 device that is requesting the IRQs. + * @cpus: CPUs array for binding the IRQs + * @nirqs: number of IRQs to request. + * @irqs: an output array of IRQs pointers. + * + * Each IRQ is bound to at most 1 CPU. + * This function is requests nirqs IRQs, starting from @vecidx. + * + * This function returns the number of IRQs requested, (which might be smaller than + * @nirqs), if successful, or a negative error code in case of an error. + */ +int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, + struct mlx5_irq **irqs) +{ + cpumask_var_t req_mask; + struct mlx5_irq *irq; + int i; + + if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) + return -ENOMEM; + for (i = 0; i < nirqs; i++) { + cpumask_set_cpu(cpus[i], req_mask); + irq = mlx5_irq_request(dev, i, req_mask); + if (IS_ERR(irq)) + break; + cpumask_clear(req_mask); + irqs[i] = irq; + } + + free_cpumask_var(req_mask); + return i ? i : PTR_ERR(irq); +} + static struct mlx5_irq_pool * irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, u32 min_threshold, u32 max_threshold) @@ -479,7 +495,7 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, pool->xa_num_irqs.max = start + size - 1; if (name) snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS, - name); + "%s", name); pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ; pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ; mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d", @@ -500,6 +516,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool) irq_release(irq); xa_destroy(&pool->irqs); mutex_destroy(&pool->lock); + kfree(pool->irqs_per_cpu); kvfree(pool); } @@ -547,7 +564,17 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec) err = PTR_ERR(table->sf_comp_pool); goto err_sf_ctrl; } + + table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL); + if (!table->sf_comp_pool->irqs_per_cpu) { + err = -ENOMEM; + goto err_irqs_per_cpu; + } + return 0; + +err_irqs_per_cpu: + irq_pool_free(table->sf_comp_pool); err_sf_ctrl: irq_pool_free(table->sf_ctrl_pool); err_pf: @@ -573,7 +600,8 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev) if (mlx5_core_is_sf(dev)) return 0; - irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); + irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL, + dev->priv.numa_node); if (!irq_table) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h new file mode 100644 index 000000000000..5c7e68bee43a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __PCI_IRQ_H__ +#define __PCI_IRQ_H__ + +#include <linux/mlx5/driver.h> + +#define MLX5_MAX_IRQ_NAME (32) +/* max irq_index is 2047, so four chars */ +#define MLX5_MAX_IRQ_IDX_CHARS (4) +#define MLX5_EQ_REFS_PER_IRQ (2) + +struct mlx5_irq; + +struct mlx5_irq_pool { + char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS]; + struct xa_limit xa_num_irqs; + struct mutex lock; /* sync IRQs creations */ + struct xarray irqs; + u32 max_threshold; + u32 min_threshold; + u16 *irqs_per_cpu; + struct mlx5_core_dev *dev; +}; + +struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev); +static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) +{ + return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf")); +} + +struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, + const struct cpumask *affinity); +int mlx5_irq_get_locked(struct mlx5_irq *irq); +int mlx5_irq_read_locked(struct mlx5_irq *irq); +int mlx5_irq_put(struct mlx5_irq *irq); + +#endif /* __PCI_IRQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c index aabc53ad8bdd..ee5ffdeb9015 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c @@ -31,7 +31,6 @@ */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/mlx5/driver.h> #include "mlx5_core.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 1ef2b6a848c1..a1548e6bfb35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -33,9 +33,10 @@ #include <linux/mlx5/port.h> #include "mlx5_core.h" -int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, - int size_in, void *data_out, int size_out, - u16 reg_id, int arg, int write) +/* calling with verbose false will not print error to log */ +int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, + void *data_out, int size_out, u16 reg_id, int arg, + int write, bool verbose) { int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out; int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in; @@ -57,7 +58,9 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, MLX5_SET(access_register_in, in, argument, arg); MLX5_SET(access_register_in, in, register_id, reg_id); - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + err = mlx5_cmd_do(dev, in, inlen, out, outlen); + if (verbose) + err = mlx5_cmd_check(dev, err, in, out); if (err) goto out; @@ -69,6 +72,15 @@ out: kvfree(in); return err; } +EXPORT_SYMBOL_GPL(mlx5_access_reg); + +int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, + int size_in, void *data_out, int size_out, + u16 reg_id, int arg, int write) +{ + return mlx5_access_reg(dev, data_in, size_in, data_out, size_out, + reg_id, arg, write, true); +} EXPORT_SYMBOL_GPL(mlx5_core_access_reg); int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, @@ -263,7 +275,6 @@ static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) { u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pmlp_reg)]; - int module_mapping; int err; MLX5_SET(pmlp_reg, in, local_port, 1); @@ -272,8 +283,9 @@ static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) if (err) return err; - module_mapping = MLX5_GET(pmlp_reg, out, lane0_module_mapping); - *module_num = module_mapping & MLX5_EEPROM_IDENTIFIER_BYTE_MASK; + *module_num = MLX5_GET(lane_2_module_mapping, + MLX5_ADDR_OF(pmlp_reg, out, lane0_module_mapping), + module); return 0; } @@ -353,6 +365,12 @@ static void mlx5_sfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset *offset -= MLX5_EEPROM_PAGE_LENGTH; } +static int mlx5_mcia_max_bytes(struct mlx5_core_dev *dev) +{ + /* mcia supports either 12 dwords or 32 dwords */ + return (MLX5_CAP_MCAM_FEATURE(dev, mcia_32dwords) ? 32 : 12) * sizeof(u32); +} + static int mlx5_query_mcia(struct mlx5_core_dev *dev, struct mlx5_module_eeprom_query_params *params, u8 *data) { @@ -362,7 +380,7 @@ static int mlx5_query_mcia(struct mlx5_core_dev *dev, void *ptr; u16 size; - size = min_t(int, params->size, MLX5_EEPROM_MAX_BYTES); + size = min_t(int, params->size, mlx5_mcia_max_bytes(dev)); MLX5_SET(mcia_reg, in, l, 0); MLX5_SET(mcia_reg, in, size, size); @@ -406,23 +424,24 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, switch (module_id) { case MLX5_MODULE_ID_SFP: - mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset); + mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &offset); break; case MLX5_MODULE_ID_QSFP: case MLX5_MODULE_ID_QSFP_PLUS: case MLX5_MODULE_ID_QSFP28: - mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset); + mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset); break; default: mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id); return -EINVAL; } - if (query.offset + size > MLX5_EEPROM_PAGE_LENGTH) + if (offset + size > MLX5_EEPROM_PAGE_LENGTH) /* Cross pages read, read until offset 256 in low page */ - size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; + size = MLX5_EEPROM_PAGE_LENGTH - offset; query.size = size; + query.offset = offset; return mlx5_query_mcia(dev, &query, data); } @@ -432,35 +451,12 @@ int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, struct mlx5_module_eeprom_query_params *params, u8 *data) { - u8 module_id; int err; err = mlx5_query_module_num(dev, ¶ms->module_number); if (err) return err; - err = mlx5_query_module_id(dev, params->module_number, &module_id); - if (err) - return err; - - switch (module_id) { - case MLX5_MODULE_ID_SFP: - if (params->page > 0) - return -EINVAL; - break; - case MLX5_MODULE_ID_QSFP: - case MLX5_MODULE_ID_QSFP28: - case MLX5_MODULE_ID_QSFP_PLUS: - if (params->page > 3) - return -EINVAL; - break; - case MLX5_MODULE_ID_DSFP: - break; - default: - mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id); - return -EINVAL; - } - if (params->i2c_address != MLX5_I2C_ADDR_HIGH && params->i2c_address != MLX5_I2C_ADDR_LOW) { mlx5_core_err(dev, "I2C address not recognized: 0x%x\n", params->i2c_address); @@ -497,29 +493,6 @@ int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap); -int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, - u8 port_num, void *out, size_t sz) -{ - u32 *in; - int err; - - in = kvzalloc(sz, GFP_KERNEL); - if (!in) { - err = -ENOMEM; - return err; - } - - MLX5_SET(ppcnt_reg, in, local_port, port_num); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP); - err = mlx5_core_access_reg(dev, in, sz, out, - sz, MLX5_REG_PPCNT, 0, 0); - - kvfree(in); - return err; -} -EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt); - static int mlx5_query_pfcc_reg(struct mlx5_core_dev *dev, u32 *out, u32 out_size) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index 7161220afe30..9f8b4005f4bd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -31,7 +31,6 @@ */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/mlx5/driver.h> #include "mlx5_core.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c index f37db7cc32a6..7da012ff0d41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -30,10 +30,7 @@ bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev) { struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; - if (!mlx5_sf_dev_supported(dev)) - return false; - - return !xa_empty(&table->devices); + return table && !xa_empty(&table->devices); } static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 3be659cd91f1..7d955a4d9f14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -501,7 +501,7 @@ static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, voi case MLX5_ESWITCH_OFFLOADS: mlx5_sf_table_enable(table); break; - case MLX5_ESWITCH_NONE: + case MLX5_ESWITCH_LEGACY: mlx5_sf_table_disable(table); break; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c index 252d6017387d..17aa348989cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c @@ -247,7 +247,7 @@ int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev) { struct mlx5_sf_hw_table *table; u16 max_ext_fn = 0; - u16 ext_base_id; + u16 ext_base_id = 0; u16 max_fn = 0; u16 base_id; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index e8185b69ac6c..c0e6c487c63c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -87,6 +87,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) enable_vfs_hca: num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs); for (vf = 0; vf < num_vfs; vf++) { + /* Notify the VF before its enablement to let it set + * some stuff. + */ + blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier, + MLX5_PF_NOTIFY_ENABLE_VF, dev); err = mlx5_core_enable_hca(dev, vf + 1); if (err) { mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err); @@ -127,6 +132,11 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) for (vf = num_vfs - 1; vf >= 0; vf--) { if (!sriov->vfs_ctx[vf].enabled) continue; + /* Notify the VF before its disablement to let it clean + * some resources. + */ + blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier, + MLX5_PF_NOTIFY_DISABLE_VF, dev); err = mlx5_core_disable_hca(dev, vf + 1); if (err) { mlx5_core_warn(dev, "failed to disable VF %d\n", vf); @@ -135,8 +145,7 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) sriov->vfs_ctx[vf].enabled = 0; } - if (MLX5_ESWITCH_MANAGER(dev)) - mlx5_eswitch_disable(dev->priv.eswitch, clear_vf); + mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf); if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); @@ -145,9 +154,12 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); int err; + devl_lock(devlink); err = mlx5_device_enable_sriov(dev, num_vfs); + devl_unlock(devlink); if (err) { mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err); return err; @@ -161,13 +173,16 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) return err; } -static void mlx5_sriov_disable(struct pci_dev *pdev) +void mlx5_sriov_disable(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); int num_vfs = pci_num_vf(dev->pdev); pci_disable_sriov(pdev); + devl_lock(devlink); mlx5_device_disable_sriov(dev, num_vfs, true); + devl_unlock(devlink); } int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) @@ -205,19 +220,8 @@ int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count) mlx5_get_default_msix_vec_count(dev, pci_num_vf(pf)); sriov = &dev->priv.sriov; - - /* Reversed translation of PCI VF function number to the internal - * function_id, which exists in the name of virtfn symlink. - */ - for (id = 0; id < pci_num_vf(pf); id++) { - if (!sriov->vfs_ctx[id].enabled) - continue; - - if (vf->devfn == pci_iov_virtfn_devfn(pf, id)) - break; - } - - if (id == pci_num_vf(pf) || !sriov->vfs_ctx[id].enabled) + id = pci_iov_vf_id(vf); + if (id < 0 || !sriov->vfs_ctx[id].enabled) return -EINVAL; return mlx5_set_msix_vec_count(dev, id + 1, msix_vec_count); @@ -268,7 +272,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; struct pci_dev *pdev = dev->pdev; - int total_vfs; + int total_vfs, i; if (!mlx5_core_is_pf(dev)) return 0; @@ -280,6 +284,9 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) if (!sriov->vfs_ctx) return -ENOMEM; + for (i = 0; i < total_vfs; i++) + BLOCKING_INIT_NOTIFIER_HEAD(&sriov->vfs_ctx[i].notifier); + return 0; } @@ -292,3 +299,53 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) kfree(sriov->vfs_ctx); } + +/** + * mlx5_sriov_blocking_notifier_unregister - Unregister a VF from + * a notification block chain. + * + * @mdev: The mlx5 core device. + * @vf_id: The VF id. + * @nb: The notifier block to be unregistered. + */ +void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev, + int vf_id, + struct notifier_block *nb) +{ + struct mlx5_vf_context *vfs_ctx; + struct mlx5_core_sriov *sriov; + + sriov = &mdev->priv.sriov; + if (WARN_ON(vf_id < 0 || vf_id >= sriov->num_vfs)) + return; + + vfs_ctx = &sriov->vfs_ctx[vf_id]; + blocking_notifier_chain_unregister(&vfs_ctx->notifier, nb); +} +EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_unregister); + +/** + * mlx5_sriov_blocking_notifier_register - Register a VF notification + * block chain. + * + * @mdev: The mlx5 core device. + * @vf_id: The VF id. + * @nb: The notifier block to be called upon the VF events. + * + * Returns 0 on success or an error code. + */ +int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev, + int vf_id, + struct notifier_block *nb) +{ + struct mlx5_vf_context *vfs_ctx; + struct mlx5_core_sriov *sriov; + + sriov = &mdev->priv.sriov; + if (vf_id < 0 || vf_id >= sriov->num_vfs) + return -EINVAL; + + vfs_ctx = &sriov->vfs_ctx[vf_id]; + return blocking_notifier_chain_register(&vfs_ctx->notifier, nb); +} +EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_register); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 07936841ce99..b1dfad274a39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -22,6 +22,7 @@ enum dr_action_valid_state { DR_ACTION_STATE_PUSH_VLAN, DR_ACTION_STATE_NON_TERM, DR_ACTION_STATE_TERM, + DR_ACTION_STATE_ASO, DR_ACTION_STATE_MAX, }; @@ -42,6 +43,7 @@ static const char * const action_type_to_str[] = { [DR_ACTION_TYP_SAMPLER] = "DR_ACTION_TYP_SAMPLER", [DR_ACTION_TYP_INSERT_HDR] = "DR_ACTION_TYP_INSERT_HDR", [DR_ACTION_TYP_REMOVE_HDR] = "DR_ACTION_TYP_REMOVE_HDR", + [DR_ACTION_TYP_ASO_FLOW_METER] = "DR_ACTION_TYP_ASO_FLOW_METER", [DR_ACTION_TYP_MAX] = "DR_ACTION_UNKNOWN", }; @@ -71,6 +73,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_DECAP] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -85,6 +88,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_ENCAP] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -93,6 +97,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_MODIFY_HDR] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -105,6 +110,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_POP_VLAN] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -118,6 +124,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_PUSH_VLAN] = { [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, @@ -128,6 +135,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_NON_TERM] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -145,6 +153,13 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ASO] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_TERM] = { [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, @@ -163,18 +178,21 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_DECAP] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_ENCAP] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_MODIFY_HDR] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -185,6 +203,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_POP_VLAN] = { [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, @@ -196,6 +215,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_PUSH_VLAN] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -206,6 +226,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_NON_TERM] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -219,6 +240,16 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ASO] = { + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, }, [DR_ACTION_STATE_TERM] = { [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, @@ -240,6 +271,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_DECAP] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -253,6 +285,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_ENCAP] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -261,6 +294,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_MODIFY_HDR] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -272,6 +306,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_POP_VLAN] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -284,6 +319,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_PUSH_VLAN] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -296,6 +332,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_NON_TERM] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -312,6 +349,13 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ASO] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_TERM] = { [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, @@ -331,6 +375,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_DECAP] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -338,6 +383,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_ENCAP] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -345,6 +391,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_MODIFY_HDR] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -356,6 +403,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_POP_VLAN] = { [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, @@ -368,6 +416,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_PUSH_VLAN] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -379,6 +428,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_NON_TERM] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, @@ -393,6 +443,17 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ASO] = { + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_TERM] = { [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, @@ -530,6 +591,37 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn, return 0; } +static void dr_action_modify_ttl_adjust(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_actions_attr *attr, + bool rx_rule, + bool *recalc_cs_required) +{ + *recalc_cs_required = false; + + /* if device supports csum recalculation - no adjustment needed */ + if (mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps)) + return; + + /* no adjustment needed on TX rules */ + if (!rx_rule) + return; + + if (!MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify)) { + /* Ignore the modify TTL action. + * It is always kept as last HW action. + */ + attr->modify_actions--; + return; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB) + /* Due to a HW bug on some devices, modifying TTL on RX flows + * will cause an incorrect checksum calculation. In such cases + * we will use a FW table to recalculate the checksum. + */ + *recalc_cs_required = true; +} + static void dr_action_print_sequence(struct mlx5dr_domain *dmn, struct mlx5dr_action *actions[], int last_idx) @@ -570,6 +662,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, for (i = 0; i < num_actions; i++) { struct mlx5dr_action_dest_tbl *dest_tbl; + struct mlx5dr_icm_chunk *chunk; struct mlx5dr_action *action; int max_actions_type = 1; u32 action_type; @@ -598,9 +691,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, matcher->tbl->level, dest_tbl->tbl->level); } - attr.final_icm_addr = rx_rule ? - dest_tbl->tbl->rx.s_anchor->chunk->icm_addr : - dest_tbl->tbl->tx.s_anchor->chunk->icm_addr; + chunk = rx_rule ? dest_tbl->tbl->rx.s_anchor->chunk : + dest_tbl->tbl->tx.s_anchor->chunk; + attr.final_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk); } else { struct mlx5dr_cmd_query_flow_table_details output; int ret; @@ -649,8 +742,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, case DR_ACTION_TYP_MODIFY_HDR: attr.modify_index = action->rewrite->index; attr.modify_actions = action->rewrite->num_of_actions; - recalc_cs_required = action->rewrite->modify_ttl && - !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps); + if (action->rewrite->modify_ttl) + dr_action_modify_ttl_adjust(dmn, &attr, rx_rule, + &recalc_cs_required); break; case DR_ACTION_TYP_L2_TO_TNL_L2: case DR_ACTION_TYP_L2_TO_TNL_L3: @@ -669,15 +763,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, case DR_ACTION_TYP_VPORT: attr.hit_gvmi = action->vport->caps->vhca_gvmi; dest_action = action; - if (rx_rule) { - if (action->vport->caps->num == MLX5_VPORT_UPLINK) { - mlx5dr_dbg(dmn, "Device doesn't support Loopback on WIRE vport\n"); - return -EOPNOTSUPP; - } - attr.final_icm_addr = action->vport->caps->icm_address_rx; - } else { - attr.final_icm_addr = action->vport->caps->icm_address_tx; - } + attr.final_icm_addr = rx_rule ? + action->vport->caps->icm_address_rx : + action->vport->caps->icm_address_tx; break; case DR_ACTION_TYP_POP_VLAN: if (!rx_rule && !(dmn->ste_ctx->actions_caps & @@ -711,6 +799,12 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, attr.reformat.param_0 = action->reformat->param_0; attr.reformat.param_1 = action->reformat->param_1; break; + case DR_ACTION_TYP_ASO_FLOW_METER: + attr.aso_flow_meter.obj_id = action->aso->obj_id; + attr.aso_flow_meter.offset = action->aso->offset; + attr.aso_flow_meter.dest_reg_id = action->aso->dest_reg_id; + attr.aso_flow_meter.init_color = action->aso->init_color; + break; default: mlx5dr_err(dmn, "Unsupported action type %d\n", action_type); return -EINVAL; @@ -737,12 +831,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, *new_hw_ste_arr_sz = nic_matcher->num_of_builders; last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1); - /* Due to a HW bug in some devices, modifying TTL on RX flows will - * cause an incorrect checksum calculation. In this case we will - * use a FW table to recalculate. - */ - if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB && - rx_rule && recalc_cs_required && dest_action) { + if (recalc_cs_required && dest_action) { ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr); if (ret) { mlx5dr_err(dmn, @@ -776,6 +865,7 @@ static unsigned int action_size[DR_ACTION_TYP_MAX] = { [DR_ACTION_TYP_INSERT_HDR] = sizeof(struct mlx5dr_action_reformat), [DR_ACTION_TYP_REMOVE_HDR] = sizeof(struct mlx5dr_action_reformat), [DR_ACTION_TYP_SAMPLER] = sizeof(struct mlx5dr_action_sampler), + [DR_ACTION_TYP_ASO_FLOW_METER] = sizeof(struct mlx5dr_action_aso_flow_meter), }; static struct mlx5dr_action * @@ -847,7 +937,8 @@ struct mlx5dr_action * mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, struct mlx5dr_action_dest *dests, u32 num_of_dests, - bool ignore_flow_level) + bool ignore_flow_level, + u32 flow_source) { struct mlx5dr_cmd_flow_destination_hw_info *hw_dests; struct mlx5dr_action **ref_actions; @@ -919,7 +1010,8 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, reformat_req, &action->dest_tbl->fw_tbl.id, &action->dest_tbl->fw_tbl.group_id, - ignore_flow_level); + ignore_flow_level, + flow_source); if (ret) goto free_action; @@ -1129,7 +1221,8 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn, } action->rewrite->data = (void *)hw_actions; - action->rewrite->index = (action->rewrite->chunk->icm_addr - + action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr + (action->rewrite->chunk) - dmn->info.caps.hdr_modify_icm_addr) / ACTION_CACHE_LINE_SIZE; @@ -1571,6 +1664,7 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action, const struct mlx5dr_ste_action_modify_field *hw_dst_action_info; const struct mlx5dr_ste_action_modify_field *hw_src_action_info; struct mlx5dr_domain *dmn = action->rewrite->dmn; + __be64 *modify_ttl_sw_action = NULL; int ret, i, hw_idx = 0; __be64 *sw_action; __be64 hw_action; @@ -1583,16 +1677,26 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action, action->rewrite->allow_rx = 1; action->rewrite->allow_tx = 1; - for (i = 0; i < num_sw_actions; i++) { - sw_action = &sw_actions[i]; + for (i = 0; i < num_sw_actions || modify_ttl_sw_action; i++) { + /* modify TTL is handled separately, as a last action */ + if (i == num_sw_actions) { + sw_action = modify_ttl_sw_action; + modify_ttl_sw_action = NULL; + } else { + sw_action = &sw_actions[i]; + } ret = dr_action_modify_check_field_limitation(action, sw_action); if (ret) return ret; - if (!(*modify_ttl)) - *modify_ttl = dr_action_modify_check_is_ttl_modify(sw_action); + if (!(*modify_ttl) && + dr_action_modify_check_is_ttl_modify(sw_action)) { + modify_ttl_sw_action = sw_action; + *modify_ttl = true; + continue; + } /* Convert SW action to HW action */ ret = dr_action_modify_sw_to_hw(dmn, @@ -1631,7 +1735,7 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action, * modify actions doesn't exceeds the limit */ hw_idx++; - if ((num_sw_actions + hw_idx - i) >= max_hw_actions) { + if (hw_idx >= max_hw_actions) { mlx5dr_dbg(dmn, "Modify header action number exceeds HW limit\n"); return -EINVAL; } @@ -1642,6 +1746,10 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action, hw_idx++; } + /* if the resulting HW actions list is empty, add NOP action */ + if (!hw_idx) + hw_idx++; + *num_hw_actions = hw_idx; return 0; @@ -1693,7 +1801,7 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn, action->rewrite->modify_ttl = modify_ttl; action->rewrite->data = (u8 *)hw_actions; action->rewrite->num_of_actions = num_hw_actions; - action->rewrite->index = (chunk->icm_addr - + action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr(chunk) - dmn->info.caps.hdr_modify_icm_addr) / ACTION_CACHE_LINE_SIZE; @@ -1790,9 +1898,37 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn, return action; } +struct mlx5dr_action * +mlx5dr_action_create_aso(struct mlx5dr_domain *dmn, u32 obj_id, + u8 dest_reg_id, u8 aso_type, + u8 init_color, u8 meter_id) +{ + struct mlx5dr_action *action; + + if (aso_type != MLX5_EXE_ASO_FLOW_METER) + return NULL; + + if (init_color > MLX5_FLOW_METER_COLOR_UNDEFINED) + return NULL; + + action = dr_action_create_generic(DR_ACTION_TYP_ASO_FLOW_METER); + if (!action) + return NULL; + + action->aso->obj_id = obj_id; + action->aso->offset = meter_id; + action->aso->dest_reg_id = dest_reg_id; + action->aso->init_color = init_color; + action->aso->dmn = dmn; + + refcount_inc(&dmn->refcount); + + return action; +} + int mlx5dr_action_destroy(struct mlx5dr_action *action) { - if (refcount_read(&action->refcount) > 1) + if (WARN_ON_ONCE(refcount_read(&action->refcount) > 1)) return -EBUSY; switch (action->action_type) { @@ -1841,6 +1977,9 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action) case DR_ACTION_TYP_SAMPLER: refcount_dec(&action->sampler->dmn->refcount); break; + case DR_ACTION_TYP_ASO_FLOW_METER: + refcount_dec(&action->aso->dmn->refcount); + break; default: break; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 1d8febed0d76..16d65fe4f654 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -132,6 +132,13 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, caps->isolate_vl_tc = MLX5_CAP_GEN(mdev, isolate_vl_tc_new); + /* geneve_tlv_option_0_exist is the indication of + * STE support for lookup type flex_parser_ok + */ + caps->flex_parser_ok_bits_supp = + MLX5_CAP_FLOWTABLE(mdev, + flow_table_properties_nic_receive.ft_field_support.geneve_tlv_option_0_exist); + if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED) { caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0); caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1); @@ -152,7 +159,7 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, caps->flex_parser_id_mpls_over_gre = MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_gre); - if (caps->flex_protocols & mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED) + if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED) caps->flex_parser_id_mpls_over_udp = MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_udp_label); @@ -304,7 +311,7 @@ int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev, in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); MLX5_SET(dest_format_struct, in_dests, destination_type, - MLX5_FLOW_DESTINATION_TYPE_VPORT); + MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT); MLX5_SET(dest_format_struct, in_dests, destination_id, vport); err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); @@ -432,6 +439,7 @@ int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev, MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); MLX5_SET(create_flow_table_in, in, table_type, attr->table_type); + MLX5_SET(create_flow_table_in, in, uid, attr->uid); ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context); MLX5_SET(flow_table_context, ft_mdev, termination_table, attr->term_tbl); @@ -597,9 +605,11 @@ static int mlx5dr_cmd_set_extended_dest(struct mlx5_core_dev *dev, if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) return 0; for (i = 0; i < fte->dests_size; i++) { - if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER || + fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_NONE) continue; - if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + if ((fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && fte->dest_arr[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) num_encap++; num_fwd_destinations++; @@ -711,25 +721,40 @@ int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev, int list_size = 0; for (i = 0; i < fte->dests_size; i++) { - unsigned int id, type = fte->dest_arr[i].type; + enum mlx5_flow_destination_type type = fte->dest_arr[i].type; + enum mlx5_ifc_flow_destination_type ifc_type; + unsigned int id; if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) continue; switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_NONE: + continue; case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: id = fte->dest_arr[i].ft_num; - type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE; break; case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: id = fte->dest_arr[i].ft_id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE; + break; + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: case MLX5_FLOW_DESTINATION_TYPE_VPORT: - id = fte->dest_arr[i].vport.num; - MLX5_SET(dest_format_struct, in_dests, - destination_eswitch_owner_vhca_id_valid, - !!(fte->dest_arr[i].vport.flags & - MLX5_FLOW_DEST_VPORT_VHCA_ID)); + if (type == MLX5_FLOW_DESTINATION_TYPE_VPORT) { + id = fte->dest_arr[i].vport.num; + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id_valid, + !!(fte->dest_arr[i].vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID)); + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT; + } else { + id = 0; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK; + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id_valid, 1); + } MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id, fte->dest_arr[i].vport.vhca_id); @@ -746,13 +771,15 @@ int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev, break; case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER: id = fte->dest_arr[i].sampler_id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; break; default: id = fte->dest_arr[i].tir_num; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR; } MLX5_SET(dest_format_struct, in_dests, destination_type, - type); + ifc_type); MLX5_SET(dest_format_struct, in_dests, destination_id, id); in_dests += dst_cnt_size; list_size++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c new file mode 100644 index 000000000000..7adcf0eec13b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c @@ -0,0 +1,657 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/debugfs.h> +#include <linux/kernel.h> +#include <linux/seq_file.h> +#include "dr_types.h" + +#define DR_DBG_PTR_TO_ID(p) ((u64)(uintptr_t)(p) & 0xFFFFFFFFULL) + +enum dr_dump_rec_type { + DR_DUMP_REC_TYPE_DOMAIN = 3000, + DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER = 3001, + DR_DUMP_REC_TYPE_DOMAIN_INFO_DEV_ATTR = 3002, + DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT = 3003, + DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS = 3004, + DR_DUMP_REC_TYPE_DOMAIN_SEND_RING = 3005, + + DR_DUMP_REC_TYPE_TABLE = 3100, + DR_DUMP_REC_TYPE_TABLE_RX = 3101, + DR_DUMP_REC_TYPE_TABLE_TX = 3102, + + DR_DUMP_REC_TYPE_MATCHER = 3200, + DR_DUMP_REC_TYPE_MATCHER_MASK_DEPRECATED = 3201, + DR_DUMP_REC_TYPE_MATCHER_RX = 3202, + DR_DUMP_REC_TYPE_MATCHER_TX = 3203, + DR_DUMP_REC_TYPE_MATCHER_BUILDER = 3204, + DR_DUMP_REC_TYPE_MATCHER_MASK = 3205, + + DR_DUMP_REC_TYPE_RULE = 3300, + DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 = 3301, + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V0 = 3302, + DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V1 = 3303, + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1 = 3304, + + DR_DUMP_REC_TYPE_ACTION_ENCAP_L2 = 3400, + DR_DUMP_REC_TYPE_ACTION_ENCAP_L3 = 3401, + DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR = 3402, + DR_DUMP_REC_TYPE_ACTION_DROP = 3403, + DR_DUMP_REC_TYPE_ACTION_QP = 3404, + DR_DUMP_REC_TYPE_ACTION_FT = 3405, + DR_DUMP_REC_TYPE_ACTION_CTR = 3406, + DR_DUMP_REC_TYPE_ACTION_TAG = 3407, + DR_DUMP_REC_TYPE_ACTION_VPORT = 3408, + DR_DUMP_REC_TYPE_ACTION_DECAP_L2 = 3409, + DR_DUMP_REC_TYPE_ACTION_DECAP_L3 = 3410, + DR_DUMP_REC_TYPE_ACTION_DEVX_TIR = 3411, + DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN = 3412, + DR_DUMP_REC_TYPE_ACTION_POP_VLAN = 3413, + DR_DUMP_REC_TYPE_ACTION_SAMPLER = 3415, + DR_DUMP_REC_TYPE_ACTION_INSERT_HDR = 3420, + DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR = 3421 +}; + +void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl) +{ + mutex_lock(&tbl->dmn->dump_info.dbg_mutex); + list_add_tail(&tbl->dbg_node, &tbl->dmn->dbg_tbl_list); + mutex_unlock(&tbl->dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_tbl_del(struct mlx5dr_table *tbl) +{ + mutex_lock(&tbl->dmn->dump_info.dbg_mutex); + list_del(&tbl->dbg_node); + mutex_unlock(&tbl->dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_rule_add(struct mlx5dr_rule *rule) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + + mutex_lock(&dmn->dump_info.dbg_mutex); + list_add_tail(&rule->dbg_node, &rule->matcher->dbg_rule_list); + mutex_unlock(&dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_rule_del(struct mlx5dr_rule *rule) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + + mutex_lock(&dmn->dump_info.dbg_mutex); + list_del(&rule->dbg_node); + mutex_unlock(&dmn->dump_info.dbg_mutex); +} + +static u64 dr_dump_icm_to_idx(u64 icm_addr) +{ + return (icm_addr >> 6) & 0xffffffff; +} + +#define DR_HEX_SIZE 256 + +static void +dr_dump_hex_print(char hex[DR_HEX_SIZE], char *src, u32 size) +{ + if (WARN_ON_ONCE(DR_HEX_SIZE < 2 * size + 1)) + size = DR_HEX_SIZE / 2 - 1; /* truncate */ + + bin2hex(hex, src, size); + hex[2 * size] = 0; /* NULL-terminate */ +} + +static int +dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id, + struct mlx5dr_rule_action_member *action_mem) +{ + struct mlx5dr_action *action = action_mem->action; + const u64 action_id = DR_DBG_PTR_TO_ID(action); + + switch (action->action_type) { + case DR_ACTION_TYP_DROP: + seq_printf(file, "%d,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_DROP, action_id, rule_id); + break; + case DR_ACTION_TYP_FT: + if (action->dest_tbl->is_fw_tbl) + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_FT, action_id, + rule_id, action->dest_tbl->fw_tbl.id, + -1); + else + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_FT, action_id, + rule_id, action->dest_tbl->tbl->table_id, + DR_DBG_PTR_TO_ID(action->dest_tbl->tbl)); + + break; + case DR_ACTION_TYP_CTR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_CTR, action_id, rule_id, + action->ctr->ctr_id + action->ctr->offset); + break; + case DR_ACTION_TYP_TAG: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_TAG, action_id, rule_id, + action->flow_tag->flow_tag); + break; + case DR_ACTION_TYP_MODIFY_HDR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR, action_id, + rule_id, action->rewrite->index); + break; + case DR_ACTION_TYP_VPORT: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_VPORT, action_id, rule_id, + action->vport->caps->num); + break; + case DR_ACTION_TYP_TNL_L2_TO_L2: + seq_printf(file, "%d,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_DECAP_L2, action_id, + rule_id); + break; + case DR_ACTION_TYP_TNL_L3_TO_L2: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_DECAP_L3, action_id, + rule_id, action->rewrite->index); + break; + case DR_ACTION_TYP_L2_TO_TNL_L2: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_ENCAP_L2, action_id, + rule_id, action->reformat->id); + break; + case DR_ACTION_TYP_L2_TO_TNL_L3: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_ENCAP_L3, action_id, + rule_id, action->reformat->id); + break; + case DR_ACTION_TYP_POP_VLAN: + seq_printf(file, "%d,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_POP_VLAN, action_id, + rule_id); + break; + case DR_ACTION_TYP_PUSH_VLAN: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN, action_id, + rule_id, action->push_vlan->vlan_hdr); + break; + case DR_ACTION_TYP_INSERT_HDR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_INSERT_HDR, action_id, + rule_id, action->reformat->id, + action->reformat->param_0, + action->reformat->param_1); + break; + case DR_ACTION_TYP_REMOVE_HDR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR, action_id, + rule_id, action->reformat->id, + action->reformat->param_0, + action->reformat->param_1); + break; + case DR_ACTION_TYP_SAMPLER: + seq_printf(file, + "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_SAMPLER, action_id, rule_id, + 0, 0, action->sampler->sampler_id, + action->sampler->rx_icm_addr, + action->sampler->tx_icm_addr); + break; + default: + return 0; + } + + return 0; +} + +static int +dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste, + bool is_rx, const u64 rule_id, u8 format_ver) +{ + char hw_ste_dump[DR_HEX_SIZE]; + u32 mem_rec_type; + + if (format_ver == MLX5_STEERING_FORMAT_CONNECTX_5) { + mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 : + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V0; + } else { + mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V1 : + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1; + } + + dr_dump_hex_print(hw_ste_dump, (char *)mlx5dr_ste_get_hw_ste(ste), + DR_STE_SIZE_REDUCED); + + seq_printf(file, "%d,0x%llx,0x%llx,%s\n", mem_rec_type, + dr_dump_icm_to_idx(mlx5dr_ste_get_icm_addr(ste)), rule_id, + hw_ste_dump); + + return 0; +} + +static int +dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx, + bool is_rx, const u64 rule_id, u8 format_ver) +{ + struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES]; + struct mlx5dr_ste *curr_ste = rule_rx_tx->last_rule_ste; + int ret, i; + + if (mlx5dr_rule_get_reverse_rule_members(ste_arr, curr_ste, &i)) + return 0; + + while (i--) { + ret = dr_dump_rule_mem(file, ste_arr[i], is_rx, rule_id, + format_ver); + if (ret < 0) + return ret; + } + + return 0; +} + +static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule) +{ + struct mlx5dr_rule_action_member *action_mem; + const u64 rule_id = DR_DBG_PTR_TO_ID(rule); + struct mlx5dr_rule_rx_tx *rx = &rule->rx; + struct mlx5dr_rule_rx_tx *tx = &rule->tx; + u8 format_ver; + int ret; + + format_ver = rule->matcher->tbl->dmn->info.caps.sw_format_ver; + + seq_printf(file, "%d,0x%llx,0x%llx\n", DR_DUMP_REC_TYPE_RULE, rule_id, + DR_DBG_PTR_TO_ID(rule->matcher)); + + if (rx->nic_matcher) { + ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver); + if (ret < 0) + return ret; + } + + if (tx->nic_matcher) { + ret = dr_dump_rule_rx_tx(file, tx, false, rule_id, format_ver); + if (ret < 0) + return ret; + } + + list_for_each_entry(action_mem, &rule->rule_actions_list, list) { + ret = dr_dump_rule_action_mem(file, rule_id, action_mem); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask, + u8 criteria, const u64 matcher_id) +{ + char dump[DR_HEX_SIZE]; + + seq_printf(file, "%d,0x%llx,", DR_DUMP_REC_TYPE_MATCHER_MASK, + matcher_id); + + if (criteria & DR_MATCHER_CRITERIA_OUTER) { + dr_dump_hex_print(dump, (char *)&mask->outer, sizeof(mask->outer)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_INNER) { + dr_dump_hex_print(dump, (char *)&mask->inner, sizeof(mask->inner)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_MISC) { + dr_dump_hex_print(dump, (char *)&mask->misc, sizeof(mask->misc)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_MISC2) { + dr_dump_hex_print(dump, (char *)&mask->misc2, sizeof(mask->misc2)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_MISC3) { + dr_dump_hex_print(dump, (char *)&mask->misc3, sizeof(mask->misc3)); + seq_printf(file, "%s\n", dump); + } else { + seq_puts(file, ",\n"); + } + + return 0; +} + +static int +dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder, + u32 index, bool is_rx, const u64 matcher_id) +{ + seq_printf(file, "%d,0x%llx,%d,%d,0x%x\n", + DR_DUMP_REC_TYPE_MATCHER_BUILDER, matcher_id, index, is_rx, + builder->lu_type); + + return 0; +} + +static int +dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx, + struct mlx5dr_matcher_rx_tx *matcher_rx_tx, + const u64 matcher_id) +{ + enum dr_dump_rec_type rec_type; + u64 s_icm_addr, e_icm_addr; + int i, ret; + + rec_type = is_rx ? DR_DUMP_REC_TYPE_MATCHER_RX : + DR_DUMP_REC_TYPE_MATCHER_TX; + + s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->s_htbl->chunk); + e_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->e_anchor->chunk); + seq_printf(file, "%d,0x%llx,0x%llx,%d,0x%llx,0x%llx\n", + rec_type, DR_DBG_PTR_TO_ID(matcher_rx_tx), + matcher_id, matcher_rx_tx->num_of_builders, + dr_dump_icm_to_idx(s_icm_addr), + dr_dump_icm_to_idx(e_icm_addr)); + + for (i = 0; i < matcher_rx_tx->num_of_builders; i++) { + ret = dr_dump_matcher_builder(file, + &matcher_rx_tx->ste_builder[i], + i, is_rx, matcher_id); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_matcher_rx_tx *rx = &matcher->rx; + struct mlx5dr_matcher_rx_tx *tx = &matcher->tx; + u64 matcher_id; + int ret; + + matcher_id = DR_DBG_PTR_TO_ID(matcher); + + seq_printf(file, "%d,0x%llx,0x%llx,%d\n", DR_DUMP_REC_TYPE_MATCHER, + matcher_id, DR_DBG_PTR_TO_ID(matcher->tbl), matcher->prio); + + ret = dr_dump_matcher_mask(file, &matcher->mask, + matcher->match_criteria, matcher_id); + if (ret < 0) + return ret; + + if (rx->nic_tbl) { + ret = dr_dump_matcher_rx_tx(file, true, rx, matcher_id); + if (ret < 0) + return ret; + } + + if (tx->nic_tbl) { + ret = dr_dump_matcher_rx_tx(file, false, tx, matcher_id); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_matcher_all(struct seq_file *file, struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_rule *rule; + int ret; + + ret = dr_dump_matcher(file, matcher); + if (ret < 0) + return ret; + + list_for_each_entry(rule, &matcher->dbg_rule_list, dbg_node) { + ret = dr_dump_rule(file, rule); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_table_rx_tx(struct seq_file *file, bool is_rx, + struct mlx5dr_table_rx_tx *table_rx_tx, + const u64 table_id) +{ + enum dr_dump_rec_type rec_type; + u64 s_icm_addr; + + rec_type = is_rx ? DR_DUMP_REC_TYPE_TABLE_RX : + DR_DUMP_REC_TYPE_TABLE_TX; + + s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(table_rx_tx->s_anchor->chunk); + seq_printf(file, "%d,0x%llx,0x%llx\n", rec_type, table_id, + dr_dump_icm_to_idx(s_icm_addr)); + + return 0; +} + +static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table) +{ + struct mlx5dr_table_rx_tx *rx = &table->rx; + struct mlx5dr_table_rx_tx *tx = &table->tx; + int ret; + + seq_printf(file, "%d,0x%llx,0x%llx,%d,%d\n", DR_DUMP_REC_TYPE_TABLE, + DR_DBG_PTR_TO_ID(table), DR_DBG_PTR_TO_ID(table->dmn), + table->table_type, table->level); + + if (rx->nic_dmn) { + ret = dr_dump_table_rx_tx(file, true, rx, + DR_DBG_PTR_TO_ID(table)); + if (ret < 0) + return ret; + } + + if (tx->nic_dmn) { + ret = dr_dump_table_rx_tx(file, false, tx, + DR_DBG_PTR_TO_ID(table)); + if (ret < 0) + return ret; + } + return 0; +} + +static int dr_dump_table_all(struct seq_file *file, struct mlx5dr_table *tbl) +{ + struct mlx5dr_matcher *matcher; + int ret; + + ret = dr_dump_table(file, tbl); + if (ret < 0) + return ret; + + list_for_each_entry(matcher, &tbl->matcher_list, list_node) { + ret = dr_dump_matcher_all(file, matcher); + if (ret < 0) + return ret; + } + return 0; +} + +static int +dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring, + const u64 domain_id) +{ + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_DOMAIN_SEND_RING, DR_DBG_PTR_TO_ID(ring), + domain_id, ring->cq->mcq.cqn, ring->qp->qpn); + return 0; +} + +static int +dr_dump_domain_info_flex_parser(struct seq_file *file, + const char *flex_parser_name, + const u8 flex_parser_value, + const u64 domain_id) +{ + seq_printf(file, "%d,0x%llx,%s,0x%x\n", + DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER, domain_id, + flex_parser_name, flex_parser_value); + return 0; +} + +static int +dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps, + const u64 domain_id) +{ + struct mlx5dr_cmd_vport_cap *vport_caps; + unsigned long i, vports_num; + + xa_for_each(&caps->vports.vports_caps_xa, vports_num, vport_caps) + ; /* count the number of vports in xarray */ + + seq_printf(file, "%d,0x%llx,0x%x,0x%llx,0x%llx,0x%x,%lu,%d\n", + DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS, domain_id, caps->gvmi, + caps->nic_rx_drop_address, caps->nic_tx_drop_address, + caps->flex_protocols, vports_num, caps->eswitch_manager); + + xa_for_each(&caps->vports.vports_caps_xa, i, vport_caps) { + vport_caps = xa_load(&caps->vports.vports_caps_xa, i); + + seq_printf(file, "%d,0x%llx,%lu,0x%x,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT, domain_id, i, + vport_caps->vport_gvmi, vport_caps->icm_address_rx, + vport_caps->icm_address_tx); + } + return 0; +} + +static int +dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info, + const u64 domain_id) +{ + int ret; + + ret = dr_dump_domain_info_caps(file, &info->caps, domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmp_dw0", + info->caps.flex_parser_id_icmp_dw0, + domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmp_dw1", + info->caps.flex_parser_id_icmp_dw1, + domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw0", + info->caps.flex_parser_id_icmpv6_dw0, + domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw1", + info->caps.flex_parser_id_icmpv6_dw1, + domain_id); + if (ret < 0) + return ret; + + return 0; +} + +static int +dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn) +{ + u64 domain_id = DR_DBG_PTR_TO_ID(dmn); + int ret; + + seq_printf(file, "%d,0x%llx,%d,0%x,%d,%s\n", DR_DUMP_REC_TYPE_DOMAIN, + domain_id, dmn->type, dmn->info.caps.gvmi, + dmn->info.supp_sw_steering, pci_name(dmn->mdev->pdev)); + + ret = dr_dump_domain_info(file, &dmn->info, domain_id); + if (ret < 0) + return ret; + + if (dmn->info.supp_sw_steering) { + ret = dr_dump_send_ring(file, dmn->send_ring, domain_id); + if (ret < 0) + return ret; + } + + return 0; +} + +static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn) +{ + struct mlx5dr_table *tbl; + int ret; + + mutex_lock(&dmn->dump_info.dbg_mutex); + mlx5dr_domain_lock(dmn); + + ret = dr_dump_domain(file, dmn); + if (ret < 0) + goto unlock_mutex; + + list_for_each_entry(tbl, &dmn->dbg_tbl_list, dbg_node) { + ret = dr_dump_table_all(file, tbl); + if (ret < 0) + break; + } + +unlock_mutex: + mlx5dr_domain_unlock(dmn); + mutex_unlock(&dmn->dump_info.dbg_mutex); + return ret; +} + +static int dr_dump_show(struct seq_file *file, void *priv) +{ + return dr_dump_domain_all(file, file->private); +} +DEFINE_SHOW_ATTRIBUTE(dr_dump); + +void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn) +{ + struct mlx5_core_dev *dev = dmn->mdev; + char file_name[128]; + + if (dmn->type != MLX5DR_DOMAIN_TYPE_FDB) { + mlx5_core_warn(dev, + "Steering dump is not supported for NIC RX/TX domains\n"); + return; + } + + dmn->dump_info.steering_debugfs = + debugfs_create_dir("steering", mlx5_debugfs_get_dev_root(dev)); + dmn->dump_info.fdb_debugfs = + debugfs_create_dir("fdb", dmn->dump_info.steering_debugfs); + + sprintf(file_name, "dmn_%p", dmn); + debugfs_create_file(file_name, 0444, dmn->dump_info.fdb_debugfs, + dmn, &dr_dump_fops); + + INIT_LIST_HEAD(&dmn->dbg_tbl_list); + mutex_init(&dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_uninit_dump(struct mlx5dr_domain *dmn) +{ + debugfs_remove_recursive(dmn->dump_info.steering_debugfs); + mutex_destroy(&dmn->dump_info.dbg_mutex); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h new file mode 100644 index 000000000000..def6cf853eea --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +struct mlx5dr_dbg_dump_info { + struct mutex dbg_mutex; /* protect dbg lists */ + struct dentry *steering_debugfs; + struct dentry *fdb_debugfs; +}; + +void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn); +void mlx5dr_dbg_uninit_dump(struct mlx5dr_domain *dmn); +void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl); +void mlx5dr_dbg_tbl_del(struct mlx5dr_table *tbl); +void mlx5dr_dbg_rule_add(struct mlx5dr_rule *rule); +void mlx5dr_dbg_rule_del(struct mlx5dr_rule *rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 8cbd36c82b3b..fc6ae49b5ecc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -2,12 +2,13 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include <linux/mlx5/eswitch.h> +#include <linux/err.h> #include "dr_types.h" #define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \ ((dmn)->info.caps.dmn_type##_sw_owner || \ ((dmn)->info.caps.dmn_type##_sw_owner_v2 && \ - (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_6DX)) + (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_7)) static void dr_domain_init_csum_recalc_fts(struct mlx5dr_domain *dmn) { @@ -72,9 +73,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn) } dmn->uar = mlx5_get_uars_page(dmn->mdev); - if (!dmn->uar) { + if (IS_ERR(dmn->uar)) { mlx5dr_err(dmn, "Couldn't allocate UAR\n"); - ret = -ENOMEM; + ret = PTR_ERR(dmn->uar); goto clean_pd; } @@ -163,9 +164,7 @@ static int dr_domain_query_vport(struct mlx5dr_domain *dmn, static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn) { - return dr_domain_query_vport(dmn, - dmn->info.caps.is_ecpf ? MLX5_VPORT_ECPF : 0, - false, + return dr_domain_query_vport(dmn, 0, false, &dmn->info.caps.vports.esw_manager_caps); } @@ -396,7 +395,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) } dr_domain_init_csum_recalc_fts(dmn); - + mlx5dr_dbg_init_dump(dmn); return dmn; uninit_caps: @@ -432,11 +431,12 @@ int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags) int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) { - if (refcount_read(&dmn->refcount) > 1) + if (WARN_ON_ONCE(refcount_read(&dmn->refcount) > 1)) return -EBUSY; /* make sure resources are not used by the hardware */ mlx5dr_cmd_sync_steering(dmn->mdev); + mlx5dr_dbg_uninit_dump(dmn); dr_domain_uninit_csum_recalc_fts(dmn); dr_domain_uninit_resources(dmn); dr_domain_caps_uninit(dmn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c index 68a4c32d5f34..f05ef0cd54ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c @@ -104,7 +104,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, bool reformat_req, u32 *tbl_id, u32 *group_id, - bool ignore_flow_level) + bool ignore_flow_level, + u32 flow_source) { struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; struct mlx5dr_cmd_fte_info fte_info = {}; @@ -139,6 +140,7 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, fte_info.val = val; fte_info.dest_arr = dest; fte_info.ignore_flow_level = ignore_flow_level; + fte_info.flow_context.flow_source = flow_source; ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info); if (ret) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index 7f6fd9c5e371..4ca67fa24cc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -4,7 +4,6 @@ #include "dr_types.h" #define DR_ICM_MODIFY_HDR_ALIGN_BASE 64 -#define DR_ICM_SYNC_THRESHOLD_POOL (64 * 1024 * 1024) struct mlx5dr_icm_pool { enum mlx5dr_icm_type icm_type; @@ -58,6 +57,36 @@ static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev, return mlx5_core_create_mkey(mdev, mkey, in, inlen); } +u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk) +{ + u32 offset = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type); + + return (u64)offset * chunk->seg; +} + +u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk) +{ + return chunk->buddy_mem->icm_mr->mkey; +} + +u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk) +{ + u32 size = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type); + + return (u64)chunk->buddy_mem->icm_mr->icm_start_addr + size * chunk->seg; +} + +u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk) +{ + return mlx5dr_icm_pool_chunk_size_to_byte(chunk->size, + chunk->buddy_mem->pool->icm_type); +} + +u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk) +{ + return mlx5dr_icm_pool_chunk_size_to_entries(chunk->size); +} + static struct mlx5dr_icm_mr * dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool) { @@ -136,37 +165,36 @@ static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr) kvfree(icm_mr); } -static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk) +static int dr_icm_buddy_get_ste_size(struct mlx5dr_icm_buddy_mem *buddy) { - chunk->ste_arr = kvzalloc(chunk->num_of_entries * - sizeof(chunk->ste_arr[0]), GFP_KERNEL); - if (!chunk->ste_arr) - return -ENOMEM; - - chunk->hw_ste_arr = kvzalloc(chunk->num_of_entries * - DR_STE_SIZE_REDUCED, GFP_KERNEL); - if (!chunk->hw_ste_arr) - goto out_free_ste_arr; - - chunk->miss_list = kvmalloc(chunk->num_of_entries * - sizeof(chunk->miss_list[0]), GFP_KERNEL); - if (!chunk->miss_list) - goto out_free_hw_ste_arr; + /* We support only one type of STE size, both for ConnectX-5 and later + * devices. Once the support for match STE which has a larger tag is + * added (32B instead of 16B), the STE size for devices later than + * ConnectX-5 needs to account for that. + */ + return DR_STE_SIZE_REDUCED; +} - return 0; +static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset) +{ + struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; + int index = offset / DR_STE_SIZE; -out_free_hw_ste_arr: - kvfree(chunk->hw_ste_arr); -out_free_ste_arr: - kvfree(chunk->ste_arr); - return -ENOMEM; + chunk->ste_arr = &buddy->ste_arr[index]; + chunk->miss_list = &buddy->miss_list[index]; + chunk->hw_ste_arr = buddy->hw_ste_arr + + index * dr_icm_buddy_get_ste_size(buddy); } static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk) { - kvfree(chunk->miss_list); - kvfree(chunk->hw_ste_arr); - kvfree(chunk->ste_arr); + int num_of_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk); + struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; + + memset(chunk->hw_ste_arr, 0, + num_of_entries * dr_icm_buddy_get_ste_size(buddy)); + memset(chunk->ste_arr, 0, + num_of_entries * sizeof(chunk->ste_arr[0])); } static enum mlx5dr_icm_type @@ -180,7 +208,7 @@ static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk, { enum mlx5dr_icm_type icm_type = get_chunk_icm_type(chunk); - buddy->used_memory -= chunk->byte_size; + buddy->used_memory -= mlx5dr_icm_pool_get_chunk_byte_size(chunk); list_del(&chunk->chunk_list); if (icm_type == DR_ICM_TYPE_STE) @@ -189,6 +217,44 @@ static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk, kvfree(chunk); } +static int dr_icm_buddy_init_ste_cache(struct mlx5dr_icm_buddy_mem *buddy) +{ + int num_of_entries = + mlx5dr_icm_pool_chunk_size_to_entries(buddy->pool->max_log_chunk_sz); + + buddy->ste_arr = kvcalloc(num_of_entries, + sizeof(struct mlx5dr_ste), GFP_KERNEL); + if (!buddy->ste_arr) + return -ENOMEM; + + /* Preallocate full STE size on non-ConnectX-5 devices since + * we need to support both full and reduced with the same cache. + */ + buddy->hw_ste_arr = kvcalloc(num_of_entries, + dr_icm_buddy_get_ste_size(buddy), GFP_KERNEL); + if (!buddy->hw_ste_arr) + goto free_ste_arr; + + buddy->miss_list = kvmalloc(num_of_entries * sizeof(struct list_head), GFP_KERNEL); + if (!buddy->miss_list) + goto free_hw_ste_arr; + + return 0; + +free_hw_ste_arr: + kvfree(buddy->hw_ste_arr); +free_ste_arr: + kvfree(buddy->ste_arr); + return -ENOMEM; +} + +static void dr_icm_buddy_cleanup_ste_cache(struct mlx5dr_icm_buddy_mem *buddy) +{ + kvfree(buddy->ste_arr); + kvfree(buddy->hw_ste_arr); + kvfree(buddy->miss_list); +} + static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool) { struct mlx5dr_icm_buddy_mem *buddy; @@ -208,11 +274,19 @@ static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool) buddy->icm_mr = icm_mr; buddy->pool = pool; + if (pool->icm_type == DR_ICM_TYPE_STE) { + /* Reduce allocations by preallocating and reusing the STE structures */ + if (dr_icm_buddy_init_ste_cache(buddy)) + goto err_cleanup_buddy; + } + /* add it to the -start- of the list in order to search in it first */ list_add(&buddy->list_node, &pool->buddy_mem_list); return 0; +err_cleanup_buddy: + mlx5dr_buddy_cleanup(buddy); err_free_buddy: kvfree(buddy); free_mr: @@ -234,6 +308,9 @@ static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy) mlx5dr_buddy_cleanup(buddy); + if (buddy->pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_buddy_cleanup_ste_cache(buddy); + kvfree(buddy); } @@ -252,48 +329,38 @@ dr_icm_chunk_create(struct mlx5dr_icm_pool *pool, offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg; - chunk->rkey = buddy_mem_pool->icm_mr->mkey; - chunk->mr_addr = offset; - chunk->icm_addr = - (uintptr_t)buddy_mem_pool->icm_mr->icm_start_addr + offset; - chunk->num_of_entries = - mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); - chunk->byte_size = - mlx5dr_icm_pool_chunk_size_to_byte(chunk_size, pool->icm_type); chunk->seg = seg; + chunk->size = chunk_size; + chunk->buddy_mem = buddy_mem_pool; - if (pool->icm_type == DR_ICM_TYPE_STE && dr_icm_chunk_ste_init(chunk)) { - mlx5dr_err(pool->dmn, - "Failed to init ste arrays (order: %d)\n", - chunk_size); - goto out_free_chunk; - } + if (pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_chunk_ste_init(chunk, offset); - buddy_mem_pool->used_memory += chunk->byte_size; - chunk->buddy_mem = buddy_mem_pool; + buddy_mem_pool->used_memory += mlx5dr_icm_pool_get_chunk_byte_size(chunk); INIT_LIST_HEAD(&chunk->chunk_list); /* chunk now is part of the used_list */ list_add_tail(&chunk->chunk_list, &buddy_mem_pool->used_list); return chunk; - -out_free_chunk: - kvfree(chunk); - return NULL; } static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool) { - if (pool->hot_memory_size > DR_ICM_SYNC_THRESHOLD_POOL) - return true; + int allow_hot_size; + + /* sync when hot memory reaches half of the pool size */ + allow_hot_size = + mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) / 2; - return false; + return pool->hot_memory_size > allow_hot_size; } static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool) { struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy; + u32 num_entries; int err; err = mlx5dr_cmd_sync_steering(pool->dmn->mdev); @@ -306,9 +373,9 @@ static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool) struct mlx5dr_icm_chunk *chunk, *tmp_chunk; list_for_each_entry_safe(chunk, tmp_chunk, &buddy->hot_list, chunk_list) { - mlx5dr_buddy_free_mem(buddy, chunk->seg, - ilog2(chunk->num_of_entries)); - pool->hot_memory_size -= chunk->byte_size; + num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk); + mlx5dr_buddy_free_mem(buddy, chunk->seg, ilog2(num_entries)); + pool->hot_memory_size -= mlx5dr_icm_pool_get_chunk_byte_size(chunk); dr_icm_chunk_destroy(chunk, buddy); } @@ -406,7 +473,7 @@ void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk) /* move the memory to the waiting list AKA "hot" */ mutex_lock(&pool->mutex); list_move_tail(&chunk->chunk_list, &buddy->hot_list); - pool->hot_memory_size += chunk->byte_size; + pool->hot_memory_size += mlx5dr_icm_pool_get_chunk_byte_size(chunk); /* Check if we have chunks that are waiting for sync-ste */ if (dr_icm_pool_is_sync_required(pool)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index 793365242e85..0726848eb3ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -13,18 +13,6 @@ static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec) return (spec->dmac_47_16 || spec->dmac_15_0); } -static bool dr_mask_is_src_addr_set(struct mlx5dr_match_spec *spec) -{ - return (spec->src_ip_127_96 || spec->src_ip_95_64 || - spec->src_ip_63_32 || spec->src_ip_31_0); -} - -static bool dr_mask_is_dst_addr_set(struct mlx5dr_match_spec *spec) -{ - return (spec->dst_ip_127_96 || spec->dst_ip_95_64 || - spec->dst_ip_63_32 || spec->dst_ip_31_0); -} - static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec) { return (spec->ip_protocol || spec->frag || spec->tcp_flags || @@ -59,6 +47,11 @@ static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec) return spec->ttl_hoplimit; } +static bool dr_mask_is_ipv4_ihl_set(struct mlx5dr_match_spec *spec) +{ + return spec->ipv4_ihl; +} + #define DR_MASK_IS_L2_DST(_spec, _misc, _inner_outer) (_spec.first_vid || \ (_spec).first_cfi || (_spec).first_prio || (_spec).cvlan_tag || \ (_spec).svlan_tag || (_spec).dmac_47_16 || (_spec).dmac_15_0 || \ @@ -115,7 +108,7 @@ dr_mask_is_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3) static bool dr_matcher_supp_vxlan_gpe(struct mlx5dr_cmd_caps *caps) { - return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || (caps->flex_protocols & MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED); } @@ -141,9 +134,22 @@ static bool dr_mask_is_tnl_geneve_tlv_opt(struct mlx5dr_match_misc3 *misc3) } static bool +dr_matcher_supp_flex_parser_ok(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_parser_ok_bits_supp; +} + +static bool dr_mask_is_tnl_geneve_tlv_opt_exist_set(struct mlx5dr_match_misc *misc, + struct mlx5dr_domain *dmn) +{ + return dr_matcher_supp_flex_parser_ok(&dmn->info.caps) && + misc->geneve_tlv_option_0_exist; +} + +static bool dr_matcher_supp_tnl_geneve(struct mlx5dr_cmd_caps *caps) { - return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_ENABLED); } @@ -260,13 +266,13 @@ static bool dr_mask_is_tnl_gtpu_any(struct mlx5dr_match_param *mask, static int dr_matcher_supp_icmp_v4(struct mlx5dr_cmd_caps *caps) { - return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED); } static int dr_matcher_supp_icmp_v6(struct mlx5dr_cmd_caps *caps) { - return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED); } @@ -359,7 +365,7 @@ static bool dr_mask_is_tnl_mpls_over_gre(struct mlx5dr_match_param *mask, static int dr_matcher_supp_tnl_mpls_over_udp(struct mlx5dr_cmd_caps *caps) { - return caps->flex_protocols & mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED; + return caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED; } static bool dr_mask_is_tnl_mpls_over_udp(struct mlx5dr_match_param *mask, @@ -368,6 +374,12 @@ static bool dr_mask_is_tnl_mpls_over_udp(struct mlx5dr_match_param *mask, return DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(&mask->misc2) && dr_matcher_supp_tnl_mpls_over_udp(&dmn->info.caps); } + +static bool dr_mask_is_tnl_header_0_1_set(struct mlx5dr_match_misc5 *misc5) +{ + return misc5->tunnel_header_0 || misc5->tunnel_header_1; +} + int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, struct mlx5dr_matcher_rx_tx *nic_matcher, enum mlx5dr_ipv outer_ipv, @@ -424,6 +436,9 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4) mask.misc4 = matcher->mask.misc4; + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC5) + mask.misc5 = matcher->mask.misc5; + ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria, &matcher->mask, NULL); if (ret) @@ -443,7 +458,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (matcher->match_criteria & (DR_MATCHER_CRITERIA_OUTER | DR_MATCHER_CRITERIA_MISC | DR_MATCHER_CRITERIA_MISC2 | - DR_MATCHER_CRITERIA_MISC3)) { + DR_MATCHER_CRITERIA_MISC3 | + DR_MATCHER_CRITERIA_MISC5)) { inner = false; if (dr_mask_is_wqe_metadata_set(&mask.misc2)) @@ -480,11 +496,11 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, &mask, inner, rx); if (outer_ipv == DR_RULE_IPV6) { - if (dr_mask_is_dst_addr_set(&mask.outer)) + if (DR_MASK_IS_DST_IP_SET(&mask.outer)) mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++], &mask, inner, rx); - if (dr_mask_is_src_addr_set(&mask.outer)) + if (DR_MASK_IS_SRC_IP_SET(&mask.outer)) mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++], &mask, inner, rx); @@ -496,7 +512,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, mlx5dr_ste_build_eth_l3_ipv4_5_tuple(ste_ctx, &sb[idx++], &mask, inner, rx); - if (dr_mask_is_ttl_set(&mask.outer)) + if (dr_mask_is_ttl_set(&mask.outer) || + dr_mask_is_ipv4_ihl_set(&mask.outer)) mlx5dr_ste_build_eth_l3_ipv4_misc(ste_ctx, &sb[idx++], &mask, inner, rx); } @@ -511,6 +528,10 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, mlx5dr_ste_build_tnl_geneve_tlv_opt(ste_ctx, &sb[idx++], &mask, &dmn->info.caps, inner, rx); + if (dr_mask_is_tnl_geneve_tlv_opt_exist_set(&mask.misc, dmn)) + mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); } else if (dr_mask_is_tnl_gtpu_any(&mask, dmn)) { if (dr_mask_is_tnl_gtpu_flex_parser_0(&mask, dmn)) mlx5dr_ste_build_tnl_gtpu_flex_parser_0(ste_ctx, &sb[idx++], @@ -525,6 +546,9 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (dr_mask_is_tnl_gtpu(&mask, dmn)) mlx5dr_ste_build_tnl_gtpu(ste_ctx, &sb[idx++], &mask, inner, rx); + } else if (dr_mask_is_tnl_header_0_1_set(&mask.misc5)) { + mlx5dr_ste_build_tnl_header_0_1(ste_ctx, &sb[idx++], + &mask, inner, rx); } if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer)) @@ -580,11 +604,11 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, &mask, inner, rx); if (inner_ipv == DR_RULE_IPV6) { - if (dr_mask_is_dst_addr_set(&mask.inner)) + if (DR_MASK_IS_DST_IP_SET(&mask.inner)) mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++], &mask, inner, rx); - if (dr_mask_is_src_addr_set(&mask.inner)) + if (DR_MASK_IS_SRC_IP_SET(&mask.inner)) mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++], &mask, inner, rx); @@ -596,7 +620,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, mlx5dr_ste_build_eth_l3_ipv4_5_tuple(ste_ctx, &sb[idx++], &mask, inner, rx); - if (dr_mask_is_ttl_set(&mask.inner)) + if (dr_mask_is_ttl_set(&mask.inner) || + dr_mask_is_ipv4_ihl_set(&mask.inner)) mlx5dr_ste_build_eth_l3_ipv4_misc(ste_ctx, &sb[idx++], &mask, inner, rx); } @@ -653,10 +678,10 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, return 0; } -static int dr_matcher_connect(struct mlx5dr_domain *dmn, - struct mlx5dr_matcher_rx_tx *curr_nic_matcher, - struct mlx5dr_matcher_rx_tx *next_nic_matcher, - struct mlx5dr_matcher_rx_tx *prev_nic_matcher) +static int dr_nic_matcher_connect(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *curr_nic_matcher, + struct mlx5dr_matcher_rx_tx *next_nic_matcher, + struct mlx5dr_matcher_rx_tx *prev_nic_matcher) { struct mlx5dr_table_rx_tx *nic_tbl = curr_nic_matcher->nic_tbl; struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; @@ -680,7 +705,7 @@ static int dr_matcher_connect(struct mlx5dr_domain *dmn, /* Connect start hash table to end anchor */ info.type = CONNECT_MISS; - info.miss_icm_addr = curr_nic_matcher->e_anchor->chunk->icm_addr; + info.miss_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(curr_nic_matcher->e_anchor->chunk); ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, curr_nic_matcher->s_htbl, &info, false); @@ -701,69 +726,63 @@ static int dr_matcher_connect(struct mlx5dr_domain *dmn, return ret; /* Update the pointing ste and next hash table */ - curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->ste_arr; - prev_htbl->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl; + curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->chunk->ste_arr; + prev_htbl->chunk->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl; if (next_nic_matcher) { - next_nic_matcher->s_htbl->pointing_ste = curr_nic_matcher->e_anchor->ste_arr; - curr_nic_matcher->e_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl; + next_nic_matcher->s_htbl->pointing_ste = + curr_nic_matcher->e_anchor->chunk->ste_arr; + curr_nic_matcher->e_anchor->chunk->ste_arr[0].next_htbl = + next_nic_matcher->s_htbl; } return 0; } -static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher) +int mlx5dr_matcher_add_to_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher) { - struct mlx5dr_matcher *next_matcher, *prev_matcher, *tmp_matcher; - struct mlx5dr_table *tbl = matcher->tbl; - struct mlx5dr_domain *dmn = tbl->dmn; + struct mlx5dr_matcher_rx_tx *next_nic_matcher, *prev_nic_matcher, *tmp_nic_matcher; + struct mlx5dr_table_rx_tx *nic_tbl = nic_matcher->nic_tbl; bool first = true; int ret; - next_matcher = NULL; - list_for_each_entry(tmp_matcher, &tbl->matcher_list, matcher_list) { - if (tmp_matcher->prio >= matcher->prio) { - next_matcher = tmp_matcher; + /* If the nic matcher is already on its parent nic table list, + * then it is already connected to the chain of nic matchers. + */ + if (!list_empty(&nic_matcher->list_node)) + return 0; + + next_nic_matcher = NULL; + list_for_each_entry(tmp_nic_matcher, &nic_tbl->nic_matcher_list, list_node) { + if (tmp_nic_matcher->prio >= nic_matcher->prio) { + next_nic_matcher = tmp_nic_matcher; break; } first = false; } - prev_matcher = NULL; - if (next_matcher && !first) - prev_matcher = list_prev_entry(next_matcher, matcher_list); + prev_nic_matcher = NULL; + if (next_nic_matcher && !first) + prev_nic_matcher = list_prev_entry(next_nic_matcher, list_node); else if (!first) - prev_matcher = list_last_entry(&tbl->matcher_list, - struct mlx5dr_matcher, - matcher_list); - - if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || - dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { - ret = dr_matcher_connect(dmn, &matcher->rx, - next_matcher ? &next_matcher->rx : NULL, - prev_matcher ? &prev_matcher->rx : NULL); - if (ret) - return ret; - } + prev_nic_matcher = list_last_entry(&nic_tbl->nic_matcher_list, + struct mlx5dr_matcher_rx_tx, + list_node); - if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || - dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { - ret = dr_matcher_connect(dmn, &matcher->tx, - next_matcher ? &next_matcher->tx : NULL, - prev_matcher ? &prev_matcher->tx : NULL); - if (ret) - return ret; - } + ret = dr_nic_matcher_connect(dmn, nic_matcher, + next_nic_matcher, prev_nic_matcher); + if (ret) + return ret; - if (prev_matcher) - list_add(&matcher->matcher_list, &prev_matcher->matcher_list); - else if (next_matcher) - list_add_tail(&matcher->matcher_list, - &next_matcher->matcher_list); + if (prev_nic_matcher) + list_add(&nic_matcher->list_node, &prev_nic_matcher->list_node); + else if (next_nic_matcher) + list_add_tail(&nic_matcher->list_node, &next_nic_matcher->list_node); else - list_add(&matcher->matcher_list, &tbl->matcher_list); + list_add(&nic_matcher->list_node, &nic_matcher->nic_tbl->nic_matcher_list); - return 0; + return ret; } static void dr_matcher_uninit_nic(struct mlx5dr_matcher_rx_tx *nic_matcher) @@ -822,6 +841,9 @@ static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher, struct mlx5dr_domain *dmn = matcher->tbl->dmn; int ret; + nic_matcher->prio = matcher->prio; + INIT_LIST_HEAD(&nic_matcher->list_node); + ret = dr_matcher_set_all_ste_builders(matcher, nic_matcher); if (ret) return ret; @@ -872,13 +894,12 @@ uninit_nic_rx: return ret; } -static int dr_matcher_init(struct mlx5dr_matcher *matcher, - struct mlx5dr_match_parameters *mask) +static int dr_matcher_copy_param(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *mask) { + struct mlx5dr_domain *dmn = matcher->tbl->dmn; struct mlx5dr_match_parameters consumed_mask; - struct mlx5dr_table *tbl = matcher->tbl; - struct mlx5dr_domain *dmn = tbl->dmn; - int i, ret; + int i, ret = 0; if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) { mlx5dr_err(dmn, "Invalid match criteria attribute\n"); @@ -898,10 +919,36 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher, consumed_mask.match_sz = mask->match_sz; memcpy(consumed_mask.match_buf, mask->match_buf, mask->match_sz); mlx5dr_ste_copy_param(matcher->match_criteria, - &matcher->mask, &consumed_mask, - true); + &matcher->mask, &consumed_mask, true); + + /* Check that all mask data was consumed */ + for (i = 0; i < consumed_mask.match_sz; i++) { + if (!((u8 *)consumed_mask.match_buf)[i]) + continue; + + mlx5dr_dbg(dmn, + "Match param mask contains unsupported parameters\n"); + ret = -EOPNOTSUPP; + break; + } + + kfree(consumed_mask.match_buf); } + return ret; +} + +static int dr_matcher_init(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *mask) +{ + struct mlx5dr_table *tbl = matcher->tbl; + struct mlx5dr_domain *dmn = tbl->dmn; + int ret; + + ret = dr_matcher_copy_param(matcher, mask); + if (ret) + return ret; + switch (dmn->type) { case MLX5DR_DOMAIN_TYPE_NIC_RX: matcher->rx.nic_tbl = &tbl->rx; @@ -919,23 +966,23 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher, default: WARN_ON(true); ret = -EINVAL; - goto free_consumed_mask; } - /* Check that all mask data was consumed */ - for (i = 0; i < consumed_mask.match_sz; i++) { - if (!((u8 *)consumed_mask.match_buf)[i]) - continue; + return ret; +} - mlx5dr_dbg(dmn, "Match param mask contains unsupported parameters\n"); - ret = -EOPNOTSUPP; - goto free_consumed_mask; - } +static void dr_matcher_add_to_dbg_list(struct mlx5dr_matcher *matcher) +{ + mutex_lock(&matcher->tbl->dmn->dump_info.dbg_mutex); + list_add(&matcher->list_node, &matcher->tbl->matcher_list); + mutex_unlock(&matcher->tbl->dmn->dump_info.dbg_mutex); +} - ret = 0; -free_consumed_mask: - kfree(consumed_mask.match_buf); - return ret; +static void dr_matcher_remove_from_dbg_list(struct mlx5dr_matcher *matcher) +{ + mutex_lock(&matcher->tbl->dmn->dump_info.dbg_mutex); + list_del(&matcher->list_node); + mutex_unlock(&matcher->tbl->dmn->dump_info.dbg_mutex); } struct mlx5dr_matcher * @@ -957,7 +1004,8 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl, matcher->prio = priority; matcher->match_criteria = match_criteria_enable; refcount_set(&matcher->refcount, 1); - INIT_LIST_HEAD(&matcher->matcher_list); + INIT_LIST_HEAD(&matcher->list_node); + INIT_LIST_HEAD(&matcher->dbg_rule_list); mlx5dr_domain_lock(tbl->dmn); @@ -965,16 +1013,12 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl, if (ret) goto free_matcher; - ret = dr_matcher_add_to_tbl(matcher); - if (ret) - goto matcher_uninit; + dr_matcher_add_to_dbg_list(matcher); mlx5dr_domain_unlock(tbl->dmn); return matcher; -matcher_uninit: - dr_matcher_uninit(matcher); free_matcher: mlx5dr_domain_unlock(tbl->dmn); kfree(matcher); @@ -983,10 +1027,10 @@ dec_ref: return NULL; } -static int dr_matcher_disconnect(struct mlx5dr_domain *dmn, - struct mlx5dr_table_rx_tx *nic_tbl, - struct mlx5dr_matcher_rx_tx *next_nic_matcher, - struct mlx5dr_matcher_rx_tx *prev_nic_matcher) +static int dr_matcher_disconnect_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl, + struct mlx5dr_matcher_rx_tx *next_nic_matcher, + struct mlx5dr_matcher_rx_tx *prev_nic_matcher) { struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; struct mlx5dr_htbl_connect_info info; @@ -1001,55 +1045,46 @@ static int dr_matcher_disconnect(struct mlx5dr_domain *dmn, if (next_nic_matcher) { info.type = CONNECT_HIT; info.hit_next_htbl = next_nic_matcher->s_htbl; - next_nic_matcher->s_htbl->pointing_ste = prev_anchor->ste_arr; - prev_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl; + next_nic_matcher->s_htbl->pointing_ste = prev_anchor->chunk->ste_arr; + prev_anchor->chunk->ste_arr[0].next_htbl = next_nic_matcher->s_htbl; } else { info.type = CONNECT_MISS; info.miss_icm_addr = nic_tbl->default_icm_addr; - prev_anchor->ste_arr[0].next_htbl = NULL; + prev_anchor->chunk->ste_arr[0].next_htbl = NULL; } return mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_anchor, &info, true); } -static int dr_matcher_remove_from_tbl(struct mlx5dr_matcher *matcher) +int mlx5dr_matcher_remove_from_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher) { - struct mlx5dr_matcher *prev_matcher, *next_matcher; - struct mlx5dr_table *tbl = matcher->tbl; - struct mlx5dr_domain *dmn = tbl->dmn; - int ret = 0; + struct mlx5dr_matcher_rx_tx *prev_nic_matcher, *next_nic_matcher; + struct mlx5dr_table_rx_tx *nic_tbl = nic_matcher->nic_tbl; + int ret; - if (list_is_last(&matcher->matcher_list, &tbl->matcher_list)) - next_matcher = NULL; - else - next_matcher = list_next_entry(matcher, matcher_list); + /* If the nic matcher is not on its parent nic table list, + * then it is detached - no need to disconnect it. + */ + if (list_empty(&nic_matcher->list_node)) + return 0; - if (matcher->matcher_list.prev == &tbl->matcher_list) - prev_matcher = NULL; + if (list_is_last(&nic_matcher->list_node, &nic_tbl->nic_matcher_list)) + next_nic_matcher = NULL; else - prev_matcher = list_prev_entry(matcher, matcher_list); - - if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || - dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { - ret = dr_matcher_disconnect(dmn, &tbl->rx, - next_matcher ? &next_matcher->rx : NULL, - prev_matcher ? &prev_matcher->rx : NULL); - if (ret) - return ret; - } + next_nic_matcher = list_next_entry(nic_matcher, list_node); - if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || - dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { - ret = dr_matcher_disconnect(dmn, &tbl->tx, - next_matcher ? &next_matcher->tx : NULL, - prev_matcher ? &prev_matcher->tx : NULL); - if (ret) - return ret; - } + if (nic_matcher->list_node.prev == &nic_tbl->nic_matcher_list) + prev_nic_matcher = NULL; + else + prev_nic_matcher = list_prev_entry(nic_matcher, list_node); - list_del(&matcher->matcher_list); + ret = dr_matcher_disconnect_nic(dmn, nic_tbl, next_nic_matcher, prev_nic_matcher); + if (ret) + return ret; + list_del_init(&nic_matcher->list_node); return 0; } @@ -1057,12 +1092,12 @@ int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) { struct mlx5dr_table *tbl = matcher->tbl; - if (refcount_read(&matcher->refcount) > 1) + if (WARN_ON_ONCE(refcount_read(&matcher->refcount) > 1)) return -EBUSY; mlx5dr_domain_lock(tbl->dmn); - dr_matcher_remove_from_tbl(matcher); + dr_matcher_remove_from_dbg_list(matcher); dr_matcher_uninit(matcher); refcount_dec(&matcher->tbl->refcount); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index 6a390e981b09..91ff19f67695 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -5,11 +5,6 @@ #define DR_RULE_MAX_STE_CHAIN (DR_RULE_MAX_STES + DR_ACTION_MAX_STES) -struct mlx5dr_rule_action_member { - struct mlx5dr_action *action; - struct list_head list; -}; - static int dr_rule_append_to_miss_list(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste *new_last_ste, struct list_head *miss_list, @@ -26,12 +21,12 @@ static int dr_rule_append_to_miss_list(struct mlx5dr_ste_ctx *ste_ctx, if (!ste_info_last) return -ENOMEM; - mlx5dr_ste_set_miss_addr(ste_ctx, last_ste->hw_ste, + mlx5dr_ste_set_miss_addr(ste_ctx, mlx5dr_ste_get_hw_ste(last_ste), mlx5dr_ste_get_icm_addr(new_last_ste)); list_add_tail(&new_last_ste->miss_list_node, miss_list); mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_CTRL, - 0, last_ste->hw_ste, + 0, mlx5dr_ste_get_hw_ste(last_ste), ste_info_last, send_list, true); return 0; @@ -46,6 +41,7 @@ dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher, struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; struct mlx5dr_ste_htbl *new_htbl; struct mlx5dr_ste *ste; + u64 icm_addr; /* Create new table for miss entry */ new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, @@ -58,9 +54,9 @@ dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher, } /* One and only entry, never grows */ - ste = new_htbl->ste_arr; - mlx5dr_ste_set_miss_addr(ste_ctx, hw_ste, - nic_matcher->e_anchor->chunk->icm_addr); + ste = new_htbl->chunk->ste_arr; + icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + mlx5dr_ste_set_miss_addr(ste_ctx, hw_ste, icm_addr); mlx5dr_htbl_get(new_htbl); return ste; @@ -84,7 +80,7 @@ dr_rule_create_collision_entry(struct mlx5dr_matcher *matcher, ste->htbl->pointing_ste = orig_ste->htbl->pointing_ste; /* In collision entry, all members share the same miss_list_head */ - ste->htbl->miss_list = mlx5dr_ste_get_miss_list(orig_ste); + ste->htbl->chunk->miss_list = mlx5dr_ste_get_miss_list(orig_ste); /* Next table */ if (mlx5dr_ste_create_next_htbl(matcher, nic_matcher, ste, hw_ste, @@ -112,9 +108,11 @@ dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info, * is already written to the hw. */ if (ste_info->size == DR_STE_SIZE_CTRL) - memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_CTRL); + memcpy(mlx5dr_ste_get_hw_ste(ste_info->ste), + ste_info->data, DR_STE_SIZE_CTRL); else - memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_REDUCED); + memcpy(mlx5dr_ste_get_hw_ste(ste_info->ste), + ste_info->data, DR_STE_SIZE_REDUCED); ret = mlx5dr_send_postsend_ste(dmn, ste_info->ste, ste_info->data, ste_info->size, ste_info->offset); @@ -164,7 +162,7 @@ dr_rule_find_ste_in_miss_list(struct list_head *miss_list, u8 *hw_ste) /* Check if hw_ste is present in the list */ list_for_each_entry(ste, miss_list, miss_list_node) { - if (mlx5dr_ste_equal_tag(ste->hw_ste, hw_ste)) + if (mlx5dr_ste_equal_tag(mlx5dr_ste_get_hw_ste(ste), hw_ste)) return ste; } @@ -190,7 +188,7 @@ dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher, new_ste->htbl->pointing_ste = col_ste->htbl->pointing_ste; /* In collision entry, all members share the same miss_list_head */ - new_ste->htbl->miss_list = mlx5dr_ste_get_miss_list(col_ste); + new_ste->htbl->chunk->miss_list = mlx5dr_ste_get_miss_list(col_ste); /* Update the previous from the list */ ret = dr_rule_append_to_miss_list(dmn->ste_ctx, new_ste, @@ -240,6 +238,7 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher, bool use_update_list = false; u8 hw_ste[DR_STE_SIZE] = {}; struct mlx5dr_ste *new_ste; + u64 icm_addr; int new_idx; u8 sb_idx; @@ -248,12 +247,12 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher, mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask); /* Copy STE control and tag */ - memcpy(hw_ste, cur_ste->hw_ste, DR_STE_SIZE_REDUCED); - mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, - nic_matcher->e_anchor->chunk->icm_addr); + icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + memcpy(hw_ste, mlx5dr_ste_get_hw_ste(cur_ste), DR_STE_SIZE_REDUCED); + mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr); new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl); - new_ste = &new_htbl->ste_arr[new_idx]; + new_ste = &new_htbl->chunk->ste_arr[new_idx]; if (mlx5dr_ste_is_not_used(new_ste)) { mlx5dr_htbl_get(new_htbl); @@ -274,7 +273,7 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher, use_update_list = true; } - memcpy(new_ste->hw_ste, hw_ste, DR_STE_SIZE_REDUCED); + memcpy(mlx5dr_ste_get_hw_ste(new_ste), hw_ste, DR_STE_SIZE_REDUCED); new_htbl->ctrl.num_of_valid_entries++; @@ -339,7 +338,7 @@ static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher, int err = 0; int i; - cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk_size); + cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk->size); if (cur_entries < 1) { mlx5dr_dbg(matcher->tbl->dmn, "Invalid number of entries\n"); @@ -347,7 +346,7 @@ static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher, } for (i = 0; i < cur_entries; i++) { - cur_ste = &cur_htbl->ste_arr[i]; + cur_ste = &cur_htbl->chunk->ste_arr[i]; if (mlx5dr_ste_is_not_used(cur_ste)) /* Empty, nothing to copy */ continue; @@ -403,7 +402,7 @@ dr_rule_rehash_htbl(struct mlx5dr_rule *rule, /* Write new table to HW */ info.type = CONNECT_MISS; - info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr; + info.miss_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); mlx5dr_ste_set_formatted_ste(dmn->ste_ctx, dmn->info.caps.gvmi, nic_dmn->type, @@ -451,21 +450,21 @@ dr_rule_rehash_htbl(struct mlx5dr_rule *rule, * (48B len) which works only on first 32B */ mlx5dr_ste_set_hit_addr(dmn->ste_ctx, - prev_htbl->ste_arr[0].hw_ste, - new_htbl->chunk->icm_addr, - new_htbl->chunk->num_of_entries); + prev_htbl->chunk->hw_ste_arr, + mlx5dr_icm_pool_get_chunk_icm_addr(new_htbl->chunk), + mlx5dr_icm_pool_get_chunk_num_of_entries(new_htbl->chunk)); - ste_to_update = &prev_htbl->ste_arr[0]; + ste_to_update = &prev_htbl->chunk->ste_arr[0]; } else { mlx5dr_ste_set_hit_addr_by_next_htbl(dmn->ste_ctx, - cur_htbl->pointing_ste->hw_ste, + mlx5dr_ste_get_hw_ste(cur_htbl->pointing_ste), new_htbl); ste_to_update = cur_htbl->pointing_ste; } mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_CTRL, - 0, ste_to_update->hw_ste, ste_info, - update_list, false); + 0, mlx5dr_ste_get_hw_ste(ste_to_update), + ste_info, update_list, false); return new_htbl; @@ -494,10 +493,10 @@ static struct mlx5dr_ste_htbl *dr_rule_rehash(struct mlx5dr_rule *rule, struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; enum mlx5dr_icm_chunk_size new_size; - new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk_size); + new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk->size); new_size = min_t(u32, new_size, dmn->info.max_log_sw_icm_sz); - if (new_size == cur_htbl->chunk_size) + if (new_size == cur_htbl->chunk->size) return NULL; /* Skip rehash, we already at the max size */ return dr_rule_rehash_htbl(rule, nic_rule, cur_htbl, ste_location, @@ -664,13 +663,13 @@ static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl, struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl; int threshold; - if (dmn->info.max_log_sw_icm_sz <= htbl->chunk_size) + if (dmn->info.max_log_sw_icm_sz <= htbl->chunk->size) return false; if (!mlx5dr_ste_htbl_may_grow(htbl)) return false; - if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk_size) + if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk->size) return false; threshold = mlx5dr_ste_htbl_increase_threshold(htbl); @@ -760,6 +759,7 @@ static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher, { struct mlx5dr_domain *dmn = matcher->tbl->dmn; struct mlx5dr_ste_send_info *ste_info; + u64 icm_addr; /* Take ref on table, only on first time this ste is used */ mlx5dr_htbl_get(cur_htbl); @@ -767,8 +767,8 @@ static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher, /* new entry -> new branch */ list_add_tail(&ste->miss_list_node, miss_list); - mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, - nic_matcher->e_anchor->chunk->icm_addr); + icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr); ste->ste_chain_location = ste_location; @@ -827,7 +827,7 @@ dr_rule_handle_ste_branch(struct mlx5dr_rule *rule, again: index = mlx5dr_ste_calc_hash_index(hw_ste, cur_htbl); miss_list = &cur_htbl->chunk->miss_list[index]; - ste = &cur_htbl->ste_arr[index]; + ste = &cur_htbl->chunk->ste_arr[index]; if (mlx5dr_ste_is_not_used(ste)) { if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl, @@ -863,7 +863,7 @@ again: ste_location, send_ste_list); if (!new_htbl) { mlx5dr_err(dmn, "Failed creating rehash table, htbl-log_size: %d\n", - cur_htbl->chunk_size); + cur_htbl->chunk->size); mlx5dr_htbl_put(cur_htbl); } else { cur_htbl = new_htbl; @@ -979,14 +979,36 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, return false; } } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC5) { + s_idx = offsetof(struct mlx5dr_match_param, misc5); + e_idx = min(s_idx + sizeof(param->misc5), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, "Rule misc5 parameters contains a value not specified by mask\n"); + return false; + } + } return true; } static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule, struct mlx5dr_rule_rx_tx *nic_rule) { + /* Check if this nic rule was actually created, or was it skipped + * and only the other type of the RX/TX nic rule was created. + */ + if (!nic_rule->last_rule_ste) + return 0; + mlx5dr_domain_nic_lock(nic_rule->nic_matcher->nic_tbl->nic_dmn); dr_rule_clean_rule_members(rule, nic_rule); + + nic_rule->nic_matcher->rules--; + if (!nic_rule->nic_matcher->rules) + mlx5dr_matcher_remove_from_tbl_nic(rule->matcher->tbl->dmn, + nic_rule->nic_matcher); + mlx5dr_domain_nic_unlock(nic_rule->nic_matcher->nic_tbl->nic_dmn); return 0; @@ -1003,6 +1025,8 @@ static int dr_rule_destroy_rule(struct mlx5dr_rule *rule) { struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + mlx5dr_dbg_rule_del(rule); + switch (dmn->type) { case MLX5DR_DOMAIN_TYPE_NIC_RX: dr_rule_destroy_rule_nic(rule, &rule->rx); @@ -1091,24 +1115,28 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, mlx5dr_domain_nic_lock(nic_dmn); + ret = mlx5dr_matcher_add_to_tbl_nic(dmn, nic_matcher); + if (ret) + goto free_hw_ste; + ret = mlx5dr_matcher_select_builders(matcher, nic_matcher, dr_rule_get_ipv(¶m->outer), dr_rule_get_ipv(¶m->inner)); if (ret) - goto free_hw_ste; + goto remove_from_nic_tbl; /* Set the tag values inside the ste array */ ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr); if (ret) - goto free_hw_ste; + goto remove_from_nic_tbl; /* Set the actions values/addresses inside the ste array */ ret = mlx5dr_actions_build_ste_arr(matcher, nic_matcher, actions, num_actions, hw_ste_arr, &new_hw_ste_arr_sz); if (ret) - goto free_hw_ste; + goto remove_from_nic_tbl; cur_htbl = nic_matcher->s_htbl; @@ -1155,6 +1183,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, if (htbl) mlx5dr_htbl_put(htbl); + nic_matcher->rules++; + mlx5dr_domain_nic_unlock(nic_dmn); kfree(hw_ste_arr); @@ -1168,6 +1198,11 @@ free_rule: list_del(&ste_info->send_list); kfree(ste_info); } + +remove_from_nic_tbl: + if (!nic_matcher->rules) + mlx5dr_matcher_remove_from_tbl_nic(dmn, nic_matcher); + free_hw_ste: mlx5dr_domain_nic_unlock(nic_dmn); kfree(hw_ste_arr); @@ -1257,6 +1292,8 @@ dr_rule_create_rule(struct mlx5dr_matcher *matcher, if (ret) goto remove_action_members; + INIT_LIST_HEAD(&rule->dbg_node); + mlx5dr_dbg_rule_add(rule); return rule; remove_action_members: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 00aef47d7682..ef19a66f5233 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -407,17 +407,17 @@ static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, int *iterations, int *num_stes) { + u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); int alloc_size; - if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) { - *iterations = htbl->chunk->byte_size / - dmn->send_ring->max_post_send_size; + if (chunk_byte_size > dmn->send_ring->max_post_send_size) { + *iterations = chunk_byte_size / dmn->send_ring->max_post_send_size; *byte_size = dmn->send_ring->max_post_send_size; alloc_size = *byte_size; *num_stes = *byte_size / DR_STE_SIZE; } else { *iterations = 1; - *num_stes = htbl->chunk->num_of_entries; + *num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk); alloc_size = *num_stes * DR_STE_SIZE; } @@ -453,7 +453,7 @@ int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, send_info.write.length = size; send_info.write.lkey = 0; send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset; - send_info.rkey = ste->htbl->chunk->rkey; + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk); return dr_postsend_icm_data(dmn, &send_info); } @@ -462,7 +462,7 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, struct mlx5dr_ste_htbl *htbl, u8 *formatted_ste, u8 *mask) { - u32 byte_size = htbl->chunk->byte_size; + u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); int num_stes_per_iter; int iterations; u8 *data; @@ -486,7 +486,7 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, * need to add the bit_mask */ for (j = 0; j < num_stes_per_iter; j++) { - struct mlx5dr_ste *ste = &htbl->ste_arr[ste_index + j]; + struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j]; u32 ste_off = j * DR_STE_SIZE; if (mlx5dr_ste_is_not_used(ste)) { @@ -495,7 +495,8 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, } else { /* Copy data */ memcpy(data + ste_off, - htbl->ste_arr[ste_index + j].hw_ste, + htbl->chunk->hw_ste_arr + + DR_STE_SIZE_REDUCED * (ste_index + j), DR_STE_SIZE_REDUCED); /* Copy bit_mask */ memcpy(data + ste_off + DR_STE_SIZE_REDUCED, @@ -511,8 +512,8 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, send_info.write.length = byte_size; send_info.write.lkey = 0; send_info.remote_addr = - mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); - send_info.rkey = htbl->chunk->rkey; + mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index); + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk); ret = dr_postsend_icm_data(dmn, &send_info); if (ret) @@ -530,7 +531,7 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, u8 *ste_init_data, bool update_hw_ste) { - u32 byte_size = htbl->chunk->byte_size; + u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); int iterations; int num_stes; u8 *copy_dst; @@ -546,7 +547,7 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, if (update_hw_ste) { /* Copy the reduced STE to hash table ste_arr */ for (i = 0; i < num_stes; i++) { - copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; + copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED; memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); } } @@ -568,8 +569,8 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, send_info.write.length = byte_size; send_info.write.lkey = 0; send_info.remote_addr = - mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); - send_info.rkey = htbl->chunk->rkey; + mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index); + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk); ret = dr_postsend_icm_data(dmn, &send_info); if (ret) @@ -591,8 +592,9 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, send_info.write.length = action->rewrite->num_of_actions * DR_MODIFY_ACTION_SIZE; send_info.write.lkey = 0; - send_info.remote_addr = action->rewrite->chunk->mr_addr; - send_info.rkey = action->rewrite->chunk->rkey; + send_info.remote_addr = + mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk); + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk); ret = dr_postsend_icm_data(dmn, &send_info); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 219a5474a8a4..09ebd3088857 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -25,6 +25,7 @@ bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps) u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl) { + u32 num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk); struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; u8 masked[DR_STE_SIZE_TAG] = {}; u32 crc32, index; @@ -32,7 +33,7 @@ u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl) int i; /* Don't calculate CRC if the result is predicted */ - if (htbl->chunk->num_of_entries == 1 || htbl->byte_mask == 0) + if (num_entries == 1 || htbl->byte_mask == 0) return 0; /* Mask tag using byte mask, bit per byte */ @@ -45,7 +46,7 @@ u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl) } crc32 = dr_ste_crc32_calc(masked, DR_STE_SIZE_TAG); - index = crc32 & (htbl->chunk->num_of_entries - 1); + index = crc32 & (num_entries - 1); return index; } @@ -96,13 +97,11 @@ void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx, } static void dr_ste_always_miss_addr(struct mlx5dr_ste_ctx *ste_ctx, - struct mlx5dr_ste *ste, u64 miss_addr) + u8 *hw_ste, u64 miss_addr) { - u8 *hw_ste_p = ste->hw_ste; - - ste_ctx->set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE); - ste_ctx->set_miss_addr(hw_ste_p, miss_addr); - dr_ste_set_always_miss((struct dr_hw_ste_format *)ste->hw_ste); + ste_ctx->set_next_lu_type(hw_ste, MLX5DR_STE_LU_TYPE_DONT_CARE); + ste_ctx->set_miss_addr(hw_ste, miss_addr); + dr_ste_set_always_miss((struct dr_hw_ste_format *)hw_ste); } void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx, @@ -113,37 +112,45 @@ void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx, u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste) { - u32 index = ste - ste->htbl->ste_arr; + u64 base_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(ste->htbl->chunk); + u32 index = ste - ste->htbl->chunk->ste_arr; - return ste->htbl->chunk->icm_addr + DR_STE_SIZE * index; + return base_icm_addr + DR_STE_SIZE * index; } u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste) { - u32 index = ste - ste->htbl->ste_arr; + u32 index = ste - ste->htbl->chunk->ste_arr; + + return mlx5dr_icm_pool_get_chunk_mr_addr(ste->htbl->chunk) + DR_STE_SIZE * index; +} + +u8 *mlx5dr_ste_get_hw_ste(struct mlx5dr_ste *ste) +{ + u64 index = ste - ste->htbl->chunk->ste_arr; - return ste->htbl->chunk->mr_addr + DR_STE_SIZE * index; + return ste->htbl->chunk->hw_ste_arr + DR_STE_SIZE_REDUCED * index; } struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste) { - u32 index = ste - ste->htbl->ste_arr; + u32 index = ste - ste->htbl->chunk->ste_arr; - return &ste->htbl->miss_list[index]; + return &ste->htbl->chunk->miss_list[index]; } static void dr_ste_always_hit_htbl(struct mlx5dr_ste_ctx *ste_ctx, - struct mlx5dr_ste *ste, + u8 *hw_ste, struct mlx5dr_ste_htbl *next_htbl) { struct mlx5dr_icm_chunk *chunk = next_htbl->chunk; - u8 *hw_ste = ste->hw_ste; ste_ctx->set_byte_mask(hw_ste, next_htbl->byte_mask); ste_ctx->set_next_lu_type(hw_ste, next_htbl->lu_type); - ste_ctx->set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries); + ste_ctx->set_hit_addr(hw_ste, mlx5dr_icm_pool_get_chunk_icm_addr(chunk), + mlx5dr_icm_pool_get_chunk_num_of_entries(chunk)); - dr_ste_set_always_hit((struct dr_hw_ste_format *)ste->hw_ste); + dr_ste_set_always_hit((struct dr_hw_ste_format *)hw_ste); } bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, @@ -166,7 +173,8 @@ bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, */ static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src) { - memcpy(dst->hw_ste, src->hw_ste, DR_STE_SIZE_REDUCED); + memcpy(mlx5dr_ste_get_hw_ste(dst), mlx5dr_ste_get_hw_ste(src), + DR_STE_SIZE_REDUCED); dst->next_htbl = src->next_htbl; if (dst->next_htbl) dst->next_htbl->pointing_ste = dst; @@ -184,18 +192,17 @@ dr_ste_remove_head_ste(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_htbl *stats_tbl) { u8 tmp_data_ste[DR_STE_SIZE] = {}; - struct mlx5dr_ste tmp_ste = {}; u64 miss_addr; - tmp_ste.hw_ste = tmp_data_ste; + miss_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); /* Use temp ste because dr_ste_always_miss_addr * touches bit_mask area which doesn't exist at ste->hw_ste. + * Need to use a full-sized (DR_STE_SIZE) hw_ste. */ - memcpy(tmp_ste.hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED); - miss_addr = nic_matcher->e_anchor->chunk->icm_addr; - dr_ste_always_miss_addr(ste_ctx, &tmp_ste, miss_addr); - memcpy(ste->hw_ste, tmp_ste.hw_ste, DR_STE_SIZE_REDUCED); + memcpy(tmp_data_ste, mlx5dr_ste_get_hw_ste(ste), DR_STE_SIZE_REDUCED); + dr_ste_always_miss_addr(ste_ctx, tmp_data_ste, miss_addr); + memcpy(mlx5dr_ste_get_hw_ste(ste), tmp_data_ste, DR_STE_SIZE_REDUCED); list_del_init(&ste->miss_list_node); @@ -237,7 +244,7 @@ dr_ste_replace_head_ste(struct mlx5dr_matcher_rx_tx *nic_matcher, mlx5dr_rule_set_last_member(next_ste->rule_rx_tx, ste, false); /* Copy all 64 hw_ste bytes */ - memcpy(hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED); + memcpy(hw_ste, mlx5dr_ste_get_hw_ste(ste), DR_STE_SIZE_REDUCED); sb_idx = ste->ste_chain_location - 1; mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask); @@ -273,12 +280,13 @@ static void dr_ste_remove_middle_ste(struct mlx5dr_ste_ctx *ste_ctx, if (WARN_ON(!prev_ste)) return; - miss_addr = ste_ctx->get_miss_addr(ste->hw_ste); - ste_ctx->set_miss_addr(prev_ste->hw_ste, miss_addr); + miss_addr = ste_ctx->get_miss_addr(mlx5dr_ste_get_hw_ste(ste)); + ste_ctx->set_miss_addr(mlx5dr_ste_get_hw_ste(prev_ste), miss_addr); mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_CTRL, 0, - prev_ste->hw_ste, ste_info, - send_ste_list, true /* Copy data*/); + mlx5dr_ste_get_hw_ste(prev_ste), + ste_info, send_ste_list, + true /* Copy data*/); list_del_init(&ste->miss_list_node); @@ -364,9 +372,11 @@ void mlx5dr_ste_set_hit_addr_by_next_htbl(struct mlx5dr_ste_ctx *ste_ctx, u8 *hw_ste, struct mlx5dr_ste_htbl *next_htbl) { - struct mlx5dr_icm_chunk *chunk = next_htbl->chunk; + u64 icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(next_htbl->chunk); + u32 num_entries = + mlx5dr_icm_pool_get_chunk_num_of_entries(next_htbl->chunk); - ste_ctx->set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries); + ste_ctx->set_hit_addr(hw_ste, icm_addr, num_entries); } void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx, @@ -385,15 +395,22 @@ void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_htbl_connect_info *connect_info) { bool is_rx = nic_type == DR_DOMAIN_NIC_TYPE_RX; - struct mlx5dr_ste ste = {}; + u8 tmp_hw_ste[DR_STE_SIZE] = {0}; ste_ctx->ste_init(formatted_ste, htbl->lu_type, is_rx, gvmi); - ste.hw_ste = formatted_ste; + /* Use temp ste because dr_ste_always_miss_addr/hit_htbl + * touches bit_mask area which doesn't exist at ste->hw_ste. + * Need to use a full-sized (DR_STE_SIZE) hw_ste. + */ + memcpy(tmp_hw_ste, formatted_ste, DR_STE_SIZE_REDUCED); if (connect_info->type == CONNECT_HIT) - dr_ste_always_hit_htbl(ste_ctx, &ste, connect_info->hit_next_htbl); + dr_ste_always_hit_htbl(ste_ctx, tmp_hw_ste, + connect_info->hit_next_htbl); else - dr_ste_always_miss_addr(ste_ctx, &ste, connect_info->miss_icm_addr); + dr_ste_always_miss_addr(ste_ctx, tmp_hw_ste, + connect_info->miss_icm_addr); + memcpy(formatted_ste, tmp_hw_ste, DR_STE_SIZE_REDUCED); } int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn, @@ -444,7 +461,8 @@ int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher, /* Write new table to HW */ info.type = CONNECT_MISS; - info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr; + info.miss_icm_addr = + mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); if (mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, next_htbl, &info, false)) { mlx5dr_info(dmn, "Failed writing table to HW\n"); @@ -470,6 +488,7 @@ struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool, { struct mlx5dr_icm_chunk *chunk; struct mlx5dr_ste_htbl *htbl; + u32 num_entries; int i; htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); @@ -483,22 +502,18 @@ struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool, htbl->chunk = chunk; htbl->lu_type = lu_type; htbl->byte_mask = byte_mask; - htbl->ste_arr = chunk->ste_arr; - htbl->hw_ste_arr = chunk->hw_ste_arr; - htbl->miss_list = chunk->miss_list; htbl->refcount = 0; + num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk); - for (i = 0; i < chunk->num_of_entries; i++) { - struct mlx5dr_ste *ste = &htbl->ste_arr[i]; + for (i = 0; i < num_entries; i++) { + struct mlx5dr_ste *ste = &chunk->ste_arr[i]; - ste->hw_ste = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; ste->htbl = htbl; ste->refcount = 0; INIT_LIST_HEAD(&ste->miss_list_node); - INIT_LIST_HEAD(&htbl->miss_list[i]); + INIT_LIST_HEAD(&chunk->miss_list[i]); } - htbl->chunk_size = chunk_size; return htbl; out_free_htbl: @@ -523,8 +538,8 @@ void mlx5dr_ste_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes) { - ste_ctx->set_actions_tx(dmn, action_type_set, hw_ste_arr, - attr, added_stes); + ste_ctx->set_actions_tx(dmn, action_type_set, ste_ctx->actions_caps, + hw_ste_arr, attr, added_stes); } void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, @@ -534,8 +549,8 @@ void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes) { - ste_ctx->set_actions_rx(dmn, action_type_set, hw_ste_arr, - attr, added_stes); + ste_ctx->set_actions_rx(dmn, action_type_set, ste_ctx->actions_caps, + hw_ste_arr, attr, added_stes); } const struct mlx5dr_ste_action_modify_field * @@ -602,12 +617,34 @@ int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx, used_hw_action_num); } +static int dr_ste_build_pre_check_spec(struct mlx5dr_domain *dmn, + struct mlx5dr_match_spec *spec) +{ + if (spec->ip_version) { + if (spec->ip_version != 0xf) { + mlx5dr_err(dmn, + "Partial ip_version mask with src/dst IP is not supported\n"); + return -EINVAL; + } + } else if (spec->ethertype != 0xffff && + (DR_MASK_IS_SRC_IP_SET(spec) || DR_MASK_IS_DST_IP_SET(spec))) { + mlx5dr_err(dmn, + "Partial/no ethertype mask with src/dst IP is not supported\n"); + return -EINVAL; + } + + return 0; +} + int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, u8 match_criteria, struct mlx5dr_match_param *mask, struct mlx5dr_match_param *value) { - if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) { + if (value) + return 0; + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { if (mask->misc.source_port && mask->misc.source_port != 0xffff) { mlx5dr_err(dmn, "Partial mask source_port is not supported\n"); @@ -621,6 +658,14 @@ int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, } } + if ((match_criteria & DR_MATCHER_CRITERIA_OUTER) && + dr_ste_build_pre_check_spec(dmn, &mask->outer)) + return -EINVAL; + + if ((match_criteria & DR_MATCHER_CRITERIA_INNER) && + dr_ste_build_pre_check_spec(dmn, &mask->inner)) + return -EINVAL; + return 0; } @@ -719,6 +764,8 @@ static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec, bo spec->vxlan_vni = IFC_GET_CLR(fte_match_set_misc, mask, vxlan_vni, clr); spec->geneve_vni = IFC_GET_CLR(fte_match_set_misc, mask, geneve_vni, clr); + spec->geneve_tlv_option_0_exist = + IFC_GET_CLR(fte_match_set_misc, mask, geneve_tlv_option_0_exist, clr); spec->geneve_oam = IFC_GET_CLR(fte_match_set_misc, mask, geneve_oam, clr); spec->outer_ipv6_flow_label = @@ -761,6 +808,7 @@ static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec, bo spec->tcp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_sport, clr); spec->tcp_dport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_dport, clr); + spec->ipv4_ihl = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ipv4_ihl, clr); spec->ttl_hoplimit = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ttl_hoplimit, clr); spec->udp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, udp_sport, clr); @@ -880,6 +928,26 @@ static void dr_ste_copy_mask_misc4(char *mask, struct mlx5dr_match_misc4 *spec, IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_3, clr); } +static void dr_ste_copy_mask_misc5(char *mask, struct mlx5dr_match_misc5 *spec, bool clr) +{ + spec->macsec_tag_0 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_0, clr); + spec->macsec_tag_1 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_1, clr); + spec->macsec_tag_2 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_2, clr); + spec->macsec_tag_3 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_3, clr); + spec->tunnel_header_0 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_0, clr); + spec->tunnel_header_1 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_1, clr); + spec->tunnel_header_2 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_2, clr); + spec->tunnel_header_3 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_3, clr); +} + void mlx5dr_ste_copy_param(u8 match_criteria, struct mlx5dr_match_param *set_param, struct mlx5dr_match_parameters *mask, @@ -966,6 +1034,20 @@ void mlx5dr_ste_copy_param(u8 match_criteria, } dr_ste_copy_mask_misc4(buff, &set_param->misc4, clr); } + + param_location += sizeof(struct mlx5dr_match_misc4); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC5) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc5)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc5(buff, &set_param->misc5, clr); + } } void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_ctx *ste_ctx, @@ -1180,6 +1262,21 @@ void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx, ste_ctx->build_tnl_geneve_tlv_opt_init(sb, mask); } +void mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + if (!ste_ctx->build_tnl_geneve_tlv_opt_exist_init) + return; + + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_geneve_tlv_opt_exist_init(sb, mask); +} + void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, @@ -1269,15 +1366,24 @@ void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, ste_ctx->build_flex_parser_1_init(sb, mask); } -static struct mlx5dr_ste_ctx *mlx5dr_ste_ctx_arr[] = { - [MLX5_STEERING_FORMAT_CONNECTX_5] = &ste_ctx_v0, - [MLX5_STEERING_FORMAT_CONNECTX_6DX] = &ste_ctx_v1, -}; +void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_tnl_header_0_1_init(sb, mask); +} struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx(u8 version) { - if (version > MLX5_STEERING_FORMAT_CONNECTX_6DX) - return NULL; + if (version == MLX5_STEERING_FORMAT_CONNECTX_5) + return mlx5dr_ste_get_ctx_v0(); + else if (version == MLX5_STEERING_FORMAT_CONNECTX_6DX) + return mlx5dr_ste_get_ctx_v1(); + else if (version == MLX5_STEERING_FORMAT_CONNECTX_7) + return mlx5dr_ste_get_ctx_v2(); - return mlx5dr_ste_ctx_arr[version]; + return NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h index 2d52d065dc8b..17513baff9b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h @@ -135,12 +135,14 @@ struct mlx5dr_ste_ctx { void DR_STE_CTX_BUILDER(tnl_vxlan_gpe); void DR_STE_CTX_BUILDER(tnl_geneve); void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt); + void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt_exist); void DR_STE_CTX_BUILDER(register_0); void DR_STE_CTX_BUILDER(register_1); void DR_STE_CTX_BUILDER(src_gvmi_qpn); void DR_STE_CTX_BUILDER(flex_parser_0); void DR_STE_CTX_BUILDER(flex_parser_1); void DR_STE_CTX_BUILDER(tnl_gtpu); + void DR_STE_CTX_BUILDER(tnl_header_0_1); void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_0); void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_1); @@ -159,11 +161,13 @@ struct mlx5dr_ste_ctx { u32 actions_caps; void (*set_actions_rx)(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *hw_ste_arr, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); void (*set_actions_tx)(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *hw_ste_arr, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); @@ -195,7 +199,8 @@ struct mlx5dr_ste_ctx { void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size); }; -extern struct mlx5dr_ste_ctx ste_ctx_v0; -extern struct mlx5dr_ste_ctx ste_ctx_v1; +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void); +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void); +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void); #endif /* _DR_STE_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c index b0649c2877dd..2010d4ac6519 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -80,6 +80,7 @@ enum { DR_STE_V0_LU_TYPE_GENERAL_PURPOSE = 0x18, DR_STE_V0_LU_TYPE_STEERING_REGISTERS_0 = 0x2f, DR_STE_V0_LU_TYPE_STEERING_REGISTERS_1 = 0x30, + DR_STE_V0_LU_TYPE_TUNNEL_HEADER = 0x34, DR_STE_V0_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE, }; @@ -407,6 +408,7 @@ static void dr_ste_v0_arr_init_next(u8 **last_ste, static void dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *last_ste, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes) @@ -418,7 +420,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn, * encapsulation. The reason for that is that we support * modify headers for outer headers only */ - if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) { dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT); dr_ste_v0_set_rewrite_actions(last_ste, attr->modify_actions, @@ -476,6 +478,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn, static void dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *last_ste, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes) @@ -510,7 +513,7 @@ dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn, } } - if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) { if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT) dr_ste_v0_arr_init_next(&last_ste, added_stes, @@ -1151,6 +1154,7 @@ dr_ste_v0_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value, struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit); + DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, ihl, spec, ipv4_ihl); return 0; } @@ -1704,7 +1708,7 @@ static void dr_ste_v0_set_flex_parser(u32 *misc4_field_id, u32 id = *misc4_field_id; u8 *parser_ptr; - if (parser_is_used[id]) + if (id >= DR_NUM_OF_FLEX_PARSERS || parser_is_used[id]) return; parser_is_used[id] = true; @@ -1875,7 +1879,28 @@ dr_ste_v0_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag; } -struct mlx5dr_ste_ctx ste_ctx_v0 = { +static int dr_ste_v0_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + struct mlx5dr_match_misc5 *misc5 = &value->misc5; + + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_0, misc5, tunnel_header_0); + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_1, misc5, tunnel_header_1); + + return 0; +} + +static void dr_ste_v0_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V0_LU_TYPE_TUNNEL_HEADER; + dr_ste_v0_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_header_0_1_tag; +} + +static struct mlx5dr_ste_ctx ste_ctx_v0 = { /* Builders */ .build_eth_l2_src_dst_init = &dr_ste_v0_build_eth_l2_src_dst_init, .build_eth_l3_ipv6_src_init = &dr_ste_v0_build_eth_l3_ipv6_src_init, @@ -1903,6 +1928,7 @@ struct mlx5dr_ste_ctx ste_ctx_v0 = { .build_flex_parser_0_init = &dr_ste_v0_build_flex_parser_0_init, .build_flex_parser_1_init = &dr_ste_v0_build_flex_parser_1_init, .build_tnl_gtpu_init = &dr_ste_v0_build_flex_parser_tnl_gtpu_init, + .build_tnl_header_0_1_init = &dr_ste_v0_build_tnl_header_0_1_init, .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_init, .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_init, @@ -1927,3 +1953,8 @@ struct mlx5dr_ste_ctx ste_ctx_v0 = { .set_action_copy = &dr_ste_v0_set_action_copy, .set_action_decap_l3_list = &dr_ste_v0_set_action_decap_l3_list, }; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void) +{ + return &ste_ctx_v0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index cb9cf67b0a02..ee677a5c76be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -3,7 +3,7 @@ #include <linux/types.h> #include "mlx5_ifc_dr_ste_v1.h" -#include "dr_ste.h" +#include "dr_ste_v1.h" #define DR_STE_CALC_DFNR_TYPE(lookup_type, inner) \ ((inner) ? DR_STE_V1_LU_TYPE_##lookup_type##_I : \ @@ -47,6 +47,7 @@ enum { DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_I = 0x000f, DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0 = 0x010f, DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1 = 0x0110, + DR_STE_V1_LU_TYPE_FLEX_PARSER_OK = 0x0011, DR_STE_V1_LU_TYPE_FLEX_PARSER_0 = 0x0111, DR_STE_V1_LU_TYPE_FLEX_PARSER_1 = 0x0112, DR_STE_V1_LU_TYPE_ETHL4_MISC_O = 0x0113, @@ -88,6 +89,7 @@ enum dr_ste_v1_action_id { DR_STE_V1_ACTION_ID_QUEUE_ID_SEL = 0x0d, DR_STE_V1_ACTION_ID_ACCELERATED_LIST = 0x0e, DR_STE_V1_ACTION_ID_MODIFY_LIST = 0x0f, + DR_STE_V1_ACTION_ID_ASO = 0x12, DR_STE_V1_ACTION_ID_TRAILER = 0x13, DR_STE_V1_ACTION_ID_COUNTER_ID = 0x14, DR_STE_V1_ACTION_ID_MAX = 0x21, @@ -120,12 +122,16 @@ enum { DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2 = 0x8c, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_3 = 0x8d, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_4 = 0x8e, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_5 = 0x8f, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_6 = 0x90, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_7 = 0x91, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0 = 0x8c, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1 = 0x8d, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0 = 0x8e, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1 = 0x8f, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0 = 0x90, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1 = 0x91, +}; + +enum dr_ste_v1_aso_ctx_type { + DR_STE_V1_ASO_CTX_TYPE_POLICERS = 0x2, }; static const struct mlx5dr_ste_action_modify_field dr_ste_v1_action_modify_field_arr[] = { @@ -222,22 +228,22 @@ static const struct mlx5dr_ste_action_modify_field dr_ste_v1_action_modify_field .hw_field = DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31, }, [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { - .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_6, .start = 0, .end = 31, + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31, }, [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { - .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_7, .start = 0, .end = 31, + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31, }, [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { - .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_4, .start = 0, .end = 31, + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31, }, [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { - .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_5, .start = 0, .end = 31, + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31, }, [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { - .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2, .start = 0, .end = 31, + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31, }, [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { - .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_3, .start = 0, .end = 31, + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31, }, [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { .hw_field = DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31, @@ -261,7 +267,7 @@ static void dr_ste_v1_set_entry_type(u8 *hw_ste_p, u8 entry_type) MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, entry_type); } -static void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) +void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) { u64 index = miss_addr >> 6; @@ -269,7 +275,7 @@ static void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6, index); } -static u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p) +u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p) { u64 index = ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) | @@ -278,12 +284,12 @@ static u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p) return index << 6; } -static void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask) +void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask) { MLX5_SET(ste_match_bwc_v1, hw_ste_p, byte_mask, byte_mask); } -static u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p) +u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p) { return MLX5_GET(ste_match_bwc_v1, hw_ste_p, byte_mask); } @@ -294,13 +300,13 @@ static void dr_ste_v1_set_lu_type(u8 *hw_ste_p, u16 lu_type) MLX5_SET(ste_match_bwc_v1, hw_ste_p, match_definer_ctx_idx, lu_type & 0xFF); } -static void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type) +void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type) { MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_entry_format, lu_type >> 8); MLX5_SET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx, lu_type & 0xFF); } -static u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p) +u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p) { u8 mode = MLX5_GET(ste_match_bwc_v1, hw_ste_p, next_entry_format); u8 index = MLX5_GET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx); @@ -313,7 +319,7 @@ static void dr_ste_v1_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi) MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_63_48, gvmi); } -static void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size) +void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size) { u64 index = (icm_addr >> 5) | ht_size; @@ -321,8 +327,7 @@ static void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size) MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_31_5_size, index); } -static void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, - bool is_rx, u16 gvmi) +void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi) { dr_ste_v1_set_lu_type(hw_ste_p, lu_type); dr_ste_v1_set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE); @@ -332,8 +337,7 @@ static void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_63_48, gvmi); } -static void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, - u32 ste_size) +void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size) { u8 *tag = hw_ste_p + DR_STE_SIZE_CTRL; u8 *mask = tag + DR_STE_SIZE_TAG; @@ -495,6 +499,27 @@ static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p, dr_ste_v1_set_reparse(hw_ste_p); } +static void dr_ste_v1_set_aso_flow_meter(u8 *d_action, + u32 object_id, + u32 offset, + u8 dest_reg_id, + u8 init_color) +{ + MLX5_SET(ste_double_action_aso_v1, d_action, action_id, + DR_STE_V1_ACTION_ID_ASO); + MLX5_SET(ste_double_action_aso_v1, d_action, aso_context_number, + object_id + (offset / MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ)); + /* Convert reg_c index to HW 64bit index */ + MLX5_SET(ste_double_action_aso_v1, d_action, dest_reg_id, + (dest_reg_id - 1) / 2); + MLX5_SET(ste_double_action_aso_v1, d_action, aso_context_type, + DR_STE_V1_ASO_CTX_TYPE_POLICERS); + MLX5_SET(ste_double_action_aso_v1, d_action, flow_meter.line_id, + offset % MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ); + MLX5_SET(ste_double_action_aso_v1, d_action, flow_meter.initial_color, + init_color); +} + static void dr_ste_v1_arr_init_next_match(u8 **last_ste, u32 *added_stes, u16 gvmi) @@ -510,11 +535,12 @@ static void dr_ste_v1_arr_init_next_match(u8 **last_ste, memset(action, 0, MLX5_FLD_SZ_BYTES(ste_mask_and_match_v1, action)); } -static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, - u8 *action_type_set, - u8 *last_ste, - struct mlx5dr_ste_actions_attr *attr, - u32 *added_stes) +void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) { u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action); u8 action_sz = DR_STE_ACTION_DOUBLE_SZ; @@ -532,7 +558,10 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; - allow_modify_hdr = false; + + /* Check if vlan_pop and modify_hdr on same STE is supported */ + if (!(actions_caps & DR_STE_CTX_ACTION_CAP_POP_MDFY)) + allow_modify_hdr = false; } if (action_type_set[DR_ACTION_TYP_CTR]) @@ -626,15 +655,31 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, action += DR_STE_ACTION_SINGLE_SZ; } + if (action_type_set[DR_ACTION_TYP_ASO_FLOW_METER]) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_aso_flow_meter(action, + attr->aso_flow_meter.obj_id, + attr->aso_flow_meter.offset, + attr->aso_flow_meter.dest_reg_id, + attr->aso_flow_meter.init_color); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } + dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi); dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); } -static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, - u8 *action_type_set, - u8 *last_ste, - struct mlx5dr_ste_actions_attr *attr, - u32 *added_stes) +void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) { u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action); u8 action_sz = DR_STE_ACTION_DOUBLE_SZ; @@ -676,13 +721,16 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; - allow_modify_hdr = false; - allow_ctr = false; } dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; + allow_ctr = false; + + /* Check if vlan_pop and modify_hdr on same STE is supported */ + if (!(actions_caps & DR_STE_CTX_ACTION_CAP_POP_MDFY)) + allow_modify_hdr = false; } if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { @@ -730,9 +778,9 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; allow_modify_hdr = true; - allow_ctr = false; } dr_ste_v1_set_counter_id(last_ste, attr->ctr_id); + allow_ctr = false; } if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) { @@ -795,15 +843,30 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, action += DR_STE_ACTION_SINGLE_SZ; } + if (action_type_set[DR_ACTION_TYP_ASO_FLOW_METER]) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_aso_flow_meter(action, + attr->aso_flow_meter.obj_id, + attr->aso_flow_meter.offset, + attr->aso_flow_meter.dest_reg_id, + attr->aso_flow_meter.init_color); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } + dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi); dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); } -static void dr_ste_v1_set_action_set(u8 *d_action, - u8 hw_field, - u8 shifter, - u8 length, - u32 data) +void dr_ste_v1_set_action_set(u8 *d_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) { shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; MLX5_SET(ste_double_action_set_v1, d_action, action_id, DR_STE_V1_ACTION_ID_SET); @@ -813,11 +876,11 @@ static void dr_ste_v1_set_action_set(u8 *d_action, MLX5_SET(ste_double_action_set_v1, d_action, inline_data, data); } -static void dr_ste_v1_set_action_add(u8 *d_action, - u8 hw_field, - u8 shifter, - u8 length, - u32 data) +void dr_ste_v1_set_action_add(u8 *d_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) { shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; MLX5_SET(ste_double_action_add_v1, d_action, action_id, DR_STE_V1_ACTION_ID_ADD); @@ -827,12 +890,12 @@ static void dr_ste_v1_set_action_add(u8 *d_action, MLX5_SET(ste_double_action_add_v1, d_action, add_value, data); } -static void dr_ste_v1_set_action_copy(u8 *d_action, - u8 dst_hw_field, - u8 dst_shifter, - u8 dst_len, - u8 src_hw_field, - u8 src_shifter) +void dr_ste_v1_set_action_copy(u8 *d_action, + u8 dst_hw_field, + u8 dst_shifter, + u8 dst_len, + u8 src_hw_field, + u8 src_shifter) { dst_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; src_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; @@ -847,11 +910,11 @@ static void dr_ste_v1_set_action_copy(u8 *d_action, #define DR_STE_DECAP_L3_ACTION_NUM 8 #define DR_STE_L2_HDR_MAX_SZ 20 -static int dr_ste_v1_set_action_decap_l3_list(void *data, - u32 data_sz, - u8 *hw_action, - u32 hw_action_sz, - u16 *used_hw_action_num) +int dr_ste_v1_set_action_decap_l3_list(void *data, + u32 data_sz, + u8 *hw_action, + u32 hw_action_sz, + u16 *used_hw_action_num) { u8 padded_data[DR_STE_L2_HDR_MAX_SZ] = {}; void *data_ptr = padded_data; @@ -976,8 +1039,8 @@ static int dr_ste_v1_build_eth_l2_src_dst_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l2_src_dst_bit_mask(mask, sb->inner, sb->bit_mask); @@ -1000,8 +1063,8 @@ static int dr_ste_v1_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l3_ipv6_dst_tag(mask, sb, sb->bit_mask); @@ -1024,8 +1087,8 @@ static int dr_ste_v1_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l3_ipv6_src_tag(mask, sb, sb->bit_mask); @@ -1059,8 +1122,8 @@ static int dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *va return 0; } -static void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(mask, sb, sb->bit_mask); @@ -1078,8 +1141,8 @@ static void dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_vlan_id, mask, first_vid); DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_cfi, mask, first_cfi); DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_priority, mask, first_prio); - DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, ip_fragmented, mask, frag); // ? - DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, l3_ethertype, mask, ethertype); // ? + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, l3_ethertype, mask, ethertype); DR_STE_SET_ONES(eth_l2_src_v1, bit_mask, l3_type, mask, ip_version); if (mask->svlan_tag || mask->cvlan_tag) { @@ -1200,8 +1263,8 @@ static int dr_ste_v1_build_eth_l2_src_tag(struct mlx5dr_match_param *value, return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); } -static void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l2_src_bit_mask(mask, sb->inner, sb->bit_mask); @@ -1233,8 +1296,8 @@ static int dr_ste_v1_build_eth_l2_dst_tag(struct mlx5dr_match_param *value, return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); } -static void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l2_dst_bit_mask(mask, sb->inner, sb->bit_mask); @@ -1313,8 +1376,8 @@ static int dr_ste_v1_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l2_tnl_bit_mask(mask, sb->inner, sb->bit_mask); @@ -1330,12 +1393,13 @@ static int dr_ste_v1_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, time_to_live, spec, ttl_hoplimit); + DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, ihl, spec, ipv4_ihl); return 0; } -static void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l3_ipv4_misc_tag(mask, sb, sb->bit_mask); @@ -1374,8 +1438,8 @@ static int dr_ste_v1_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_ipv6_l3_l4_tag(mask, sb, sb->bit_mask); @@ -1398,8 +1462,8 @@ static int dr_ste_v1_build_mpls_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_mpls_tag(mask, sb, sb->bit_mask); @@ -1425,8 +1489,8 @@ static int dr_ste_v1_build_tnl_gre_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_tnl_gre_tag(mask, sb, sb->bit_mask); @@ -1470,8 +1534,8 @@ static int dr_ste_v1_build_tnl_mpls_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_tnl_mpls_tag(mask, sb, sb->bit_mask); @@ -1505,8 +1569,8 @@ static int dr_ste_v1_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *valu return 0; } -static void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask); @@ -1546,8 +1610,8 @@ static int dr_ste_v1_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *valu return 0; } -static void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask); @@ -1593,8 +1657,8 @@ static int dr_ste_v1_build_icmp_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_icmp_tag(mask, sb, sb->bit_mask); @@ -1615,8 +1679,8 @@ static int dr_ste_v1_build_general_purpose_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_general_purpose_tag(mask, sb, sb->bit_mask); @@ -1642,8 +1706,8 @@ static int dr_ste_v1_build_eth_l4_misc_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l4_misc_tag(mask, sb, sb->bit_mask); @@ -1672,9 +1736,8 @@ dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value, return 0; } -static void -dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(mask, sb, sb->bit_mask); @@ -1702,9 +1765,8 @@ dr_ste_v1_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value, return 0; } -static void -dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_flex_parser_tnl_geneve_tag(mask, sb, sb->bit_mask); @@ -1713,6 +1775,27 @@ dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tag; } +static int dr_ste_v1_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + struct mlx5dr_match_misc5 *misc5 = &value->misc5; + + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_0, misc5, tunnel_header_0); + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_1, misc5, tunnel_header_1); + + return 0; +} + +void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; + dr_ste_v1_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_header_0_1_tag; +} + static int dr_ste_v1_build_register_0_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, u8 *tag) @@ -1727,8 +1810,8 @@ static int dr_ste_v1_build_register_0_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_register_0_tag(mask, sb, sb->bit_mask); @@ -1751,8 +1834,8 @@ static int dr_ste_v1_build_register_1_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_register_1_tag(mask, sb, sb->bit_mask); @@ -1815,8 +1898,8 @@ static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask); @@ -1833,7 +1916,7 @@ static void dr_ste_v1_set_flex_parser(u32 *misc4_field_id, u32 id = *misc4_field_id; u8 *parser_ptr; - if (parser_is_used[id]) + if (id >= DR_NUM_OF_FLEX_PARSERS || parser_is_used[id]) return; parser_is_used[id] = true; @@ -1870,8 +1953,8 @@ static int dr_ste_v1_build_felx_parser_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0; dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask); @@ -1879,8 +1962,8 @@ static void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag; } -static void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1; dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask); @@ -1904,7 +1987,7 @@ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *va return 0; } -static void +void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask) { @@ -1921,6 +2004,32 @@ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag; } +static int +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0; + struct mlx5dr_match_misc *misc = &value->misc; + + if (misc->geneve_tlv_option_0_exist) { + MLX5_SET(ste_flex_parser_ok, tag, flex_parsers_ok, 1 << parser_id); + misc->geneve_tlv_option_0_exist = 0; + } + + return 0; +} + +void +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_OK; + dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag; +} + static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, u8 *tag) @@ -1934,8 +2043,8 @@ static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *v return 0; } -static void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask); @@ -1960,7 +2069,7 @@ dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value, return 0; } -static void +void dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask) { @@ -1987,7 +2096,7 @@ dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value, return 0; } -static void +void dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask) { @@ -1998,7 +2107,7 @@ dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag; } -struct mlx5dr_ste_ctx ste_ctx_v1 = { +static struct mlx5dr_ste_ctx ste_ctx_v1 = { /* Builders */ .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init, @@ -2020,12 +2129,14 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = { .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init, .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init, .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init, + .build_tnl_geneve_tlv_opt_exist_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init, .build_register_0_init = &dr_ste_v1_build_register_0_init, .build_register_1_init = &dr_ste_v1_build_register_1_init, .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init, .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init, .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init, .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init, + .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init, .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init, .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init, @@ -2041,7 +2152,8 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = { /* Actions */ .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP | DR_STE_CTX_ACTION_CAP_RX_PUSH | - DR_STE_CTX_ACTION_CAP_RX_ENCAP, + DR_STE_CTX_ACTION_CAP_RX_ENCAP | + DR_STE_CTX_ACTION_CAP_POP_MDFY, .set_actions_rx = &dr_ste_v1_set_actions_rx, .set_actions_tx = &dr_ste_v1_set_actions_tx, .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v1_action_modify_field_arr), @@ -2053,3 +2165,8 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = { /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void) +{ + return &ste_ctx_v1; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h new file mode 100644 index 000000000000..8a1d49790c6e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef _DR_STE_V1_ +#define _DR_STE_V1_ + +#include "dr_types.h" +#include "dr_ste.h" + +void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr); +u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p); +void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask); +u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p); +void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type); +u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p); +void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size); +void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi); +void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size); +void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); +void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); +void dr_ste_v1_set_action_set(u8 *d_action, u8 hw_field, u8 shifter, + u8 length, u32 data); +void dr_ste_v1_set_action_add(u8 *d_action, u8 hw_field, u8 shifter, + u8 length, u32 data); +void dr_ste_v1_set_action_copy(u8 *d_action, u8 dst_hw_field, u8 dst_shifter, + u8 dst_len, u8 src_hw_field, u8 src_shifter); +int dr_ste_v1_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action, + u32 hw_action_sz, u16 *used_hw_action_num); +void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); + +#endif /* _DR_STE_V1_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c new file mode 100644 index 000000000000..c60fddd125d2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "dr_ste_v1.h" + +enum { + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, + DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, + DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, + DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, + DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, + DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, + DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, + DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, + DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, + DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, + DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, + DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, + DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, + DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0 = 0x90, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1 = 0x91, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0 = 0x92, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1 = 0x93, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0 = 0x94, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1 = 0x95, +}; + +static const struct mlx5dr_ste_action_modify_field dr_ste_v2_action_modify_field_arr[] = { + [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15, + }, +}; + +static struct mlx5dr_ste_ctx ste_ctx_v2 = { + /* Builders */ + .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, + .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init, + .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init, + .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_init, + .build_eth_l2_src_init = &dr_ste_v1_build_eth_l2_src_init, + .build_eth_l2_dst_init = &dr_ste_v1_build_eth_l2_dst_init, + .build_eth_l2_tnl_init = &dr_ste_v1_build_eth_l2_tnl_init, + .build_eth_l3_ipv4_misc_init = &dr_ste_v1_build_eth_l3_ipv4_misc_init, + .build_eth_ipv6_l3_l4_init = &dr_ste_v1_build_eth_ipv6_l3_l4_init, + .build_mpls_init = &dr_ste_v1_build_mpls_init, + .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init, + .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init, + .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init, + .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init, + .build_icmp_init = &dr_ste_v1_build_icmp_init, + .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init, + .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init, + .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init, + .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init, + .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init, + .build_tnl_geneve_tlv_opt_exist_init = + &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init, + .build_register_0_init = &dr_ste_v1_build_register_0_init, + .build_register_1_init = &dr_ste_v1_build_register_1_init, + .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init, + .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init, + .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init, + .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init, + .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init, + .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init, + .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init, + + /* Getters and Setters */ + .ste_init = &dr_ste_v1_init, + .set_next_lu_type = &dr_ste_v1_set_next_lu_type, + .get_next_lu_type = &dr_ste_v1_get_next_lu_type, + .set_miss_addr = &dr_ste_v1_set_miss_addr, + .get_miss_addr = &dr_ste_v1_get_miss_addr, + .set_hit_addr = &dr_ste_v1_set_hit_addr, + .set_byte_mask = &dr_ste_v1_set_byte_mask, + .get_byte_mask = &dr_ste_v1_get_byte_mask, + + /* Actions */ + .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP | + DR_STE_CTX_ACTION_CAP_RX_PUSH | + DR_STE_CTX_ACTION_CAP_RX_ENCAP, + .set_actions_rx = &dr_ste_v1_set_actions_rx, + .set_actions_tx = &dr_ste_v1_set_actions_tx, + .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v2_action_modify_field_arr), + .modify_field_arr = dr_ste_v2_action_modify_field_arr, + .set_action_set = &dr_ste_v1_set_action_set, + .set_action_add = &dr_ste_v1_set_action_add, + .set_action_copy = &dr_ste_v1_set_action_copy, + .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list, + + /* Send */ + .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, +}; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void) +{ + return &ste_ctx_v2; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index 30ae3cda6d2e..31d443dd8386 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -3,12 +3,49 @@ #include "dr_types.h" -int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, - struct mlx5dr_action *action) +static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl, + struct mlx5dr_action *action) { - struct mlx5dr_matcher *last_matcher = NULL; + struct mlx5dr_matcher_rx_tx *last_nic_matcher = NULL; struct mlx5dr_htbl_connect_info info; struct mlx5dr_ste_htbl *last_htbl; + struct mlx5dr_icm_chunk *chunk; + int ret; + + if (!list_empty(&nic_tbl->nic_matcher_list)) + last_nic_matcher = list_last_entry(&nic_tbl->nic_matcher_list, + struct mlx5dr_matcher_rx_tx, + list_node); + + if (last_nic_matcher) + last_htbl = last_nic_matcher->e_anchor; + else + last_htbl = nic_tbl->s_anchor; + + if (action) { + chunk = nic_tbl->nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX ? + action->dest_tbl->tbl->rx.s_anchor->chunk : + action->dest_tbl->tbl->tx.s_anchor->chunk; + nic_tbl->default_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk); + } else { + nic_tbl->default_icm_addr = nic_tbl->nic_dmn->default_icm_addr; + } + + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_tbl->default_icm_addr; + + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_tbl->nic_dmn, + last_htbl, &info, true); + if (ret) + mlx5dr_dbg(dmn, "Failed to set NIC RX/TX miss action, ret %d\n", ret); + + return ret; +} + +int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action) +{ int ret; if (action && action->action_type != DR_ACTION_TYP_FT) @@ -16,56 +53,18 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, mlx5dr_domain_lock(tbl->dmn); - if (!list_empty(&tbl->matcher_list)) - last_matcher = list_last_entry(&tbl->matcher_list, - struct mlx5dr_matcher, - matcher_list); - if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX || tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { - if (last_matcher) - last_htbl = last_matcher->rx.e_anchor; - else - last_htbl = tbl->rx.s_anchor; - - tbl->rx.default_icm_addr = action ? - action->dest_tbl->tbl->rx.s_anchor->chunk->icm_addr : - tbl->rx.nic_dmn->default_icm_addr; - - info.type = CONNECT_MISS; - info.miss_icm_addr = tbl->rx.default_icm_addr; - - ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn, - tbl->rx.nic_dmn, - last_htbl, - &info, true); - if (ret) { - mlx5dr_dbg(tbl->dmn, "Failed to set RX miss action, ret %d\n", ret); + ret = dr_table_set_miss_action_nic(tbl->dmn, &tbl->rx, action); + if (ret) goto out; - } } if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX || tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { - if (last_matcher) - last_htbl = last_matcher->tx.e_anchor; - else - last_htbl = tbl->tx.s_anchor; - - tbl->tx.default_icm_addr = action ? - action->dest_tbl->tbl->tx.s_anchor->chunk->icm_addr : - tbl->tx.nic_dmn->default_icm_addr; - - info.type = CONNECT_MISS; - info.miss_icm_addr = tbl->tx.default_icm_addr; - - ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn, - tbl->tx.nic_dmn, - last_htbl, &info, true); - if (ret) { - mlx5dr_dbg(tbl->dmn, "Failed to set TX miss action, ret %d\n", ret); + ret = dr_table_set_miss_action_nic(tbl->dmn, &tbl->tx, action); + if (ret) goto out; - } } /* Release old action */ @@ -122,6 +121,8 @@ static int dr_table_init_nic(struct mlx5dr_domain *dmn, struct mlx5dr_htbl_connect_info info; int ret; + INIT_LIST_HEAD(&nic_tbl->nic_matcher_list); + nic_tbl->default_icm_addr = nic_dmn->default_icm_addr; nic_tbl->s_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, @@ -213,7 +214,7 @@ static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl) tbl->table_type); } -static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl) +static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl, u16 uid) { bool en_encap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); bool en_decap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); @@ -223,10 +224,10 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl) int ret; if (tbl->rx.s_anchor) - icm_addr_rx = tbl->rx.s_anchor->chunk->icm_addr; + icm_addr_rx = mlx5dr_icm_pool_get_chunk_icm_addr(tbl->rx.s_anchor->chunk); if (tbl->tx.s_anchor) - icm_addr_tx = tbl->tx.s_anchor->chunk->icm_addr; + icm_addr_tx = mlx5dr_icm_pool_get_chunk_icm_addr(tbl->tx.s_anchor->chunk); ft_attr.table_type = tbl->table_type; ft_attr.icm_addr_rx = icm_addr_rx; @@ -235,6 +236,7 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl) ft_attr.sw_owner = true; ft_attr.decap_en = en_decap; ft_attr.reformat_en = en_encap; + ft_attr.uid = uid; ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev, &ft_attr, NULL, &tbl->table_id); @@ -242,7 +244,8 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl) return ret; } -struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, u32 flags) +struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, + u32 flags, u16 uid) { struct mlx5dr_table *tbl; int ret; @@ -262,10 +265,12 @@ struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, u if (ret) goto free_tbl; - ret = dr_table_create_sw_owned_tbl(tbl); + ret = dr_table_create_sw_owned_tbl(tbl, uid); if (ret) goto uninit_tbl; + INIT_LIST_HEAD(&tbl->dbg_node); + mlx5dr_dbg_tbl_add(tbl); return tbl; uninit_tbl: @@ -281,9 +286,10 @@ int mlx5dr_table_destroy(struct mlx5dr_table *tbl) { int ret; - if (refcount_read(&tbl->refcount) > 1) + if (WARN_ON_ONCE(refcount_read(&tbl->refcount) > 1)) return -EBUSY; + mlx5dr_dbg_tbl_del(tbl); ret = dr_table_destroy_sw_owned_tbl(tbl); if (ret) return ret; @@ -303,3 +309,8 @@ u32 mlx5dr_table_get_id(struct mlx5dr_table *tbl) { return tbl->table_id; } + +struct mlx5dr_table *mlx5dr_table_get_from_fs_ft(struct mlx5_flow_table *ft) +{ + return ft->fs_dr_table.dr_table; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 2333c2439c28..1777a1e508e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -11,6 +11,7 @@ #include "lib/mlx5.h" #include "mlx5_ifc_dr.h" #include "mlx5dr.h" +#include "dr_dbg.h" #define DR_RULE_MAX_STES 18 #define DR_ACTION_MAX_STES 5 @@ -90,6 +91,7 @@ enum mlx5dr_ste_ctx_action_cap { DR_STE_CTX_ACTION_CAP_TX_POP = 1 << 0, DR_STE_CTX_ACTION_CAP_RX_PUSH = 1 << 1, DR_STE_CTX_ACTION_CAP_RX_ENCAP = 1 << 2, + DR_STE_CTX_ACTION_CAP_POP_MDFY = 1 << 3, }; enum { @@ -104,7 +106,8 @@ enum mlx5dr_matcher_criteria { DR_MATCHER_CRITERIA_MISC2 = 1 << 3, DR_MATCHER_CRITERIA_MISC3 = 1 << 4, DR_MATCHER_CRITERIA_MISC4 = 1 << 5, - DR_MATCHER_CRITERIA_MAX = 1 << 6, + DR_MATCHER_CRITERIA_MISC5 = 1 << 6, + DR_MATCHER_CRITERIA_MAX = 1 << 7, }; enum mlx5dr_action_type { @@ -124,6 +127,7 @@ enum mlx5dr_action_type { DR_ACTION_TYP_INSERT_HDR, DR_ACTION_TYP_REMOVE_HDR, DR_ACTION_TYP_SAMPLER, + DR_ACTION_TYP_ASO_FLOW_METER, DR_ACTION_TYP_MAX, }; @@ -144,10 +148,12 @@ struct mlx5dr_matcher_rx_tx; struct mlx5dr_ste_ctx; struct mlx5dr_ste { - u8 *hw_ste; /* refcount: indicates the num of rules that using this ste */ u32 refcount; + /* this ste is part of a rule, located in ste's chain */ + u8 ste_chain_location; + /* attached to the miss_list head at each htbl entry */ struct list_head miss_list_node; @@ -158,9 +164,6 @@ struct mlx5dr_ste { /* The rule this STE belongs to */ struct mlx5dr_rule_rx_tx *rule_rx_tx; - - /* this ste is part of a rule, located in ste's chain */ - u8 ste_chain_location; }; struct mlx5dr_ste_htbl_ctrl { @@ -178,14 +181,7 @@ struct mlx5dr_ste_htbl { u16 byte_mask; u32 refcount; struct mlx5dr_icm_chunk *chunk; - struct mlx5dr_ste *ste_arr; - u8 *hw_ste_arr; - - struct list_head *miss_list; - - enum mlx5dr_icm_chunk_size chunk_size; struct mlx5dr_ste *pointing_ste; - struct mlx5dr_ste_htbl_ctrl ctrl; }; @@ -276,6 +272,13 @@ struct mlx5dr_ste_actions_attr { int count; u32 headers[MLX5DR_MAX_VLANS]; } vlans; + + struct { + u32 obj_id; + u32 offset; + u8 dest_reg_id; + u8 init_color; + } aso_flow_meter; }; void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, @@ -440,6 +443,11 @@ void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_match_param *mask, struct mlx5dr_cmd_caps *caps, bool inner, bool rx); +void mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, @@ -454,6 +462,10 @@ void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_match_param *mask, struct mlx5dr_cmd_caps *caps, bool inner, bool rx); +void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, @@ -494,57 +506,66 @@ struct mlx5dr_match_spec { /* Incoming packet Ethertype - this is the Ethertype * following the last VLAN tag of the packet */ - u32 ethertype:16; u32 smac_15_0:16; /* Source MAC address of incoming packet */ + u32 ethertype:16; + u32 dmac_47_16; /* Destination MAC address of incoming packet */ - /* VLAN ID of first VLAN tag in the incoming packet. + + u32 dmac_15_0:16; /* Destination MAC address of incoming packet */ + /* Priority of first VLAN tag in the incoming packet. * Valid only when cvlan_tag==1 or svlan_tag==1 */ - u32 first_vid:12; + u32 first_prio:3; /* CFI bit of first VLAN tag in the incoming packet. * Valid only when cvlan_tag==1 or svlan_tag==1 */ u32 first_cfi:1; - /* Priority of first VLAN tag in the incoming packet. + /* VLAN ID of first VLAN tag in the incoming packet. * Valid only when cvlan_tag==1 or svlan_tag==1 */ - u32 first_prio:3; - u32 dmac_15_0:16; /* Destination MAC address of incoming packet */ - /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK; - * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS + u32 first_vid:12; + + u32 ip_protocol:8; /* IP protocol */ + /* Differentiated Services Code Point derived from + * Traffic Class/TOS field of IPv6/v4 */ - u32 tcp_flags:9; - u32 ip_version:4; /* IP version */ - u32 frag:1; /* Packet is an IP fragment */ - /* The first vlan in the packet is s-vlan (0x8a88). - * cvlan_tag and svlan_tag cannot be set together + u32 ip_dscp:6; + /* Explicit Congestion Notification derived from + * Traffic Class/TOS field of IPv6/v4 */ - u32 svlan_tag:1; + u32 ip_ecn:2; /* The first vlan in the packet is c-vlan (0x8100). * cvlan_tag and svlan_tag cannot be set together */ u32 cvlan_tag:1; - /* Explicit Congestion Notification derived from - * Traffic Class/TOS field of IPv6/v4 + /* The first vlan in the packet is s-vlan (0x8a88). + * cvlan_tag and svlan_tag cannot be set together */ - u32 ip_ecn:2; - /* Differentiated Services Code Point derived from - * Traffic Class/TOS field of IPv6/v4 + u32 svlan_tag:1; + u32 frag:1; /* Packet is an IP fragment */ + u32 ip_version:4; /* IP version */ + /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK; + * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS */ - u32 ip_dscp:6; - u32 ip_protocol:8; /* IP protocol */ + u32 tcp_flags:9; + + /* TCP source port.;tcp and udp sport/dport are mutually exclusive */ + u32 tcp_sport:16; /* TCP destination port. * tcp and udp sport/dport are mutually exclusive */ u32 tcp_dport:16; - /* TCP source port.;tcp and udp sport/dport are mutually exclusive */ - u32 tcp_sport:16; + + u32 reserved_auto1:16; + u32 ipv4_ihl:4; + u32 reserved_auto2:4; u32 ttl_hoplimit:8; - u32 reserved:24; - /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */ - u32 udp_dport:16; + /* UDP source port.;tcp and udp sport/dport are mutually exclusive */ u32 udp_sport:16; + /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */ + u32 udp_dport:16; + /* IPv6 source address of incoming packets * For IPv4 address use bits 31:0 (rest of the bits are reserved) * This field should be qualified by an appropriate ethertype @@ -588,96 +609,114 @@ struct mlx5dr_match_spec { }; struct mlx5dr_match_misc { - u32 source_sqn:24; /* Source SQN */ - u32 source_vhca_port:4; - /* used with GRE, sequence number exist when gre_s_present == 1 */ - u32 gre_s_present:1; - /* used with GRE, key exist when gre_k_present == 1 */ - u32 gre_k_present:1; - u32 reserved_auto1:1; /* used with GRE, checksum exist when gre_c_present == 1 */ u32 gre_c_present:1; + u32 reserved_auto1:1; + /* used with GRE, key exist when gre_k_present == 1 */ + u32 gre_k_present:1; + /* used with GRE, sequence number exist when gre_s_present == 1 */ + u32 gre_s_present:1; + u32 source_vhca_port:4; + u32 source_sqn:24; /* Source SQN */ + + u32 source_eswitch_owner_vhca_id:16; /* Source port.;0xffff determines wire port */ u32 source_port:16; - u32 source_eswitch_owner_vhca_id:16; - /* VLAN ID of first VLAN tag the inner header of the incoming packet. - * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 - */ - u32 inner_second_vid:12; - /* CFI bit of first VLAN tag in the inner header of the incoming packet. - * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 - */ - u32 inner_second_cfi:1; - /* Priority of second VLAN tag in the inner header of the incoming packet. - * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 - */ - u32 inner_second_prio:3; - /* VLAN ID of first VLAN tag the outer header of the incoming packet. + + /* Priority of second VLAN tag in the outer header of the incoming packet. * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 */ - u32 outer_second_vid:12; + u32 outer_second_prio:3; /* CFI bit of first VLAN tag in the outer header of the incoming packet. * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 */ u32 outer_second_cfi:1; - /* Priority of second VLAN tag in the outer header of the incoming packet. + /* VLAN ID of first VLAN tag the outer header of the incoming packet. * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 */ - u32 outer_second_prio:3; - u32 gre_protocol:16; /* GRE Protocol (outer) */ - u32 reserved_auto3:12; - /* The second vlan in the inner header of the packet is s-vlan (0x8a88). - * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together + u32 outer_second_vid:12; + /* Priority of second VLAN tag in the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 */ - u32 inner_second_svlan_tag:1; - /* The second vlan in the outer header of the packet is s-vlan (0x8a88). + u32 inner_second_prio:3; + /* CFI bit of first VLAN tag in the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_cfi:1; + /* VLAN ID of first VLAN tag the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_vid:12; + + u32 outer_second_cvlan_tag:1; + u32 inner_second_cvlan_tag:1; + /* The second vlan in the outer header of the packet is c-vlan (0x8100). * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together */ u32 outer_second_svlan_tag:1; /* The second vlan in the inner header of the packet is c-vlan (0x8100). * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together */ - u32 inner_second_cvlan_tag:1; - /* The second vlan in the outer header of the packet is c-vlan (0x8100). + u32 inner_second_svlan_tag:1; + /* The second vlan in the outer header of the packet is s-vlan (0x8a88). * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together */ - u32 outer_second_cvlan_tag:1; - u32 gre_key_l:8; /* GRE Key [7:0] (outer) */ + u32 reserved_auto2:12; + /* The second vlan in the inner header of the packet is s-vlan (0x8a88). + * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together + */ + u32 gre_protocol:16; /* GRE Protocol (outer) */ + u32 gre_key_h:24; /* GRE Key[31:8] (outer) */ - u32 reserved_auto4:8; + u32 gre_key_l:8; /* GRE Key [7:0] (outer) */ + u32 vxlan_vni:24; /* VXLAN VNI (outer) */ - u32 geneve_oam:1; /* GENEVE OAM field (outer) */ - u32 reserved_auto5:7; + u32 reserved_auto3:8; + u32 geneve_vni:24; /* GENEVE VNI field (outer) */ + u32 reserved_auto4:6; + u32 geneve_tlv_option_0_exist:1; + u32 geneve_oam:1; /* GENEVE OAM field (outer) */ + + u32 reserved_auto5:12; u32 outer_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (outer) */ + u32 reserved_auto6:12; u32 inner_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (inner) */ - u32 reserved_auto7:12; - u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */ + + u32 reserved_auto7:10; u32 geneve_opt_len:6; /* GENEVE OptLen (outer) */ - u32 reserved_auto8:10; + u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */ + + u32 reserved_auto8:8; u32 bth_dst_qp:24; /* Destination QP in BTH header */ - u32 reserved_auto9:8; - u8 reserved_auto10[20]; + + u32 reserved_auto9; + u32 outer_esp_spi; + u32 reserved_auto10[3]; }; struct mlx5dr_match_misc2 { - u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */ - u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */ - u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */ u32 outer_first_mpls_label:20; /* First MPLS LABEL (outer) */ - u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */ - u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */ - u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */ + u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */ + u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */ + u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */ + u32 inner_first_mpls_label:20; /* First MPLS LABEL (inner) */ - u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */ - u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */ - u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */ + u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */ + u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */ + u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */ + u32 outer_first_mpls_over_gre_label:20; /* last MPLS LABEL (outer) */ - u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */ - u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */ - u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */ + u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */ + u32 outer_first_mpls_over_udp_label:20; /* last MPLS LABEL (outer) */ + u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */ + u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */ + u32 metadata_reg_c_7; /* metadata_reg_c_7 */ u32 metadata_reg_c_6; /* metadata_reg_c_6 */ u32 metadata_reg_c_5; /* metadata_reg_c_5 */ @@ -687,7 +726,7 @@ struct mlx5dr_match_misc2 { u32 metadata_reg_c_1; /* metadata_reg_c_1 */ u32 metadata_reg_c_0; /* metadata_reg_c_0 */ u32 metadata_reg_a; /* metadata_reg_a */ - u8 reserved_auto2[12]; + u32 reserved_auto1[3]; }; struct mlx5dr_match_misc3 { @@ -695,24 +734,34 @@ struct mlx5dr_match_misc3 { u32 outer_tcp_seq_num; u32 inner_tcp_ack_num; u32 outer_tcp_ack_num; - u32 outer_vxlan_gpe_vni:24; + u32 reserved_auto1:8; - u32 reserved_auto2:16; - u32 outer_vxlan_gpe_flags:8; + u32 outer_vxlan_gpe_vni:24; + u32 outer_vxlan_gpe_next_protocol:8; + u32 outer_vxlan_gpe_flags:8; + u32 reserved_auto2:16; + u32 icmpv4_header_data; u32 icmpv6_header_data; - u8 icmpv6_code; - u8 icmpv6_type; - u8 icmpv4_code; + u8 icmpv4_type; + u8 icmpv4_code; + u8 icmpv6_type; + u8 icmpv6_code; + u32 geneve_tlv_option_0_data; - u8 gtpu_msg_flags; - u8 gtpu_msg_type; + u32 gtpu_teid; + + u8 gtpu_msg_type; + u8 gtpu_msg_flags; + u32 reserved_auto3:16; + u32 gtpu_dw_2; u32 gtpu_first_ext_dw_0; u32 gtpu_dw_0; + u32 reserved_auto4; }; struct mlx5dr_match_misc4 { @@ -724,6 +773,18 @@ struct mlx5dr_match_misc4 { u32 prog_sample_field_id_2; u32 prog_sample_field_value_3; u32 prog_sample_field_id_3; + u32 reserved_auto1[8]; +}; + +struct mlx5dr_match_misc5 { + u32 macsec_tag_0; + u32 macsec_tag_1; + u32 macsec_tag_2; + u32 macsec_tag_3; + u32 tunnel_header_0; + u32 tunnel_header_1; + u32 tunnel_header_2; + u32 tunnel_header_3; }; struct mlx5dr_match_param { @@ -733,12 +794,23 @@ struct mlx5dr_match_param { struct mlx5dr_match_misc2 misc2; struct mlx5dr_match_misc3 misc3; struct mlx5dr_match_misc4 misc4; + struct mlx5dr_match_misc5 misc5; }; #define DR_MASK_IS_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \ (_misc3)->icmpv4_code || \ (_misc3)->icmpv4_header_data) +#define DR_MASK_IS_SRC_IP_SET(_spec) ((_spec)->src_ip_127_96 || \ + (_spec)->src_ip_95_64 || \ + (_spec)->src_ip_63_32 || \ + (_spec)->src_ip_31_0) + +#define DR_MASK_IS_DST_IP_SET(_spec) ((_spec)->dst_ip_127_96 || \ + (_spec)->dst_ip_95_64 || \ + (_spec)->dst_ip_63_32 || \ + (_spec)->dst_ip_31_0) + struct mlx5dr_esw_caps { u64 drop_icm_address_rx; u64 drop_icm_address_tx; @@ -789,6 +861,7 @@ struct mlx5dr_cmd_caps { u8 flex_parser_id_gtpu_teid; u8 flex_parser_id_gtpu_dw_2; u8 flex_parser_id_gtpu_first_ext_dw_0; + u8 flex_parser_ok_bits_supp; u8 max_ft_level; u16 roce_min_src_udp; u8 sw_format_ver; @@ -843,12 +916,15 @@ struct mlx5dr_domain { struct mlx5dr_domain_info info; struct xarray csum_fts_xa; struct mlx5dr_ste_ctx *ste_ctx; + struct list_head dbg_tbl_list; + struct mlx5dr_dbg_dump_info dump_info; }; struct mlx5dr_table_rx_tx { struct mlx5dr_ste_htbl *s_anchor; struct mlx5dr_domain_rx_tx *nic_dmn; u64 default_icm_addr; + struct list_head nic_matcher_list; }; struct mlx5dr_table { @@ -862,6 +938,7 @@ struct mlx5dr_table { struct list_head matcher_list; struct mlx5dr_action *miss_action; refcount_t refcount; + struct list_head dbg_node; }; struct mlx5dr_matcher_rx_tx { @@ -875,18 +952,21 @@ struct mlx5dr_matcher_rx_tx { u8 num_of_builders_arr[DR_RULE_IPV_MAX][DR_RULE_IPV_MAX]; u64 default_icm_addr; struct mlx5dr_table_rx_tx *nic_tbl; + u32 prio; + struct list_head list_node; + u32 rules; }; struct mlx5dr_matcher { struct mlx5dr_table *tbl; struct mlx5dr_matcher_rx_tx rx; struct mlx5dr_matcher_rx_tx tx; - struct list_head matcher_list; + struct list_head list_node; /* Used for both matchers and dbg managing */ u32 prio; struct mlx5dr_match_param mask; u8 match_criteria; refcount_t refcount; - struct mlx5dv_flow_matcher *dv_matcher; + struct list_head dbg_rule_list; }; struct mlx5dr_ste_action_modify_field { @@ -958,6 +1038,19 @@ struct mlx5dr_action_flow_tag { u32 flow_tag; }; +struct mlx5dr_rule_action_member { + struct mlx5dr_action *action; + struct list_head list; +}; + +struct mlx5dr_action_aso_flow_meter { + struct mlx5dr_domain *dmn; + u32 obj_id; + u32 offset; + u8 dest_reg_id; + u8 init_color; +}; + struct mlx5dr_action { enum mlx5dr_action_type action_type; refcount_t refcount; @@ -972,6 +1065,7 @@ struct mlx5dr_action { struct mlx5dr_action_vport *vport; struct mlx5dr_action_push_vlan *push_vlan; struct mlx5dr_action_flow_tag *flow_tag; + struct mlx5dr_action_aso_flow_meter *aso; }; }; @@ -998,6 +1092,7 @@ struct mlx5dr_rule { struct mlx5dr_rule_rx_tx rx; struct mlx5dr_rule_rx_tx tx; struct list_head rule_actions_list; + struct list_head dbg_node; u32 flow_source; }; @@ -1011,16 +1106,12 @@ int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr, struct mlx5dr_icm_chunk { struct mlx5dr_icm_buddy_mem *buddy_mem; struct list_head chunk_list; - u32 rkey; - u32 num_of_entries; - u32 byte_size; - u64 icm_addr; - u64 mr_addr; /* indicates the index of this chunk in the whole memory, * used for deleting the chunk from the buddy */ unsigned int seg; + enum mlx5dr_icm_chunk_size size; /* Memory optimisation */ struct mlx5dr_ste *ste_arr; @@ -1050,11 +1141,23 @@ static inline void mlx5dr_domain_unlock(struct mlx5dr_domain *dmn) mlx5dr_domain_nic_unlock(&dmn->info.rx); } +int mlx5dr_matcher_add_to_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher); +int mlx5dr_matcher_remove_from_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher); + int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, struct mlx5dr_matcher_rx_tx *nic_matcher, enum mlx5dr_ipv outer_ipv, enum mlx5dr_ipv inner_ipv); +u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk); +u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk); +u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk); +u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk); +u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk); +u8 *mlx5dr_ste_get_hw_ste(struct mlx5dr_ste *ste); + static inline int mlx5dr_icm_pool_dm_type_to_entry_size(enum mlx5dr_icm_type icm_type) { @@ -1087,7 +1190,7 @@ static inline int mlx5dr_ste_htbl_increase_threshold(struct mlx5dr_ste_htbl *htbl) { int num_of_entries = - mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk_size); + mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk->size); /* Threshold is 50%, one is added to table of size 1 */ return (num_of_entries + 1) / 2; @@ -1096,7 +1199,7 @@ mlx5dr_ste_htbl_increase_threshold(struct mlx5dr_ste_htbl *htbl) static inline bool mlx5dr_ste_htbl_may_grow(struct mlx5dr_ste_htbl *htbl) { - if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask) + if (htbl->chunk->size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask) return false; return true; @@ -1114,6 +1217,7 @@ struct mlx5dr_cmd_query_flow_table_details { struct mlx5dr_cmd_create_flow_table_attr { u32 table_type; + u16 uid; u64 icm_addr_rx; u64 icm_addr_tx; u8 level; @@ -1190,20 +1294,6 @@ struct mlx5dr_cmd_gid_attr { u32 roce_ver; }; -struct mlx5dr_cmd_qp_create_attr { - u32 page_id; - u32 pdn; - u32 cqn; - u32 pm_state; - u32 service_type; - u32 buff_umem_id; - u32 db_umem_id; - u32 sq_wqe_cnt; - u32 rq_wqe_cnt; - u32 rq_wqe_shift; - u8 isolate_vl_tc:1; -}; - int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, u16 index, struct mlx5dr_cmd_gid_attr *attr); @@ -1375,7 +1465,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, bool reformat_req, u32 *tbl_id, u32 *group_id, - bool ignore_flow_level); + bool ignore_flow_level, + u32 flow_source); void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id, u32 group_id); #endif /* _DR_TYPES_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 2632d5ae9bc0..13b6d4721e17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2019 Mellanox Technologies */ +#include <linux/mlx5/vport.h> #include "mlx5_core.h" #include "fs_core.h" #include "fs_cmd.h" @@ -43,11 +44,10 @@ static int set_miss_action(struct mlx5_flow_root_namespace *ns, err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action); if (err && action) { err = mlx5dr_action_destroy(action); - if (err) { - action = NULL; - mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n", - err); - } + if (err) + mlx5_core_err(ns->dev, + "Failed to destroy action (%d)\n", err); + action = NULL; } ft->fs_dr_table.miss_action = action; if (old_miss_action) { @@ -62,7 +62,7 @@ static int set_miss_action(struct mlx5_flow_root_namespace *ns, static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, - unsigned int size, + struct mlx5_flow_table_attr *ft_attr, struct mlx5_flow_table *next_ft) { struct mlx5dr_table *tbl; @@ -71,7 +71,7 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns, if (mlx5_dr_is_fw_table(ft->flags)) return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft, - size, + ft_attr, next_ft); flags = ft->flags; /* turn off encap/decap if not supported for sw-str by fw */ @@ -79,7 +79,8 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns, flags = ft->flags & ~(MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags); + tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags, + ft_attr->uid); if (!tbl) { mlx5_core_err(ns->dev, "Failed creating dr flow_table\n"); return -EINVAL; @@ -194,6 +195,15 @@ static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain, dest_attr->vport.vhca_id); } +static struct mlx5dr_action *create_uplink_action(struct mlx5dr_domain *domain, + struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_destination *dest_attr = &dst->dest_attr; + + return mlx5dr_action_create_dest_vport(domain, MLX5_VPORT_UPLINK, 1, + dest_attr->vport.vhca_id); +} + static struct mlx5dr_action *create_ft_action(struct mlx5dr_domain *domain, struct mlx5_flow_rule *dst) { @@ -218,11 +228,16 @@ static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domai static bool contain_vport_reformat_action(struct mlx5_flow_rule *dst) { - return dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + return (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID; } -#define MLX5_FLOW_CONTEXT_ACTION_MAX 32 +/* We want to support a rule with 32 destinations, which means we need to + * account for 32 destinations plus usually a counter plus one more action + * for a multi-destination flow table. + */ +#define MLX5_FLOW_CONTEXT_ACTION_MAX 34 static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_group *group, @@ -392,9 +407,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, enum mlx5_flow_destination_type type = dst->dest_attr.type; u32 id; - if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || - num_term_actions >= MLX5_FLOW_CONTEXT_ACTION_MAX) { - err = -ENOSPC; + if (fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + num_term_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; goto free_actions; } @@ -411,8 +426,11 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, fs_dr_actions[fs_dr_num_actions++] = tmp_action; term_actions[num_term_actions++].dest = tmp_action; break; + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: case MLX5_FLOW_DESTINATION_TYPE_VPORT: - tmp_action = create_vport_action(domain, dst); + tmp_action = type == MLX5_FLOW_DESTINATION_TYPE_VPORT ? + create_vport_action(domain, dst) : + create_uplink_action(domain, dst); if (!tmp_action) { err = -ENOMEM; goto free_actions; @@ -464,8 +482,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, MLX5_FLOW_DESTINATION_TYPE_COUNTER) continue; - if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { - err = -ENOSPC; + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; goto free_actions; } @@ -482,21 +501,58 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, } } + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) { + if (fte->action.exe_aso.type != MLX5_EXE_ASO_FLOW_METER) { + err = -EOPNOTSUPP; + goto free_actions; + } + + tmp_action = + mlx5dr_action_create_aso(domain, + fte->action.exe_aso.object_id, + fte->action.exe_aso.return_reg_id, + fte->action.exe_aso.type, + fte->action.exe_aso.flow_meter.init_color, + fte->action.exe_aso.flow_meter.meter_idx); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + params.match_sz = match_sz; params.match_buf = (u64 *)fte->val; if (num_term_actions == 1) { - if (term_actions->reformat) + if (term_actions->reformat) { + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } actions[num_actions++] = term_actions->reformat; + } + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } actions[num_actions++] = term_actions->dest; } else if (num_term_actions > 1) { bool ignore_flow_level = !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL); + u32 flow_source = fte->flow_context.flow_source; + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } tmp_action = mlx5dr_action_create_mult_dest_tbl(domain, term_actions, num_term_actions, - ignore_flow_level); + ignore_flow_level, + flow_source); if (!tmp_action) { err = -EOPNOTSUPP; goto free_actions; @@ -721,6 +777,16 @@ static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns) return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain); } +static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + if (ft_type != FS_FT_FDB || + MLX5_CAP_GEN(ns->dev, steering_format_version) == MLX5_STEERING_FORMAT_CONNECTX_5) + return 0; + + return MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX | MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX; +} + bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) { return mlx5dr_is_supported(dev); @@ -745,6 +811,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = { .set_peer = mlx5_cmd_dr_set_peer, .create_ns = mlx5_cmd_dr_create_ns, .destroy_ns = mlx5_cmd_dr_destroy_ns, + .get_capabilities = mlx5_cmd_dr_get_capabilities, }; const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h index 1fb185d6ac7f..d168622063d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h @@ -14,10 +14,6 @@ struct mlx5_fs_dr_action { struct mlx5dr_action *dr_action; }; -struct mlx5_fs_dr_ns { - struct mlx5_dr_ns *dr_ns; -}; - struct mlx5_fs_dr_rule { struct mlx5dr_rule *dr_rule; /* Only actions created by fs_dr */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h index d2a937f69784..fb078fa0f0cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h @@ -447,6 +447,14 @@ struct mlx5_ifc_ste_flex_parser_1_bits { u8 flex_parser_4[0x20]; }; +struct mlx5_ifc_ste_flex_parser_ok_bits { + u8 flex_parser_3[0x20]; + u8 flex_parser_2[0x20]; + u8 flex_parsers_ok[0x8]; + u8 reserved_at_48[0x18]; + u8 flex_parser_0[0x20]; +}; + struct mlx5_ifc_ste_flex_parser_tnl_bits { u8 flex_parser_tunneling_header_63_32[0x20]; @@ -490,6 +498,14 @@ struct mlx5_ifc_ste_flex_parser_tnl_gtpu_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_ste_tunnel_header_bits { + u8 tunnel_header_0[0x20]; + + u8 tunnel_header_1[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_ste_general_purpose_bits { u8 general_purpose_lookup_field[0x20]; @@ -558,4 +574,30 @@ struct mlx5_ifc_dr_action_hw_copy_bits { u8 reserved_at_38[0x8]; }; +enum { + MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ = 2, +}; + +struct mlx5_ifc_ste_aso_flow_meter_action_bits { + u8 reserved_at_0[0xc]; + u8 action[0x1]; + u8 initial_color[0x2]; + u8 line_id[0x1]; +}; + +struct mlx5_ifc_ste_double_action_aso_v1_bits { + u8 action_id[0x8]; + u8 aso_context_number[0x18]; + + u8 dest_reg_id[0x2]; + u8 change_ordering_tag[0x1]; + u8 aso_check_ordering[0x1]; + u8 aso_context_type[0x4]; + u8 reserved_at_28[0x8]; + union { + u8 aso_fields[0x10]; + struct mlx5_ifc_ste_aso_flow_meter_action_bits flow_meter; + }; +}; + #endif /* MLX5_IFC_DR_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index c7c93131b762..226a0d7bb06d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -51,7 +51,11 @@ void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, struct mlx5dr_domain *peer_dmn); struct mlx5dr_table * -mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags); +mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags, + u16 uid); + +struct mlx5dr_table * +mlx5dr_table_get_from_fs_ft(struct mlx5_flow_table *ft); int mlx5dr_table_destroy(struct mlx5dr_table *table); @@ -96,7 +100,8 @@ struct mlx5dr_action * mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, struct mlx5dr_action_dest *dests, u32 num_of_dests, - bool ignore_flow_level); + bool ignore_flow_level, + u32 flow_source); struct mlx5dr_action *mlx5dr_action_create_drop(void); @@ -127,6 +132,14 @@ struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void); struct mlx5dr_action * mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, __be32 vlan_hdr); +struct mlx5dr_action * +mlx5dr_action_create_aso(struct mlx5dr_domain *dmn, + u32 obj_id, + u8 return_reg_id, + u8 aso_type, + u8 init_color, + u8 meter_id); + int mlx5dr_action_destroy(struct mlx5dr_action *action); static inline bool @@ -136,7 +149,7 @@ mlx5dr_is_supported(struct mlx5_core_dev *dev) (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) || (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) && (MLX5_CAP_GEN(dev, steering_format_version) <= - MLX5_STEERING_FORMAT_CONNECTX_6DX))); + MLX5_STEERING_FORMAT_CONNECTX_7))); } /* buddy functions & structure */ @@ -160,6 +173,11 @@ struct mlx5dr_icm_buddy_mem { * sync_ste command sets them free. */ struct list_head hot_list; + + /* Memory optimisation */ + struct mlx5dr_ste *ste_arr; + struct list_head *miss_list; + u8 *hw_ste_arr; }; int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 01e9c412977c..8455e79bc44a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -31,7 +31,6 @@ */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/io-mapping.h> #include <linux/mlx5/driver.h> #include "mlx5_core.h" @@ -100,19 +99,21 @@ static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev, int err = -ENOMEM; phys_addr_t pfn; int bfregs; + int node; int i; bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR; - up = kzalloc(sizeof(*up), GFP_KERNEL); + node = mdev->priv.numa_node; + up = kzalloc_node(sizeof(*up), GFP_KERNEL, node); if (!up) return ERR_PTR(err); up->mdev = mdev; - up->reg_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL); + up->reg_bitmap = bitmap_zalloc_node(bfregs, GFP_KERNEL, node); if (!up->reg_bitmap) goto error1; - up->fp_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL); + up->fp_bitmap = bitmap_zalloc_node(bfregs, GFP_KERNEL, node); if (!up->fp_bitmap) goto error1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 8846d30a380a..d5c317325030 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -280,7 +280,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_in) + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); - out = kzalloc(out_sz, GFP_KERNEL); + out = kvzalloc(out_sz, GFP_KERNEL); if (!out) return -ENOMEM; @@ -307,7 +307,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, ether_addr_copy(addr_list[i], mac_addr); } out: - kfree(out); + kvfree(out); return err; } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list); @@ -335,7 +335,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + list_size * MLX5_ST_SZ_BYTES(mac_address_layout); - in = kzalloc(in_sz, GFP_KERNEL); + in = kvzalloc(in_sz, GFP_KERNEL); if (!in) return -ENOMEM; @@ -360,7 +360,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, } err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); - kfree(in); + kvfree(in); return err; } EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); @@ -386,7 +386,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, list_size * MLX5_ST_SZ_BYTES(vlan_layout); memset(out, 0, sizeof(out)); - in = kzalloc(in_sz, GFP_KERNEL); + in = kvzalloc(in_sz, GFP_KERNEL); if (!in) return -ENOMEM; @@ -411,7 +411,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, } err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); - kfree(in); + kvfree(in); return err; } EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans); @@ -542,8 +542,8 @@ int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, out_sz += nout * sizeof(*gid); - in = kzalloc(in_sz, GFP_KERNEL); - out = kzalloc(out_sz, GFP_KERNEL); + in = kvzalloc(in_sz, GFP_KERNEL); + out = kvzalloc(out_sz, GFP_KERNEL); if (!in || !out) { err = -ENOMEM; goto out; @@ -573,8 +573,8 @@ int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, gid->global.interface_id = tmp->global.interface_id; out: - kfree(in); - kfree(out); + kvfree(in); + kvfree(out); return err; } EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_gid); @@ -607,8 +607,8 @@ int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport, out_sz += nout * MLX5_ST_SZ_BYTES(pkey); - in = kzalloc(in_sz, GFP_KERNEL); - out = kzalloc(out_sz, GFP_KERNEL); + in = kvzalloc(in_sz, GFP_KERNEL); + out = kvzalloc(out_sz, GFP_KERNEL); if (!in || !out) { err = -ENOMEM; goto out; @@ -638,8 +638,8 @@ int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport, *pkey = MLX5_GET_PR(pkey, pkarr, pkey); out: - kfree(in); - kfree(out); + kvfree(in); + kvfree(out); return err; } EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_pkey); @@ -658,7 +658,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); - out = kzalloc(out_sz, GFP_KERNEL); + out = kvzalloc(out_sz, GFP_KERNEL); if (!out) return -ENOMEM; @@ -717,7 +717,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, system_image_guid); ex: - kfree(out); + kvfree(out); return err; } EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context); @@ -728,7 +728,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, struct mlx5_hca_vport_context *rep; int err; - rep = kzalloc(sizeof(*rep), GFP_KERNEL); + rep = kvzalloc(sizeof(*rep), GFP_KERNEL); if (!rep) return -ENOMEM; @@ -736,7 +736,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, if (!err) *sys_image_guid = rep->sys_image_guid; - kfree(rep); + kvfree(rep); return err; } EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_system_image_guid); @@ -747,7 +747,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, struct mlx5_hca_vport_context *rep; int err; - rep = kzalloc(sizeof(*rep), GFP_KERNEL); + rep = kvzalloc(sizeof(*rep), GFP_KERNEL); if (!rep) return -ENOMEM; @@ -755,7 +755,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, if (!err) *node_guid = rep->node_guid; - kfree(rep); + kvfree(rep); return err; } EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); @@ -770,7 +770,7 @@ int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); int err; - out = kzalloc(outlen, GFP_KERNEL); + out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; @@ -786,7 +786,7 @@ int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, nic_vport_context.promisc_all); out: - kfree(out); + kvfree(out); return err; } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc); @@ -874,7 +874,7 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status) int value; int err; - out = kzalloc(outlen, GFP_KERNEL); + out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; @@ -891,7 +891,7 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status) *status = !value; out: - kfree(out); + kvfree(out); return err; } EXPORT_SYMBOL_GPL(mlx5_nic_vport_query_local_lb); @@ -1033,7 +1033,7 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, mlx5_core_dbg(dev, "vf %d\n", vf); is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); - in = kzalloc(in_sz, GFP_KERNEL); + in = kvzalloc(in_sz, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1065,7 +1065,7 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, req->cap_mask1_perm); err = mlx5_cmd_exec_in(dev, modify_hca_vport_context, in); ex: - kfree(in); + kvfree(in); return err; } EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context); @@ -1086,9 +1086,17 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, goto free; MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1); - MLX5_SET(modify_nic_vport_context_in, in, - nic_vport_context.affiliated_vhca_id, - MLX5_CAP_GEN(master_mdev, vhca_id)); + if (MLX5_CAP_GEN_2(master_mdev, sw_vhca_id_valid)) { + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.vhca_id_type, VHCA_ID_TYPE_SW); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, + MLX5_CAP_GEN_2(master_mdev, sw_vhca_id)); + } else { + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, + MLX5_CAP_GEN(master_mdev, vhca_id)); + } MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.affiliation_criteria, MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index e5c4dcd1425e..4d629e5ddbc7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -123,7 +123,7 @@ static inline void mlx5_wq_cyc_push(struct mlx5_wq_cyc *wq) wq->cur_sz++; } -static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u8 n) +static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u16 n) { wq->wqe_ctr += n; wq->cur_sz += n; |