diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5/main.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 4460 |
1 files changed, 819 insertions, 3641 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e4bcfa81b70a..c669ef6e47e7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1,33 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. */ #include <linux/debugfs.h> @@ -39,9 +13,6 @@ #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/bitmap.h> -#if defined(CONFIG_X86) -#include <asm/memtype.h> -#endif #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> @@ -55,34 +26,34 @@ #include <linux/mlx5/eswitch.h> #include <linux/list.h> #include <rdma/ib_smi.h> -#include <rdma/ib_umem.h> +#include <rdma/ib_umem_odp.h> +#include <rdma/lag.h> #include <linux/in.h> #include <linux/etherdevice.h> #include "mlx5_ib.h" #include "ib_rep.h" #include "cmd.h" +#include "devx.h" +#include "dm.h" +#include "fs.h" #include "srq.h" -#include <linux/mlx5/fs_helpers.h> -#include <linux/mlx5/accel.h> +#include "qp.h" +#include "wr.h" +#include "restrack.h" +#include "counters.h" +#include "umr.h" #include <rdma/uverbs_std_types.h> +#include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_verbs.h> #include <rdma/mlx5_user_ioctl_cmds.h> -#include <rdma/ib_umem_odp.h> #define UVERBS_MODULE_NAME mlx5_ib #include <rdma/uverbs_named_ioctl.h> -#define DRIVER_NAME "mlx5_ib" -#define DRIVER_VERSION "5.0-0" - MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); -MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); +MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) IB driver"); MODULE_LICENSE("Dual BSD/GPL"); -static char mlx5_version[] = - DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" - DRIVER_VERSION "\n"; - struct mlx5_ib_event_work { struct work_struct work; union { @@ -106,12 +77,6 @@ static LIST_HEAD(mlx5_ib_dev_list); */ static DEFINE_MUTEX(mlx5_ib_multiport_mutex); -/* We can't use an array for xlt_emergency_page because dma_map_single - * doesn't work on kernel modules memory - */ -static unsigned long xlt_emergency_page; -static struct mutex xlt_emergency_page_mutex; - struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi) { struct mlx5_ib_dev *dev; @@ -136,7 +101,7 @@ mlx5_port_type_cap_to_rdma_ll(int port_type_cap) } static enum rdma_link_layer -mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num) +mlx5_ib_port_link_layer(struct ib_device *device, u32 port_num) { struct mlx5_ib_dev *dev = to_mdev(device); int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); @@ -145,7 +110,7 @@ mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num) } static int get_port_state(struct ib_device *ibdev, - u8 port_num, + u32 port_num, enum ib_port_state *state) { struct ib_port_attr attr; @@ -160,9 +125,9 @@ static int get_port_state(struct ib_device *ibdev, static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, struct net_device *ndev, - u8 *port_num) + struct net_device *upper, + u32 *port_num) { - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; struct net_device *rep_ndev; struct mlx5_ib_port *port; int i; @@ -172,8 +137,16 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, if (!port->rep) continue; + if (upper == ndev && port->rep->vport == MLX5_VPORT_UPLINK) { + *port_num = i + 1; + return &port->roce; + } + + if (upper && port->rep->vport == MLX5_VPORT_UPLINK) + continue; + read_lock(&port->roce.netdev_lock); - rep_ndev = mlx5_ib_get_rep_netdev(esw, + rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw, port->rep->vport); if (rep_ndev == ndev) { read_unlock(&port->roce.netdev_lock); @@ -191,7 +164,7 @@ static int mlx5_netdev_event(struct notifier_block *this, { struct mlx5_roce *roce = container_of(this, struct mlx5_roce, nb); struct net_device *ndev = netdev_notifier_info_to_dev(ptr); - u8 port_num = roce->native_port_num; + u32 port_num = roce->native_port_num; struct mlx5_core_dev *mdev; struct mlx5_ib_dev *ibdev; @@ -231,11 +204,12 @@ static int mlx5_netdev_event(struct notifier_block *this, } if (ibdev->is_rep) - roce = mlx5_get_rep_roce(ibdev, ndev, &port_num); + roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num); if (!roce) return NOTIFY_DONE; - if ((upper == ndev || (!upper && ndev == roce->netdev)) - && ibdev->ib_active) { + if ((upper == ndev || + ((!upper || ibdev->is_rep) && ndev == roce->netdev)) && + ibdev->ib_active) { struct ib_event ibev = { }; enum ib_port_state port_state; @@ -270,7 +244,7 @@ done: } static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, - u8 port_num) + u32 port_num) { struct mlx5_ib_dev *ibdev = to_mdev(device); struct net_device *ndev; @@ -298,8 +272,8 @@ out: } struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev, - u8 ib_port_num, - u8 *native_port_num) + u32 ib_port_num, + u32 *native_port_num) { enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev, ib_port_num); @@ -318,9 +292,6 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev, *native_port_num = 1; port = &ibdev->port[ib_port_num - 1]; - if (!port) - return NULL; - spin_lock(&port->mp.mpi_lock); mpi = ibdev->port[ib_port_num - 1].mp.mpi; if (mpi && !mpi->unaffiliate) { @@ -336,7 +307,7 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev, return mdev; } -void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *ibdev, u8 port_num) +void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *ibdev, u32 port_num) { enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev, port_num); @@ -360,8 +331,8 @@ out: spin_unlock(&port->mp.mpi_lock); } -static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed, - u8 *active_width) +static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, + u16 *active_speed, u8 *active_width) { switch (eth_proto_oper) { case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII): @@ -418,7 +389,7 @@ static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed, return 0; } -static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, +static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, u8 *active_width) { switch (eth_proto_oper) { @@ -459,10 +430,22 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, *active_width = IB_WIDTH_2X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_100GAUI_1_100GBASE_CR_KR): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_NDR; + break; case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_200GAUI_2_200GBASE_CR2_KR2): + *active_width = IB_WIDTH_2X; + *active_speed = IB_SPEED_NDR; + break; + case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_NDR; + break; default: return -EINVAL; } @@ -470,7 +453,7 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, return 0; } -static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, +static int translate_eth_proto_oper(u32 eth_proto_oper, u16 *active_speed, u8 *active_width, bool ext) { return ext ? @@ -480,7 +463,7 @@ static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, active_width); } -static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, +static int mlx5_query_port_roce(struct ib_device *device, u32 port_num, struct ib_port_attr *props) { struct mlx5_ib_dev *dev = to_mdev(device); @@ -489,9 +472,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct net_device *ndev, *upper; enum ib_mtu ndev_ib_mtu; bool put_mdev = true; - u16 qkey_viol_cntr; u32 eth_prot_oper; - u8 mdev_port_num; + u32 mdev_port_num; bool ext; int err; @@ -518,7 +500,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, mdev_port_num); if (err) goto out; - ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet); + ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability); eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); props->active_width = IB_WIDTH_4X; @@ -527,20 +509,22 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, translate_eth_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width, ext); - props->port_cap_flags |= IB_PORT_CM_SUP; - props->ip_gids = true; + if (!dev->is_rep && dev->mdev->roce.roce_en) { + u16 qkey_viol_cntr; - props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, - roce_address_table_size); + props->port_cap_flags |= IB_PORT_CM_SUP; + props->ip_gids = true; + props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, + roce_address_table_size); + mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); + props->qkey_viol_cntr = qkey_viol_cntr; + } props->max_mtu = IB_MTU_4096; props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); props->pkey_tbl_len = 1; props->state = IB_PORT_DOWN; props->phys_state = IB_PORT_PHYS_STATE_DISABLED; - mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); - props->qkey_viol_cntr = qkey_viol_cntr; - /* If this is a stub query for an unaffiliated port stop here */ if (!put_mdev) goto out; @@ -576,31 +560,31 @@ out: return err; } -static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, +static int set_roce_addr(struct mlx5_ib_dev *dev, u32 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr) { - enum ib_gid_type gid_type = IB_GID_TYPE_IB; + enum ib_gid_type gid_type; u16 vlan_id = 0xffff; u8 roce_version = 0; u8 roce_l3_type = 0; u8 mac[ETH_ALEN]; int ret; + gid_type = attr->gid_type; if (gid) { - gid_type = attr->gid_type; ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]); if (ret) return ret; } switch (gid_type) { - case IB_GID_TYPE_IB: + case IB_GID_TYPE_ROCE: roce_version = MLX5_ROCE_VERSION_1; break; case IB_GID_TYPE_ROCE_UDP_ENCAP: roce_version = MLX5_ROCE_VERSION_2; - if (ipv6_addr_v4mapped((void *)gid)) + if (gid && ipv6_addr_v4mapped((void *)gid)) roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4; else roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6; @@ -627,11 +611,11 @@ static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, __always_unused void **context) { return set_roce_addr(to_mdev(attr->device), attr->port_num, - attr->index, NULL, NULL); + attr->index, NULL, attr); } -__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, - const struct ib_gid_attr *attr) +__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev, + const struct ib_gid_attr *attr) { if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) return 0; @@ -843,9 +827,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (err) return err; - err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); - if (err) - return err; + props->max_pkeys = dev->pkey_table_len; err = mlx5_query_vendor_id(ibdev, &props->vendor_id); if (err) @@ -872,11 +854,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, IB_DEVICE_MEM_WINDOW_TYPE_2B; props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); /* We support 'Gappy' memory registration too */ - props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; + props->kernel_cap_flags |= IBK_SG_GAPS_REG; } - props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + /* IB_WR_REG_MR always requires changing the entity size with UMR */ + if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { - props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER; + props->kernel_cap_flags |= IBK_INTEGRITY_HANDOVER; /* At this stage no support for signature handover */ props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | IB_PROT_T10DIF_TYPE_2 | @@ -885,7 +869,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, IB_GUARD_T10DIF_CSUM; } if (MLX5_CAP_GEN(mdev, block_lb_mc)) - props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + props->kernel_cap_flags |= IBK_BLOCK_MULTICAST_LOOPBACK; if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && raw_support) { if (MLX5_CAP_ETH(mdev, csum_cap)) { @@ -898,7 +882,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->raw_packet_caps |= IB_RAW_PACKET_CAP_CVLAN_STRIPPING; - if (field_avail(typeof(resp), tso_caps, uhw_outlen)) { + if (offsetofend(typeof(resp), tso_caps) <= uhw_outlen) { max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); if (max_tso) { resp.tso_caps.max_tso = 1 << max_tso; @@ -908,7 +892,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), rss_caps, uhw_outlen)) { + if (offsetofend(typeof(resp), rss_caps) <= uhw_outlen) { resp.rss_caps.rx_hash_function = MLX5_RX_HASH_FUNC_TOEPLITZ; resp.rss_caps.rx_hash_fields_mask = @@ -921,22 +905,18 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_RX_HASH_SRC_PORT_UDP | MLX5_RX_HASH_DST_PORT_UDP | MLX5_RX_HASH_INNER; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - resp.rss_caps.rx_hash_fields_mask |= - MLX5_RX_HASH_IPSEC_SPI; resp.response_length += sizeof(resp.rss_caps); } } else { - if (field_avail(typeof(resp), tso_caps, uhw_outlen)) + if (offsetofend(typeof(resp), tso_caps) <= uhw_outlen) resp.response_length += sizeof(resp.tso_caps); - if (field_avail(typeof(resp), rss_caps, uhw_outlen)) + if (offsetofend(typeof(resp), rss_caps) <= uhw_outlen) resp.response_length += sizeof(resp.rss_caps); } if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; - props->device_cap_flags |= IB_DEVICE_UD_TSO; + props->kernel_cap_flags |= IBK_UD_TSO; } if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) && @@ -1006,14 +986,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; - props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ props->max_ah = INT_MAX; props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) - props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; + props->kernel_cap_flags |= IBK_ON_DEMAND_PAGING; props->odp_caps = dev->odp_caps; if (!uhw) { /* ODP for kernel QPs is not implemented for receive @@ -1034,11 +1013,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (MLX5_CAP_GEN(mdev, cd)) - props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; - if (mlx5_core_is_vf(mdev)) - props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; + props->kernel_cap_flags |= IBK_VIRTUAL_FUNCTION; if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET && raw_support) { @@ -1072,7 +1048,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_MAX_CQ_PERIOD; } - if (field_avail(typeof(resp), cqe_comp_caps, uhw_outlen)) { + if (offsetofend(typeof(resp), cqe_comp_caps) <= uhw_outlen) { resp.response_length += sizeof(resp.cqe_comp_caps); if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) { @@ -1090,7 +1066,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), packet_pacing_caps, uhw_outlen) && + if (offsetofend(typeof(resp), packet_pacing_caps) <= uhw_outlen && raw_support) { if (MLX5_CAP_QOS(mdev, packet_pacing) && MLX5_CAP_GEN(mdev, qos)) { @@ -1108,8 +1084,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.response_length += sizeof(resp.packet_pacing_caps); } - if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, - uhw_outlen)) { + if (offsetofend(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes) <= + uhw_outlen) { if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) resp.mlx5_ib_support_multi_pkt_send_wqes = MLX5_IB_ALLOW_MPW; @@ -1122,7 +1098,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } - if (field_avail(typeof(resp), flags, uhw_outlen)) { + if (offsetofend(typeof(resp), flags) <= uhw_outlen) { resp.response_length += sizeof(resp.flags); if (MLX5_CAP_GEN(mdev, cqe_compression_128)) @@ -1138,7 +1114,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT; } - if (field_avail(typeof(resp), sw_parsing_caps, uhw_outlen)) { + if (offsetofend(typeof(resp), sw_parsing_caps) <= uhw_outlen) { resp.response_length += sizeof(resp.sw_parsing_caps); if (MLX5_CAP_ETH(mdev, swp)) { resp.sw_parsing_caps.sw_parsing_offloads |= @@ -1158,7 +1134,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), striding_rq_caps, uhw_outlen) && + if (offsetofend(typeof(resp), striding_rq_caps) <= uhw_outlen && raw_support) { resp.response_length += sizeof(resp.striding_rq_caps); if (MLX5_CAP_GEN(mdev, striding_rq)) { @@ -1181,7 +1157,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), tunnel_offloads_caps, uhw_outlen)) { + if (offsetofend(typeof(resp), tunnel_offloads_caps) <= uhw_outlen) { resp.response_length += sizeof(resp.tunnel_offloads_caps); if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan)) resp.tunnel_offloads_caps |= @@ -1192,16 +1168,24 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_GRE; - if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & - MLX5_FLEX_PROTO_CW_MPLS_GRE) + if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE; - if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & - MLX5_FLEX_PROTO_CW_MPLS_UDP) + if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_udp)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP; } + if (offsetofend(typeof(resp), dci_streams_caps) <= uhw_outlen) { + resp.response_length += sizeof(resp.dci_streams_caps); + + resp.dci_streams_caps.max_log_num_concurent = + MLX5_CAP_GEN(mdev, log_max_dci_stream_channels); + + resp.dci_streams_caps.max_log_num_errored = + MLX5_CAP_GEN(mdev, log_max_dci_errored_streams); + } + if (uhw_outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); @@ -1212,32 +1196,24 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, return 0; } -enum mlx5_ib_width { - MLX5_IB_WIDTH_1X = 1 << 0, - MLX5_IB_WIDTH_2X = 1 << 1, - MLX5_IB_WIDTH_4X = 1 << 2, - MLX5_IB_WIDTH_8X = 1 << 3, - MLX5_IB_WIDTH_12X = 1 << 4 -}; - -static void translate_active_width(struct ib_device *ibdev, u8 active_width, - u8 *ib_width) +static void translate_active_width(struct ib_device *ibdev, u16 active_width, + u8 *ib_width) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - if (active_width & MLX5_IB_WIDTH_1X) + if (active_width & MLX5_PTYS_WIDTH_1X) *ib_width = IB_WIDTH_1X; - else if (active_width & MLX5_IB_WIDTH_2X) + else if (active_width & MLX5_PTYS_WIDTH_2X) *ib_width = IB_WIDTH_2X; - else if (active_width & MLX5_IB_WIDTH_4X) + else if (active_width & MLX5_PTYS_WIDTH_4X) *ib_width = IB_WIDTH_4X; - else if (active_width & MLX5_IB_WIDTH_8X) + else if (active_width & MLX5_PTYS_WIDTH_8X) *ib_width = IB_WIDTH_8X; - else if (active_width & MLX5_IB_WIDTH_12X) + else if (active_width & MLX5_PTYS_WIDTH_12X) *ib_width = IB_WIDTH_12X; else { mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n", - (int)active_width); + active_width); *ib_width = IB_WIDTH_4X; } @@ -1305,7 +1281,7 @@ static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap, return 0; } -static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, +static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props) { struct mlx5_ib_dev *dev = to_mdev(ibdev); @@ -1314,7 +1290,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, u16 max_mtu; u16 oper_mtu; int err; - u8 ib_link_width_oper; + u16 ib_link_width_oper; u8 vl_hw_cap; rep = kzalloc(sizeof(*rep), GFP_KERNEL); @@ -1347,16 +1323,13 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) props->port_cap_flags2 = rep->cap_mask2; - err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); + err = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, + &props->active_speed, port); if (err) goto out; translate_active_width(ibdev, ib_link_width_oper, &props->active_width); - err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port); - if (err) - goto out; - mlx5_query_port_max_mtu(mdev, &max_mtu, port); props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu); @@ -1376,7 +1349,7 @@ out: return err; } -int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, +int mlx5_ib_query_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props) { unsigned int count; @@ -1421,25 +1394,23 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, return ret; } -static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port, +static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props) { - int ret; + return mlx5_query_port_roce(ibdev, port, props); +} - /* Only link layer == ethernet is valid for representors - * and we always use port 1 +static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u32 port, u16 index, + u16 *pkey) +{ + /* Default special Pkey for representor device port as per the + * IB specification 1.3 section 10.9.1.2. */ - ret = mlx5_query_port_roce(ibdev, port, props); - if (ret || !props) - return ret; - - /* We don't support GIDS */ - props->gid_tbl_len = 0; - - return ret; + *pkey = 0xffff; + return 0; } -static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, +static int mlx5_ib_query_gid(struct ib_device *ibdev, u32 port, int index, union ib_gid *gid) { struct mlx5_ib_dev *dev = to_mdev(ibdev); @@ -1458,13 +1429,13 @@ static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, } -static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u8 port, +static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev; bool put_mdev = true; - u8 mdev_port_num; + u32 mdev_port_num; int err; mdev = mlx5_ib_get_native_port_mdev(dev, port, &mdev_port_num); @@ -1485,7 +1456,7 @@ static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u8 port, return err; } -static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, +static int mlx5_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey) { switch (mlx5_get_vport_access_method(ibdev)) { @@ -1529,12 +1500,12 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, return err; } -static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask, +static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u32 port_num, u32 mask, u32 value) { struct mlx5_hca_vport_context ctx = {}; struct mlx5_core_dev *mdev; - u8 mdev_port_num; + u32 mdev_port_num; int err; mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); @@ -1563,7 +1534,7 @@ out: return err; } -static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, +static int mlx5_ib_modify_port(struct ib_device *ibdev, u32 port, int mask, struct ib_port_modify *props) { struct mlx5_ib_dev *dev = to_mdev(ibdev); @@ -1664,7 +1635,8 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte bfregi = &context->bfregi; for (i = 0; i < bfregi->num_static_sys_pages; i++) { - err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]); + err = mlx5_cmd_uar_alloc(dev->mdev, &bfregi->sys_pages[i], + context->devx_uid); if (err) goto error; @@ -1678,7 +1650,8 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte error: for (--i; i >= 0; i--) - if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i])) + if (mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i], + context->devx_uid)) mlx5_ib_warn(dev, "failed to free uar %d\n", i); return err; @@ -1694,7 +1667,8 @@ static void deallocate_uars(struct mlx5_ib_dev *dev, for (i = 0; i < bfregi->num_sys_pages; i++) if (i < bfregi->num_static_sys_pages || bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) - mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); + mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i], + context->devx_uid); } int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) @@ -1775,6 +1749,88 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn, mlx5_ib_disable_lb(dev, true, false); } +static int set_ucontext_resp(struct ib_ucontext *uctx, + struct mlx5_ib_alloc_ucontext_resp *resp) +{ + struct ib_device *ibdev = uctx->device; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_ucontext *context = to_mucontext(uctx); + struct mlx5_bfreg_info *bfregi = &context->bfregi; + int err; + + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { + err = mlx5_cmd_dump_fill_mkey(dev->mdev, + &resp->dump_fill_mkey); + if (err) + return err; + resp->comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; + } + + resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); + if (dev->wc_support) + resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, + log_bf_reg_size); + resp->cache_line_size = cache_line_size(); + resp->max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); + resp->max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); + resp->max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp->max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp->max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + resp->cqe_version = context->cqe_version; + resp->log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; + resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_CAP_GEN(dev->mdev, + num_of_uars_per_page) : 1; + resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 : + bfregi->total_num_bfregs - bfregi->num_dyn_bfregs; + resp->num_ports = dev->num_ports; + resp->cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | + MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; + + if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { + mlx5_query_min_inline(dev->mdev, &resp->eth_min_inline); + resp->eth_min_inline++; + } + + if (dev->mdev->clock_info) + resp->clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); + + /* + * We don't want to expose information from the PCI bar that is located + * after 4096 bytes, so if the arch only supports larger pages, let's + * pretend we don't support reading the HCA's core clock. This is also + * forced by mmap function. + */ + if (PAGE_SIZE <= 4096) { + resp->comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; + resp->hca_core_clock_offset = + offsetof(struct mlx5_init_seg, + internal_timer_h) % PAGE_SIZE; + } + + if (MLX5_CAP_GEN(dev->mdev, ece_support)) + resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE; + + if (rt_supported(MLX5_CAP_GEN(dev->mdev, sq_ts_format)) && + rt_supported(MLX5_CAP_GEN(dev->mdev, rq_ts_format)) && + rt_supported(MLX5_CAP_ROCE(dev->mdev, qp_ts_format))) + resp->comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_REAL_TIME_TS; + + resp->num_dyn_bfregs = bfregi->num_dyn_bfregs; + + if (MLX5_CAP_GEN(dev->mdev, drain_sigerr)) + resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS; + + resp->comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_MKEY_UPDATE_TAG; + + return 0; +} + static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { @@ -1782,15 +1838,14 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_alloc_ucontext_req_v2 req = {}; struct mlx5_ib_alloc_ucontext_resp resp = {}; - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_ucontext *context = to_mucontext(uctx); struct mlx5_bfreg_info *bfregi; int ver; int err; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); - u32 dump_fill_mkey; bool lib_uar_4k; + bool lib_uar_dyn; if (!dev->ib_active) return -EAGAIN; @@ -1817,44 +1872,26 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) return -EINVAL; - resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); - if (dev->wc_support) - resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); - resp.cache_line_size = cache_line_size(); - resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); - resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); - resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); - resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); - resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); - resp.cqe_version = min_t(__u8, - (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), - req.max_cqe_version); - resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? - MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; - resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? - MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1; - resp.response_length = min(offsetof(typeof(resp), response_length) + - sizeof(resp.response_length), udata->outlen); - - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) { - if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS)) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; - if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; - /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { + err = mlx5_ib_devx_create(dev, true); + if (err < 0) + goto out_ctx; + context->devx_uid = err; } lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; + lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR; bfregi = &context->bfregi; + if (lib_uar_dyn) { + bfregi->lib_uar_dyn = lib_uar_dyn; + goto uar_done; + } + /* updates req->total_num_bfregs */ err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi); if (err) - goto out_ctx; + goto out_devx; mutex_init(&bfregi->lock); bfregi->lib_uar_4k = lib_uar_4k; @@ -1862,7 +1899,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, GFP_KERNEL); if (!bfregi->count) { err = -ENOMEM; - goto out_ctx; + goto out_devx; } bfregi->sys_pages = kcalloc(bfregi->num_sys_pages, @@ -1877,101 +1914,35 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, if (err) goto out_sys_pages; - if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { - err = mlx5_ib_devx_create(dev, true); - if (err < 0) - goto out_uars; - context->devx_uid = err; - } - +uar_done: err = mlx5_ib_alloc_transport_domain(dev, &context->tdn, context->devx_uid); if (err) - goto out_devx; - - if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { - err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey); - if (err) - goto out_mdev; - } + goto out_uars; INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); - resp.tot_bfregs = req.total_num_bfregs; - resp.num_ports = dev->num_ports; - - if (field_avail(typeof(resp), cqe_version, udata->outlen)) - resp.response_length += sizeof(resp.cqe_version); - - if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) { - resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | - MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; - resp.response_length += sizeof(resp.cmds_supp_uhw); - } - - if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) { - if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { - mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline); - resp.eth_min_inline++; - } - resp.response_length += sizeof(resp.eth_min_inline); - } - - if (field_avail(typeof(resp), clock_info_versions, udata->outlen)) { - if (mdev->clock_info) - resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); - resp.response_length += sizeof(resp.clock_info_versions); - } - - /* - * We don't want to expose information from the PCI bar that is located - * after 4096 bytes, so if the arch only supports larger pages, let's - * pretend we don't support reading the HCA's core clock. This is also - * forced by mmap function. - */ - if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { - if (PAGE_SIZE <= 4096) { - resp.comp_mask |= - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; - resp.hca_core_clock_offset = - offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE; - } - resp.response_length += sizeof(resp.hca_core_clock_offset); - } - - if (field_avail(typeof(resp), log_uar_size, udata->outlen)) - resp.response_length += sizeof(resp.log_uar_size); - - if (field_avail(typeof(resp), num_uars_per_page, udata->outlen)) - resp.response_length += sizeof(resp.num_uars_per_page); - - if (field_avail(typeof(resp), num_dyn_bfregs, udata->outlen)) { - resp.num_dyn_bfregs = bfregi->num_dyn_bfregs; - resp.response_length += sizeof(resp.num_dyn_bfregs); - } + context->cqe_version = min_t(__u8, + (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), + req.max_cqe_version); - if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) { - if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { - resp.dump_fill_mkey = dump_fill_mkey; - resp.comp_mask |= - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; - } - resp.response_length += sizeof(resp.dump_fill_mkey); - } + err = set_ucontext_resp(uctx, &resp); + if (err) + goto out_mdev; + resp.response_length = min(udata->outlen, sizeof(resp)); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto out_mdev; bfregi->ver = ver; bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; - context->cqe_version = resp.cqe_version; context->lib_caps = req.lib_caps; print_lib_caps(dev, context->lib_caps); - if (dev->lag_active) { - u8 port = mlx5_core_native_port_num(dev->mdev) - 1; + if (mlx5_ib_lag_should_assign_affinity(dev)) { + u32 port = mlx5_core_native_port_num(dev->mdev) - 1; atomic_set(&context->tx_port_affinity, atomic_add_return( @@ -1982,9 +1953,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, out_mdev: mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); -out_devx: - if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) - mlx5_ib_devx_destroy(dev, context->devx_uid); out_uars: deallocate_uars(dev, context); @@ -1995,10 +1963,37 @@ out_sys_pages: out_count: kfree(bfregi->count); +out_devx: + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) + mlx5_ib_devx_destroy(dev, context->devx_uid); + out_ctx: return err; } +static int mlx5_ib_query_ucontext(struct ib_ucontext *ibcontext, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_alloc_ucontext_resp uctx_resp = {}; + int ret; + + ret = set_ucontext_resp(ibcontext, &uctx_resp); + if (ret) + return ret; + + uctx_resp.response_length = + min_t(size_t, + uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX), + sizeof(uctx_resp)); + + ret = uverbs_copy_to_struct_or_zero(attrs, + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, + &uctx_resp, + sizeof(uctx_resp)); + return ret; +} + static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); @@ -2008,12 +2003,12 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) bfregi = &context->bfregi; mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); - if (context->devx_uid) - mlx5_ib_devx_destroy(dev, context->devx_uid); - deallocate_uars(dev, context); kfree(bfregi->sys_pages); kfree(bfregi->count); + + if (context->devx_uid) + mlx5_ib_devx_destroy(dev, context->devx_uid); } static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, @@ -2026,6 +2021,17 @@ static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, return (dev->mdev->bar_addr >> PAGE_SHIFT) + uar_idx / fw_uars_per_page; } +static u64 uar_index2paddress(struct mlx5_ib_dev *dev, + int uar_idx) +{ + unsigned int fw_uars_per_page; + + fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_UARS_IN_PAGE : 1; + + return (dev->mdev->bar_addr + (uar_idx / fw_uars_per_page) * PAGE_SIZE); +} + static int get_command(unsigned long offset) { return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK; @@ -2095,14 +2101,12 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry) struct mlx5_user_mmap_entry *mentry = to_mmmap(entry); struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device); struct mlx5_var_table *var_table = &dev->var_table; - struct mlx5_ib_dm *mdm; + struct mlx5_ib_ucontext *context = to_mucontext(entry->ucontext); switch (mentry->mmap_flag) { case MLX5_IB_MMAP_TYPE_MEMIC: - mdm = container_of(mentry, struct mlx5_ib_dm, mentry); - mlx5_cmd_dealloc_memic(&dev->dm, mdm->dev_addr, - mdm->size); - kfree(mdm); + case MLX5_IB_MMAP_TYPE_MEMIC_OP: + mlx5_ib_dm_mmap_free(dev, mentry); break; case MLX5_IB_MMAP_TYPE_VAR: mutex_lock(&var_table->bitmap_lock); @@ -2110,6 +2114,12 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry) mutex_unlock(&var_table->bitmap_lock); kfree(mentry); break; + case MLX5_IB_MMAP_TYPE_UAR_WC: + case MLX5_IB_MMAP_TYPE_UAR_NC: + mlx5_cmd_uar_dealloc(dev->mdev, mentry->page_idx, + context->devx_uid); + kfree(mentry); + break; default: WARN_ON(true); } @@ -2130,6 +2140,9 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, int max_valid_idx = dyn_uar ? bfregi->num_sys_pages : bfregi->num_static_sys_pages; + if (bfregi->lib_uar_dyn) + return -EINVAL; + if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; @@ -2147,14 +2160,6 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, switch (cmd) { case MLX5_IB_MMAP_WC_PAGE: case MLX5_IB_MMAP_ALLOC_WC: -/* Some architectures don't support WC memory */ -#if defined(CONFIG_X86) - if (!pat_enabled()) - return -EPERM; -#elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU))) - return -EPERM; -#endif - /* fall through */ case MLX5_IB_MMAP_REGULAR_PAGE: /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */ prot = pgprot_writecombine(vma->vm_page_prot); @@ -2190,7 +2195,8 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, bfregi->count[bfreg_dyn_idx]++; mutex_unlock(&bfregi->lock); - err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index); + err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index, + context->devx_uid); if (err) { mlx5_ib_warn(dev, "UAR alloc failed\n"); goto free_bfreg; @@ -2219,7 +2225,7 @@ err: if (!dyn_uar) return err; - mlx5_cmd_free_uar(dev->mdev, idx); + mlx5_cmd_uar_dealloc(dev->mdev, idx, context->devx_uid); free_bfreg: mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx); @@ -2227,19 +2233,6 @@ free_bfreg: return err; } -static int add_dm_mmap_entry(struct ib_ucontext *context, - struct mlx5_ib_dm *mdm, - u64 address) -{ - mdm->mentry.mmap_flag = MLX5_IB_MMAP_TYPE_MEMIC; - mdm->mentry.address = address; - return rdma_user_mmap_entry_insert_range( - context, &mdm->mentry.rdma_entry, - mdm->size, - MLX5_IB_MMAP_DEVICE_MEM << 16, - (MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1); -} - static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma) { unsigned long idx; @@ -2269,7 +2262,8 @@ static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev, mentry = to_mmmap(entry); pfn = (mentry->address >> PAGE_SHIFT); - if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR) + if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR || + mentry->mmap_flag == MLX5_IB_MMAP_TYPE_UAR_NC) prot = pgprot_noncached(vma->vm_page_prot); else prot = pgprot_writecombine(vma->vm_page_prot); @@ -2300,9 +2294,12 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm command = get_command(vma->vm_pgoff); switch (command) { case MLX5_IB_MMAP_WC_PAGE: + case MLX5_IB_MMAP_ALLOC_WC: + if (!dev->wc_support) + return -EPERM; + fallthrough; case MLX5_IB_MMAP_NC_PAGE: case MLX5_IB_MMAP_REGULAR_PAGE: - case MLX5_IB_MMAP_ALLOC_WC: return uar_mmap(dev, command, vma, context); case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: @@ -2337,204 +2334,6 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm return 0; } -static inline int check_dm_type_support(struct mlx5_ib_dev *dev, - u32 type) -{ - switch (type) { - case MLX5_IB_UAPI_DM_TYPE_MEMIC: - if (!MLX5_CAP_DEV_MEM(dev->mdev, memic)) - return -EOPNOTSUPP; - break; - case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: - case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - if (!capable(CAP_SYS_RAWIO) || - !capable(CAP_NET_RAW)) - return -EPERM; - - if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) || - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner))) - return -EOPNOTSUPP; - break; - } - - return 0; -} - -static int handle_alloc_dm_memic(struct ib_ucontext *ctx, - struct mlx5_ib_dm *dm, - struct ib_dm_alloc_attr *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm; - u64 start_offset; - u16 page_idx; - int err; - u64 address; - - dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE); - - err = mlx5_cmd_alloc_memic(dm_db, &dm->dev_addr, - dm->size, attr->alignment); - if (err) - return err; - - address = dm->dev_addr & PAGE_MASK; - err = add_dm_mmap_entry(ctx, dm, address); - if (err) - goto err_dealloc; - - page_idx = dm->mentry.rdma_entry.start_pgoff & 0xFFFF; - err = uverbs_copy_to(attrs, - MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, - &page_idx, - sizeof(page_idx)); - if (err) - goto err_copy; - - start_offset = dm->dev_addr & ~PAGE_MASK; - err = uverbs_copy_to(attrs, - MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, - &start_offset, sizeof(start_offset)); - if (err) - goto err_copy; - - return 0; - -err_copy: - rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry); -err_dealloc: - mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size); - - return err; -} - -static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, - struct mlx5_ib_dm *dm, - struct ib_dm_alloc_attr *attr, - struct uverbs_attr_bundle *attrs, - int type) -{ - struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev; - u64 act_size; - int err; - - /* Allocation size must a multiple of the basic block size - * and a power of 2. - */ - act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dev)); - act_size = roundup_pow_of_two(act_size); - - dm->size = act_size; - err = mlx5_dm_sw_icm_alloc(dev, type, act_size, - to_mucontext(ctx)->devx_uid, &dm->dev_addr, - &dm->icm_dm.obj_id); - if (err) - return err; - - err = uverbs_copy_to(attrs, - MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, - &dm->dev_addr, sizeof(dm->dev_addr)); - if (err) - mlx5_dm_sw_icm_dealloc(dev, type, dm->size, - to_mucontext(ctx)->devx_uid, dm->dev_addr, - dm->icm_dm.obj_id); - - return err; -} - -struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_dm_alloc_attr *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_dm *dm; - enum mlx5_ib_uapi_dm_type type; - int err; - - err = uverbs_get_const_default(&type, attrs, - MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE, - MLX5_IB_UAPI_DM_TYPE_MEMIC); - if (err) - return ERR_PTR(err); - - mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n", - type, attr->length, attr->alignment); - - err = check_dm_type_support(to_mdev(ibdev), type); - if (err) - return ERR_PTR(err); - - dm = kzalloc(sizeof(*dm), GFP_KERNEL); - if (!dm) - return ERR_PTR(-ENOMEM); - - dm->type = type; - - switch (type) { - case MLX5_IB_UAPI_DM_TYPE_MEMIC: - err = handle_alloc_dm_memic(context, dm, - attr, - attrs); - break; - case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: - err = handle_alloc_dm_sw_icm(context, dm, - attr, attrs, - MLX5_SW_ICM_TYPE_STEERING); - break; - case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - err = handle_alloc_dm_sw_icm(context, dm, - attr, attrs, - MLX5_SW_ICM_TYPE_HEADER_MODIFY); - break; - default: - err = -EOPNOTSUPP; - } - - if (err) - goto err_free; - - return &dm->ibdm; - -err_free: - kfree(dm); - return ERR_PTR(err); -} - -int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context( - &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); - struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev; - struct mlx5_ib_dm *dm = to_mdm(ibdm); - int ret; - - switch (dm->type) { - case MLX5_IB_UAPI_DM_TYPE_MEMIC: - rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry); - return 0; - case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: - ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING, - dm->size, ctx->devx_uid, dm->dev_addr, - dm->icm_dm.obj_id); - if (ret) - return ret; - break; - case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_HEADER_MODIFY, - dm->size, ctx->devx_uid, dm->dev_addr, - dm->icm_dm.obj_id); - if (ret) - return ret; - break; - default: - return -EOPNOTSUPP; - } - - kfree(dm); - - return 0; -} - static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct mlx5_ib_pd *pd = to_mpd(ibpd); @@ -2542,7 +2341,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) struct mlx5_ib_alloc_pd_resp resp; int err; u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; - u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; u16 uid = 0; struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); @@ -2550,8 +2349,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) uid = context ? context->devx_uid : 0; MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); MLX5_SET(alloc_pd_in, in, uid, uid); - err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in), - out, sizeof(out)); + err = mlx5_cmd_exec_inout(to_mdev(ibdev)->mdev, alloc_pd, in, out); if (err) return err; @@ -2568,1846 +2366,12 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +static int mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); - mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); -} - -enum { - MATCH_CRITERIA_ENABLE_OUTER_BIT, - MATCH_CRITERIA_ENABLE_MISC_BIT, - MATCH_CRITERIA_ENABLE_INNER_BIT, - MATCH_CRITERIA_ENABLE_MISC2_BIT -}; - -#define HEADER_IS_ZERO(match_criteria, headers) \ - !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ - 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ - -static u8 get_match_criteria_enable(u32 *match_criteria) -{ - u8 match_criteria_enable; - - match_criteria_enable = - (!HEADER_IS_ZERO(match_criteria, outer_headers)) << - MATCH_CRITERIA_ENABLE_OUTER_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << - MATCH_CRITERIA_ENABLE_MISC_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, inner_headers)) << - MATCH_CRITERIA_ENABLE_INNER_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << - MATCH_CRITERIA_ENABLE_MISC2_BIT; - - return match_criteria_enable; -} - -static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) -{ - u8 entry_mask; - u8 entry_val; - int err = 0; - - if (!mask) - goto out; - - entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c, - ip_protocol); - entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v, - ip_protocol); - if (!entry_mask) { - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); - goto out; - } - /* Don't override existing ip protocol */ - if (mask != entry_mask || val != entry_val) - err = -EINVAL; -out: - return err; -} - -static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, - bool inner) -{ - if (inner) { - MLX5_SET(fte_match_set_misc, - misc_c, inner_ipv6_flow_label, mask); - MLX5_SET(fte_match_set_misc, - misc_v, inner_ipv6_flow_label, val); - } else { - MLX5_SET(fte_match_set_misc, - misc_c, outer_ipv6_flow_label, mask); - MLX5_SET(fte_match_set_misc, - misc_v, outer_ipv6_flow_label, val); - } -} - -static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) -{ - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); -} - -static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) -{ - if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) - return -EOPNOTSUPP; - - return 0; -} - -#define LAST_ETH_FIELD vlan_tag -#define LAST_IB_FIELD sl -#define LAST_IPV4_FIELD tos -#define LAST_IPV6_FIELD traffic_class -#define LAST_TCP_UDP_FIELD src_port -#define LAST_TUNNEL_FIELD tunnel_id -#define LAST_FLOW_TAG_FIELD tag_id -#define LAST_DROP_FIELD size -#define LAST_COUNTERS_FIELD counters - -/* Field is the last supported field */ -#define FIELDS_NOT_SUPPORTED(filter, field)\ - memchr_inv((void *)&filter.field +\ - sizeof(filter.field), 0,\ - sizeof(filter) -\ - offsetof(typeof(filter), field) -\ - sizeof(filter.field)) - -int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, - bool is_egress, - struct mlx5_flow_act *action) -{ - - switch (maction->ib_action.type) { - case IB_FLOW_ACTION_ESP: - if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) - return -EINVAL; - /* Currently only AES_GCM keymat is supported by the driver */ - action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; - action->action |= is_egress ? - MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : - MLX5_FLOW_CONTEXT_ACTION_DECRYPT; - return 0; - case IB_FLOW_ACTION_UNSPECIFIED: - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { - if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - return -EINVAL; - action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - action->modify_hdr = - maction->flow_action_raw.modify_hdr; - return 0; - } - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_DECAP) { - if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) - return -EINVAL; - action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; - return 0; - } - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) { - if (action->action & - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) - return -EINVAL; - action->action |= - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - action->pkt_reformat = - maction->flow_action_raw.pkt_reformat; - return 0; - } - /* fall through */ - default: - return -EOPNOTSUPP; - } -} - -static int parse_flow_attr(struct mlx5_core_dev *mdev, - struct mlx5_flow_spec *spec, - const union ib_flow_spec *ib_spec, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_act *action, u32 prev_type) -{ - struct mlx5_flow_context *flow_context = &spec->flow_context; - u32 *match_c = spec->match_criteria; - u32 *match_v = spec->match_value; - void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, - misc_parameters); - void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, - misc_parameters); - void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, - misc_parameters_2); - void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, - misc_parameters_2); - void *headers_c; - void *headers_v; - int match_ipv; - int ret; - - if (ib_spec->type & IB_FLOW_SPEC_INNER) { - headers_c = MLX5_ADDR_OF(fte_match_param, match_c, - inner_headers); - headers_v = MLX5_ADDR_OF(fte_match_param, match_v, - inner_headers); - match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_ip_version); - } else { - headers_c = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - headers_v = MLX5_ADDR_OF(fte_match_param, match_v, - outer_headers); - match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_ip_version); - } - - switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { - case IB_FLOW_SPEC_ETH: - if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) - return -EOPNOTSUPP; - - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dmac_47_16), - ib_spec->eth.mask.dst_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dmac_47_16), - ib_spec->eth.val.dst_mac); - - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - smac_47_16), - ib_spec->eth.mask.src_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - smac_47_16), - ib_spec->eth.val.src_mac); - - if (ib_spec->eth.mask.vlan_tag) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - cvlan_tag, 1); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - cvlan_tag, 1); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_vid, ntohs(ib_spec->eth.val.vlan_tag)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_cfi, - ntohs(ib_spec->eth.mask.vlan_tag) >> 12); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_cfi, - ntohs(ib_spec->eth.val.vlan_tag) >> 12); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_prio, - ntohs(ib_spec->eth.mask.vlan_tag) >> 13); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_prio, - ntohs(ib_spec->eth.val.vlan_tag) >> 13); - } - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, ntohs(ib_spec->eth.mask.ether_type)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ntohs(ib_spec->eth.val.ether_type)); - break; - case IB_FLOW_SPEC_IPV4: - if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) - return -EOPNOTSUPP; - - if (match_ipv) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ip_version, 0xf); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ip_version, MLX5_FS_IPV4_VERSION); - } else { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ETH_P_IP); - } - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.mask.src_ip, - sizeof(ib_spec->ipv4.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.val.src_ip, - sizeof(ib_spec->ipv4.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.mask.dst_ip, - sizeof(ib_spec->ipv4.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.val.dst_ip, - sizeof(ib_spec->ipv4.val.dst_ip)); - - set_tos(headers_c, headers_v, - ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); - - if (set_proto(headers_c, headers_v, - ib_spec->ipv4.mask.proto, - ib_spec->ipv4.val.proto)) - return -EINVAL; - break; - case IB_FLOW_SPEC_IPV6: - if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) - return -EOPNOTSUPP; - - if (match_ipv) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ip_version, 0xf); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ip_version, MLX5_FS_IPV6_VERSION); - } else { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ETH_P_IPV6); - } - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.mask.src_ip, - sizeof(ib_spec->ipv6.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.val.src_ip, - sizeof(ib_spec->ipv6.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.mask.dst_ip, - sizeof(ib_spec->ipv6.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.val.dst_ip, - sizeof(ib_spec->ipv6.val.dst_ip)); - - set_tos(headers_c, headers_v, - ib_spec->ipv6.mask.traffic_class, - ib_spec->ipv6.val.traffic_class); - - if (set_proto(headers_c, headers_v, - ib_spec->ipv6.mask.next_hdr, - ib_spec->ipv6.val.next_hdr)) - return -EINVAL; - - set_flow_label(misc_params_c, misc_params_v, - ntohl(ib_spec->ipv6.mask.flow_label), - ntohl(ib_spec->ipv6.val.flow_label), - ib_spec->type & IB_FLOW_SPEC_INNER); - break; - case IB_FLOW_SPEC_ESP: - if (ib_spec->esp.mask.seq) - return -EOPNOTSUPP; - - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, - ntohl(ib_spec->esp.mask.spi)); - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, - ntohl(ib_spec->esp.val.spi)); - break; - case IB_FLOW_SPEC_TCP: - if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, - LAST_TCP_UDP_FIELD)) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, - ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, - ntohs(ib_spec->tcp_udp.val.src_port)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, - ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, - ntohs(ib_spec->tcp_udp.val.dst_port)); - break; - case IB_FLOW_SPEC_UDP: - if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, - LAST_TCP_UDP_FIELD)) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, - ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, - ntohs(ib_spec->tcp_udp.val.src_port)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, - ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, - ntohs(ib_spec->tcp_udp.val.dst_port)); - break; - case IB_FLOW_SPEC_GRE: - if (ib_spec->gre.mask.c_ks_res0_ver) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - 0xff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - IPPROTO_GRE); - - MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, - ntohs(ib_spec->gre.mask.protocol)); - MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, - ntohs(ib_spec->gre.val.protocol)); - - memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, - gre_key.nvgre.hi), - &ib_spec->gre.mask.key, - sizeof(ib_spec->gre.mask.key)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, - gre_key.nvgre.hi), - &ib_spec->gre.val.key, - sizeof(ib_spec->gre.val.key)); - break; - case IB_FLOW_SPEC_MPLS: - switch (prev_type) { - case IB_FLOW_SPEC_UDP: - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls_over_udp), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls_over_udp), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls_over_udp), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - break; - case IB_FLOW_SPEC_GRE: - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls_over_gre), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls_over_gre), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls_over_gre), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - break; - default: - if (ib_spec->type & IB_FLOW_SPEC_INNER) { - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_first_mpls), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - inner_first_mpls), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - inner_first_mpls), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - } else { - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - } - } - break; - case IB_FLOW_SPEC_VXLAN_TUNNEL: - if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, - LAST_TUNNEL_FIELD)) - return -EOPNOTSUPP; - - MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, - ntohl(ib_spec->tunnel.mask.tunnel_id)); - MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, - ntohl(ib_spec->tunnel.val.tunnel_id)); - break; - case IB_FLOW_SPEC_ACTION_TAG: - if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag, - LAST_FLOW_TAG_FIELD)) - return -EOPNOTSUPP; - if (ib_spec->flow_tag.tag_id >= BIT(24)) - return -EINVAL; - - flow_context->flow_tag = ib_spec->flow_tag.tag_id; - flow_context->flags |= FLOW_CONTEXT_HAS_TAG; - break; - case IB_FLOW_SPEC_ACTION_DROP: - if (FIELDS_NOT_SUPPORTED(ib_spec->drop, - LAST_DROP_FIELD)) - return -EOPNOTSUPP; - action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; - break; - case IB_FLOW_SPEC_ACTION_HANDLE: - ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act), - flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action); - if (ret) - return ret; - break; - case IB_FLOW_SPEC_ACTION_COUNT: - if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, - LAST_COUNTERS_FIELD)) - return -EOPNOTSUPP; - - /* for now support only one counters spec per flow */ - if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) - return -EINVAL; - - action->counters = ib_spec->flow_count.counters; - action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - break; - default: - return -EINVAL; - } - - return 0; -} - -/* If a flow could catch both multicast and unicast packets, - * it won't fall into the multicast flow steering table and this rule - * could steal other multicast packets. - */ -static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) -{ - union ib_flow_spec *flow_spec; - - if (ib_attr->type != IB_FLOW_ATTR_NORMAL || - ib_attr->num_of_specs < 1) - return false; - - flow_spec = (union ib_flow_spec *)(ib_attr + 1); - if (flow_spec->type == IB_FLOW_SPEC_IPV4) { - struct ib_flow_spec_ipv4 *ipv4_spec; - - ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec; - if (ipv4_is_multicast(ipv4_spec->val.dst_ip)) - return true; - - return false; - } - - if (flow_spec->type == IB_FLOW_SPEC_ETH) { - struct ib_flow_spec_eth *eth_spec; - - eth_spec = (struct ib_flow_spec_eth *)flow_spec; - return is_multicast_ether_addr(eth_spec->mask.dst_mac) && - is_multicast_ether_addr(eth_spec->val.dst_mac); - } - - return false; -} - -enum valid_spec { - VALID_SPEC_INVALID, - VALID_SPEC_VALID, - VALID_SPEC_NA, -}; - -static enum valid_spec -is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - const u32 *match_c = spec->match_criteria; - bool is_crypto = - (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); - bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); - bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; - - /* - * Currently only crypto is supported in egress, when regular egress - * rules would be supported, always return VALID_SPEC_NA. - */ - if (!is_crypto) - return VALID_SPEC_NA; - - return is_crypto && is_ipsec && - (!egress || (!is_drop && - !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? - VALID_SPEC_VALID : VALID_SPEC_INVALID; -} - -static bool is_valid_spec(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - /* We curretly only support ipsec egress flow */ - return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; -} - -static bool is_valid_ethertype(struct mlx5_core_dev *mdev, - const struct ib_flow_attr *flow_attr, - bool check_inner) -{ - union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); - int match_ipv = check_inner ? - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_ip_version) : - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_ip_version); - int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0; - bool ipv4_spec_valid, ipv6_spec_valid; - unsigned int ip_spec_type = 0; - bool has_ethertype = false; - unsigned int spec_index; - bool mask_valid = true; - u16 eth_type = 0; - bool type_valid; - - /* Validate that ethertype is correct */ - for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) && - ib_spec->eth.mask.ether_type) { - mask_valid = (ib_spec->eth.mask.ether_type == - htons(0xffff)); - has_ethertype = true; - eth_type = ntohs(ib_spec->eth.val.ether_type); - } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) || - (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) { - ip_spec_type = ib_spec->type; - } - ib_spec = (void *)ib_spec + ib_spec->size; - } - - type_valid = (!has_ethertype) || (!ip_spec_type); - if (!type_valid && mask_valid) { - ipv4_spec_valid = (eth_type == ETH_P_IP) && - (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit)); - ipv6_spec_valid = (eth_type == ETH_P_IPV6) && - (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit)); - - type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) || - (((eth_type == ETH_P_MPLS_UC) || - (eth_type == ETH_P_MPLS_MC)) && match_ipv); - } - - return type_valid; -} - -static bool is_valid_attr(struct mlx5_core_dev *mdev, - const struct ib_flow_attr *flow_attr) -{ - return is_valid_ethertype(mdev, flow_attr, false) && - is_valid_ethertype(mdev, flow_attr, true); -} - -static void put_flow_table(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *prio, bool ft_added) -{ - prio->refcount -= !!ft_added; - if (!prio->refcount) { - mlx5_destroy_flow_table(prio->flow_table); - prio->flow_table = NULL; - } -} - -static void counters_clear_description(struct ib_counters *counters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - - mutex_lock(&mcounters->mcntrs_mutex); - kfree(mcounters->counters_data); - mcounters->counters_data = NULL; - mcounters->cntrs_max_index = 0; - mutex_unlock(&mcounters->mcntrs_mutex); -} - -static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) -{ - struct mlx5_ib_flow_handler *handler = container_of(flow_id, - struct mlx5_ib_flow_handler, - ibflow); - struct mlx5_ib_flow_handler *iter, *tmp; - struct mlx5_ib_dev *dev = handler->dev; - - mutex_lock(&dev->flow_db->lock); - - list_for_each_entry_safe(iter, tmp, &handler->list, list) { - mlx5_del_flow_rules(iter->rule); - put_flow_table(dev, iter->prio, true); - list_del(&iter->list); - kfree(iter); - } - - mlx5_del_flow_rules(handler->rule); - put_flow_table(dev, handler->prio, true); - if (handler->ibcounters && - atomic_read(&handler->ibcounters->usecnt) == 1) - counters_clear_description(handler->ibcounters); - - mutex_unlock(&dev->flow_db->lock); - if (handler->flow_matcher) - atomic_dec(&handler->flow_matcher->usecnt); - kfree(handler); - - return 0; -} - -static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) -{ - priority *= 2; - if (!dont_trap) - priority++; - return priority; -} - -enum flow_table_type { - MLX5_IB_FT_RX, - MLX5_IB_FT_TX -}; - -#define MLX5_FS_MAX_TYPES 6 -#define MLX5_FS_MAX_ENTRIES BIT(16) - -static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, - struct mlx5_ib_flow_prio *prio, - int priority, - int num_entries, int num_groups, - u32 flags) -{ - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_table *ft; - - ft_attr.prio = priority; - ft_attr.max_fte = num_entries; - ft_attr.flags = flags; - ft_attr.autogroup.max_num_groups = num_groups; - ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(ft)) - return ERR_CAST(ft); - - prio->flow_table = ft; - prio->refcount = 0; - return prio; -} - -static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, - struct ib_flow_attr *flow_attr, - enum flow_table_type ft_type) -{ - bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; - struct mlx5_flow_namespace *ns = NULL; - struct mlx5_ib_flow_prio *prio; - struct mlx5_flow_table *ft; - int max_table_size; - int num_entries; - int num_groups; - bool esw_encap; - u32 flags = 0; - int priority; - - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); - esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != - DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - enum mlx5_flow_namespace_type fn_type; - - if (flow_is_multicast_only(flow_attr) && - !dont_trap) - priority = MLX5_IB_FLOW_MCAST_PRIO; - else - priority = ib_prio_to_core_prio(flow_attr->priority, - dont_trap); - if (ft_type == MLX5_IB_FT_RX) { - fn_type = MLX5_FLOW_NAMESPACE_BYPASS; - prio = &dev->flow_db->prios[priority]; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, - log_max_ft_size)); - fn_type = MLX5_FLOW_NAMESPACE_EGRESS; - prio = &dev->flow_db->egress_prios[priority]; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } - ns = mlx5_get_flow_namespace(dev->mdev, fn_type); - num_entries = MLX5_FS_MAX_ENTRIES; - num_groups = MLX5_FS_MAX_TYPES; - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { - ns = mlx5_get_flow_namespace(dev->mdev, - MLX5_FLOW_NAMESPACE_LEFTOVERS); - build_leftovers_ft_param(&priority, - &num_entries, - &num_groups); - prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - if (!MLX5_CAP_FLOWTABLE(dev->mdev, - allow_sniffer_and_nic_rx_shared_tir)) - return ERR_PTR(-ENOTSUPP); - - ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? - MLX5_FLOW_NAMESPACE_SNIFFER_RX : - MLX5_FLOW_NAMESPACE_SNIFFER_TX); - - prio = &dev->flow_db->sniffer[ft_type]; - priority = 0; - num_entries = 1; - num_groups = 1; - } - - if (!ns) - return ERR_PTR(-ENOTSUPP); - - max_table_size = min_t(int, num_entries, max_table_size); - - ft = prio->flow_table; - if (!ft) - return _get_prio(ns, prio, priority, max_table_size, num_groups, - flags); - - return prio; -} - -static void set_underlay_qp(struct mlx5_ib_dev *dev, - struct mlx5_flow_spec *spec, - u32 underlay_qpn) -{ - void *misc_params_c = MLX5_ADDR_OF(fte_match_param, - spec->match_criteria, - misc_parameters); - void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - - if (underlay_qpn && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - ft_field_support.bth_dst_qp)) { - MLX5_SET(fte_match_set_misc, - misc_params_v, bth_dst_qp, underlay_qpn); - MLX5_SET(fte_match_set_misc, - misc_params_c, bth_dst_qp, 0xffffff); - } -} - -static int read_flow_counters(struct ib_device *ibdev, - struct mlx5_read_counters_attr *read_attr) -{ - struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; - struct mlx5_ib_dev *dev = to_mdev(ibdev); - - return mlx5_fc_query(dev->mdev, fc, - &read_attr->out[IB_COUNTER_PACKETS], - &read_attr->out[IB_COUNTER_BYTES]); -} - -/* flow counters currently expose two counters packets and bytes */ -#define FLOW_COUNTERS_NUM 2 -static int counters_set_description(struct ib_counters *counters, - enum mlx5_ib_counters_type counters_type, - struct mlx5_ib_flow_counters_desc *desc_data, - u32 ncounters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - u32 cntrs_max_index = 0; - int i; - - if (counters_type != MLX5_IB_COUNTERS_FLOW) - return -EINVAL; - - /* init the fields for the object */ - mcounters->type = counters_type; - mcounters->read_counters = read_flow_counters; - mcounters->counters_num = FLOW_COUNTERS_NUM; - mcounters->ncounters = ncounters; - /* each counter entry have both description and index pair */ - for (i = 0; i < ncounters; i++) { - if (desc_data[i].description > IB_COUNTER_BYTES) - return -EINVAL; - - if (cntrs_max_index <= desc_data[i].index) - cntrs_max_index = desc_data[i].index + 1; - } - - mutex_lock(&mcounters->mcntrs_mutex); - mcounters->counters_data = desc_data; - mcounters->cntrs_max_index = cntrs_max_index; - mutex_unlock(&mcounters->mcntrs_mutex); - - return 0; -} - -#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) -static int flow_counters_set_data(struct ib_counters *ibcounters, - struct mlx5_ib_create_flow *ucmd) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); - struct mlx5_ib_flow_counters_data *cntrs_data = NULL; - struct mlx5_ib_flow_counters_desc *desc_data = NULL; - bool hw_hndl = false; - int ret = 0; - - if (ucmd && ucmd->ncounters_data != 0) { - cntrs_data = ucmd->data; - if (cntrs_data->ncounters > MAX_COUNTERS_NUM) - return -EINVAL; - - desc_data = kcalloc(cntrs_data->ncounters, - sizeof(*desc_data), - GFP_KERNEL); - if (!desc_data) - return -ENOMEM; - - if (copy_from_user(desc_data, - u64_to_user_ptr(cntrs_data->counters_data), - sizeof(*desc_data) * cntrs_data->ncounters)) { - ret = -EFAULT; - goto free; - } - } - - if (!mcounters->hw_cntrs_hndl) { - mcounters->hw_cntrs_hndl = mlx5_fc_create( - to_mdev(ibcounters->device)->mdev, false); - if (IS_ERR(mcounters->hw_cntrs_hndl)) { - ret = PTR_ERR(mcounters->hw_cntrs_hndl); - goto free; - } - hw_hndl = true; - } - - if (desc_data) { - /* counters already bound to at least one flow */ - if (mcounters->cntrs_max_index) { - ret = -EINVAL; - goto free_hndl; - } - - ret = counters_set_description(ibcounters, - MLX5_IB_COUNTERS_FLOW, - desc_data, - cntrs_data->ncounters); - if (ret) - goto free_hndl; - - } else if (!mcounters->cntrs_max_index) { - /* counters not bound yet, must have udata passed */ - ret = -EINVAL; - goto free_hndl; - } - - return 0; - -free_hndl: - if (hw_hndl) { - mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, - mcounters->hw_cntrs_hndl); - mcounters->hw_cntrs_hndl = NULL; - } -free: - kfree(desc_data); - return ret; -} - -static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, - struct mlx5_flow_spec *spec, - struct mlx5_eswitch_rep *rep) -{ - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - void *misc; - - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters_2); - - MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_for_match(esw, - rep->vport)); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters_2); - - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); - } else { - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - - MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport); - - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - } -} - -static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst, - u32 underlay_qpn, - struct mlx5_ib_create_flow *ucmd) -{ - struct mlx5_flow_table *ft = ft_prio->flow_table; - struct mlx5_ib_flow_handler *handler; - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_spec *spec; - struct mlx5_flow_destination dest_arr[2] = {}; - struct mlx5_flow_destination *rule_dst = dest_arr; - const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); - unsigned int spec_index; - u32 prev_type = 0; - int err = 0; - int dest_num = 0; - bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; - - if (!is_valid_attr(dev->mdev, flow_attr)) - return ERR_PTR(-EINVAL); - - if (dev->is_rep && is_egress) - return ERR_PTR(-EINVAL); - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - handler = kzalloc(sizeof(*handler), GFP_KERNEL); - if (!handler || !spec) { - err = -ENOMEM; - goto free; - } - - INIT_LIST_HEAD(&handler->list); - - for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - err = parse_flow_attr(dev->mdev, spec, - ib_flow, flow_attr, &flow_act, - prev_type); - if (err < 0) - goto free; - - prev_type = ((union ib_flow_spec *)ib_flow)->type; - ib_flow += ((union ib_flow_spec *)ib_flow)->size; - } - - if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) { - memcpy(&dest_arr[0], dst, sizeof(*dst)); - dest_num++; - } - - if (!flow_is_multicast_only(flow_attr)) - set_underlay_qp(dev, spec, underlay_qpn); - - if (dev->is_rep) { - struct mlx5_eswitch_rep *rep; - - rep = dev->port[flow_attr->port - 1].rep; - if (!rep) { - err = -EINVAL; - goto free; - } - - mlx5_ib_set_rule_source_port(dev, spec, rep); - } - - spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); - - if (is_egress && - !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { - err = -EINVAL; - goto free; - } - - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - struct mlx5_ib_mcounters *mcounters; - - err = flow_counters_set_data(flow_act.counters, ucmd); - if (err) - goto free; - - mcounters = to_mcounters(flow_act.counters); - handler->ibcounters = flow_act.counters; - dest_arr[dest_num].type = - MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest_arr[dest_num].counter_id = - mlx5_fc_id(mcounters->hw_cntrs_hndl); - dest_num++; - } - - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { - if (!dest_num) - rule_dst = NULL; - } else { - if (is_egress) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; - else - flow_act.action |= - dest_num ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : - MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; - } - - if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) && - (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { - mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", - spec->flow_context.flow_tag, flow_attr->type); - err = -EINVAL; - goto free; - } - handler->rule = mlx5_add_flow_rules(ft, spec, - &flow_act, - rule_dst, dest_num); - - if (IS_ERR(handler->rule)) { - err = PTR_ERR(handler->rule); - goto free; - } - - ft_prio->refcount++; - handler->prio = ft_prio; - handler->dev = dev; - - ft_prio->flow_table = ft; -free: - if (err && handler) { - if (handler->ibcounters && - atomic_read(&handler->ibcounters->usecnt) == 1) - counters_clear_description(handler->ibcounters); - kfree(handler); - } - kvfree(spec); - return err ? ERR_PTR(err) : handler; -} - -static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) -{ - return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); -} - -static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) -{ - struct mlx5_ib_flow_handler *handler_dst = NULL; - struct mlx5_ib_flow_handler *handler = NULL; - - handler = create_flow_rule(dev, ft_prio, flow_attr, NULL); - if (!IS_ERR(handler)) { - handler_dst = create_flow_rule(dev, ft_prio, - flow_attr, dst); - if (IS_ERR(handler_dst)) { - mlx5_del_flow_rules(handler->rule); - ft_prio->refcount--; - kfree(handler); - handler = handler_dst; - } else { - list_add(&handler_dst->list, &handler->list); - } - } - - return handler; -} -enum { - LEFTOVERS_MC, - LEFTOVERS_UC, -}; - -static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) -{ - struct mlx5_ib_flow_handler *handler_ucast = NULL; - struct mlx5_ib_flow_handler *handler = NULL; - - static struct { - struct ib_flow_attr flow_attr; - struct ib_flow_spec_eth eth_flow; - } leftovers_specs[] = { - [LEFTOVERS_MC] = { - .flow_attr = { - .num_of_specs = 1, - .size = sizeof(leftovers_specs[0]) - }, - .eth_flow = { - .type = IB_FLOW_SPEC_ETH, - .size = sizeof(struct ib_flow_spec_eth), - .mask = {.dst_mac = {0x1} }, - .val = {.dst_mac = {0x1} } - } - }, - [LEFTOVERS_UC] = { - .flow_attr = { - .num_of_specs = 1, - .size = sizeof(leftovers_specs[0]) - }, - .eth_flow = { - .type = IB_FLOW_SPEC_ETH, - .size = sizeof(struct ib_flow_spec_eth), - .mask = {.dst_mac = {0x1} }, - .val = {.dst_mac = {} } - } - } - }; - - handler = create_flow_rule(dev, ft_prio, - &leftovers_specs[LEFTOVERS_MC].flow_attr, - dst); - if (!IS_ERR(handler) && - flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { - handler_ucast = create_flow_rule(dev, ft_prio, - &leftovers_specs[LEFTOVERS_UC].flow_attr, - dst); - if (IS_ERR(handler_ucast)) { - mlx5_del_flow_rules(handler->rule); - ft_prio->refcount--; - kfree(handler); - handler = handler_ucast; - } else { - list_add(&handler_ucast->list, &handler->list); - } - } - - return handler; -} - -static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_rx, - struct mlx5_ib_flow_prio *ft_tx, - struct mlx5_flow_destination *dst) -{ - struct mlx5_ib_flow_handler *handler_rx; - struct mlx5_ib_flow_handler *handler_tx; - int err; - static const struct ib_flow_attr flow_attr = { - .num_of_specs = 0, - .size = sizeof(flow_attr) - }; - - handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); - if (IS_ERR(handler_rx)) { - err = PTR_ERR(handler_rx); - goto err; - } - - handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); - if (IS_ERR(handler_tx)) { - err = PTR_ERR(handler_tx); - goto err_tx; - } - - list_add(&handler_tx->list, &handler_rx->list); - - return handler_rx; - -err_tx: - mlx5_del_flow_rules(handler_rx->rule); - ft_rx->refcount--; - kfree(handler_rx); -err: - return ERR_PTR(err); -} - -static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, - int domain, - struct ib_udata *udata) -{ - struct mlx5_ib_dev *dev = to_mdev(qp->device); - struct mlx5_ib_qp *mqp = to_mqp(qp); - struct mlx5_ib_flow_handler *handler = NULL; - struct mlx5_flow_destination *dst = NULL; - struct mlx5_ib_flow_prio *ft_prio_tx = NULL; - struct mlx5_ib_flow_prio *ft_prio; - bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; - struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; - size_t min_ucmd_sz, required_ucmd_sz; - int err; - int underlay_qpn; - - if (udata && udata->inlen) { - min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + - sizeof(ucmd_hdr.reserved); - if (udata->inlen < min_ucmd_sz) - return ERR_PTR(-EOPNOTSUPP); - - err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); - if (err) - return ERR_PTR(err); - - /* currently supports only one counters data */ - if (ucmd_hdr.ncounters_data > 1) - return ERR_PTR(-EINVAL); - - required_ucmd_sz = min_ucmd_sz + - sizeof(struct mlx5_ib_flow_counters_data) * - ucmd_hdr.ncounters_data; - if (udata->inlen > required_ucmd_sz && - !ib_is_udata_cleared(udata, required_ucmd_sz, - udata->inlen - required_ucmd_sz)) - return ERR_PTR(-EOPNOTSUPP); - - ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); - if (!ucmd) - return ERR_PTR(-ENOMEM); - - err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); - if (err) - goto free_ucmd; - } - - if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) { - err = -ENOMEM; - goto free_ucmd; - } - - if (domain != IB_FLOW_DOMAIN_USER || - flow_attr->port > dev->num_ports || - (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | - IB_FLOW_ATTR_FLAGS_EGRESS))) { - err = -EINVAL; - goto free_ucmd; - } - - if (is_egress && - (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { - err = -EINVAL; - goto free_ucmd; - } - - dst = kzalloc(sizeof(*dst), GFP_KERNEL); - if (!dst) { - err = -ENOMEM; - goto free_ucmd; - } - - mutex_lock(&dev->flow_db->lock); - - ft_prio = get_flow_table(dev, flow_attr, - is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX); - if (IS_ERR(ft_prio)) { - err = PTR_ERR(ft_prio); - goto unlock; - } - if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); - if (IS_ERR(ft_prio_tx)) { - err = PTR_ERR(ft_prio_tx); - ft_prio_tx = NULL; - goto destroy_ft; - } - } - - if (is_egress) { - dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; - } else { - dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - if (mqp->flags & MLX5_IB_QP_RSS) - dst->tir_num = mqp->rss_qp.tirn; - else - dst->tir_num = mqp->raw_packet_qp.rq.tirn; - } - - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { - handler = create_dont_trap_rule(dev, ft_prio, - flow_attr, dst); - } else { - underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ? - mqp->underlay_qpn : 0; - handler = _create_flow_rule(dev, ft_prio, flow_attr, - dst, underlay_qpn, ucmd); - } - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { - handler = create_leftovers_rule(dev, ft_prio, flow_attr, - dst); - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); - } else { - err = -EINVAL; - goto destroy_ft; - } - - if (IS_ERR(handler)) { - err = PTR_ERR(handler); - handler = NULL; - goto destroy_ft; - } - - mutex_unlock(&dev->flow_db->lock); - kfree(dst); - kfree(ucmd); - - return &handler->ibflow; - -destroy_ft: - put_flow_table(dev, ft_prio, false); - if (ft_prio_tx) - put_flow_table(dev, ft_prio_tx, false); -unlock: - mutex_unlock(&dev->flow_db->lock); - kfree(dst); -free_ucmd: - kfree(ucmd); - return ERR_PTR(err); -} - -static struct mlx5_ib_flow_prio * -_get_flow_table(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_matcher *fs_matcher, - bool mcast) -{ - struct mlx5_flow_namespace *ns = NULL; - struct mlx5_ib_flow_prio *prio = NULL; - int max_table_size = 0; - bool esw_encap; - u32 flags = 0; - int priority; - - if (mcast) - priority = MLX5_IB_FLOW_MCAST_PRIO; - else - priority = ib_prio_to_core_prio(fs_matcher->priority, false); - - esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != - DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2) && - !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { - max_table_size = BIT( - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { - max_table_size = BIT( - MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && - esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - priority = FDB_BYPASS_PATH; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, - log_max_ft_size)); - priority = fs_matcher->priority; - } - - max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); - - ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); - if (!ns) - return ERR_PTR(-ENOTSUPP); - - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) - prio = &dev->flow_db->prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) - prio = &dev->flow_db->egress_prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) - prio = &dev->flow_db->fdb; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) - prio = &dev->flow_db->rdma_rx[priority]; - - if (!prio) - return ERR_PTR(-EINVAL); - - if (prio->flow_table) - return prio; - - return _get_prio(ns, prio, priority, max_table_size, - MLX5_FS_MAX_TYPES, flags); -} - -static struct mlx5_ib_flow_handler * -_create_raw_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - struct mlx5_flow_destination *dst, - struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_context *flow_context, - struct mlx5_flow_act *flow_act, - void *cmd_in, int inlen, - int dst_num) -{ - struct mlx5_ib_flow_handler *handler; - struct mlx5_flow_spec *spec; - struct mlx5_flow_table *ft = ft_prio->flow_table; - int err = 0; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - handler = kzalloc(sizeof(*handler), GFP_KERNEL); - if (!handler || !spec) { - err = -ENOMEM; - goto free; - } - - INIT_LIST_HEAD(&handler->list); - - memcpy(spec->match_value, cmd_in, inlen); - memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, - fs_matcher->mask_len); - spec->match_criteria_enable = fs_matcher->match_criteria_enable; - spec->flow_context = *flow_context; - - handler->rule = mlx5_add_flow_rules(ft, spec, - flow_act, dst, dst_num); - - if (IS_ERR(handler->rule)) { - err = PTR_ERR(handler->rule); - goto free; - } - - ft_prio->refcount++; - handler->prio = ft_prio; - handler->dev = dev; - ft_prio->flow_table = ft; - -free: - if (err) - kfree(handler); - kvfree(spec); - return err ? ERR_PTR(err) : handler; -} - -static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, - void *match_v) -{ - void *match_c; - void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4; - void *dmac, *dmac_mask; - void *ipv4, *ipv4_mask; - - if (!(fs_matcher->match_criteria_enable & - (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT))) - return false; - - match_c = fs_matcher->matcher_mask.match_params; - match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v, - outer_headers); - match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - - dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, - dmac_47_16); - dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, - dmac_47_16); - - if (is_multicast_ether_addr(dmac) && - is_multicast_ether_addr(dmac_mask)) - return true; - - ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - - ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - - if (ipv4_is_multicast(*(__be32 *)(ipv4)) && - ipv4_is_multicast(*(__be32 *)(ipv4_mask))) - return true; - - return false; -} - -struct mlx5_ib_flow_handler * -mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_context *flow_context, - struct mlx5_flow_act *flow_act, - u32 counter_id, - void *cmd_in, int inlen, int dest_id, - int dest_type) -{ - struct mlx5_flow_destination *dst; - struct mlx5_ib_flow_prio *ft_prio; - struct mlx5_ib_flow_handler *handler; - int dst_num = 0; - bool mcast; - int err; - - if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL) - return ERR_PTR(-EOPNOTSUPP); - - if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) - return ERR_PTR(-ENOMEM); - - dst = kcalloc(2, sizeof(*dst), GFP_KERNEL); - if (!dst) - return ERR_PTR(-ENOMEM); - - mcast = raw_fs_is_multicast(fs_matcher, cmd_in); - mutex_lock(&dev->flow_db->lock); - - ft_prio = _get_flow_table(dev, fs_matcher, mcast); - if (IS_ERR(ft_prio)) { - err = PTR_ERR(ft_prio); - goto unlock; - } - - if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { - dst[dst_num].type = dest_type; - dst[dst_num].tir_num = dest_id; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { - dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; - dst[dst_num].ft_num = dest_id; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else { - dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_PORT; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; - } - - dst_num++; - - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dst[dst_num].counter_id = counter_id; - dst_num++; - } - - handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, - flow_context, flow_act, - cmd_in, inlen, dst_num); - - if (IS_ERR(handler)) { - err = PTR_ERR(handler); - goto destroy_ft; - } - - mutex_unlock(&dev->flow_db->lock); - atomic_inc(&fs_matcher->usecnt); - handler->flow_matcher = fs_matcher; - - kfree(dst); - - return handler; - -destroy_ft: - put_flow_table(dev, ft_prio, false); -unlock: - mutex_unlock(&dev->flow_db->lock); - kfree(dst); - - return ERR_PTR(err); -} - -static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) -{ - u32 flags = 0; - - if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) - flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; - - return flags; -} - -#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA -static struct ib_flow_action * -mlx5_ib_create_flow_action_esp(struct ib_device *device, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_dev *mdev = to_mdev(device); - struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; - struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; - struct mlx5_ib_flow_action *action; - u64 action_flags; - u64 flags; - int err = 0; - - err = uverbs_get_flags64( - &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); - if (err) - return ERR_PTR(err); - - flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); - - /* We current only support a subset of the standard features. Only a - * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn - * (with overlap). Full offload mode isn't supported. - */ - if (!attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) - return ERR_PTR(-EOPNOTSUPP); - - if (attr->keymat->protocol != - IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) - return ERR_PTR(-EOPNOTSUPP); - - aes_gcm = &attr->keymat->keymat.aes_gcm; - - if (aes_gcm->icv_len != 16 || - aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) - return ERR_PTR(-EOPNOTSUPP); - - action = kmalloc(sizeof(*action), GFP_KERNEL); - if (!action) - return ERR_PTR(-ENOMEM); - - action->esp_aes_gcm.ib_flags = attr->flags; - memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, - sizeof(accel_attrs.keymat.aes_gcm.aes_key)); - accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; - memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, - sizeof(accel_attrs.keymat.aes_gcm.salt)); - memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, - sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); - accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; - accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; - accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) - accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; - - action->esp_aes_gcm.ctx = - mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); - if (IS_ERR(action->esp_aes_gcm.ctx)) { - err = PTR_ERR(action->esp_aes_gcm.ctx); - goto err_parse; - } - - action->esp_aes_gcm.ib_flags = attr->flags; - - return &action->ib_action; - -err_parse: - kfree(action); - return ERR_PTR(err); -} - -static int -mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_flow_action *maction = to_mflow_act(action); - struct mlx5_accel_esp_xfrm_attrs accel_attrs; - int err = 0; - - if (attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) - return -EOPNOTSUPP; - - /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can - * be modified. - */ - if (!(maction->esp_aes_gcm.ib_flags & - IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && - attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) - return -EINVAL; - - memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, - sizeof(accel_attrs)); - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - else - accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, - &accel_attrs); - if (err) - return err; - - maction->esp_aes_gcm.ib_flags &= - ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - maction->esp_aes_gcm.ib_flags |= - attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - - return 0; -} - -static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) -{ - struct mlx5_ib_flow_action *maction = to_mflow_act(action); - - switch (action->type) { - case IB_FLOW_ACTION_ESP: - /* - * We only support aes_gcm by now, so we implicitly know this is - * the underline crypto. - */ - mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); - break; - case IB_FLOW_ACTION_UNSPECIFIED: - mlx5_ib_destroy_flow_action_raw(maction); - break; - default: - WARN_ON(true); - break; - } - - kfree(maction); - return 0; + return mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); } static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) @@ -4420,7 +2384,7 @@ static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) uid = ibqp->pd ? to_mpd(ibqp->pd)->uid : 0; - if (mqp->flags & MLX5_IB_QP_UNDERLAY) { + if (mqp->flags & IB_QP_CREATE_SOURCE_QPN) { mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n"); return -EOPNOTSUPP; } @@ -4468,7 +2432,7 @@ static ssize_t fw_pages_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); + return sysfs_emit(buf, "%d\n", dev->mdev->priv.fw_pages); } static DEVICE_ATTR_RO(fw_pages); @@ -4478,7 +2442,7 @@ static ssize_t reg_pages_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); + return sysfs_emit(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); } static DEVICE_ATTR_RO(reg_pages); @@ -4488,7 +2452,7 @@ static ssize_t hca_type_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); + return sysfs_emit(buf, "MT%d\n", dev->mdev->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -4498,7 +2462,7 @@ static ssize_t hw_rev_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%x\n", dev->mdev->rev_id); + return sysfs_emit(buf, "%x\n", dev->mdev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -4508,8 +2472,8 @@ static ssize_t board_id_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, - dev->mdev->board_id); + return sysfs_emit(buf, "%.*s\n", MLX5_BOARD_ID_LEN, + dev->mdev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -4532,9 +2496,14 @@ static void pkey_change_handler(struct work_struct *work) container_of(work, struct mlx5_ib_port_resources, pkey_change_work); - mutex_lock(&ports->devr->mutex); + if (!ports->gsi) + /* + * We got this event before device was fully configured + * and MAD registration code wasn't called/finished yet. + */ + return; + mlx5_ib_gsi_pkey_change(ports->gsi); - mutex_unlock(&ports->devr->mutex); } static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) @@ -4606,8 +2575,7 @@ static void delay_drop_handler(struct work_struct *work) atomic_inc(&delay_drop->events_cnt); mutex_lock(&delay_drop->lock); - err = mlx5_core_set_delay_drop(delay_drop->dev->mdev, - delay_drop->timeout); + err = mlx5_core_set_delay_drop(delay_drop->dev, delay_drop->timeout); if (err) { mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n", delay_drop->timeout); @@ -4619,7 +2587,7 @@ static void delay_drop_handler(struct work_struct *work) static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, struct ib_event *ibev) { - u8 port = (eqe->data.port.port >> 4) & 0xf; + u32 port = (eqe->data.port.port >> 4) & 0xf; switch (eqe->sub_type) { case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: @@ -4635,7 +2603,7 @@ static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, struct ib_event *ibev) { - u8 port = (eqe->data.port.port >> 4) & 0xf; + u32 port = (eqe->data.port.port >> 4) & 0xf; ibev->element.port_num = port; @@ -4706,7 +2674,7 @@ static void mlx5_ib_handle_event(struct work_struct *_work) break; case MLX5_EVENT_TYPE_GENERAL_EVENT: handle_general_event(ibdev, work->param, &ibev); - /* fall through */ + fallthrough; default: goto out; } @@ -4772,219 +2740,35 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) int err; int port; - for (port = 1; port <= ARRAY_SIZE(dev->mdev->port_caps); port++) { - dev->mdev->port_caps[port - 1].has_smi = false; - if (MLX5_CAP_GEN(dev->mdev, port_type) == - MLX5_CAP_PORT_TYPE_IB) { - if (MLX5_CAP_GEN(dev->mdev, ib_virt)) { - err = mlx5_query_hca_vport_context(dev->mdev, 0, - port, 0, - &vport_ctx); - if (err) { - mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n", - port, err); - return err; - } - dev->mdev->port_caps[port - 1].has_smi = - vport_ctx.has_smi; - } else { - dev->mdev->port_caps[port - 1].has_smi = true; - } + if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) + return 0; + + for (port = 1; port <= dev->num_ports; port++) { + if (!MLX5_CAP_GEN(dev->mdev, ib_virt)) { + dev->port_caps[port - 1].has_smi = true; + continue; } + err = mlx5_query_hca_vport_context(dev->mdev, 0, port, 0, + &vport_ctx); + if (err) { + mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n", + port, err); + return err; + } + dev->port_caps[port - 1].has_smi = vport_ctx.has_smi; } + return 0; } static void get_ext_port_caps(struct mlx5_ib_dev *dev) { - int port; + unsigned int port; - for (port = 1; port <= dev->num_ports; port++) + rdma_for_each_port (&dev->ib_dev, port) mlx5_query_ext_port_caps(dev, port); } -static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) -{ - struct ib_device_attr *dprops = NULL; - struct ib_port_attr *pprops = NULL; - int err = -ENOMEM; - - pprops = kzalloc(sizeof(*pprops), GFP_KERNEL); - if (!pprops) - goto out; - - dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); - if (!dprops) - goto out; - - err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL); - if (err) { - mlx5_ib_warn(dev, "query_device failed %d\n", err); - goto out; - } - - err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); - if (err) { - mlx5_ib_warn(dev, "query_port %d failed %d\n", - port, err); - goto out; - } - - dev->mdev->port_caps[port - 1].pkey_table_len = - dprops->max_pkeys; - dev->mdev->port_caps[port - 1].gid_table_len = - pprops->gid_tbl_len; - mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n", - port, dprops->max_pkeys, pprops->gid_tbl_len); - -out: - kfree(pprops); - kfree(dprops); - - return err; -} - -static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) -{ - /* For representors use port 1, is this is the only native - * port - */ - if (dev->is_rep) - return __get_port_caps(dev, 1); - return __get_port_caps(dev, port); -} - -static void destroy_umrc_res(struct mlx5_ib_dev *dev) -{ - int err; - - err = mlx5_mr_cache_cleanup(dev); - if (err) - mlx5_ib_warn(dev, "mr cache cleanup failed\n"); - - if (dev->umrc.qp) - mlx5_ib_destroy_qp(dev->umrc.qp, NULL); - if (dev->umrc.cq) - ib_free_cq(dev->umrc.cq); - if (dev->umrc.pd) - ib_dealloc_pd(dev->umrc.pd); -} - -enum { - MAX_UMR_WR = 128, -}; - -static int create_umr_res(struct mlx5_ib_dev *dev) -{ - struct ib_qp_init_attr *init_attr = NULL; - struct ib_qp_attr *attr = NULL; - struct ib_pd *pd; - struct ib_cq *cq; - struct ib_qp *qp; - int ret; - - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); - if (!attr || !init_attr) { - ret = -ENOMEM; - goto error_0; - } - - pd = ib_alloc_pd(&dev->ib_dev, 0); - if (IS_ERR(pd)) { - mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); - ret = PTR_ERR(pd); - goto error_0; - } - - cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); - if (IS_ERR(cq)) { - mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); - ret = PTR_ERR(cq); - goto error_2; - } - - init_attr->send_cq = cq; - init_attr->recv_cq = cq; - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; - init_attr->cap.max_send_wr = MAX_UMR_WR; - init_attr->cap.max_send_sge = 1; - init_attr->qp_type = MLX5_IB_QPT_REG_UMR; - init_attr->port_num = 1; - qp = mlx5_ib_create_qp(pd, init_attr, NULL); - if (IS_ERR(qp)) { - mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); - ret = PTR_ERR(qp); - goto error_3; - } - qp->device = &dev->ib_dev; - qp->real_qp = qp; - qp->uobject = NULL; - qp->qp_type = MLX5_IB_QPT_REG_UMR; - qp->send_cq = init_attr->send_cq; - qp->recv_cq = init_attr->recv_cq; - - attr->qp_state = IB_QPS_INIT; - attr->port_num = 1; - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | - IB_QP_PORT, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); - goto error_4; - } - - memset(attr, 0, sizeof(*attr)); - attr->qp_state = IB_QPS_RTR; - attr->path_mtu = IB_MTU_256; - - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); - goto error_4; - } - - memset(attr, 0, sizeof(*attr)); - attr->qp_state = IB_QPS_RTS; - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); - goto error_4; - } - - dev->umrc.qp = qp; - dev->umrc.cq = cq; - dev->umrc.pd = pd; - - sema_init(&dev->umrc.sem, MAX_UMR_WR); - ret = mlx5_mr_cache_init(dev); - if (ret) { - mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); - goto error_4; - } - - kfree(attr); - kfree(init_attr); - - return 0; - -error_4: - mlx5_ib_destroy_qp(qp, NULL); - dev->umrc.qp = NULL; - -error_3: - ib_free_cq(cq); - dev->umrc.cq = NULL; - -error_2: - ib_dealloc_pd(pd); - dev->umrc.pd = NULL; - -error_0: - kfree(attr); - kfree(init_attr); - return ret; -} - static u8 mlx5_get_umr_fence(u8 umr_fence_cap) { switch (umr_fence_cap) { @@ -4997,163 +2781,100 @@ static u8 mlx5_get_umr_fence(u8 umr_fence_cap) } } -static int create_dev_resources(struct mlx5_ib_resources *devr) +static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) { + struct mlx5_ib_resources *devr = &dev->devr; struct ib_srq_init_attr attr; - struct mlx5_ib_dev *dev; struct ib_device *ibdev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; int port; int ret = 0; - dev = container_of(devr, struct mlx5_ib_dev, devr); ibdev = &dev->ib_dev; - mutex_init(&devr->mutex); - - devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd); - if (!devr->p0) - return -ENOMEM; - - devr->p0->device = ibdev; - devr->p0->uobject = NULL; - atomic_set(&devr->p0->usecnt, 0); + if (!MLX5_CAP_GEN(dev->mdev, xrc)) + return -EOPNOTSUPP; - ret = mlx5_ib_alloc_pd(devr->p0, NULL); - if (ret) - goto error0; + devr->p0 = ib_alloc_pd(ibdev, 0); + if (IS_ERR(devr->p0)) + return PTR_ERR(devr->p0); - devr->c0 = rdma_zalloc_drv_obj(ibdev, ib_cq); - if (!devr->c0) { - ret = -ENOMEM; + devr->c0 = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr); + if (IS_ERR(devr->c0)) { + ret = PTR_ERR(devr->c0); goto error1; } - devr->c0->device = &dev->ib_dev; - atomic_set(&devr->c0->usecnt, 0); - - ret = mlx5_ib_create_cq(devr->c0, &cq_attr, NULL); + ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0); if (ret) - goto err_create_cq; - - devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); - if (IS_ERR(devr->x0)) { - ret = PTR_ERR(devr->x0); goto error2; - } - devr->x0->device = &dev->ib_dev; - devr->x0->inode = NULL; - atomic_set(&devr->x0->usecnt, 0); - mutex_init(&devr->x0->tgt_qp_mutex); - INIT_LIST_HEAD(&devr->x0->tgt_qp_list); - - devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); - if (IS_ERR(devr->x1)) { - ret = PTR_ERR(devr->x1); + + ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0); + if (ret) goto error3; - } - devr->x1->device = &dev->ib_dev; - devr->x1->inode = NULL; - atomic_set(&devr->x1->usecnt, 0); - mutex_init(&devr->x1->tgt_qp_mutex); - INIT_LIST_HEAD(&devr->x1->tgt_qp_list); memset(&attr, 0, sizeof(attr)); attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_XRC; attr.ext.cq = devr->c0; - attr.ext.xrc.xrcd = devr->x0; - - devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq); - if (!devr->s0) { - ret = -ENOMEM; - goto error4; - } - devr->s0->device = &dev->ib_dev; - devr->s0->pd = devr->p0; - devr->s0->srq_type = IB_SRQT_XRC; - devr->s0->ext.xrc.xrcd = devr->x0; - devr->s0->ext.cq = devr->c0; - ret = mlx5_ib_create_srq(devr->s0, &attr, NULL); - if (ret) + devr->s0 = ib_create_srq(devr->p0, &attr); + if (IS_ERR(devr->s0)) { + ret = PTR_ERR(devr->s0); goto err_create; - - atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); - atomic_inc(&devr->s0->ext.cq->usecnt); - atomic_inc(&devr->p0->usecnt); - atomic_set(&devr->s0->usecnt, 0); + } memset(&attr, 0, sizeof(attr)); attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_BASIC; - devr->s1 = rdma_zalloc_drv_obj(ibdev, ib_srq); - if (!devr->s1) { - ret = -ENOMEM; - goto error5; - } - - devr->s1->device = &dev->ib_dev; - devr->s1->pd = devr->p0; - devr->s1->srq_type = IB_SRQT_BASIC; - devr->s1->ext.cq = devr->c0; - ret = mlx5_ib_create_srq(devr->s1, &attr, NULL); - if (ret) + devr->s1 = ib_create_srq(devr->p0, &attr); + if (IS_ERR(devr->s1)) { + ret = PTR_ERR(devr->s1); goto error6; + } - atomic_inc(&devr->p0->usecnt); - atomic_set(&devr->s1->usecnt, 0); - - for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { + for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) INIT_WORK(&devr->ports[port].pkey_change_work, pkey_change_handler); - devr->ports[port].devr = devr; - } return 0; error6: - kfree(devr->s1); -error5: - mlx5_ib_destroy_srq(devr->s0, NULL); + ib_destroy_srq(devr->s0); err_create: - kfree(devr->s0); -error4: - mlx5_ib_dealloc_xrcd(devr->x1, NULL); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); error3: - mlx5_ib_dealloc_xrcd(devr->x0, NULL); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); error2: - mlx5_ib_destroy_cq(devr->c0, NULL); -err_create_cq: - kfree(devr->c0); + ib_destroy_cq(devr->c0); error1: - mlx5_ib_dealloc_pd(devr->p0, NULL); -error0: - kfree(devr->p0); + ib_dealloc_pd(devr->p0); return ret; } -static void destroy_dev_resources(struct mlx5_ib_resources *devr) +static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev) { + struct mlx5_ib_resources *devr = &dev->devr; int port; - mlx5_ib_destroy_srq(devr->s1, NULL); - kfree(devr->s1); - mlx5_ib_destroy_srq(devr->s0, NULL); - kfree(devr->s0); - mlx5_ib_dealloc_xrcd(devr->x0, NULL); - mlx5_ib_dealloc_xrcd(devr->x1, NULL); - mlx5_ib_destroy_cq(devr->c0, NULL); - kfree(devr->c0); - mlx5_ib_dealloc_pd(devr->p0, NULL); - kfree(devr->p0); - - /* Make sure no change P_Key work items are still executing */ + /* + * Make sure no change P_Key work items are still executing. + * + * At this stage, the mlx5_ib_event should be unregistered + * and it ensures that no new works are added. + */ for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) cancel_work_sync(&devr->ports[port].pkey_change_work); + + ib_destroy_srq(devr->s1); + ib_destroy_srq(devr->s0); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); + ib_destroy_cq(devr->c0); + ib_dealloc_pd(devr->p0); } static u32 get_core_cap_flags(struct ib_device *ibdev, @@ -5190,7 +2911,7 @@ static u32 get_core_cap_flags(struct ib_device *ibdev, return ret; } -static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, +static int mlx5_port_immutable(struct ib_device *ibdev, u32 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; @@ -5218,7 +2939,7 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num, +static int mlx5_port_rep_immutable(struct ib_device *ibdev, u32 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; @@ -5254,7 +2975,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) struct mlx5_flow_table *ft; int err; - if (!ns || !mlx5_lag_is_roce(mdev)) + if (!ns || !mlx5_lag_is_active(mdev)) return 0; err = mlx5_cmd_create_vport_lag(mdev); @@ -5268,6 +2989,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) } dev->flow_db->lag_demux_ft = ft; + dev->lag_ports = mlx5_lag_get_num_ports(mdev); dev->lag_active = true; return 0; @@ -5290,7 +3012,7 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) } } -static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) +static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u32 port_num) { int err; @@ -5304,7 +3026,7 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) return 0; } -static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) +static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u32 port_num) { if (dev->port[port_num].roce.nb.notifier_call) { unregister_netdevice_notifier(&dev->port[port_num].roce.nb); @@ -5316,9 +3038,11 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) { int err; - err = mlx5_nic_vport_enable_roce(dev->mdev); - if (err) - return err; + if (!dev->is_rep && dev->profile != &raw_eth_profile) { + err = mlx5_nic_vport_enable_roce(dev->mdev); + if (err) + return err; + } err = mlx5_eth_lag_init(dev); if (err) @@ -5327,7 +3051,8 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) return 0; err_disable_roce: - mlx5_nic_vport_disable_roce(dev->mdev); + if (!dev->is_rep && dev->profile != &raw_eth_profile) + mlx5_nic_vport_disable_roce(dev->mdev); return err; } @@ -5335,451 +3060,11 @@ err_disable_roce: static void mlx5_disable_eth(struct mlx5_ib_dev *dev) { mlx5_eth_lag_cleanup(dev); - mlx5_nic_vport_disable_roce(dev->mdev); + if (!dev->is_rep && dev->profile != &raw_eth_profile) + mlx5_nic_vport_disable_roce(dev->mdev); } -struct mlx5_ib_counter { - const char *name; - size_t offset; -}; - -#define INIT_Q_COUNTER(_name) \ - { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} - -static const struct mlx5_ib_counter basic_q_cnts[] = { - INIT_Q_COUNTER(rx_write_requests), - INIT_Q_COUNTER(rx_read_requests), - INIT_Q_COUNTER(rx_atomic_requests), - INIT_Q_COUNTER(out_of_buffer), -}; - -static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { - INIT_Q_COUNTER(out_of_sequence), -}; - -static const struct mlx5_ib_counter retrans_q_cnts[] = { - INIT_Q_COUNTER(duplicate_request), - INIT_Q_COUNTER(rnr_nak_retry_err), - INIT_Q_COUNTER(packet_seq_err), - INIT_Q_COUNTER(implied_nak_seq_err), - INIT_Q_COUNTER(local_ack_timeout_err), -}; - -#define INIT_CONG_COUNTER(_name) \ - { .name = #_name, .offset = \ - MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} - -static const struct mlx5_ib_counter cong_cnts[] = { - INIT_CONG_COUNTER(rp_cnp_ignored), - INIT_CONG_COUNTER(rp_cnp_handled), - INIT_CONG_COUNTER(np_ecn_marked_roce_packets), - INIT_CONG_COUNTER(np_cnp_sent), -}; - -static const struct mlx5_ib_counter extended_err_cnts[] = { - INIT_Q_COUNTER(resp_local_length_error), - INIT_Q_COUNTER(resp_cqe_error), - INIT_Q_COUNTER(req_cqe_error), - INIT_Q_COUNTER(req_remote_invalid_request), - INIT_Q_COUNTER(req_remote_access_errors), - INIT_Q_COUNTER(resp_remote_access_errors), - INIT_Q_COUNTER(resp_cqe_flush_error), - INIT_Q_COUNTER(req_cqe_flush_error), -}; - -static const struct mlx5_ib_counter roce_accl_cnts[] = { - INIT_Q_COUNTER(roce_adp_retrans), - INIT_Q_COUNTER(roce_adp_retrans_to), - INIT_Q_COUNTER(roce_slow_restart), - INIT_Q_COUNTER(roce_slow_restart_cnps), - INIT_Q_COUNTER(roce_slow_restart_trans), -}; - -#define INIT_EXT_PPCNT_COUNTER(_name) \ - { .name = #_name, .offset = \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} - -static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { - INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), -}; - -static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev) -{ - return MLX5_ESWITCH_MANAGER(mdev) && - mlx5_ib_eswitch_mode(mdev->priv.eswitch) == - MLX5_ESWITCH_OFFLOADS; -} - -static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) -{ - int num_cnt_ports; - int i; - - num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; - - for (i = 0; i < num_cnt_ports; i++) { - if (dev->port[i].cnts.set_id_valid) - mlx5_core_dealloc_q_counter(dev->mdev, - dev->port[i].cnts.set_id); - kfree(dev->port[i].cnts.names); - kfree(dev->port[i].cnts.offsets); - } -} - -static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, - struct mlx5_ib_counters *cnts) -{ - u32 num_counters; - - num_counters = ARRAY_SIZE(basic_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) - num_counters += ARRAY_SIZE(out_of_seq_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) - num_counters += ARRAY_SIZE(retrans_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) - num_counters += ARRAY_SIZE(extended_err_cnts); - - if (MLX5_CAP_GEN(dev->mdev, roce_accl)) - num_counters += ARRAY_SIZE(roce_accl_cnts); - - cnts->num_q_counters = num_counters; - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); - num_counters += ARRAY_SIZE(cong_cnts); - } - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); - num_counters += ARRAY_SIZE(ext_ppcnt_cnts); - } - cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); - if (!cnts->names) - return -ENOMEM; - - cnts->offsets = kcalloc(num_counters, - sizeof(cnts->offsets), GFP_KERNEL); - if (!cnts->offsets) - goto err_names; - - return 0; - -err_names: - kfree(cnts->names); - cnts->names = NULL; - return -ENOMEM; -} - -static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, - const char **names, - size_t *offsets) -{ - int i; - int j = 0; - - for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { - names[j] = basic_q_cnts[i].name; - offsets[j] = basic_q_cnts[i].offset; - } - - if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { - for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { - names[j] = out_of_seq_q_cnts[i].name; - offsets[j] = out_of_seq_q_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { - for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { - names[j] = retrans_q_cnts[i].name; - offsets[j] = retrans_q_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { - for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { - names[j] = extended_err_cnts[i].name; - offsets[j] = extended_err_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { - for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { - names[j] = roce_accl_cnts[i].name; - offsets[j] = roce_accl_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { - names[j] = cong_cnts[i].name; - offsets[j] = cong_cnts[i].offset; - } - } - - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { - names[j] = ext_ppcnt_cnts[i].name; - offsets[j] = ext_ppcnt_cnts[i].offset; - } - } -} - -static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) -{ - int num_cnt_ports; - int err = 0; - int i; - bool is_shared; - - is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; - num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; - - for (i = 0; i < num_cnt_ports; i++) { - err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); - if (err) - goto err_alloc; - - mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, - dev->port[i].cnts.offsets); - - err = mlx5_cmd_alloc_q_counter(dev->mdev, - &dev->port[i].cnts.set_id, - is_shared ? - MLX5_SHARED_RESOURCE_UID : 0); - if (err) { - mlx5_ib_warn(dev, - "couldn't allocate queue counter for port %d, err %d\n", - i + 1, err); - goto err_alloc; - } - dev->port[i].cnts.set_id_valid = true; - } - return 0; - -err_alloc: - mlx5_ib_dealloc_counters(dev); - return err; -} - -static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, - u8 port_num) -{ - return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts : - &dev->port[port_num].cnts; -} - -/** - * mlx5_ib_get_counters_id - Returns counters id to use for device+port - * @dev: Pointer to mlx5 IB device - * @port_num: Zero based port number - * - * mlx5_ib_get_counters_id() Returns counters set id to use for given - * device port combination in switchdev and non switchdev mode of the - * parent device. - */ -u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num) -{ - const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); - - return cnts->set_id; -} - -static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, - u8 port_num) -{ - struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct mlx5_ib_counters *cnts; - bool is_switchdev = is_mdev_switchdev_mode(dev->mdev); - - if ((is_switchdev && port_num) || (!is_switchdev && !port_num)) - return NULL; - - cnts = get_counters(dev, port_num - 1); - - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, - const struct mlx5_ib_counters *cnts, - struct rdma_hw_stats *stats, - u16 set_id) -{ - int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); - void *out; - __be32 val; - int ret, i; - - out = kvzalloc(outlen, GFP_KERNEL); - if (!out) - return -ENOMEM; - - ret = mlx5_core_query_q_counter(mdev, set_id, 0, out, outlen); - if (ret) - goto free; - - for (i = 0; i < cnts->num_q_counters; i++) { - val = *(__be32 *)(out + cnts->offsets[i]); - stats->value[i] = (u64)be32_to_cpu(val); - } - -free: - kvfree(out); - return ret; -} - -static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, - const struct mlx5_ib_counters *cnts, - struct rdma_hw_stats *stats) -{ - int offset = cnts->num_q_counters + cnts->num_cong_counters; - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - int ret, i; - void *out; - - out = kvzalloc(sz, GFP_KERNEL); - if (!out) - return -ENOMEM; - - ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out); - if (ret) - goto free; - - for (i = 0; i < cnts->num_ext_ppcnt_counters; i++) - stats->value[i + offset] = - be64_to_cpup((__be64 *)(out + - cnts->offsets[i + offset])); -free: - kvfree(out); - return ret; -} - -static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, - struct rdma_hw_stats *stats, - u8 port_num, int index) -{ - struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); - struct mlx5_core_dev *mdev; - int ret, num_counters; - u8 mdev_port_num; - - if (!stats) - return -EINVAL; - - num_counters = cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters; - - /* q_counters are per IB device, query the master mdev */ - ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id); - if (ret) - return ret; - - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); - if (ret) - return ret; - } - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - mdev = mlx5_ib_get_native_port_mdev(dev, port_num, - &mdev_port_num); - if (!mdev) { - /* If port is not affiliated yet, its in down state - * which doesn't have any counters yet, so it would be - * zero. So no need to read from the HCA. - */ - goto done; - } - ret = mlx5_lag_query_cong_counters(dev->mdev, - stats->value + - cnts->num_q_counters, - cnts->num_cong_counters, - cnts->offsets + - cnts->num_q_counters); - - mlx5_ib_put_native_port_mdev(dev, port_num); - if (ret) - return ret; - } - -done: - return num_counters; -} - -static struct rdma_hw_stats * -mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - const struct mlx5_ib_counters *cnts = - get_counters(dev, counter->port - 1); - - /* Q counters are in the beginning of all counters */ - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - const struct mlx5_ib_counters *cnts = - get_counters(dev, counter->port - 1); - - return mlx5_ib_query_q_counters(dev->mdev, cnts, - counter->stats, counter->id); -} - -static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, - struct ib_qp *qp) -{ - struct mlx5_ib_dev *dev = to_mdev(qp->device); - u16 cnt_set_id = 0; - int err; - - if (!counter->id) { - err = mlx5_cmd_alloc_q_counter(dev->mdev, - &cnt_set_id, - MLX5_SHARED_RESOURCE_UID); - if (err) - return err; - counter->id = cnt_set_id; - } - - err = mlx5_ib_qp_set_counter(qp, counter); - if (err) - goto fail_set_counter; - - return 0; - -fail_set_counter: - mlx5_core_dealloc_q_counter(dev->mdev, cnt_set_id); - counter->id = 0; - - return err; -} - -static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) -{ - return mlx5_ib_qp_set_counter(qp, NULL); -} - -static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - - return mlx5_core_dealloc_q_counter(dev->mdev, counter->id); -} - -static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num, +static int mlx5_ib_rn_get_params(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, struct rdma_netdev_alloc_params *params) { @@ -5789,23 +3074,6 @@ static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num, return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params); } -static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev) -{ - if (!dev->delay_drop.dir_debugfs) - return; - debugfs_remove_recursive(dev->delay_drop.dir_debugfs); - dev->delay_drop.dir_debugfs = NULL; -} - -static void cancel_delay_drop(struct mlx5_ib_dev *dev) -{ - if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) - return; - - cancel_work_sync(&dev->delay_drop.delay_drop_work); - delay_drop_debugfs_cleanup(dev); -} - static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { @@ -5845,44 +3113,10 @@ static const struct file_operations fops_delay_drop_timeout = { .read = delay_drop_timeout_read, }; -static void delay_drop_debugfs_init(struct mlx5_ib_dev *dev) -{ - struct dentry *root; - - if (!mlx5_debugfs_root) - return; - - root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root); - dev->delay_drop.dir_debugfs = root; - - debugfs_create_atomic_t("num_timeout_events", 0400, root, - &dev->delay_drop.events_cnt); - debugfs_create_atomic_t("num_rqs", 0400, root, - &dev->delay_drop.rqs_cnt); - debugfs_create_file("timeout", 0600, root, &dev->delay_drop, - &fops_delay_drop_timeout); -} - -static void init_delay_drop(struct mlx5_ib_dev *dev) -{ - if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) - return; - - mutex_init(&dev->delay_drop.lock); - dev->delay_drop.dev = dev; - dev->delay_drop.activate = false; - dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; - INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); - atomic_set(&dev->delay_drop.rqs_cnt, 0); - atomic_set(&dev->delay_drop.events_cnt, 0); - - delay_drop_debugfs_init(dev); -} - static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) { - u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; + u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; struct mlx5_ib_port *port = &ibdev->port[port_num]; int comps; int err; @@ -5922,13 +3156,11 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, port->mp.mpi = NULL; - list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list); - spin_unlock(&port->mp.mpi_lock); err = mlx5_nic_vport_unaffiliate_multiport(mpi->mdev); - mlx5_ib_dbg(ibdev, "unaffiliated port %d\n", port_num + 1); + mlx5_ib_dbg(ibdev, "unaffiliated port %u\n", port_num + 1); /* Log an error, still needed to cleanup the pointers and add * it back to the list. */ @@ -5942,14 +3174,14 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) { - u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; + u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; int err; lockdep_assert_held(&mlx5_ib_multiport_mutex); spin_lock(&ibdev->port[port_num].mp.mpi_lock); if (ibdev->port[port_num].mp.mpi) { - mlx5_ib_dbg(ibdev, "port %d already affiliated.\n", + mlx5_ib_dbg(ibdev, "port %u already affiliated.\n", port_num + 1); spin_unlock(&ibdev->port[port_num].mp.mpi_lock); return false; @@ -5964,10 +3196,6 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, if (err) goto unbind; - err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev)); - if (err) - goto unbind; - err = mlx5_add_netdev_notifier(ibdev, port_num); if (err) { mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n", @@ -5989,12 +3217,12 @@ unbind: static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev) { - int port_num = mlx5_core_native_port_num(dev->mdev) - 1; + u32 port_num = mlx5_core_native_port_num(dev->mdev) - 1; enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, port_num + 1); struct mlx5_ib_multiport_info *mpi; int err; - int i; + u32 i; if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET) return 0; @@ -6045,11 +3273,9 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev) break; } } - if (!bound) { - get_port_caps(dev, i + 1); + if (!bound) mlx5_ib_dbg(dev, "no free port found for port %d\n", i + 1); - } } list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list); @@ -6059,10 +3285,10 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev) static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) { - int port_num = mlx5_core_native_port_num(dev->mdev) - 1; + u32 port_num = mlx5_core_native_port_num(dev->mdev) - 1; enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, port_num + 1); - int i; + u32 i; if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET) return; @@ -6075,8 +3301,12 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) kfree(dev->port[i].mp.mpi); dev->port[i].mp.mpi = NULL; } else { - mlx5_ib_dbg(dev, "unbinding port_num: %d\n", i + 1); - mlx5_ib_unbind_slave_port(dev, dev->port[i].mp.mpi); + mlx5_ib_dbg(dev, "unbinding port_num: %u\n", + i + 1); + list_add_tail(&dev->port[i].mp.mpi->list, + &mlx5_ib_unaffiliated_port_list); + mlx5_ib_unbind_slave_port(dev, + dev->port[i].mp.mpi); } } } @@ -6088,9 +3318,9 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } -static int var_obj_cleanup(struct ib_uobject *uobject, - enum rdma_remove_reason why, - struct uverbs_attr_bundle *attrs) +static int mmap_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct mlx5_user_mmap_entry *obj = uobject->object; @@ -6098,6 +3328,16 @@ static int var_obj_cleanup(struct ib_uobject *uobject, return 0; } +static int mlx5_rdma_user_mmap_entry_insert(struct mlx5_ib_ucontext *c, + struct mlx5_user_mmap_entry *entry, + size_t length) +{ + return rdma_user_mmap_entry_insert_range( + &c->ibucontext, &entry->rdma_entry, length, + (MLX5_IB_MMAP_OFFSET_START << 16), + ((MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1)); +} + static struct mlx5_user_mmap_entry * alloc_var_entry(struct mlx5_ib_ucontext *c) { @@ -6128,10 +3368,8 @@ alloc_var_entry(struct mlx5_ib_ucontext *c) entry->page_idx = page_idx; entry->mmap_flag = MLX5_IB_MMAP_TYPE_VAR; - err = rdma_user_mmap_entry_insert_range( - &c->ibucontext, &entry->rdma_entry, var_table->stride_size, - MLX5_IB_MMAP_OFFSET_START << 16, - (MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1); + err = mlx5_rdma_user_mmap_entry_insert(c, entry, + var_table->stride_size); if (err) goto err_insert; @@ -6168,26 +3406,20 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_VAR_OBJ_ALLOC)( mmap_offset = mlx5_entry_to_mmap_offset(entry); length = entry->rdma_entry.npages * PAGE_SIZE; uobj->object = entry; + uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE); err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET, &mmap_offset, sizeof(mmap_offset)); if (err) - goto err; + return err; err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID, &entry->page_idx, sizeof(entry->page_idx)); if (err) - goto err; + return err; err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH, &length, sizeof(length)); - if (err) - goto err; - - return 0; - -err: - rdma_user_mmap_entry_remove(&entry->rdma_entry); return err; } @@ -6215,7 +3447,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_VAR, - UVERBS_TYPE_ALLOC_IDR(var_obj_cleanup), + UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup), &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_ALLOC), &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_DESTROY)); @@ -6227,115 +3459,157 @@ static bool var_is_supported(struct ib_device *device) MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q); } -ADD_UVERBS_ATTRIBUTES_SIMPLE( - mlx5_ib_dm, - UVERBS_OBJECT_DM, - UVERBS_METHOD_DM_ALLOC, - UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, - UVERBS_ATTR_TYPE(u64), - UA_MANDATORY), - UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, - UVERBS_ATTR_TYPE(u16), - UA_OPTIONAL), - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE, - enum mlx5_ib_uapi_dm_type, - UA_OPTIONAL)); +static struct mlx5_user_mmap_entry * +alloc_uar_entry(struct mlx5_ib_ucontext *c, + enum mlx5_ib_uapi_uar_alloc_type alloc_type) +{ + struct mlx5_user_mmap_entry *entry; + struct mlx5_ib_dev *dev; + u32 uar_index; + int err; -ADD_UVERBS_ATTRIBUTES_SIMPLE( - mlx5_ib_flow_action, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - enum mlx5_ib_uapi_flow_action_flags)); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); -static const struct uapi_definition mlx5_ib_defs[] = { - UAPI_DEF_CHAIN(mlx5_ib_devx_defs), - UAPI_DEF_CHAIN(mlx5_ib_flow_defs), + dev = to_mdev(c->ibucontext.device); + err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index, c->devx_uid); + if (err) + goto end; - UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, - &mlx5_ib_flow_action), - UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm), - UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, - UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), - {} -}; + entry->page_idx = uar_index; + entry->address = uar_index2paddress(dev, uar_index); + if (alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF) + entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_WC; + else + entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_NC; + + err = mlx5_rdma_user_mmap_entry_insert(c, entry, PAGE_SIZE); + if (err) + goto err_insert; -static int mlx5_ib_read_counters(struct ib_counters *counters, - struct ib_counters_read_attr *read_attr, - struct uverbs_attr_bundle *attrs) + return entry; + +err_insert: + mlx5_cmd_uar_dealloc(dev->mdev, uar_index, c->devx_uid); +end: + kfree(entry); + return ERR_PTR(err); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)( + struct uverbs_attr_bundle *attrs) { - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - struct mlx5_read_counters_attr mread_attr = {}; - struct mlx5_ib_flow_counters_desc *desc; - int ret, i; + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE); + enum mlx5_ib_uapi_uar_alloc_type alloc_type; + struct mlx5_ib_ucontext *c; + struct mlx5_user_mmap_entry *entry; + u64 mmap_offset; + u32 length; + int err; - mutex_lock(&mcounters->mcntrs_mutex); - if (mcounters->cntrs_max_index > read_attr->ncounters) { - ret = -EINVAL; - goto err_bound; - } + c = to_mucontext(ib_uverbs_get_ucontext(attrs)); + if (IS_ERR(c)) + return PTR_ERR(c); - mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), - GFP_KERNEL); - if (!mread_attr.out) { - ret = -ENOMEM; - goto err_bound; - } + err = uverbs_get_const(&alloc_type, attrs, + MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE); + if (err) + return err; - mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; - mread_attr.flags = read_attr->flags; - ret = mcounters->read_counters(counters->device, &mread_attr); - if (ret) - goto err_read; + if (alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF && + alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC) + return -EOPNOTSUPP; - /* do the pass over the counters data array to assign according to the - * descriptions and indexing pairs - */ - desc = mcounters->counters_data; - for (i = 0; i < mcounters->ncounters; i++) - read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; - -err_read: - kfree(mread_attr.out); -err_bound: - mutex_unlock(&mcounters->mcntrs_mutex); - return ret; -} + if (!to_mdev(c->ibucontext.device)->wc_support && + alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF) + return -EOPNOTSUPP; -static int mlx5_ib_destroy_counters(struct ib_counters *counters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + entry = alloc_uar_entry(c, alloc_type); + if (IS_ERR(entry)) + return PTR_ERR(entry); - counters_clear_description(counters); - if (mcounters->hw_cntrs_hndl) - mlx5_fc_destroy(to_mdev(counters->device)->mdev, - mcounters->hw_cntrs_hndl); + mmap_offset = mlx5_entry_to_mmap_offset(entry); + length = entry->rdma_entry.npages * PAGE_SIZE; + uobj->object = entry; + uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE); - kfree(mcounters); + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET, + &mmap_offset, sizeof(mmap_offset)); + if (err) + return err; - return 0; + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID, + &entry->page_idx, sizeof(entry->page_idx)); + if (err) + return err; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH, + &length, sizeof(length)); + return err; } -static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_mcounters *mcounters; +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_UAR_OBJ_ALLOC, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE, + MLX5_IB_OBJECT_UAR, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE, + enum mlx5_ib_uapi_uar_alloc_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY)); - mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL); - if (!mcounters) - return ERR_PTR(-ENOMEM); +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_UAR_OBJ_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_DESTROY_HANDLE, + MLX5_IB_OBJECT_UAR, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); - mutex_init(&mcounters->mcntrs_mutex); +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_UAR, + UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_ALLOC), + &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_DESTROY)); - return &mcounters->ibcntrs; -} +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_query_context, + UVERBS_OBJECT_DEVICE, + UVERBS_METHOD_QUERY_CONTEXT, + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, + UVERBS_ATTR_STRUCT(struct mlx5_ib_alloc_ucontext_resp, + dump_fill_mkey), + UA_MANDATORY)); + +static const struct uapi_definition mlx5_ib_defs[] = { + UAPI_DEF_CHAIN(mlx5_ib_devx_defs), + UAPI_DEF_CHAIN(mlx5_ib_flow_defs), + UAPI_DEF_CHAIN(mlx5_ib_qos_defs), + UAPI_DEF_CHAIN(mlx5_ib_std_types_defs), + UAPI_DEF_CHAIN(mlx5_ib_dm_defs), + + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, + UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR), + {} +}; static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); WARN_ON(!xa_empty(&dev->odp_mkeys)); - cleanup_srcu_struct(&dev->odp_srcu); - + mutex_destroy(&dev->cap_mask_mutex); WARN_ON(!xa_empty(&dev->sig_mrs)); WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); } @@ -6346,6 +3620,12 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) int err; int i; + dev->ib_dev.node_type = RDMA_NODE_IB_CA; + dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; + dev->ib_dev.phys_port_cnt = dev->num_ports; + dev->ib_dev.dev.parent = mdev->device; + dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; + for (i = 0; i < dev->num_ports; i++) { spin_lock_init(&dev->port[i].mp.mpi_lock); rwlock_init(&dev->port[i].roce.netdev_lock); @@ -6354,73 +3634,49 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->port[i].roce.last_port_state = IB_PORT_DOWN; } - mlx5_ib_internal_fill_odp_caps(dev); - err = mlx5_ib_init_multiport_master(dev); if (err) return err; err = set_has_smi_cap(dev); if (err) - return err; + goto err_mp; - if (!mlx5_core_mp_enabled(mdev)) { - for (i = 1; i <= dev->num_ports; i++) { - err = get_port_caps(dev, i); - if (err) - break; - } - } else { - err = get_port_caps(dev, mlx5_core_native_port_num(mdev)); - } + err = mlx5_query_max_pkeys(&dev->ib_dev, &dev->pkey_table_len); if (err) goto err_mp; if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); - dev->ib_dev.node_type = RDMA_NODE_IB_CA; - dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; - dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); - dev->ib_dev.dev.parent = mdev->device; mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); xa_init(&dev->odp_mkeys); xa_init(&dev->sig_mrs); + atomic_set(&dev->mkey_var, 0); spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; - - err = init_srcu_struct(&dev->odp_srcu); - if (err) - goto err_mp; - return 0; err_mp: mlx5_ib_cleanup_multiport_master(dev); - - return -ENOMEM; + return err; } -static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_enable_driver(struct ib_device *dev) { - dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); - - if (!dev->flow_db) - return -ENOMEM; - - mutex_init(&dev->flow_db->lock); + struct mlx5_ib_dev *mdev = to_mdev(dev); + int ret; - return 0; -} + ret = mlx5_ib_test_wc(mdev); + mlx5_ib_dbg(mdev, "Write-Combining %s", + mdev->wc_support ? "supported" : "not supported"); -static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) -{ - kfree(dev->flow_db); + return ret; } static const struct ib_device_ops mlx5_ib_dev_ops = { @@ -6436,29 +3692,24 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .attach_mcast = mlx5_ib_mcg_attach, .check_mr_status = mlx5_ib_check_mr_status, .create_ah = mlx5_ib_create_ah, - .create_counters = mlx5_ib_create_counters, .create_cq = mlx5_ib_create_cq, - .create_flow = mlx5_ib_create_flow, .create_qp = mlx5_ib_create_qp, .create_srq = mlx5_ib_create_srq, + .create_user_ah = mlx5_ib_create_ah, .dealloc_pd = mlx5_ib_dealloc_pd, .dealloc_ucontext = mlx5_ib_dealloc_ucontext, .del_gid = mlx5_ib_del_gid, .dereg_mr = mlx5_ib_dereg_mr, .destroy_ah = mlx5_ib_destroy_ah, - .destroy_counters = mlx5_ib_destroy_counters, .destroy_cq = mlx5_ib_destroy_cq, - .destroy_flow = mlx5_ib_destroy_flow, - .destroy_flow_action = mlx5_ib_destroy_flow_action, .destroy_qp = mlx5_ib_destroy_qp, .destroy_srq = mlx5_ib_destroy_srq, .detach_mcast = mlx5_ib_mcg_detach, .disassociate_ucontext = mlx5_ib_disassociate_ucontext, .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, + .device_group = &mlx5_attr_group, .enable_driver = mlx5_ib_enable_driver, - .fill_res_entry = mlx5_ib_fill_res_entry, - .fill_stat_entry = mlx5_ib_fill_stat_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, @@ -6472,8 +3723,8 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .modify_qp = mlx5_ib_modify_qp, .modify_srq = mlx5_ib_modify_srq, .poll_cq = mlx5_ib_poll_cq, - .post_recv = mlx5_ib_post_recv, - .post_send = mlx5_ib_post_send, + .post_recv = mlx5_ib_post_recv_nodrain, + .post_send = mlx5_ib_post_send_nodrain, .post_srq_recv = mlx5_ib_post_srq_recv, .process_mad = mlx5_ib_process_mad, .query_ah = mlx5_ib_query_ah, @@ -6482,24 +3733,22 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .query_pkey = mlx5_ib_query_pkey, .query_qp = mlx5_ib_query_qp, .query_srq = mlx5_ib_query_srq, - .read_counters = mlx5_ib_read_counters, + .query_ucontext = mlx5_ib_query_ucontext, .reg_user_mr = mlx5_ib_reg_user_mr, + .reg_user_mr_dmabuf = mlx5_ib_reg_user_mr_dmabuf, .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs), INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_qp, mlx5_ib_qp, ibqp), INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), }; -static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = { - .create_flow_action_esp = mlx5_ib_create_flow_action_esp, - .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, -}; - static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { .rdma_netdev_get_params = mlx5_ib_rn_get_params, }; @@ -6515,17 +3764,15 @@ static const struct ib_device_ops mlx5_ib_dev_sriov_ops = { static const struct ib_device_ops mlx5_ib_dev_mw_ops = { .alloc_mw = mlx5_ib_alloc_mw, .dealloc_mw = mlx5_ib_dealloc_mw, + + INIT_RDMA_OBJ_SIZE(ib_mw, mlx5_ib_mw, ibmw), }; static const struct ib_device_ops mlx5_ib_dev_xrc_ops = { .alloc_xrcd = mlx5_ib_alloc_xrcd, .dealloc_xrcd = mlx5_ib_dealloc_xrcd, -}; -static const struct ib_device_ops mlx5_ib_dev_dm_ops = { - .alloc_dm = mlx5_ib_alloc_dm, - .dealloc_dm = mlx5_ib_dealloc_dm, - .reg_dm_mr = mlx5_ib_reg_dm_mr, + INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd), }; static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev) @@ -6545,7 +3792,8 @@ static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev) doorbell_bar_offset); bar_size = (1ULL << log_doorbell_bar_size) * 4096; var_table->stride_size = 1ULL << log_doorbell_stride; - var_table->num_var_hw_entries = div64_u64(bar_size, var_table->stride_size); + var_table->num_var_hw_entries = div_u64(bar_size, + var_table->stride_size); mutex_init(&var_table->bitmap_lock); var_table->bitmap = bitmap_zalloc(var_table->num_var_hw_entries, GFP_KERNEL); @@ -6562,42 +3810,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; int err; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - dev->ib_dev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); - if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) ib_set_device_ops(&dev->ib_dev, @@ -6608,28 +3820,17 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); - if (MLX5_CAP_GEN(mdev, imaicl)) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + if (MLX5_CAP_GEN(mdev, imaicl)) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops); - } - if (MLX5_CAP_GEN(mdev, xrc)) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | - (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + if (MLX5_CAP_GEN(mdev, xrc)) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); - } if (MLX5_CAP_DEV_MEM(mdev, memic) || MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops); - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops); ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops); if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) @@ -6670,6 +3871,7 @@ static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev) static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { .get_port_immutable = mlx5_port_rep_immutable, .query_port = mlx5_ib_rep_query_port, + .query_pkey = mlx5_ib_rep_query_pkey, }; static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev) @@ -6685,66 +3887,29 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { .destroy_wq = mlx5_ib_destroy_wq, .get_netdev = mlx5_ib_get_netdev, .modify_wq = mlx5_ib_modify_wq, -}; - -static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) -{ - u8 port_num; - - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); - - port_num = mlx5_core_native_port_num(dev->mdev) - 1; - - /* Register only for native ports */ - return mlx5_add_netdev_notifier(dev, port_num); -} - -static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev) -{ - u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1; - - mlx5_remove_netdev_notifier(dev, port_num); -} - -static int mlx5_ib_stage_raw_eth_roce_init(struct mlx5_ib_dev *dev) -{ - struct mlx5_core_dev *mdev = dev->mdev; - enum rdma_link_layer ll; - int port_type_cap; - int err = 0; - - port_type_cap = MLX5_CAP_GEN(mdev, port_type); - ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - - if (ll == IB_LINK_LAYER_ETHERNET) - err = mlx5_ib_stage_common_roce_init(dev); - return err; -} - -static void mlx5_ib_stage_raw_eth_roce_cleanup(struct mlx5_ib_dev *dev) -{ - mlx5_ib_stage_common_roce_cleanup(dev); -} + INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx5_ib_rwq_ind_table, + ib_rwq_ind_tbl), +}; -static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; + u32 port_num = 0; int err; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - err = mlx5_ib_stage_common_roce_init(dev); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); + + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + + /* Register only for native ports */ + err = mlx5_add_netdev_notifier(dev, port_num); if (err) return err; @@ -6755,71 +3920,26 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) return 0; cleanup: - mlx5_ib_stage_common_roce_cleanup(dev); - + mlx5_remove_netdev_notifier(dev, port_num); return err; } -static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; + u32 port_num; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { mlx5_disable_eth(dev); - mlx5_ib_stage_common_roce_cleanup(dev); - } -} - -static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) -{ - return create_dev_resources(&dev->devr); -} - -static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) -{ - destroy_dev_resources(&dev->devr); -} - -static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) -{ - return mlx5_ib_odp_init_one(dev); -} - -static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) -{ - mlx5_ib_odp_cleanup_one(dev); -} - -static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = { - .alloc_hw_stats = mlx5_ib_alloc_hw_stats, - .get_hw_stats = mlx5_ib_get_hw_stats, - .counter_bind_qp = mlx5_ib_counter_bind_qp, - .counter_unbind_qp = mlx5_ib_counter_unbind_qp, - .counter_dealloc = mlx5_ib_counter_dealloc, - .counter_alloc_stats = mlx5_ib_counter_alloc_stats, - .counter_update_stats = mlx5_ib_counter_update_stats, -}; -static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) -{ - if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops); - - return mlx5_ib_alloc_counters(dev); + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + mlx5_remove_netdev_notifier(dev, port_num); } - - return 0; -} - -static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) -{ - if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) - mlx5_ib_dealloc_counters(dev); } static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev) @@ -6856,7 +3976,7 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true); if (err) - mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); + mlx5_free_bfreg(dev->mdev, &dev->bfreg); return err; } @@ -6871,17 +3991,22 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { const char *name; - rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group); - if (!mlx5_lag_is_roce(dev->mdev)) + if (!mlx5_lag_is_active(dev->mdev)) name = "mlx5_%d"; else name = "mlx5_bond_%d"; - return ib_register_device(&dev->ib_dev, name); + return ib_register_device(&dev->ib_dev, name, &dev->mdev->pdev->dev); } static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { - destroy_umrc_res(dev); + int err; + + err = mlx5_mkey_cache_cleanup(dev); + if (err) + mlx5_ib_warn(dev, "mr cache cleanup failed\n"); + + mlx5r_umr_resource_cleanup(dev); } static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) @@ -6891,19 +4016,61 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) { - return create_umr_res(dev); + int ret; + + ret = mlx5r_umr_resource_init(dev); + if (ret) + return ret; + + ret = mlx5_mkey_cache_init(dev); + if (ret) { + mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); + mlx5r_umr_resource_cleanup(dev); + } + return ret; } static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev) { - init_delay_drop(dev); + struct dentry *root; + + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return 0; + mutex_init(&dev->delay_drop.lock); + dev->delay_drop.dev = dev; + dev->delay_drop.activate = false; + dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; + INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); + atomic_set(&dev->delay_drop.rqs_cnt, 0); + atomic_set(&dev->delay_drop.events_cnt, 0); + + if (!mlx5_debugfs_root) + return 0; + + root = debugfs_create_dir("delay_drop", mlx5_debugfs_get_dev_root(dev->mdev)); + dev->delay_drop.dir_debugfs = root; + + debugfs_create_atomic_t("num_timeout_events", 0400, root, + &dev->delay_drop.events_cnt); + debugfs_create_atomic_t("num_rqs", 0400, root, + &dev->delay_drop.rqs_cnt); + debugfs_create_file("timeout", 0600, root, &dev->delay_drop, + &fops_delay_drop_timeout); return 0; } static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev) { - cancel_delay_drop(dev); + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return; + + cancel_work_sync(&dev->delay_drop.delay_drop_work); + if (!dev->delay_drop.dir_debugfs) + return; + + debugfs_remove_recursive(dev->delay_drop.dir_debugfs); + dev->delay_drop.dir_debugfs = NULL; } static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev) @@ -6918,38 +4085,6 @@ static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev) mlx5_notifier_unregister(dev->mdev, &dev->mdev_events); } -static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev) -{ - int uid; - - uid = mlx5_ib_devx_create(dev, false); - if (uid > 0) { - dev->devx_whitelist_uid = uid; - mlx5_ib_devx_init_event_table(dev); - } - - return 0; -} -static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) -{ - if (dev->devx_whitelist_uid) { - mlx5_ib_devx_cleanup_event_table(dev); - mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); - } -} - -int mlx5_ib_enable_driver(struct ib_device *dev) -{ - struct mlx5_ib_dev *mdev = to_mdev(dev); - int ret; - - ret = mlx5_ib_test_wc(mdev); - mlx5_ib_dbg(mdev, "Write-Combining %s", - mdev->wc_support ? "supported" : "not supported"); - - return ret; -} - void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) @@ -6967,12 +4102,14 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, ib_dealloc_device(&dev->ib_dev); } -void *__mlx5_ib_add(struct mlx5_ib_dev *dev, - const struct mlx5_ib_profile *profile) +int __mlx5_ib_add(struct mlx5_ib_dev *dev, + const struct mlx5_ib_profile *profile) { int err; int i; + dev->profile = profile; + for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { if (profile->stage[i].init) { err = profile->stage[i].init(dev); @@ -6981,24 +4118,26 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, } } - dev->profile = profile; dev->ib_active = true; - - return dev; + return 0; err_out: - __mlx5_ib_remove(dev, profile, i); - - return NULL; + /* Clean up stages which were initialized */ + while (i) { + i--; + if (profile->stage[i].cleanup) + profile->stage[i].cleanup(dev); + } + return -ENOMEM; } static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, - mlx5_ib_stage_flow_db_init, - mlx5_ib_stage_flow_db_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_FS, + mlx5_ib_fs_init, + mlx5_ib_fs_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, mlx5_ib_stage_caps_cleanup), @@ -7006,23 +4145,26 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_non_default_cb, NULL), STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_roce_init, - mlx5_ib_stage_roce_cleanup), + mlx5_ib_roce_init, + mlx5_ib_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_QP, + mlx5_init_qp_table, + mlx5_cleanup_qp_table), STAGE_CREATE(MLX5_IB_STAGE_SRQ, mlx5_init_srq_table, mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, - mlx5_ib_stage_dev_res_init, - mlx5_ib_stage_dev_res_cleanup), + mlx5_ib_dev_res_init, + mlx5_ib_dev_res_cleanup), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, mlx5_ib_stage_dev_notifier_init, mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_ODP, - mlx5_ib_stage_odp_init, - mlx5_ib_stage_odp_cleanup), + mlx5_ib_odp_init_one, + mlx5_ib_odp_cleanup_one), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, - mlx5_ib_stage_counters_init, - mlx5_ib_stage_counters_cleanup), + mlx5_ib_counters_init, + mlx5_ib_counters_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, mlx5_ib_stage_cong_debugfs_init, mlx5_ib_stage_cong_debugfs_cleanup), @@ -7036,8 +4178,8 @@ static const struct mlx5_ib_profile pf_profile = { NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, - mlx5_ib_stage_devx_init, - mlx5_ib_stage_devx_cleanup), + mlx5_ib_devx_init, + mlx5_ib_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -7047,15 +4189,18 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, mlx5_ib_stage_delay_drop_init, mlx5_ib_stage_delay_drop_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, + mlx5_ib_restrack_init, + NULL), }; const struct mlx5_ib_profile raw_eth_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, - mlx5_ib_stage_flow_db_init, - mlx5_ib_stage_flow_db_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_FS, + mlx5_ib_fs_init, + mlx5_ib_fs_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, mlx5_ib_stage_caps_cleanup), @@ -7063,20 +4208,26 @@ const struct mlx5_ib_profile raw_eth_profile = { mlx5_ib_stage_raw_eth_non_default_cb, NULL), STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_raw_eth_roce_init, - mlx5_ib_stage_raw_eth_roce_cleanup), + mlx5_ib_roce_init, + mlx5_ib_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_QP, + mlx5_init_qp_table, + mlx5_cleanup_qp_table), STAGE_CREATE(MLX5_IB_STAGE_SRQ, mlx5_init_srq_table, mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, - mlx5_ib_stage_dev_res_init, - mlx5_ib_stage_dev_res_cleanup), + mlx5_ib_dev_res_init, + mlx5_ib_dev_res_cleanup), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, mlx5_ib_stage_dev_notifier_init, mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, - mlx5_ib_stage_counters_init, - mlx5_ib_stage_counters_cleanup), + mlx5_ib_counters_init, + mlx5_ib_counters_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, + mlx5_ib_stage_cong_debugfs_init, + mlx5_ib_stage_cong_debugfs_cleanup), STAGE_CREATE(MLX5_IB_STAGE_UAR, mlx5_ib_stage_uar_init, mlx5_ib_stage_uar_cleanup), @@ -7087,18 +4238,24 @@ const struct mlx5_ib_profile raw_eth_profile = { NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, - mlx5_ib_stage_devx_init, - mlx5_ib_stage_devx_cleanup), + mlx5_ib_devx_init, + mlx5_ib_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, mlx5_ib_stage_post_ib_reg_umr_init, NULL), + STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, + mlx5_ib_restrack_init, + NULL), }; -static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) +static int mlx5r_mp_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) { + struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = idev->mdev; struct mlx5_ib_multiport_info *mpi; struct mlx5_ib_dev *dev; bool bound = false; @@ -7106,15 +4263,14 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) mpi = kzalloc(sizeof(*mpi), GFP_KERNEL); if (!mpi) - return NULL; + return -ENOMEM; mpi->mdev = mdev; - err = mlx5_query_nic_vport_system_image_guid(mdev, &mpi->sys_image_guid); if (err) { kfree(mpi); - return NULL; + return err; } mutex_lock(&mlx5_ib_multiport_mutex); @@ -7124,6 +4280,7 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) if (bound) { rdma_roce_rescan_device(&dev->ib_dev); + mpi->ibdev->ib_active = true; break; } } @@ -7135,126 +4292,147 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) } mutex_unlock(&mlx5_ib_multiport_mutex); - return mpi; + auxiliary_set_drvdata(adev, mpi); + return 0; +} + +static void mlx5r_mp_remove(struct auxiliary_device *adev) +{ + struct mlx5_ib_multiport_info *mpi; + + mpi = auxiliary_get_drvdata(adev); + mutex_lock(&mlx5_ib_multiport_mutex); + if (mpi->ibdev) + mlx5_ib_unbind_slave_port(mpi->ibdev, mpi); + else + list_del(&mpi->list); + mutex_unlock(&mlx5_ib_multiport_mutex); + kfree(mpi); } -static void *mlx5_ib_add(struct mlx5_core_dev *mdev) +static int mlx5r_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) { + struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = idev->mdev; const struct mlx5_ib_profile *profile; + int port_type_cap, num_ports, ret; enum rdma_link_layer ll; struct mlx5_ib_dev *dev; - int port_type_cap; - int num_ports; - - printk_once(KERN_INFO "%s", mlx5_version); - - if (MLX5_ESWITCH_MANAGER(mdev) && - mlx5_ib_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) { - if (!mlx5_core_mp_enabled(mdev)) - mlx5_ib_register_vport_reps(mdev); - return mdev; - } port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) - return mlx5_ib_add_slave_port(mdev); - num_ports = max(MLX5_CAP_GEN(mdev, num_ports), MLX5_CAP_GEN(mdev, num_vhca_ports)); dev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!dev) - return NULL; + return -ENOMEM; dev->port = kcalloc(num_ports, sizeof(*dev->port), GFP_KERNEL); if (!dev->port) { ib_dealloc_device(&dev->ib_dev); - return NULL; + return -ENOMEM; } dev->mdev = mdev; dev->num_ports = num_ports; - if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_enabled(mdev)) + if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev)) profile = &raw_eth_profile; else profile = &pf_profile; - return __mlx5_ib_add(dev, profile); + ret = __mlx5_ib_add(dev, profile); + if (ret) { + kfree(dev->port); + ib_dealloc_device(&dev->ib_dev); + return ret; + } + + auxiliary_set_drvdata(adev, dev); + return 0; } -static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) +static void mlx5r_remove(struct auxiliary_device *adev) { - struct mlx5_ib_multiport_info *mpi; struct mlx5_ib_dev *dev; - if (MLX5_ESWITCH_MANAGER(mdev) && context == mdev) { - mlx5_ib_unregister_vport_reps(mdev); - return; - } - - if (mlx5_core_is_mp_slave(mdev)) { - mpi = context; - mutex_lock(&mlx5_ib_multiport_mutex); - if (mpi->ibdev) - mlx5_ib_unbind_slave_port(mpi->ibdev, mpi); - list_del(&mpi->list); - mutex_unlock(&mlx5_ib_multiport_mutex); - kfree(mpi); - return; - } - - dev = context; + dev = auxiliary_get_drvdata(adev); __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); } -static struct mlx5_interface mlx5_ib_interface = { - .add = mlx5_ib_add, - .remove = mlx5_ib_remove, - .protocol = MLX5_INTERFACE_PROTOCOL_IB, +static const struct auxiliary_device_id mlx5r_mp_id_table[] = { + { .name = MLX5_ADEV_NAME ".multiport", }, + {}, }; -unsigned long mlx5_ib_get_xlt_emergency_page(void) -{ - mutex_lock(&xlt_emergency_page_mutex); - return xlt_emergency_page; -} +static const struct auxiliary_device_id mlx5r_id_table[] = { + { .name = MLX5_ADEV_NAME ".rdma", }, + {}, +}; -void mlx5_ib_put_xlt_emergency_page(void) -{ - mutex_unlock(&xlt_emergency_page_mutex); -} +MODULE_DEVICE_TABLE(auxiliary, mlx5r_mp_id_table); +MODULE_DEVICE_TABLE(auxiliary, mlx5r_id_table); + +static struct auxiliary_driver mlx5r_mp_driver = { + .name = "multiport", + .probe = mlx5r_mp_probe, + .remove = mlx5r_mp_remove, + .id_table = mlx5r_mp_id_table, +}; + +static struct auxiliary_driver mlx5r_driver = { + .name = "rdma", + .probe = mlx5r_probe, + .remove = mlx5r_remove, + .id_table = mlx5r_id_table, +}; static int __init mlx5_ib_init(void) { - int err; + int ret; - xlt_emergency_page = __get_free_page(GFP_KERNEL); + xlt_emergency_page = (void *)__get_free_page(GFP_KERNEL); if (!xlt_emergency_page) return -ENOMEM; - mutex_init(&xlt_emergency_page_mutex); - mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0); if (!mlx5_ib_event_wq) { - free_page(xlt_emergency_page); + free_page((unsigned long)xlt_emergency_page); return -ENOMEM; } mlx5_ib_odp_init(); + ret = mlx5r_rep_init(); + if (ret) + goto rep_err; + ret = auxiliary_driver_register(&mlx5r_mp_driver); + if (ret) + goto mp_err; + ret = auxiliary_driver_register(&mlx5r_driver); + if (ret) + goto drv_err; + return 0; - err = mlx5_register_interface(&mlx5_ib_interface); - - return err; +drv_err: + auxiliary_driver_unregister(&mlx5r_mp_driver); +mp_err: + mlx5r_rep_cleanup(); +rep_err: + destroy_workqueue(mlx5_ib_event_wq); + free_page((unsigned long)xlt_emergency_page); + return ret; } static void __exit mlx5_ib_cleanup(void) { - mlx5_unregister_interface(&mlx5_ib_interface); + auxiliary_driver_unregister(&mlx5r_driver); + auxiliary_driver_unregister(&mlx5r_mp_driver); + mlx5r_rep_cleanup(); + destroy_workqueue(mlx5_ib_event_wq); - mutex_destroy(&xlt_emergency_page_mutex); - free_page(xlt_emergency_page); + free_page((unsigned long)xlt_emergency_page); } module_init(mlx5_ib_init); |