diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/Makefile | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.c | 20 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.h | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cong.c | 421 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 8 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_virt.c | 9 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mad.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 464 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 86 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 121 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 172 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.c | 33 |
13 files changed, 1187 insertions, 159 deletions
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 90ad2adc752f..bc6299697dda 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 18d5e1db93ed..470995fa38d2 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -57,3 +57,23 @@ int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, MLX5_SET(query_cong_statistics_in, in, clear, reset); return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } + +int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, + void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = { }; + + MLX5_SET(query_cong_params_in, in, opcode, + MLX5_CMD_OP_QUERY_CONG_PARAMS); + MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); +} + +int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev, + void *in, int in_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_cong_params_out)] = { }; + + return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out)); +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index fa09228193a6..af4c24596274 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -39,4 +39,8 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, bool reset, void *out, int out_size); +int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, + void *out, int out_size); +int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, + void *in, int in_size); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c new file mode 100644 index 000000000000..2d32b519bb61 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -0,0 +1,421 @@ +/* + * Copyright (c) 2013-2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/debugfs.h> + +#include "mlx5_ib.h" +#include "cmd.h" + +enum mlx5_ib_cong_node_type { + MLX5_IB_RROCE_ECN_RP = 1, + MLX5_IB_RROCE_ECN_NP = 2, +}; + +static const char * const mlx5_ib_dbg_cc_name[] = { + "rp_clamp_tgt_rate", + "rp_clamp_tgt_rate_ati", + "rp_time_reset", + "rp_byte_reset", + "rp_threshold", + "rp_ai_rate", + "rp_hai_rate", + "rp_min_dec_fac", + "rp_min_rate", + "rp_rate_to_set_on_first_cnp", + "rp_dce_tcp_g", + "rp_dce_tcp_rtt", + "rp_rate_reduce_monitor_period", + "rp_initial_alpha_value", + "rp_gd", + "np_cnp_dscp", + "np_cnp_prio_mode", + "np_cnp_prio", +}; + +#define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1) +#define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2) +#define MLX5_IB_RP_TIME_RESET_ATTR BIT(3) +#define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4) +#define MLX5_IB_RP_THRESHOLD_ATTR BIT(5) +#define MLX5_IB_RP_AI_RATE_ATTR BIT(7) +#define MLX5_IB_RP_HAI_RATE_ATTR BIT(8) +#define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9) +#define MLX5_IB_RP_MIN_RATE_ATTR BIT(10) +#define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11) +#define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12) +#define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13) +#define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14) +#define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15) +#define MLX5_IB_RP_GD_ATTR BIT(16) + +#define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3) +#define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4) + +static enum mlx5_ib_cong_node_type +mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset) +{ + if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE && + param_offset <= MLX5_IB_DBG_CC_RP_GD) + return MLX5_IB_RROCE_ECN_RP; + else + return MLX5_IB_RROCE_ECN_NP; +} + +static u32 mlx5_get_cc_param_val(void *field, int offset) +{ + switch (offset) { + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate); + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate_after_time_inc); + case MLX5_IB_DBG_CC_RP_TIME_RESET: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_time_reset); + case MLX5_IB_DBG_CC_RP_BYTE_RESET: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_byte_reset); + case MLX5_IB_DBG_CC_RP_THRESHOLD: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_threshold); + case MLX5_IB_DBG_CC_RP_AI_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_ai_rate); + case MLX5_IB_DBG_CC_RP_HAI_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_hai_rate); + case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_min_dec_fac); + case MLX5_IB_DBG_CC_RP_MIN_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_min_rate); + case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rate_to_set_on_first_cnp); + case MLX5_IB_DBG_CC_RP_DCE_TCP_G: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + dce_tcp_g); + case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + dce_tcp_rtt); + case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rate_reduce_monitor_period); + case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + initial_alpha_value); + case MLX5_IB_DBG_CC_RP_GD: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_gd); + case MLX5_IB_DBG_CC_NP_CNP_DSCP: + return MLX5_GET(cong_control_r_roce_ecn_np, field, + cnp_dscp); + case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE: + return MLX5_GET(cong_control_r_roce_ecn_np, field, + cnp_prio_mode); + case MLX5_IB_DBG_CC_NP_CNP_PRIO: + return MLX5_GET(cong_control_r_roce_ecn_np, field, + cnp_802p_prio); + default: + return 0; + } +} + +static void mlx5_ib_set_cc_param_mask_val(void *field, int offset, + u32 var, u32 *attr_mask) +{ + switch (offset) { + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE: + *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate, var); + break; + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI: + *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate_after_time_inc, var); + break; + case MLX5_IB_DBG_CC_RP_TIME_RESET: + *attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_time_reset, var); + break; + case MLX5_IB_DBG_CC_RP_BYTE_RESET: + *attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_byte_reset, var); + break; + case MLX5_IB_DBG_CC_RP_THRESHOLD: + *attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_threshold, var); + break; + case MLX5_IB_DBG_CC_RP_AI_RATE: + *attr_mask |= MLX5_IB_RP_AI_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_ai_rate, var); + break; + case MLX5_IB_DBG_CC_RP_HAI_RATE: + *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_hai_rate, var); + break; + case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC: + *attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_min_dec_fac, var); + break; + case MLX5_IB_DBG_CC_RP_MIN_RATE: + *attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_min_rate, var); + break; + case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP: + *attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rate_to_set_on_first_cnp, var); + break; + case MLX5_IB_DBG_CC_RP_DCE_TCP_G: + *attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + dce_tcp_g, var); + break; + case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT: + *attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + dce_tcp_rtt, var); + break; + case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD: + *attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rate_reduce_monitor_period, var); + break; + case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE: + *attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + initial_alpha_value, var); + break; + case MLX5_IB_DBG_CC_RP_GD: + *attr_mask |= MLX5_IB_RP_GD_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_gd, var); + break; + case MLX5_IB_DBG_CC_NP_CNP_DSCP: + *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR; + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var); + break; + case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE: + *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var); + break; + case MLX5_IB_DBG_CC_NP_CNP_PRIO: + *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0); + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var); + break; + } +} + +static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out); + void *out; + void *field; + int err; + enum mlx5_ib_cong_node_type node; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + node = mlx5_ib_param_to_node(offset); + + err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen); + if (err) + goto free; + + field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters); + *var = mlx5_get_cc_param_val(field, offset); + +free: + kvfree(out); + return err; +} + +static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in); + void *in; + void *field; + enum mlx5_ib_cong_node_type node; + u32 attr_mask = 0; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_cong_params_in, in, opcode, + MLX5_CMD_OP_MODIFY_CONG_PARAMS); + + node = mlx5_ib_param_to_node(offset); + MLX5_SET(modify_cong_params_in, in, cong_protocol, node); + + field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters); + mlx5_ib_set_cc_param_mask_val(field, offset, var, &attr_mask); + + field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select); + MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp, + attr_mask); + + err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen); + kvfree(in); + return err; +} + +static ssize_t set_param(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_ib_dbg_param *param = filp->private_data; + int offset = param->offset; + char lbuf[11] = { }; + u32 var; + int ret; + + if (count > sizeof(lbuf)) + return -EINVAL; + + if (copy_from_user(lbuf, buf, count)) + return -EFAULT; + + lbuf[sizeof(lbuf) - 1] = '\0'; + + if (kstrtou32(lbuf, 0, &var)) + return -EINVAL; + + ret = mlx5_ib_set_cc_params(param->dev, offset, var); + return ret ? ret : count; +} + +static ssize_t get_param(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_ib_dbg_param *param = filp->private_data; + int offset = param->offset; + u32 var = 0; + int ret; + char lbuf[11]; + + if (*pos) + return 0; + + ret = mlx5_ib_get_cc_params(param->dev, offset, &var); + if (ret) + return ret; + + ret = snprintf(lbuf, sizeof(lbuf), "%d\n", var); + if (ret < 0) + return ret; + + if (copy_to_user(buf, lbuf, ret)) + return -EFAULT; + + *pos += ret; + return ret; +} + +static const struct file_operations dbg_cc_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = set_param, + .read = get_param, +}; + +void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev) +{ + if (!mlx5_debugfs_root || + !dev->dbg_cc_params || + !dev->dbg_cc_params->root) + return; + + debugfs_remove_recursive(dev->dbg_cc_params->root); + kfree(dev->dbg_cc_params); + dev->dbg_cc_params = NULL; +} + +int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev) +{ + struct mlx5_ib_dbg_cc_params *dbg_cc_params; + int i; + + if (!mlx5_debugfs_root) + goto out; + + if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed) || + !MLX5_CAP_GEN(dev->mdev, cc_modify_allowed)) + goto out; + + dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL); + if (!dbg_cc_params) + goto out; + + dev->dbg_cc_params = dbg_cc_params; + + dbg_cc_params->root = debugfs_create_dir("cc_params", + dev->mdev->priv.dbg_root); + if (!dbg_cc_params->root) + goto err; + + for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) { + dbg_cc_params->params[i].offset = i; + dbg_cc_params->params[i].dev = dev; + dbg_cc_params->params[i].dentry = + debugfs_create_file(mlx5_ib_dbg_cc_name[i], + 0600, dbg_cc_params->root, + &dbg_cc_params->params[i], + &dbg_cc_fops); + if (!dbg_cc_params->params[i].dentry) + goto err; + } +out: return 0; + +err: + mlx5_ib_warn(dev, "cong debugfs failure\n"); + mlx5_ib_cleanup_cong_debugfs(dev); + /* + * We don't want to fail driver if debugfs failed to initialize, + * so we are not forwarding error to the user. + */ + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index a384d72ea3cd..2aa53f427685 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -499,7 +499,7 @@ static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, struct mlx5_ib_qp *qp; *npolled = 0; - /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */ + /* Find uncompleted WQEs belonging to that cq and return mmics ones */ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { sw_send_comp(qp, num_entries, wc + *npolled, npolled); if (*npolled >= num_entries) @@ -751,10 +751,8 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, void *cqc; int err; - ucmdlen = - (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < - sizeof(ucmd)) ? (sizeof(ucmd) - - sizeof(ucmd.reserved)) : sizeof(ucmd); + ucmdlen = udata->inlen < sizeof(ucmd) ? + (sizeof(ucmd) - sizeof(ucmd.reserved)) : sizeof(ucmd); if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) return -EFAULT; diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c index c1b9de800fe5..649a3364f838 100644 --- a/drivers/infiniband/hw/mlx5/ib_virt.c +++ b/drivers/infiniband/hw/mlx5/ib_virt.c @@ -96,6 +96,7 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *in; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; int err; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -109,6 +110,8 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, } in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + if (!err) + vfs_ctx[vf].policy = in->policy; out: kfree(in); @@ -151,6 +154,7 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid) struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *in; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; int err; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -160,6 +164,8 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid) in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID; in->node_guid = guid; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + if (!err) + vfs_ctx[vf].node_guid = guid; kfree(in); return err; } @@ -169,6 +175,7 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid) struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *in; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; int err; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -178,6 +185,8 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid) in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID; in->port_guid = guid; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + if (!err) + vfs_ctx[vf].port_guid = guid; kfree(in); return err; } diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 95db929bdc34..1003b0133a49 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -78,7 +78,7 @@ static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, u16 slid; int err; - slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; @@ -204,7 +204,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num, int err; void *out_cnt; - /* Decalring support of extended counters */ + /* Declaring support of extended counters */ if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) { struct ib_class_port_info cpi = {}; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a7f2e60085c4..ab3c562d5ba7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include <linux/debugfs.h> #include <linux/highmem.h> #include <linux/module.h> #include <linux/init.h> @@ -58,6 +59,7 @@ #include <linux/mlx5/vport.h> #include "mlx5_ib.h" #include "cmd.h" +#include <linux/mlx5/vport.h> #define DRIVER_NAME "mlx5_ib" #define DRIVER_VERSION "5.0-0" @@ -65,7 +67,6 @@ MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRIVER_VERSION); static char mlx5_version[] = DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" @@ -97,6 +98,20 @@ mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num) return mlx5_port_type_cap_to_rdma_ll(port_type_cap); } +static int get_port_state(struct ib_device *ibdev, + u8 port_num, + enum ib_port_state *state) +{ + struct ib_port_attr attr; + int ret; + + memset(&attr, 0, sizeof(attr)); + ret = mlx5_ib_query_port(ibdev, port_num, &attr); + if (!ret) + *state = attr.state; + return ret; +} + static int mlx5_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -114,6 +129,7 @@ static int mlx5_netdev_event(struct notifier_block *this, write_unlock(&ibdev->roce.netdev_lock); break; + case NETDEV_CHANGE: case NETDEV_UP: case NETDEV_DOWN: { struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); @@ -127,10 +143,23 @@ static int mlx5_netdev_event(struct notifier_block *this, if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev)) && ibdev->ib_active) { struct ib_event ibev = { }; + enum ib_port_state port_state; + if (get_port_state(&ibdev->ib_dev, 1, &port_state)) + return NOTIFY_DONE; + + if (ibdev->roce.last_port_state == port_state) + return NOTIFY_DONE; + + ibdev->roce.last_port_state = port_state; ibev.device = &ibdev->ib_dev; - ibev.event = (event == NETDEV_UP) ? - IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + if (port_state == IB_PORT_DOWN) + ibev.event = IB_EVENT_PORT_ERR; + else if (port_state == IB_PORT_ACTIVE) + ibev.event = IB_EVENT_PORT_ACTIVE; + else + return NOTIFY_DONE; + ibev.element.port_num = 1; ib_dispatch_event(&ibev); } @@ -668,6 +697,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_UD_TSO; } + if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) && + MLX5_CAP_GEN(dev->mdev, general_notification_event)) + props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP; + + if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && + MLX5_CAP_IPOIB_ENHANCED(mdev, csum_cap)) + props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && MLX5_CAP_ETH(dev->mdev, scatter_fcs)) { /* Legacy bit to support old userspace libraries */ @@ -740,6 +777,16 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq); } + if (MLX5_CAP_GEN(mdev, tag_matching)) { + props->xrq_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE; + props->xrq_caps.max_num_tags = + (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1; + props->xrq_caps.flags = IB_TM_CAP_RC; + props->xrq_caps.max_ops = + 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); + props->xrq_caps.max_sge = MLX5_TM_MAX_SGE; + } + if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { resp.cqe_comp_caps.max_num = MLX5_CAP_GEN(dev->mdev, cqe_compression) ? @@ -765,8 +812,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, uhw->outlen)) { - resp.mlx5_ib_support_multi_pkt_send_wqes = - MLX5_CAP_ETH(mdev, multi_pkt_send_wqe); + if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) + resp.mlx5_ib_support_multi_pkt_send_wqes = + MLX5_IB_ALLOW_MPW; + + if (MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)) + resp.mlx5_ib_support_multi_pkt_send_wqes |= + MLX5_IB_SUPPORT_EMPW; + resp.response_length += sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } @@ -774,6 +827,27 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (field_avail(typeof(resp), reserved, uhw->outlen)) resp.response_length += sizeof(resp.reserved); + if (field_avail(typeof(resp), sw_parsing_caps, + uhw->outlen)) { + resp.response_length += sizeof(resp.sw_parsing_caps); + if (MLX5_CAP_ETH(mdev, swp)) { + resp.sw_parsing_caps.sw_parsing_offloads |= + MLX5_IB_SW_PARSING; + + if (MLX5_CAP_ETH(mdev, swp_csum)) + resp.sw_parsing_caps.sw_parsing_offloads |= + MLX5_IB_SW_PARSING_CSUM; + + if (MLX5_CAP_ETH(mdev, swp_lso)) + resp.sw_parsing_caps.sw_parsing_offloads |= + MLX5_IB_SW_PARSING_LSO; + + if (resp.sw_parsing_caps.sw_parsing_offloads) + resp.sw_parsing_caps.supported_qpts = + BIT(IB_QPT_RAW_PACKET); + } + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); @@ -1085,6 +1159,12 @@ static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND); + /* CM layer calls ib_modify_port() regardless of the link layer. For + * Ethernet ports, qkey violation and Port capabilities are meaningless. + */ + if (!is_ib) + return 0; + if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) { change_mask = props->clr_port_cap_mask | props->set_port_cap_mask; value = ~props->clr_port_cap_mask | props->set_port_cap_mask; @@ -1138,7 +1218,7 @@ static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k, if (req->num_low_latency_bfregs > req->total_num_bfregs - 1) return -EINVAL; - mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, alloated %d, using %d sys pages\n", + mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, using %d sys pages\n", MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no", lib_uar_4k ? "yes" : "no", ref_bfregs, req->total_num_bfregs, *num_sys_pages); @@ -1187,6 +1267,45 @@ static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *con return 0; } +static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) +{ + int err; + + err = mlx5_core_alloc_transport_domain(dev->mdev, tdn); + if (err) + return err; + + if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || + !MLX5_CAP_GEN(dev->mdev, disable_local_lb)) + return err; + + mutex_lock(&dev->lb_mutex); + dev->user_td++; + + if (dev->user_td == 2) + err = mlx5_nic_vport_update_local_lb(dev->mdev, true); + + mutex_unlock(&dev->lb_mutex); + return err; +} + +static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn) +{ + mlx5_core_dealloc_transport_domain(dev->mdev, tdn); + + if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || + !MLX5_CAP_GEN(dev->mdev, disable_local_lb)) + return; + + mutex_lock(&dev->lb_mutex); + dev->user_td--; + + if (dev->user_td < 2) + mlx5_nic_vport_update_local_lb(dev->mdev, false); + + mutex_unlock(&dev->lb_mutex); +} + static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { @@ -1197,7 +1316,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct mlx5_bfreg_info *bfregi; int ver; int err; - size_t reqlen; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); bool lib_uar_4k; @@ -1205,18 +1323,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (!dev->ib_active) return ERR_PTR(-EAGAIN); - if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr)) - return ERR_PTR(-EINVAL); - - reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); - if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) + if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) ver = 0; - else if (reqlen >= min_req_v2) + else if (udata->inlen >= min_req_v2) ver = 2; else return ERR_PTR(-EINVAL); - err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req))); + err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); if (err) return ERR_PTR(err); @@ -1295,8 +1409,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, mutex_init(&context->upd_xlt_page_mutex); if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { - err = mlx5_core_alloc_transport_domain(dev->mdev, - &context->tdn); + err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); if (err) goto out_page; } @@ -1362,7 +1475,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, out_td: if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn); out_page: free_page(context->upd_xlt_page); @@ -1390,7 +1503,7 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) bfregi = &context->bfregi; if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn); free_page(context->upd_xlt_page); deallocate_uars(dev, context); @@ -2028,23 +2141,34 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, * it won't fall into the multicast flow steering table and this rule * could steal other multicast packets. */ -static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) +static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) { - struct ib_flow_spec_eth *eth_spec; + union ib_flow_spec *flow_spec; if (ib_attr->type != IB_FLOW_ATTR_NORMAL || - ib_attr->size < sizeof(struct ib_flow_attr) + - sizeof(struct ib_flow_spec_eth) || ib_attr->num_of_specs < 1) return false; - eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1); - if (eth_spec->type != IB_FLOW_SPEC_ETH || - eth_spec->size != sizeof(*eth_spec)) + flow_spec = (union ib_flow_spec *)(ib_attr + 1); + if (flow_spec->type == IB_FLOW_SPEC_IPV4) { + struct ib_flow_spec_ipv4 *ipv4_spec; + + ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec; + if (ipv4_is_multicast(ipv4_spec->val.dst_ip)) + return true; + return false; + } + + if (flow_spec->type == IB_FLOW_SPEC_ETH) { + struct ib_flow_spec_eth *eth_spec; + + eth_spec = (struct ib_flow_spec_eth *)flow_spec; + return is_multicast_ether_addr(eth_spec->mask.dst_mac) && + is_multicast_ether_addr(eth_spec->val.dst_mac); + } - return is_multicast_ether_addr(eth_spec->mask.dst_mac) && - is_multicast_ether_addr(eth_spec->val.dst_mac); + return false; } static bool is_valid_ethertype(struct mlx5_core_dev *mdev, @@ -2229,10 +2353,31 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, return err ? ERR_PTR(err) : prio; } -static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) +static void set_underlay_qp(struct mlx5_ib_dev *dev, + struct mlx5_flow_spec *spec, + u32 underlay_qpn) +{ + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, + spec->match_criteria, + misc_parameters); + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + if (underlay_qpn && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + ft_field_support.bth_dst_qp)) { + MLX5_SET(fte_match_set_misc, + misc_params_v, bth_dst_qp, underlay_qpn); + MLX5_SET(fte_match_set_misc, + misc_params_c, bth_dst_qp, 0xffffff); + } +} + +static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst, + u32 underlay_qpn) { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; @@ -2268,6 +2413,9 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, ib_flow += ((union ib_flow_spec *)ib_flow)->size; } + if (!flow_is_multicast_only(flow_attr)) + set_underlay_qp(dev, spec, underlay_qpn); + spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); if (is_drop) { flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; @@ -2307,6 +2455,14 @@ free: return err ? ERR_PTR(err) : handler; } +static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0); +} + static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, struct ib_flow_attr *flow_attr, @@ -2443,6 +2599,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct mlx5_ib_flow_prio *ft_prio_tx = NULL; struct mlx5_ib_flow_prio *ft_prio; int err; + int underlay_qpn; if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); @@ -2483,8 +2640,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, handler = create_dont_trap_rule(dev, ft_prio, flow_attr, dst); } else { - handler = create_flow_rule(dev, ft_prio, flow_attr, - dst); + underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ? + mqp->underlay_qpn : 0; + handler = _create_flow_rule(dev, ft_prio, flow_attr, + dst, underlay_qpn); } } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { @@ -2522,8 +2681,14 @@ unlock: static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_qp *mqp = to_mqp(ibqp); int err; + if (mqp->flags & MLX5_IB_QP_UNDERLAY) { + mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n"); + return -EOPNOTSUPP; + } + err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num); if (err) mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", @@ -2685,6 +2850,26 @@ static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); } +static void delay_drop_handler(struct work_struct *work) +{ + int err; + struct mlx5_ib_delay_drop *delay_drop = + container_of(work, struct mlx5_ib_delay_drop, + delay_drop_work); + + atomic_inc(&delay_drop->events_cnt); + + mutex_lock(&delay_drop->lock); + err = mlx5_core_set_delay_drop(delay_drop->dev->mdev, + delay_drop->timeout); + if (err) { + mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n", + delay_drop->timeout); + delay_drop->activate = false; + } + mutex_unlock(&delay_drop->lock); +} + static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param) { @@ -2737,8 +2922,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, ibev.event = IB_EVENT_CLIENT_REREGISTER; port = (u8)param; break; + case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT: + schedule_work(&ibdev->delay_drop.delay_drop_work); + goto out; default: - return; + goto out; } ibev.device = &ibdev->ib_dev; @@ -2746,7 +2934,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, if (port < 1 || port > ibdev->num_ports) { mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); - return; + goto out; } if (ibdev->ib_active) @@ -2754,6 +2942,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, if (fatal) ibdev->ib_active = false; + +out: + return; } static int set_has_smi_cap(struct mlx5_ib_dev *dev) @@ -3036,7 +3227,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_XRC; - attr.ext.xrc.cq = devr->c0; + attr.ext.cq = devr->c0; attr.ext.xrc.xrcd = devr->x0; devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); @@ -3051,9 +3242,9 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->s0->srq_context = NULL; devr->s0->srq_type = IB_SRQT_XRC; devr->s0->ext.xrc.xrcd = devr->x0; - devr->s0->ext.xrc.cq = devr->c0; + devr->s0->ext.cq = devr->c0; atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); - atomic_inc(&devr->s0->ext.xrc.cq->usecnt); + atomic_inc(&devr->s0->ext.cq->usecnt); atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); @@ -3072,9 +3263,9 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->s1->event_handler = NULL; devr->s1->srq_context = NULL; devr->s1->srq_type = IB_SRQT_BASIC; - devr->s1->ext.xrc.cq = devr->c0; + devr->s1->ext.cq = devr->c0; atomic_inc(&devr->p0->usecnt); - atomic_set(&devr->s0->usecnt, 0); + atomic_set(&devr->s1->usecnt, 0); for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { INIT_WORK(&devr->ports[port].pkey_change_work, @@ -3167,13 +3358,13 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_dev_fw_str(struct ib_device *ibdev, char *str, - size_t str_len) +static void get_dev_fw_str(struct ib_device *ibdev, char *str) { struct mlx5_ib_dev *dev = container_of(ibdev, struct mlx5_ib_dev, ib_dev); - snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev), - fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%04d", + fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev), + fw_rev_sub(dev->mdev)); } static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) @@ -3313,6 +3504,17 @@ static const struct mlx5_ib_counter cong_cnts[] = { INIT_CONG_COUNTER(np_cnp_sent), }; +static const struct mlx5_ib_counter extended_err_cnts[] = { + INIT_Q_COUNTER(resp_local_length_error), + INIT_Q_COUNTER(resp_cqe_error), + INIT_Q_COUNTER(req_cqe_error), + INIT_Q_COUNTER(req_remote_invalid_request), + INIT_Q_COUNTER(req_remote_access_errors), + INIT_Q_COUNTER(resp_remote_access_errors), + INIT_Q_COUNTER(resp_cqe_flush_error), + INIT_Q_COUNTER(req_cqe_flush_error), +}; + static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { unsigned int i; @@ -3337,6 +3539,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) num_counters += ARRAY_SIZE(retrans_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) + num_counters += ARRAY_SIZE(extended_err_cnts); + cnts->num_q_counters = num_counters; if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { @@ -3386,6 +3592,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, } } + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { + for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { + names[j] = extended_err_cnts[i].name; + offsets[j] = extended_err_cnts[i].offset; + } + } + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { names[j] = cong_cnts[i].name; @@ -3556,6 +3769,136 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, return netdev; } +static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev) +{ + if (!dev->delay_drop.dbg) + return; + debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs); + kfree(dev->delay_drop.dbg); + dev->delay_drop.dbg = NULL; +} + +static void cancel_delay_drop(struct mlx5_ib_dev *dev) +{ + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return; + + cancel_work_sync(&dev->delay_drop.delay_drop_work); + delay_drop_debugfs_cleanup(dev); +} + +static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_ib_delay_drop *delay_drop = filp->private_data; + char lbuf[20]; + int len; + + len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout); + return simple_read_from_buffer(buf, count, pos, lbuf, len); +} + +static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_ib_delay_drop *delay_drop = filp->private_data; + u32 timeout; + u32 var; + + if (kstrtouint_from_user(buf, count, 0, &var)) + return -EFAULT; + + timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS * + 1000); + if (timeout != var) + mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n", + timeout); + + delay_drop->timeout = timeout; + + return count; +} + +static const struct file_operations fops_delay_drop_timeout = { + .owner = THIS_MODULE, + .open = simple_open, + .write = delay_drop_timeout_write, + .read = delay_drop_timeout_read, +}; + +static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_ib_dbg_delay_drop *dbg; + + if (!mlx5_debugfs_root) + return 0; + + dbg = kzalloc(sizeof(*dbg), GFP_KERNEL); + if (!dbg) + return -ENOMEM; + + dbg->dir_debugfs = + debugfs_create_dir("delay_drop", + dev->mdev->priv.dbg_root); + if (!dbg->dir_debugfs) + return -ENOMEM; + + dbg->events_cnt_debugfs = + debugfs_create_atomic_t("num_timeout_events", 0400, + dbg->dir_debugfs, + &dev->delay_drop.events_cnt); + if (!dbg->events_cnt_debugfs) + goto out_debugfs; + + dbg->rqs_cnt_debugfs = + debugfs_create_atomic_t("num_rqs", 0400, + dbg->dir_debugfs, + &dev->delay_drop.rqs_cnt); + if (!dbg->rqs_cnt_debugfs) + goto out_debugfs; + + dbg->timeout_debugfs = + debugfs_create_file("timeout", 0600, + dbg->dir_debugfs, + &dev->delay_drop, + &fops_delay_drop_timeout); + if (!dbg->timeout_debugfs) + goto out_debugfs; + + dev->delay_drop.dbg = dbg; + + return 0; + +out_debugfs: + delay_drop_debugfs_cleanup(dev); + return -ENOMEM; +} + +static void init_delay_drop(struct mlx5_ib_dev *dev) +{ + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return; + + mutex_init(&dev->delay_drop.lock); + dev->delay_drop.dev = dev; + dev->delay_drop.activate = false; + dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; + INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); + atomic_set(&dev->delay_drop.rqs_cnt, 0); + atomic_set(&dev->delay_drop.events_cnt, 0); + + if (delay_drop_debugfs_init(dev)) + mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n"); +} + +static const struct cpumask * +mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + + return mlx5_get_vector_affinity(dev->mdev, comp_vector); +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -3686,6 +4029,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; dev->ib_dev.get_dev_fw_str = get_dev_fw_str; + dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity; if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev; @@ -3723,18 +4067,20 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } + dev->ib_dev.create_flow = mlx5_ib_create_flow; + dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; + dev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) { - dev->ib_dev.create_flow = mlx5_ib_create_flow; - dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; dev->ib_dev.create_wq = mlx5_ib_create_wq; dev->ib_dev.modify_wq = mlx5_ib_modify_wq; dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | @@ -3754,6 +4100,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) err = mlx5_enable_eth(dev); if (err) goto err_free_port; + dev->roce.last_port_state = IB_PORT_DOWN; } err = create_dev_resources(&dev->devr); @@ -3770,9 +4117,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) goto err_odp; } + err = mlx5_ib_init_cong_debugfs(dev); + if (err) + goto err_cnt; + dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); if (!dev->mdev->priv.uar) - goto err_cnt; + goto err_cong; err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false); if (err) @@ -3790,18 +4141,25 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (err) goto err_dev; + init_delay_drop(dev); + for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { err = device_create_file(&dev->ib_dev.dev, mlx5_class_attributes[i]); if (err) - goto err_umrc; + goto err_delay_drop; } + if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && + MLX5_CAP_GEN(mdev, disable_local_lb)) + mutex_init(&dev->lb_mutex); + dev->ib_active = true; return dev; -err_umrc: +err_delay_drop: + cancel_delay_drop(dev); destroy_umrc_res(dev); err_dev: @@ -3817,6 +4175,8 @@ err_uar_page: mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); err_cnt: + mlx5_ib_cleanup_cong_debugfs(dev); +err_cong: if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) mlx5_ib_dealloc_counters(dev); @@ -3846,11 +4206,13 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) struct mlx5_ib_dev *dev = context; enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); + cancel_delay_drop(dev); mlx5_remove_netdev_notifier(dev); ib_unregister_device(&dev->ib_dev); mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); mlx5_free_bfreg(dev->mdev, &dev->bfreg); mlx5_put_uars_page(dev->mdev, mdev->priv.uar); + mlx5_ib_cleanup_cong_debugfs(dev); if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) mlx5_ib_dealloc_counters(dev); destroy_umrc_res(dev); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index bdcf25410c99..189e80cd6b2f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -107,6 +107,11 @@ enum { MLX5_CQE_VERSION_V1, }; +enum { + MLX5_TM_MAX_RNDV_MSG_SIZE = 64, + MLX5_TM_MAX_SGE = 1, +}; + struct mlx5_ib_vma_private_data { struct list_head list; struct vm_area_struct *vma; @@ -247,6 +252,10 @@ struct mlx5_ib_wq { void *qend; }; +enum mlx5_ib_wq_flags { + MLX5_IB_WQ_FLAGS_DELAY_DROP = 0x1, +}; + struct mlx5_ib_rwq { struct ib_wq ibwq; struct mlx5_core_qp core_qp; @@ -264,6 +273,7 @@ struct mlx5_ib_rwq { u32 wqe_count; u32 wqe_shift; int wq_sig; + u32 create_flags; /* Use enum mlx5_ib_wq_flags */ }; enum { @@ -378,6 +388,7 @@ struct mlx5_ib_qp { struct list_head cq_recv_list; struct list_head cq_send_list; u32 rate_limit; + u32 underlay_qpn; }; struct mlx5_ib_cq_buf { @@ -399,6 +410,7 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, MLX5_IB_QP_RSS = 1 << 8, MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9, + MLX5_IB_QP_UNDERLAY = 1 << 10, }; struct mlx5_umr_wr { @@ -496,7 +508,7 @@ struct mlx5_ib_mr { struct mlx5_shared_mr_info *smr_info; struct list_head list; int order; - int umred; + bool allocated_from_cache; int npages; struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; @@ -616,6 +628,63 @@ struct mlx5_roce { struct net_device *netdev; struct notifier_block nb; atomic_t next_port; + enum ib_port_state last_port_state; +}; + +struct mlx5_ib_dbg_param { + int offset; + struct mlx5_ib_dev *dev; + struct dentry *dentry; +}; + +enum mlx5_ib_dbg_cc_types { + MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE, + MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI, + MLX5_IB_DBG_CC_RP_TIME_RESET, + MLX5_IB_DBG_CC_RP_BYTE_RESET, + MLX5_IB_DBG_CC_RP_THRESHOLD, + MLX5_IB_DBG_CC_RP_AI_RATE, + MLX5_IB_DBG_CC_RP_HAI_RATE, + MLX5_IB_DBG_CC_RP_MIN_DEC_FAC, + MLX5_IB_DBG_CC_RP_MIN_RATE, + MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP, + MLX5_IB_DBG_CC_RP_DCE_TCP_G, + MLX5_IB_DBG_CC_RP_DCE_TCP_RTT, + MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD, + MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE, + MLX5_IB_DBG_CC_RP_GD, + MLX5_IB_DBG_CC_NP_CNP_DSCP, + MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE, + MLX5_IB_DBG_CC_NP_CNP_PRIO, + MLX5_IB_DBG_CC_MAX, +}; + +struct mlx5_ib_dbg_cc_params { + struct dentry *root; + struct mlx5_ib_dbg_param params[MLX5_IB_DBG_CC_MAX]; +}; + +enum { + MLX5_MAX_DELAY_DROP_TIMEOUT_MS = 100, +}; + +struct mlx5_ib_dbg_delay_drop { + struct dentry *dir_debugfs; + struct dentry *rqs_cnt_debugfs; + struct dentry *events_cnt_debugfs; + struct dentry *timeout_debugfs; +}; + +struct mlx5_ib_delay_drop { + struct mlx5_ib_dev *dev; + struct work_struct delay_drop_work; + /* serialize setting of delay drop */ + struct mutex lock; + u32 timeout; + bool activate; + atomic_t events_cnt; + atomic_t rqs_cnt; + struct mlx5_ib_dbg_delay_drop *dbg; }; struct mlx5_ib_dev { @@ -652,9 +721,15 @@ struct mlx5_ib_dev { struct list_head qp_list; /* Array with num_ports elements */ struct mlx5_ib_port *port; - struct mlx5_sq_bfreg bfreg; - struct mlx5_sq_bfreg fp_bfreg; - u8 umr_fence; + struct mlx5_sq_bfreg bfreg; + struct mlx5_sq_bfreg fp_bfreg; + struct mlx5_ib_delay_drop delay_drop; + struct mlx5_ib_dbg_cc_params *dbg_cc_params; + + /* protect the user_td */ + struct mutex lb_mutex; + u32 user_td; + u8 umr_fence; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -904,6 +979,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, int index, enum ib_gid_type *gid_type); +void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev); +int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev); + /* GSI QP helper functions */ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 2c40a2e989d2..0e2789d9bb4d 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -48,7 +48,7 @@ enum { #define MLX5_UMR_ALIGN 2048 static int clean_mr(struct mlx5_ib_mr *mr); -static int use_umr(struct mlx5_ib_dev *dev, int order); +static int mr_cache_max_order(struct mlx5_ib_dev *dev); static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) @@ -183,7 +183,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) break; } mr->order = ent->order; - mr->umred = 1; + mr->allocated_from_cache = 1; mr->dev = dev; MLX5_SET(mkc, mkc, free, 1); @@ -491,16 +491,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_ib_mr *mr = NULL; struct mlx5_cache_ent *ent; + int last_umr_cache_entry; int c; int i; c = order2idx(dev, order); - if (c < 0 || c > MAX_UMR_CACHE_ENTRY) { + last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev)); + if (c < 0 || c > last_umr_cache_entry) { mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); return NULL; } - for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) { + for (i = c; i <= last_umr_cache_entry; i++) { ent = &cache->ent[i]; mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); @@ -674,12 +676,12 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); queue_work(cache->wq, &ent->work); - if (i > MAX_UMR_CACHE_ENTRY) { + if (i > MR_CACHE_LAST_STD_ENTRY) { mlx5_odp_init_mr_cache_entry(ent); continue; } - if (!use_umr(dev, ent->order)) + if (ent->order > mr_cache_max_order(dev)) continue; ent->page = PAGE_SHIFT; @@ -806,21 +808,22 @@ err_free: return ERR_PTR(err); } -static int get_octo_len(u64 addr, u64 len, int page_size) +static int get_octo_len(u64 addr, u64 len, int page_shift) { + u64 page_size = 1ULL << page_shift; u64 offset; int npages; offset = addr & (page_size - 1); - npages = ALIGN(len + offset, page_size) >> ilog2(page_size); + npages = ALIGN(len + offset, page_size) >> page_shift; return (npages + 1) / 2; } -static int use_umr(struct mlx5_ib_dev *dev, int order) +static int mr_cache_max_order(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) - return order <= MAX_UMR_CACHE_ENTRY + 2; - return order <= MLX5_MAX_UMR_SHIFT; + return MR_CACHE_LAST_STD_ENTRY + 2; + return MLX5_MAX_UMR_SHIFT; } static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, @@ -896,7 +899,8 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, return err; } -static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, +static struct mlx5_ib_mr *alloc_mr_from_cache( + struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, u64 len, int npages, int page_shift, int order, int access_flags) { @@ -928,16 +932,6 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, mr->mmkey.size = len; mr->mmkey.pd = to_mpd(pd)->pdn; - err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, - MLX5_IB_UPD_XLT_ENABLE); - - if (err) { - mlx5_mr_cache_free(dev, mr); - return ERR_PTR(err); - } - - mr->live = 1; - return mr; } @@ -1103,7 +1097,8 @@ free_xlt: static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, u64 virt_addr, u64 length, struct ib_umem *umem, int npages, - int page_shift, int access_flags) + int page_shift, int access_flags, + bool populate) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr; @@ -1118,15 +1113,19 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, if (!mr) return ERR_PTR(-ENOMEM); - inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + - sizeof(*pas) * ((npages + 1) / 2) * 2; + mr->ibmr.pd = pd; + mr->access_flags = access_flags; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + if (populate) + inlen += sizeof(*pas) * roundup(npages, 2); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_1; } pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); - if (!(access_flags & IB_ACCESS_ON_DEMAND)) + if (populate && !(access_flags & IB_ACCESS_ON_DEMAND)) mlx5_ib_populate_pas(dev, umem, page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); @@ -1135,23 +1134,27 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, free, !populate); MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET64(mkc, mkc, start_addr, virt_addr); MLX5_SET64(mkc, mkc, len, length); MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, bsf_octword_size, 0); MLX5_SET(mkc, mkc, translations_octword_size, - get_octo_len(virt_addr, length, 1 << page_shift)); + get_octo_len(virt_addr, length, page_shift)); MLX5_SET(mkc, mkc, log_page_size, page_shift); MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(create_mkey_in, in, translations_octword_actual_size, - get_octo_len(virt_addr, length, 1 << page_shift)); + if (populate) { + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + get_octo_len(virt_addr, length, page_shift)); + } err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) { @@ -1160,9 +1163,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, } mr->mmkey.type = MLX5_MKEY_MR; mr->desc_size = sizeof(struct mlx5_mtt); - mr->umem = umem; mr->dev = dev; - mr->live = 1; kvfree(in); mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); @@ -1202,6 +1203,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int ncont; int order; int err; + bool use_umr = true; mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); @@ -1220,27 +1222,29 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, &page_shift, &ncont, &order); - if (err < 0) + if (err < 0) return ERR_PTR(err); - if (use_umr(dev, order)) { - mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, - order, access_flags); + if (order <= mr_cache_max_order(dev)) { + mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, + page_shift, order, access_flags); if (PTR_ERR(mr) == -EAGAIN) { mlx5_ib_dbg(dev, "cache empty for order %d", order); mr = NULL; } - } else if (access_flags & IB_ACCESS_ON_DEMAND && - !MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { - err = -EINVAL; - pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); - goto error; + } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { + if (access_flags & IB_ACCESS_ON_DEMAND) { + err = -EINVAL; + pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); + goto error; + } + use_umr = false; } if (!mr) { mutex_lock(&dev->slow_path_mutex); mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags); + page_shift, access_flags, !use_umr); mutex_unlock(&dev->slow_path_mutex); } @@ -1258,8 +1262,22 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, update_odp_mr(mr); #endif - return &mr->ibmr; + if (use_umr) { + int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; + if (access_flags & IB_ACCESS_ON_DEMAND) + update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP; + + err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, + update_xlt_flags); + if (err) { + mlx5_ib_dereg_mr(&mr->ibmr); + return ERR_PTR(err); + } + } + + mr->live = 1; + return &mr->ibmr; error: ib_umem_release(umem); return ERR_PTR(err); @@ -1347,7 +1365,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, /* * UMR can't be used - MKey needs to be replaced. */ - if (mr->umred) { + if (mr->allocated_from_cache) { err = unreg_umr(dev, mr); if (err) mlx5_ib_warn(dev, "Failed to unregister MR\n"); @@ -1360,12 +1378,13 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, return err; mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, - page_shift, access_flags); + page_shift, access_flags, true); if (IS_ERR(mr)) return PTR_ERR(mr); - mr->umred = 0; + mr->allocated_from_cache = 0; + mr->live = 1; } else { /* * Send a UMR WQE @@ -1453,7 +1472,7 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) static int clean_mr(struct mlx5_ib_mr *mr) { struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - int umred = mr->umred; + int allocated_from_cache = mr->allocated_from_cache; int err; if (mr->sig) { @@ -1471,20 +1490,20 @@ static int clean_mr(struct mlx5_ib_mr *mr) mlx5_free_priv_descs(mr); - if (!umred) { + if (!allocated_from_cache) { + u32 key = mr->mmkey.key; + err = destroy_mkey(dev, mr); + kfree(mr); if (err) { mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", - mr->mmkey.key, err); + key, err); return err; } } else { mlx5_mr_cache_free(dev, mr); } - if (!umred) - kfree(mr); - return 0; } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index ae0746754008..3d701c7a4c91 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -939,7 +939,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( if (qp->ibqp.qp_type != IB_QPT_RC) { av = *wqe; - if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT)) + if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); else *wqe += sizeof(struct mlx5_base_av); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0889ff367c86..acb79d3a4f1d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -34,6 +34,7 @@ #include <rdma/ib_umem.h> #include <rdma/ib_cache.h> #include <rdma/ib_user_verbs.h> +#include <linux/mlx5/fs.h> #include "mlx5_ib.h" /* not supported currently */ @@ -453,7 +454,8 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, return -EINVAL; } - if (attr->qp_type == IB_QPT_RAW_PACKET) { + if (attr->qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift; qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6; } else { @@ -675,10 +677,14 @@ err_umem: return err; } -static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq) +static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_rwq *rwq) { struct mlx5_ib_ucontext *context; + if (rwq->create_flags & MLX5_IB_WQ_FLAGS_DELAY_DROP) + atomic_dec(&dev->delay_drop.rqs_cnt); + context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &rwq->db); if (rwq->umem) @@ -959,11 +965,16 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, goto err_free; } - qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL); - qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL); - qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL); - qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL); - qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL); + qp->sq.wrid = kvmalloc_array(qp->sq.wqe_cnt, + sizeof(*qp->sq.wrid), GFP_KERNEL); + qp->sq.wr_data = kvmalloc_array(qp->sq.wqe_cnt, + sizeof(*qp->sq.wr_data), GFP_KERNEL); + qp->rq.wrid = kvmalloc_array(qp->rq.wqe_cnt, + sizeof(*qp->rq.wrid), GFP_KERNEL); + qp->sq.w_list = kvmalloc_array(qp->sq.wqe_cnt, + sizeof(*qp->sq.w_list), GFP_KERNEL); + qp->sq.wqe_head = kvmalloc_array(qp->sq.wqe_cnt, + sizeof(*qp->sq.wqe_head), GFP_KERNEL); if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid || !qp->sq.w_list || !qp->sq.wqe_head) { @@ -975,11 +986,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, return 0; err_wrid: - kfree(qp->sq.wqe_head); - kfree(qp->sq.w_list); - kfree(qp->sq.wrid); - kfree(qp->sq.wr_data); - kfree(qp->rq.wrid); + kvfree(qp->sq.wqe_head); + kvfree(qp->sq.w_list); + kvfree(qp->sq.wrid); + kvfree(qp->sq.wr_data); + kvfree(qp->rq.wrid); mlx5_db_free(dev->mdev, &qp->db); err_free: @@ -992,11 +1003,11 @@ err_buf: static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { - kfree(qp->sq.wqe_head); - kfree(qp->sq.w_list); - kfree(qp->sq.wrid); - kfree(qp->sq.wr_data); - kfree(qp->rq.wrid); + kvfree(qp->sq.wqe_head); + kvfree(qp->sq.w_list); + kvfree(qp->sq.wrid); + kvfree(qp->sq.wr_data); + kvfree(qp->rq.wrid); mlx5_db_free(dev->mdev, &qp->db); mlx5_buf_free(dev->mdev, &qp->buf); } @@ -1021,12 +1032,16 @@ static int is_connected(enum ib_qp_type qp_type) } static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, struct mlx5_ib_sq *sq, u32 tdn) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); MLX5_SET(tisc, tisc, transport_domain, tdn); + if (qp->flags & MLX5_IB_QP_UNDERLAY) + MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn); + return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn); } @@ -1068,11 +1083,16 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); MLX5_SET(sqc, sqc, flush_in_error_en, 1); + if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe)) + MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); MLX5_SET(sqc, sqc, tis_lst_sz, 1); MLX5_SET(sqc, sqc, tis_num_0, sq->tisn); + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && + MLX5_CAP_ETH(dev->mdev, swp)) + MLX5_SET(sqc, sqc, allow_swp, 1); wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); @@ -1229,7 +1249,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, u32 tdn = mucontext->tdn; if (qp->sq.wqe_cnt) { - err = create_raw_packet_qp_tis(dev, sq, tdn); + err = create_raw_packet_qp_tis(dev, qp, sq, tdn); if (err) return err; @@ -1238,6 +1258,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, goto err_destroy_tis; sq->base.container_mibqp = qp; + sq->base.mqp.event = mlx5_ib_qp_event; } if (qp->rq.wqe_cnt) { @@ -1502,10 +1523,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 *in; int err; - base = init_attr->qp_type == IB_QPT_RAW_PACKET ? - &qp->raw_packet_qp.rq.base : - &qp->trans_qp.base; - mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); @@ -1587,10 +1604,28 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); + + if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { + if (init_attr->qp_type != IB_QPT_UD || + (MLX5_CAP_GEN(dev->mdev, port_type) != + MLX5_CAP_PORT_TYPE_IB) || + !mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) { + mlx5_ib_dbg(dev, "Source QP option isn't supported\n"); + return -EOPNOTSUPP; + } + + qp->flags |= MLX5_IB_QP_UNDERLAY; + qp->underlay_qpn = init_attr->source_qpn; + } } else { qp->wq_sig = !!wq_signature; } + base = (init_attr->qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) ? + &qp->raw_packet_qp.rq.base : + &qp->trans_qp.base; + qp->has_rq = qp_has_rq(init_attr); err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, (pd && pd->uobject) ? &ucmd : NULL); @@ -1694,10 +1729,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr)); - if (qp->sq.wqe_cnt) + if (qp->sq.wqe_cnt) { MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt)); - else + } else { MLX5_SET(qpc, qpc, no_sq, 1); + if (init_attr->srq && + init_attr->srq->srq_type == IB_SRQT_TM) + MLX5_SET(qpc, qpc, offload_type, + MLX5_QPC_OFFLOAD_TYPE_RNDV); + } /* Set default resources */ switch (init_attr->qp_type) { @@ -1741,7 +1781,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags |= MLX5_IB_QP_LSO; } - if (init_attr->qp_type == IB_QPT_RAW_PACKET) { + if (init_attr->qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); err = create_raw_packet_qp(dev, qp, in, pd); @@ -1893,7 +1934,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { struct mlx5_ib_cq *send_cq, *recv_cq; - struct mlx5_ib_qp_base *base = &qp->trans_qp.base; + struct mlx5_ib_qp_base *base; unsigned long flags; int err; @@ -1902,12 +1943,14 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) return; } - base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ? + base = (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) ? &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; if (qp->state != IB_QPS_RESET) { - if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) { + if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET && + !(qp->flags & MLX5_IB_QP_UNDERLAY)) { err = mlx5_core_qp_modify(dev->mdev, MLX5_CMD_OP_2RST_QP, 0, NULL, &base->mqp); @@ -1946,7 +1989,8 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) mlx5_ib_unlock_cqs(send_cq, recv_cq); spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { destroy_raw_packet_qp(dev, qp); } else { err = mlx5_core_destroy_qp(dev->mdev, &base->mqp); @@ -2702,7 +2746,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; - } else if (ibqp->qp_type == IB_QPT_UD || + } else if ((ibqp->qp_type == IB_QPT_UD && + !(qp->flags & MLX5_IB_QP_UNDERLAY)) || ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; } else if (attr_mask & IB_QP_PATH_MTU) { @@ -2799,6 +2844,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : qp->port) - 1; + + /* Underlay port should be used - index 0 function per port */ + if (qp->flags & MLX5_IB_QP_UNDERLAY) + port_num = 0; + mibport = &dev->port[port_num]; context->qp_counter_set_usr_page |= cpu_to_be32((u32)(mibport->cnts.set_id) << 24); @@ -2824,7 +2874,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, optpar = ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { struct mlx5_modify_raw_qp_param raw_qp_param = {}; raw_qp_param.operation = op; @@ -2913,7 +2964,13 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port); } - if (qp_type != MLX5_IB_QPT_REG_UMR && + if (qp->flags & MLX5_IB_QP_UNDERLAY) { + if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) { + mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n", + attr_mask); + goto out; + } + } else if (qp_type != MLX5_IB_QPT_REG_UMR && !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) { mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n", cur_state, new_state, ibqp->qp_type, attr_mask); @@ -4477,9 +4534,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); + /* Not all of output fields are applicable, make sure to zero them */ + memset(qp_init_attr, 0, sizeof(*qp_init_attr)); + memset(qp_attr, 0, sizeof(*qp_attr)); + mutex_lock(&qp->mutex); - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state); if (err) goto out; @@ -4597,6 +4659,27 @@ static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type) } } +static int set_delay_drop(struct mlx5_ib_dev *dev) +{ + int err = 0; + + mutex_lock(&dev->delay_drop.lock); + if (dev->delay_drop.activate) + goto out; + + err = mlx5_core_set_delay_drop(dev->mdev, dev->delay_drop.timeout); + if (err) + goto out; + + dev->delay_drop.activate = true; +out: + mutex_unlock(&dev->delay_drop.lock); + + if (!err) + atomic_inc(&dev->delay_drop.rqs_cnt); + return err; +} + static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, struct ib_wq_init_attr *init_attr) { @@ -4651,9 +4734,28 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, } MLX5_SET(rqc, rqc, scatter_fcs, 1); } + if (init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { + if (!(dev->ib_dev.attrs.raw_packet_caps & + IB_RAW_PACKET_CAP_DELAY_DROP)) { + mlx5_ib_dbg(dev, "Delay drop is not supported\n"); + err = -EOPNOTSUPP; + goto out; + } + MLX5_SET(rqc, rqc, delay_drop_en, 1); + } rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp); + if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { + err = set_delay_drop(dev); + if (err) { + mlx5_ib_warn(dev, "Failed to enable delay drop err=%d\n", + err); + mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); + } else { + rwq->create_flags |= MLX5_IB_WQ_FLAGS_DELAY_DROP; + } + } out: kvfree(in); return err; @@ -4787,7 +4889,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, err_copy: mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); err_user_rq: - destroy_user_rq(pd, rwq); + destroy_user_rq(dev, pd, rwq); err: kfree(rwq); return ERR_PTR(err); @@ -4799,7 +4901,7 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq) struct mlx5_ib_rwq *rwq = to_mrwq(wq); mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); - destroy_user_rq(wq->pd, rwq); + destroy_user_rq(dev, wq->pd, rwq); kfree(rwq); return 0; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 43707b101f47..6d5fadad9090 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -101,7 +101,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata->inlen - sizeof(ucmd))) return -EINVAL; - if (in->type == IB_SRQT_XRC) { + if (in->type != IB_SRQT_BASIC) { err = get_srq_user_index(to_mucontext(pd->uobject->context), &ucmd, udata->inlen, &uidx); if (err) @@ -145,7 +145,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; in->page_offset = offset; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && - in->type == IB_SRQT_XRC) + in->type != IB_SRQT_BASIC) in->user_index = uidx; return 0; @@ -196,7 +196,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, } mlx5_fill_page_array(&srq->buf, in->pas); - srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL); + srq->wrid = kvmalloc_array(srq->msrq.max, sizeof(u64), GFP_KERNEL); if (!srq->wrid) { err = -ENOMEM; goto err_in; @@ -205,7 +205,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && - in->type == IB_SRQT_XRC) + in->type != IB_SRQT_BASIC) in->user_index = MLX5_IB_DEFAULT_UIDX; return 0; @@ -230,7 +230,7 @@ static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq) static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq) { - kfree(srq->wrid); + kvfree(srq->wrid); mlx5_buf_free(dev->mdev, &srq->buf); mlx5_db_free(dev->mdev, &srq->db); } @@ -292,14 +292,29 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, in.wqe_shift = srq->msrq.wqe_shift - 4; if (srq->wq_sig) in.flags |= MLX5_SRQ_FLAG_WQ_SIG; - if (init_attr->srq_type == IB_SRQT_XRC) { + + if (init_attr->srq_type == IB_SRQT_XRC) in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; - in.cqn = to_mcq(init_attr->ext.xrc.cq)->mcq.cqn; - } else if (init_attr->srq_type == IB_SRQT_BASIC) { + else in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn; - in.cqn = to_mcq(dev->devr.c0)->mcq.cqn; + + if (init_attr->srq_type == IB_SRQT_TM) { + in.tm_log_list_size = + ilog2(init_attr->ext.tag_matching.max_num_tags) + 1; + if (in.tm_log_list_size > + MLX5_CAP_GEN(dev->mdev, log_tag_matching_list_sz)) { + mlx5_ib_dbg(dev, "TM SRQ max_num_tags exceeding limit\n"); + err = -EINVAL; + goto err_usr_kern_srq; + } + in.flags |= MLX5_SRQ_FLAG_RNDV; } + if (ib_srq_has_cq(init_attr->srq_type)) + in.cqn = to_mcq(init_attr->ext.cq)->mcq.cqn; + else + in.cqn = to_mcq(dev->devr.c0)->mcq.cqn; + in.pd = to_mpd(pd)->pdn; in.db_record = srq->db.dma; err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in); |