diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.c | 26 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/counters.c | 283 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.c | 13 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/fs.c | 187 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 55 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 59 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 109 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 78 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/wr.c | 10 |
11 files changed, 633 insertions, 191 deletions
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index a8db8a051170..ff3742b0460a 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -206,3 +206,29 @@ out: kfree(in); return err; } + +int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid) +{ + u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {}; + int err; + + MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR); + MLX5_SET(alloc_uar_in, in, uid, uid); + err = mlx5_cmd_exec_inout(dev, alloc_uar, in, out); + if (err) + return err; + + *uarn = MLX5_GET(alloc_uar_out, out, uar); + return 0; +} + +int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {}; + + MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR); + MLX5_SET(dealloc_uar_in, in, uar, uarn); + MLX5_SET(dealloc_uar_in, in, uid, uid); + return mlx5_cmd_exec_in(dev, dealloc_uar, in); +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 66c96292ed43..ee46638db5de 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -57,4 +57,6 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); +int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid); +int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 224ba36f2946..945758f39523 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -12,6 +12,7 @@ struct mlx5_ib_counter { const char *name; size_t offset; + u32 type; }; #define INIT_Q_COUNTER(_name) \ @@ -75,6 +76,21 @@ static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), }; +#define INIT_OP_COUNTER(_name, _type) \ + { .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type} + +static const struct mlx5_ib_counter basic_op_cnts[] = { + INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS), +}; + +static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = { + INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS), +}; + +static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = { + INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS), +}; + static int mlx5_ib_read_counters(struct ib_counters *counters, struct ib_counters_read_attr *read_attr, struct uverbs_attr_bundle *attrs) @@ -161,17 +177,34 @@ u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num) return cnts->set_id; } +static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts) +{ + struct rdma_hw_stats *stats; + u32 num_hw_counters; + int i; + + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + stats = rdma_alloc_hw_stats_struct(cnts->descs, + num_hw_counters + + cnts->num_op_counters, + RDMA_HW_STATS_DEFAULT_LIFESPAN); + if (!stats) + return NULL; + + for (i = 0; i < cnts->num_op_counters; i++) + set_bit(num_hw_counters + i, stats->is_disabled); + + return stats; +} + static struct rdma_hw_stats * mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev) { struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts = &dev->port[0].cnts; - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); + return do_alloc_stats(cnts); } static struct rdma_hw_stats * @@ -180,11 +213,7 @@ mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts; - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); + return do_alloc_stats(cnts); } static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, @@ -241,9 +270,9 @@ free: return ret; } -static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, - struct rdma_hw_stats *stats, - u32 port_num, int index) +static int do_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num, int index) { struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); @@ -295,6 +324,88 @@ done: return num_counters; } +static int do_get_op_stat(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num, int index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + const struct mlx5_ib_op_fc *opfcs; + u64 packets = 0, bytes; + u32 type; + int ret; + + cnts = get_counters(dev, port_num - 1); + opfcs = cnts->opfcs; + type = *(u32 *)cnts->descs[index].priv; + if (type >= MLX5_IB_OPCOUNTER_MAX) + return -EINVAL; + + if (!opfcs[type].fc) + goto out; + + ret = mlx5_fc_query(dev->mdev, opfcs[type].fc, + &packets, &bytes); + if (ret) + return ret; + +out: + stats->value[index] = packets; + return index; +} + +static int do_get_op_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + int index, ret, num_hw_counters; + + cnts = get_counters(dev, port_num - 1); + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + for (index = num_hw_counters; + index < (num_hw_counters + cnts->num_op_counters); index++) { + ret = do_get_op_stat(ibdev, stats, port_num, index); + if (ret != index) + return ret; + } + + return cnts->num_op_counters; +} + +static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num, int index) +{ + int num_counters, num_hw_counters, num_op_counters; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + + cnts = get_counters(dev, port_num - 1); + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + num_counters = num_hw_counters + cnts->num_op_counters; + + if (index < 0 || index > num_counters) + return -EINVAL; + else if (index > 0 && index < num_hw_counters) + return do_get_hw_stats(ibdev, stats, port_num, index); + else if (index >= num_hw_counters && index < num_counters) + return do_get_op_stat(ibdev, stats, port_num, index); + + num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index); + if (num_hw_counters < 0) + return num_hw_counters; + + num_op_counters = do_get_op_stats(ibdev, stats, port_num); + if (num_op_counters < 0) + return num_op_counters; + + return num_hw_counters + num_op_counters; +} + static struct rdma_hw_stats * mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) { @@ -302,11 +413,7 @@ mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port - 1); - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); + return do_alloc_stats(cnts); } static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) @@ -371,67 +478,89 @@ static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) return mlx5_ib_qp_set_counter(qp, NULL); } - static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, - const char **names, - size_t *offsets) + struct rdma_stat_desc *descs, size_t *offsets) { int i; int j = 0; for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { - names[j] = basic_q_cnts[i].name; + descs[j].name = basic_q_cnts[i].name; offsets[j] = basic_q_cnts[i].offset; } if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { - names[j] = out_of_seq_q_cnts[i].name; + descs[j].name = out_of_seq_q_cnts[i].name; offsets[j] = out_of_seq_q_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { - names[j] = retrans_q_cnts[i].name; + descs[j].name = retrans_q_cnts[i].name; offsets[j] = retrans_q_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { - names[j] = extended_err_cnts[i].name; + descs[j].name = extended_err_cnts[i].name; offsets[j] = extended_err_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { - names[j] = roce_accl_cnts[i].name; + descs[j].name = roce_accl_cnts[i].name; offsets[j] = roce_accl_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { - names[j] = cong_cnts[i].name; + descs[j].name = cong_cnts[i].name; offsets[j] = cong_cnts[i].offset; } } if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { - names[j] = ext_ppcnt_cnts[i].name; + descs[j].name = ext_ppcnt_cnts[i].name; offsets[j] = ext_ppcnt_cnts[i].offset; } } + + for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) { + descs[j].name = basic_op_cnts[i].name; + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + descs[j].priv = &basic_op_cnts[i].type; + } + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_receive_rdma.bth_opcode)) { + for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) { + descs[j].name = rdmarx_cnp_op_cnts[i].name; + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + descs[j].priv = &rdmarx_cnp_op_cnts[i].type; + } + } + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_transmit_rdma.bth_opcode)) { + for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) { + descs[j].name = rdmatx_cnp_op_cnts[i].name; + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + descs[j].priv = &rdmatx_cnp_op_cnts[i].type; + } + } } static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, struct mlx5_ib_counters *cnts) { - u32 num_counters; + u32 num_counters, num_op_counters; num_counters = ARRAY_SIZE(basic_q_cnts); @@ -457,20 +586,34 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); num_counters += ARRAY_SIZE(ext_ppcnt_cnts); } - cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL); - if (!cnts->names) + + num_op_counters = ARRAY_SIZE(basic_op_cnts); + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_receive_rdma.bth_opcode)) + num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts); + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_transmit_rdma.bth_opcode)) + num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts); + + cnts->num_op_counters = num_op_counters; + num_counters += num_op_counters; + cnts->descs = kcalloc(num_counters, + sizeof(struct rdma_stat_desc), GFP_KERNEL); + if (!cnts->descs) return -ENOMEM; cnts->offsets = kcalloc(num_counters, sizeof(*cnts->offsets), GFP_KERNEL); if (!cnts->offsets) - goto err_names; + goto err; return 0; -err_names: - kfree(cnts->names); - cnts->names = NULL; +err: + kfree(cnts->descs); + cnts->descs = NULL; return -ENOMEM; } @@ -478,7 +621,7 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; int num_cnt_ports; - int i; + int i, j; num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; @@ -491,8 +634,20 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) dev->port[i].cnts.set_id); mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); } - kfree(dev->port[i].cnts.names); + kfree(dev->port[i].cnts.descs); kfree(dev->port[i].cnts.offsets); + + for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) { + if (!dev->port[i].cnts.opfcs[j].fc) + continue; + + if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) + mlx5_ib_fs_remove_op_fc(dev, + &dev->port[i].cnts.opfcs[j], j); + mlx5_fc_destroy(dev->mdev, + dev->port[i].cnts.opfcs[j].fc); + dev->port[i].cnts.opfcs[j].fc = NULL; + } } } @@ -514,7 +669,7 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) if (err) goto err_alloc; - mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, + mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs, dev->port[i].cnts.offsets); MLX5_SET(alloc_q_counter_in, in, uid, @@ -672,6 +827,56 @@ void mlx5_ib_counters_clear_description(struct ib_counters *counters) mutex_unlock(&mcounters->mcntrs_mutex); } +static int mlx5_ib_modify_stat(struct ib_device *device, u32 port, + unsigned int index, bool enable) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_ib_counters *cnts; + struct mlx5_ib_op_fc *opfc; + u32 num_hw_counters, type; + int ret; + + cnts = &dev->port[port - 1].cnts; + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + if (index < num_hw_counters || + index >= (num_hw_counters + cnts->num_op_counters)) + return -EINVAL; + + if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) + return -EINVAL; + + type = *(u32 *)cnts->descs[index].priv; + if (type >= MLX5_IB_OPCOUNTER_MAX) + return -EINVAL; + + opfc = &cnts->opfcs[type]; + + if (enable) { + if (opfc->fc) + return -EEXIST; + + opfc->fc = mlx5_fc_create(dev->mdev, false); + if (IS_ERR(opfc->fc)) + return PTR_ERR(opfc->fc); + + ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type); + if (ret) { + mlx5_fc_destroy(dev->mdev, opfc->fc); + opfc->fc = NULL; + } + return ret; + } + + if (!opfc->fc) + return -EINVAL; + + mlx5_ib_fs_remove_op_fc(dev, opfc, type); + mlx5_fc_destroy(dev->mdev, opfc->fc); + opfc->fc = NULL; + return 0; +} + static const struct ib_device_ops hw_stats_ops = { .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats, .get_hw_stats = mlx5_ib_get_hw_stats, @@ -680,6 +885,8 @@ static const struct ib_device_ops hw_stats_ops = { .counter_dealloc = mlx5_ib_counter_dealloc, .counter_alloc_stats = mlx5_ib_counter_alloc_stats, .counter_update_stats = mlx5_ib_counter_update_stats, + .modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ? + mlx5_ib_modify_stat : NULL, }; static const struct ib_device_ops hw_switchdev_stats_ops = { diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index e95967aefe78..08b7f6bc56c3 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1292,21 +1292,16 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, struct mlx5_ib_dev *dev, void *in, void *out) { - struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr; - struct mlx5_core_mkey *mkey; + struct mlx5_ib_mkey *mkey = &obj->mkey; void *mkc; u8 key; - mkey = &devx_mr->mmkey; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); key = MLX5_GET(mkc, mkc, mkey_7_0); mkey->key = mlx5_idx_to_mkey( MLX5_GET(create_mkey_out, out, mkey_index)) | key; mkey->type = MLX5_MKEY_INDIRECT_DEVX; - mkey->iova = MLX5_GET64(mkc, mkc, start_addr); - mkey->size = MLX5_GET64(mkc, mkc, len); - mkey->pd = MLX5_GET(mkc, mkc, pd); - devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); + mkey->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); init_waitqueue_head(&mkey->wait); return mlx5r_store_odp_mkey(dev, mkey); @@ -1384,13 +1379,13 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, dev = mlx5_udata_to_mdev(&attrs->driver_udata); if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY && xa_erase(&obj->ib_dev->odp_mkeys, - mlx5_base_mkey(obj->devx_mr.mmkey.key))) + mlx5_base_mkey(obj->mkey.key))) /* * The pagefault_single_data_segment() does commands against * the mmkey, we must wait for that to stop before freeing the * mkey, as another allocation could get the same mkey #. */ - mlx5r_deref_wait_odp_mkey(&obj->devx_mr.mmkey); + mlx5r_deref_wait_odp_mkey(&obj->mkey); if (obj->flags & DEVX_OBJ_FLAGS_DCT) ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct); diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h index 1f69866aed16..ee2213275fd6 100644 --- a/drivers/infiniband/hw/mlx5/devx.h +++ b/drivers/infiniband/hw/mlx5/devx.h @@ -16,7 +16,7 @@ struct devx_obj { u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; u32 flags; union { - struct mlx5_ib_devx_mr devx_mr; + struct mlx5_ib_mkey mkey; struct mlx5_core_dct core_dct; struct mlx5_core_cq core_cq; u32 flow_counter_bulk_size; diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 5fbc0a8454b9..b780185d9dc6 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -10,12 +10,14 @@ #include <rdma/uverbs_std_types.h> #include <rdma/mlx5_user_ioctl_cmds.h> #include <rdma/mlx5_user_ioctl_verbs.h> +#include <rdma/ib_hdrs.h> #include <rdma/ib_umem.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/fs_helpers.h> #include <linux/mlx5/accel.h> #include <linux/mlx5/eswitch.h> +#include <net/inet_ecn.h> #include "mlx5_ib.h" #include "counters.h" #include "devx.h" @@ -847,6 +849,191 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, return prio; } +enum { + RDMA_RX_ECN_OPCOUNTER_PRIO, + RDMA_RX_CNP_OPCOUNTER_PRIO, +}; + +enum { + RDMA_TX_CNP_OPCOUNTER_PRIO, +}; + +static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_flow_spec *spec) +{ + if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, + ft_field_support.source_vhca_port) || + !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, + ft_field_support.source_vhca_port)) + return -EOPNOTSUPP; + + MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria, + misc_parameters.source_vhca_port); + MLX5_SET(fte_match_param, &spec->match_value, + misc_parameters.source_vhca_port, port_num); + + return 0; +} + +static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_flow_spec *spec, int ipv) +{ + if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, + ft_field_support.outer_ip_version)) + return -EOPNOTSUPP; + + if (mlx5_core_mp_enabled(dev->mdev) && + set_vhca_port_spec(dev, port_num, spec)) + return -EOPNOTSUPP; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ip_ecn); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn, + INET_ECN_CE); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, + ipv); + + spec->match_criteria_enable = + get_match_criteria_enable(spec->match_criteria); + + return 0; +} + +static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_flow_spec *spec) +{ + if (mlx5_core_mp_enabled(dev->mdev) && + set_vhca_port_spec(dev, port_num, spec)) + return -EOPNOTSUPP; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.bth_opcode); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode, + IB_BTH_OPCODE_CNP); + + spec->match_criteria_enable = + get_match_criteria_enable(spec->match_criteria); + + return 0; +} + +int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type) +{ + enum mlx5_flow_namespace_type fn_type; + int priority, i, err, spec_num; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_destination dst; + struct mlx5_flow_namespace *ns; + struct mlx5_ib_flow_prio *prio; + struct mlx5_flow_spec *spec; + + spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + switch (type) { + case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS: + if (set_ecn_ce_spec(dev, port_num, &spec[0], + MLX5_FS_IPV4_VERSION) || + set_ecn_ce_spec(dev, port_num, &spec[1], + MLX5_FS_IPV6_VERSION)) { + err = -EOPNOTSUPP; + goto free; + } + spec_num = 2; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_ECN_OPCOUNTER_PRIO; + break; + + case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS: + if (!MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_receive_rdma.bth_opcode) || + set_cnp_spec(dev, port_num, &spec[0])) { + err = -EOPNOTSUPP; + goto free; + } + spec_num = 1; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_CNP_OPCOUNTER_PRIO; + break; + + case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS: + if (!MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_transmit_rdma.bth_opcode) || + set_cnp_spec(dev, port_num, &spec[0])) { + err = -EOPNOTSUPP; + goto free; + } + spec_num = 1; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; + priority = RDMA_TX_CNP_OPCOUNTER_PRIO; + break; + + default: + err = -EOPNOTSUPP; + goto free; + } + + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); + if (!ns) { + err = -EOPNOTSUPP; + goto free; + } + + prio = &dev->flow_db->opfcs[type]; + if (!prio->flow_table) { + prio = _get_prio(ns, prio, priority, + dev->num_ports * MAX_OPFC_RULES, 1, 0); + if (IS_ERR(prio)) { + err = PTR_ERR(prio); + goto free; + } + } + + dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dst.counter_id = mlx5_fc_id(opfc->fc); + + flow_act.action = + MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + + for (i = 0; i < spec_num; i++) { + opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i], + &flow_act, &dst, 1); + if (IS_ERR(opfc->rule[i])) { + err = PTR_ERR(opfc->rule[i]); + goto del_rules; + } + } + prio->refcount += spec_num; + kfree(spec); + + return 0; + +del_rules: + for (i -= 1; i >= 0; i--) + mlx5_del_flow_rules(opfc->rule[i]); + put_flow_table(dev, prio, false); +free: + kfree(spec); + return err; +} + +void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type) +{ + int i; + + for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) { + mlx5_del_flow_rules(opfc->rule[i]); + put_flow_table(dev, &dev->flow_db->opfcs[type], true); + } +} + static void set_underlay_qp(struct mlx5_ib_dev *dev, struct mlx5_flow_spec *spec, u32 underlay_qpn) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8664bcf6d3f5..5ec8bd2f0b2f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1643,7 +1643,8 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte bfregi = &context->bfregi; for (i = 0; i < bfregi->num_static_sys_pages; i++) { - err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]); + err = mlx5_cmd_uar_alloc(dev->mdev, &bfregi->sys_pages[i], + context->devx_uid); if (err) goto error; @@ -1657,7 +1658,8 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte error: for (--i; i >= 0; i--) - if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i])) + if (mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i], + context->devx_uid)) mlx5_ib_warn(dev, "failed to free uar %d\n", i); return err; @@ -1673,7 +1675,8 @@ static void deallocate_uars(struct mlx5_ib_dev *dev, for (i = 0; i < bfregi->num_sys_pages; i++) if (i < bfregi->num_static_sys_pages || bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) - mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); + mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i], + context->devx_uid); } int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) @@ -1891,6 +1894,13 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) return -EINVAL; + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { + err = mlx5_ib_devx_create(dev, true); + if (err < 0) + goto out_ctx; + context->devx_uid = err; + } + lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR; bfregi = &context->bfregi; @@ -1903,7 +1913,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, /* updates req->total_num_bfregs */ err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi); if (err) - goto out_ctx; + goto out_devx; mutex_init(&bfregi->lock); bfregi->lib_uar_4k = lib_uar_4k; @@ -1911,7 +1921,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, GFP_KERNEL); if (!bfregi->count) { err = -ENOMEM; - goto out_ctx; + goto out_devx; } bfregi->sys_pages = kcalloc(bfregi->num_sys_pages, @@ -1927,17 +1937,10 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, goto out_sys_pages; uar_done: - if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { - err = mlx5_ib_devx_create(dev, true); - if (err < 0) - goto out_uars; - context->devx_uid = err; - } - err = mlx5_ib_alloc_transport_domain(dev, &context->tdn, context->devx_uid); if (err) - goto out_devx; + goto out_uars; INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); @@ -1972,9 +1975,6 @@ uar_done: out_mdev: mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); -out_devx: - if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) - mlx5_ib_devx_destroy(dev, context->devx_uid); out_uars: deallocate_uars(dev, context); @@ -1985,6 +1985,10 @@ out_sys_pages: out_count: kfree(bfregi->count); +out_devx: + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) + mlx5_ib_devx_destroy(dev, context->devx_uid); + out_ctx: return err; } @@ -2021,12 +2025,12 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) bfregi = &context->bfregi; mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); - if (context->devx_uid) - mlx5_ib_devx_destroy(dev, context->devx_uid); - deallocate_uars(dev, context); kfree(bfregi->sys_pages); kfree(bfregi->count); + + if (context->devx_uid) + mlx5_ib_devx_destroy(dev, context->devx_uid); } static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, @@ -2119,6 +2123,7 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry) struct mlx5_user_mmap_entry *mentry = to_mmmap(entry); struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device); struct mlx5_var_table *var_table = &dev->var_table; + struct mlx5_ib_ucontext *context = to_mucontext(entry->ucontext); switch (mentry->mmap_flag) { case MLX5_IB_MMAP_TYPE_MEMIC: @@ -2133,7 +2138,8 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry) break; case MLX5_IB_MMAP_TYPE_UAR_WC: case MLX5_IB_MMAP_TYPE_UAR_NC: - mlx5_cmd_free_uar(dev->mdev, mentry->page_idx); + mlx5_cmd_uar_dealloc(dev->mdev, mentry->page_idx, + context->devx_uid); kfree(mentry); break; default: @@ -2211,7 +2217,8 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, bfregi->count[bfreg_dyn_idx]++; mutex_unlock(&bfregi->lock); - err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index); + err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index, + context->devx_uid); if (err) { mlx5_ib_warn(dev, "UAR alloc failed\n"); goto free_bfreg; @@ -2240,7 +2247,7 @@ err: if (!dyn_uar) return err; - mlx5_cmd_free_uar(dev->mdev, idx); + mlx5_cmd_uar_dealloc(dev->mdev, idx, context->devx_uid); free_bfreg: mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx); @@ -3489,7 +3496,7 @@ alloc_uar_entry(struct mlx5_ib_ucontext *c, return ERR_PTR(-ENOMEM); dev = to_mdev(c->ibucontext.device); - err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index); + err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index, c->devx_uid); if (err) goto end; @@ -3507,7 +3514,7 @@ alloc_uar_entry(struct mlx5_ib_ucontext *c, return entry; err_insert: - mlx5_cmd_free_uar(dev->mdev, uar_index); + mlx5_cmd_uar_dealloc(dev->mdev, uar_index, c->devx_uid); end: kfree(entry); return ERR_PTR(err); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index bf20a388eabe..e636e954f6bf 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -263,6 +263,14 @@ struct mlx5_ib_pp { struct mlx5_core_dev *mdev; }; +enum mlx5_ib_optional_counter_type { + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS, + MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS, + MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS, + + MLX5_IB_OPCOUNTER_MAX, +}; + struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT]; @@ -271,6 +279,7 @@ struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio fdb; struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT]; + struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX]; struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. @@ -619,6 +628,20 @@ struct mlx5_user_mmap_entry { u32 page_idx; }; +enum mlx5_mkey_type { + MLX5_MKEY_MR = 1, + MLX5_MKEY_MW, + MLX5_MKEY_INDIRECT_DEVX, +}; + +struct mlx5_ib_mkey { + u32 key; + enum mlx5_mkey_type type; + unsigned int ndescs; + struct wait_queue_head wait; + refcount_t usecount; +}; + #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) #define MLX5_IB_DM_MEMIC_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\ @@ -637,7 +660,7 @@ struct mlx5_user_mmap_entry { struct mlx5_ib_mr { struct ib_mr ibmr; - struct mlx5_core_mkey mmkey; + struct mlx5_ib_mkey mmkey; /* User MR data */ struct mlx5_cache_ent *cache_ent; @@ -659,7 +682,6 @@ struct mlx5_ib_mr { void *descs_alloc; dma_addr_t desc_map; int max_descs; - int ndescs; int desc_size; int access_mode; @@ -713,13 +735,7 @@ static inline bool is_dmabuf_mr(struct mlx5_ib_mr *mr) struct mlx5_ib_mw { struct ib_mw ibmw; - struct mlx5_core_mkey mmkey; - int ndescs; -}; - -struct mlx5_ib_devx_mr { - struct mlx5_core_mkey mmkey; - int ndescs; + struct mlx5_ib_mkey mmkey; }; struct mlx5_ib_umr_context { @@ -797,15 +813,32 @@ struct mlx5_ib_resources { struct mlx5_ib_port_resources ports[2]; }; +#define MAX_OPFC_RULES 2 + +struct mlx5_ib_op_fc { + struct mlx5_fc *fc; + struct mlx5_flow_handle *rule[MAX_OPFC_RULES]; +}; + struct mlx5_ib_counters { - const char **names; + struct rdma_stat_desc *descs; size_t *offsets; u32 num_q_counters; u32 num_cong_counters; u32 num_ext_ppcnt_counters; + u32 num_op_counters; u16 set_id; + struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX]; }; +int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type); + +void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type); + struct mlx5_ib_multiport_info; struct mlx5_ib_multiport { @@ -1579,7 +1612,7 @@ static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev, } static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev, - struct mlx5_core_mkey *mmkey) + struct mlx5_ib_mkey *mmkey) { refcount_set(&mmkey->usecount, 1); @@ -1588,14 +1621,14 @@ static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev, } /* deref an mkey that can participate in ODP flow */ -static inline void mlx5r_deref_odp_mkey(struct mlx5_core_mkey *mmkey) +static inline void mlx5r_deref_odp_mkey(struct mlx5_ib_mkey *mmkey) { if (refcount_dec_and_test(&mmkey->usecount)) wake_up(&mmkey->wait); } /* deref an mkey that can participate in ODP flow and wait for relese */ -static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_core_mkey *mmkey) +static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_ib_mkey *mmkey) { mlx5r_deref_odp_mkey(mmkey); wait_event(mmkey->wait, refcount_read(&mmkey->usecount) == 0); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 22e2f4d79743..157d862fb864 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -88,9 +88,8 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, MLX5_SET64(mkc, mkc, start_addr, start_addr); } -static void -assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in) +static void assign_mkey_variant(struct mlx5_ib_dev *dev, + struct mlx5_ib_mkey *mkey, u32 *in) { u8 key = atomic_inc_return(&dev->mkey_var); void *mkc; @@ -100,17 +99,22 @@ assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, mkey->key = key; } -static int -mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen) +static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, + struct mlx5_ib_mkey *mkey, u32 *in, int inlen) { + int ret; + assign_mkey_variant(dev, mkey, in); - return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen); + ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen); + if (!ret) + init_waitqueue_head(&mkey->wait); + + return ret; } static int mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, - struct mlx5_core_mkey *mkey, + struct mlx5_ib_mkey *mkey, struct mlx5_async_ctx *async_ctx, u32 *in, int inlen, u32 *out, int outlen, struct mlx5_async_work *context) @@ -133,7 +137,7 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); - return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key); } static void create_mkey_callback(int status, struct mlx5_async_work *context) @@ -260,10 +264,11 @@ static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent) goto free_in; } - err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen); + err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey.key, in, inlen); if (err) goto free_mr; + init_waitqueue_head(&mr->mmkey.wait); mr->mmkey.type = MLX5_MKEY_MR; WRITE_ONCE(ent->dev->cache.last_add, jiffies); spin_lock_irq(&ent->lock); @@ -290,7 +295,7 @@ static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) ent->available_mrs--; ent->total_mrs--; spin_unlock_irq(&ent->lock); - mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey); + mlx5_core_destroy_mkey(ent->dev->mdev, mr->mmkey.key); kfree(mr); spin_lock_irq(&ent->lock); } @@ -600,29 +605,21 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, /* Return a MR already available in the cache */ static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent) { - struct mlx5_ib_dev *dev = req_ent->dev; struct mlx5_ib_mr *mr = NULL; struct mlx5_cache_ent *ent = req_ent; - /* Try larger MR pools from the cache to satisfy the allocation */ - for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) { - mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order, - ent - dev->cache.ent); - - spin_lock_irq(&ent->lock); - if (!list_empty(&ent->head)) { - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, - list); - list_del(&mr->list); - ent->available_mrs--; - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); - mlx5_clear_mr(mr); - return mr; - } + spin_lock_irq(&ent->lock); + if (!list_empty(&ent->head)) { + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->available_mrs--; queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); + mlx5_clear_mr(mr); + return mr; } + queue_adjust_cache_locked(ent); + spin_unlock_irq(&ent->lock); req_ent->miss++; return NULL; } @@ -658,7 +655,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) ent->available_mrs--; ent->total_mrs--; spin_unlock_irq(&ent->lock); - mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key); } list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { @@ -911,12 +908,13 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, } static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - u64 length, int access_flags) + u64 length, int access_flags, u64 iova) { mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; mr->ibmr.length = length; mr->ibmr.device = &dev->ib_dev; + mr->ibmr.iova = iova; mr->access_flags = access_flags; } @@ -974,11 +972,8 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, mr->ibmr.pd = pd; mr->umem = umem; - mr->mmkey.iova = iova; - mr->mmkey.size = umem->length; - mr->mmkey.pd = to_mpd(pd)->pdn; mr->page_shift = order_base_2(page_size); - set_mr_fields(dev, mr, umem->length, access_flags); + set_mr_fields(dev, mr, umem->length, access_flags, iova); return mr; } @@ -1087,8 +1082,8 @@ static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, wr->wr.opcode = MLX5_IB_WR_UMR; wr->pd = mr->ibmr.pd; wr->mkey = mr->mmkey.key; - wr->length = mr->mmkey.size; - wr->virt_addr = mr->mmkey.iova; + wr->length = mr->ibmr.length; + wr->virt_addr = mr->ibmr.iova; wr->access_flags = mr->access_flags; wr->page_shift = mr->page_shift; wr->xlt_size = sg->length; @@ -1340,7 +1335,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, } mr->mmkey.type = MLX5_MKEY_MR; mr->umem = umem; - set_mr_fields(dev, mr, umem->length, access_flags); + set_mr_fields(dev, mr, umem->length, access_flags, iova); kvfree(in); mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); @@ -1387,7 +1382,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, kfree(in); - set_mr_fields(dev, mr, length, acc); + set_mr_fields(dev, mr, length, acc, start_addr); return &mr->ibmr; @@ -1709,7 +1704,6 @@ static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, return err; mr->access_flags = access_flags; - mr->mmkey.pd = to_mpd(pd)->pdn; return 0; } @@ -1754,7 +1748,6 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, if (flags & IB_MR_REREG_PD) { mr->ibmr.pd = pd; - mr->mmkey.pd = to_mpd(pd)->pdn; upd_flags |= MLX5_IB_UPD_XLT_PD; } if (flags & IB_MR_REREG_ACCESS) { @@ -1763,8 +1756,8 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, } mr->ibmr.length = new_umem->length; - mr->mmkey.iova = iova; - mr->mmkey.size = new_umem->length; + mr->ibmr.iova = iova; + mr->ibmr.length = new_umem->length; mr->page_shift = order_base_2(page_size); mr->umem = new_umem; err = mlx5_ib_update_mr_pas(mr, upd_flags); @@ -1834,7 +1827,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, mr->umem = NULL; atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); - return create_real_mr(new_pd, umem, mr->mmkey.iova, + return create_real_mr(new_pd, umem, mr->ibmr.iova, new_access_flags); } @@ -2263,9 +2256,9 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) struct mlx5_ib_dev *dev = to_mdev(ibmw->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mw *mw = to_mmw(ibmw); + unsigned int ndescs; u32 *in = NULL; void *mkc; - int ndescs; int err; struct mlx5_ib_alloc_mw req = {}; struct { @@ -2310,7 +2303,7 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) mw->mmkey.type = MLX5_MKEY_MW; ibmw->rkey = mw->mmkey.key; - mw->ndescs = ndescs; + mw->mmkey.ndescs = ndescs; resp.response_length = min(offsetofend(typeof(resp), response_length), udata->outlen); @@ -2330,7 +2323,7 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) return 0; free_mkey: - mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); + mlx5_core_destroy_mkey(dev->mdev, mw->mmkey.key); free: kfree(in); return err; @@ -2349,7 +2342,7 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw) */ mlx5r_deref_wait_odp_mkey(&mmw->mmkey); - return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); + return mlx5_core_destroy_mkey(dev->mdev, mmw->mmkey.key); } int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, @@ -2406,7 +2399,7 @@ mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, mr->meta_length = 0; if (data_sg_nents == 1) { n++; - mr->ndescs = 1; + mr->mmkey.ndescs = 1; if (data_sg_offset) sg_offset = *data_sg_offset; mr->data_length = sg_dma_len(data_sg) - sg_offset; @@ -2459,7 +2452,7 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, if (sg_offset_p) *sg_offset_p = sg_offset; - mr->ndescs = i; + mr->mmkey.ndescs = i; mr->data_length = mr->ibmr.length; if (meta_sg_nents) { @@ -2492,11 +2485,11 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) struct mlx5_ib_mr *mr = to_mmr(ibmr); __be64 *descs; - if (unlikely(mr->ndescs == mr->max_descs)) + if (unlikely(mr->mmkey.ndescs == mr->max_descs)) return -ENOMEM; descs = mr->descs; - descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); + descs[mr->mmkey.ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); return 0; } @@ -2506,11 +2499,11 @@ static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr) struct mlx5_ib_mr *mr = to_mmr(ibmr); __be64 *descs; - if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs)) + if (unlikely(mr->mmkey.ndescs + mr->meta_ndescs == mr->max_descs)) return -ENOMEM; descs = mr->descs; - descs[mr->ndescs + mr->meta_ndescs++] = + descs[mr->mmkey.ndescs + mr->meta_ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); return 0; @@ -2526,7 +2519,7 @@ mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, struct mlx5_ib_mr *pi_mr = mr->mtt_mr; int n; - pi_mr->ndescs = 0; + pi_mr->mmkey.ndescs = 0; pi_mr->meta_ndescs = 0; pi_mr->meta_length = 0; @@ -2560,7 +2553,7 @@ mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, * metadata offset at the first metadata page */ pi_mr->pi_iova = (iova & page_mask) + - pi_mr->ndescs * ibmr->page_size + + pi_mr->mmkey.ndescs * ibmr->page_size + (pi_mr->ibmr.iova & ~page_mask); /* * In order to use one MTT MR for data and metadata, we register @@ -2591,7 +2584,7 @@ mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, struct mlx5_ib_mr *pi_mr = mr->klm_mr; int n; - pi_mr->ndescs = 0; + pi_mr->mmkey.ndescs = 0; pi_mr->meta_ndescs = 0; pi_mr->meta_length = 0; @@ -2626,7 +2619,7 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); - mr->ndescs = 0; + mr->mmkey.ndescs = 0; mr->data_length = 0; mr->data_iova = 0; mr->meta_ndescs = 0; @@ -2682,7 +2675,7 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, struct mlx5_ib_mr *mr = to_mmr(ibmr); int n; - mr->ndescs = 0; + mr->mmkey.ndescs = 0; ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, mr->desc_size * mr->max_descs, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index d0d98e584ebc..31596d8c5212 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -430,7 +430,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; - mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; + mr->ibmr.iova = idx * MLX5_IMR_MTT_SIZE; mr->parent = imr; odp->private = mr; @@ -500,7 +500,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, } imr->ibmr.pd = &pd->ibpd; - imr->mmkey.iova = 0; + imr->ibmr.iova = 0; imr->umem = &umem_odp->umem; imr->ibmr.lkey = imr->mmkey.key; imr->ibmr.rkey = imr->mmkey.key; @@ -738,7 +738,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - if (unlikely(io_virt < mr->mmkey.iova)) + if (unlikely(io_virt < mr->ibmr.iova)) return -EFAULT; if (mr->umem->is_dmabuf) @@ -747,7 +747,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, if (!odp->is_implicit_odp) { u64 user_va; - if (check_add_overflow(io_virt - mr->mmkey.iova, + if (check_add_overflow(io_virt - mr->ibmr.iova, (u64)odp->umem.address, &user_va)) return -EFAULT; if (unlikely(user_va >= ib_umem_end(odp) || @@ -788,7 +788,7 @@ struct pf_frame { int depth; }; -static bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key) +static bool mkey_is_eq(struct mlx5_ib_mkey *mmkey, u32 key) { if (!mmkey) return false; @@ -797,21 +797,6 @@ static bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key) return mmkey->key == key; } -static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey) -{ - struct mlx5_ib_mw *mw; - struct mlx5_ib_devx_mr *devx_mr; - - if (mmkey->type == MLX5_MKEY_MW) { - mw = container_of(mmkey, struct mlx5_ib_mw, mmkey); - return mw->ndescs; - } - - devx_mr = container_of(mmkey, struct mlx5_ib_devx_mr, - mmkey); - return devx_mr->ndescs; -} - /* * Handle a single data segment in a page-fault WQE or RDMA region. * @@ -831,12 +816,11 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, { int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0; struct pf_frame *head = NULL, *frame; - struct mlx5_core_mkey *mmkey; + struct mlx5_ib_mkey *mmkey; struct mlx5_ib_mr *mr; struct mlx5_klm *pklm; u32 *out = NULL; size_t offset; - int ndescs; io_virt += *bytes_committed; bcnt -= *bytes_committed; @@ -885,8 +869,6 @@ next_mr: case MLX5_MKEY_MW: case MLX5_MKEY_INDIRECT_DEVX: - ndescs = get_indirect_num_descs(mmkey); - if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) { mlx5_ib_dbg(dev, "indirection level exceeded\n"); ret = -EFAULT; @@ -894,7 +876,7 @@ next_mr: } outlen = MLX5_ST_SZ_BYTES(query_mkey_out) + - sizeof(*pklm) * (ndescs - 2); + sizeof(*pklm) * (mmkey->ndescs - 2); if (outlen > cur_outlen) { kfree(out); @@ -909,14 +891,14 @@ next_mr: pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out, bsf0_klm0_pas_mtt0_1); - ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen); + ret = mlx5_core_query_mkey(dev->mdev, mmkey->key, out, outlen); if (ret) goto end; offset = io_virt - MLX5_GET64(query_mkey_out, out, memory_key_mkey_entry.start_addr); - for (i = 0; bcnt && i < ndescs; i++, pklm++) { + for (i = 0; bcnt && i < mmkey->ndescs; i++, pklm++) { if (offset >= be32_to_cpu(pklm->bcount)) { offset -= be32_to_cpu(pklm->bcount); continue; @@ -1703,25 +1685,31 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 lkey) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr = NULL; + struct mlx5_ib_mkey *mmkey; xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); - if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) + if (!mmkey || mmkey->key != lkey) { + mr = ERR_PTR(-ENOENT); goto end; + } + if (mmkey->type != MLX5_MKEY_MR) { + mr = ERR_PTR(-EINVAL); + goto end; + } mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); if (mr->ibmr.pd != pd) { - mr = NULL; + mr = ERR_PTR(-EPERM); goto end; } /* prefetch with write-access must be supported by the MR */ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && !mr->umem->writable) { - mr = NULL; + mr = ERR_PTR(-EPERM); goto end; } @@ -1753,7 +1741,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) destroy_prefetch_work(work); } -static bool init_prefetch_work(struct ib_pd *pd, +static int init_prefetch_work(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 pf_flags, struct prefetch_mr_work *work, struct ib_sge *sg_list, u32 num_sge) @@ -1764,17 +1752,19 @@ static bool init_prefetch_work(struct ib_pd *pd, work->pf_flags = pf_flags; for (i = 0; i < num_sge; ++i) { - work->frags[i].io_virt = sg_list[i].addr; - work->frags[i].length = sg_list[i].length; - work->frags[i].mr = - get_prefetchable_mr(pd, advice, sg_list[i].lkey); - if (!work->frags[i].mr) { + struct mlx5_ib_mr *mr; + + mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); + if (IS_ERR(mr)) { work->num_sge = i; - return false; + return PTR_ERR(mr); } + work->frags[i].io_virt = sg_list[i].addr; + work->frags[i].length = sg_list[i].length; + work->frags[i].mr = mr; } work->num_sge = num_sge; - return true; + return 0; } static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, @@ -1790,8 +1780,8 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, struct mlx5_ib_mr *mr; mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); - if (!mr) - return -ENOENT; + if (IS_ERR(mr)) + return PTR_ERR(mr); ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, &bytes_mapped, pf_flags); if (ret < 0) { @@ -1811,6 +1801,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { u32 pf_flags = 0; struct prefetch_mr_work *work; + int rc; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; @@ -1826,9 +1817,10 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (!work) return -ENOMEM; - if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { + rc = init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge); + if (rc) { destroy_prefetch_work(work); - return -EINVAL; + return rc; } queue_work(system_unbound_wq, &work->work); return 0; diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index 8841620af82f..51e48ca9016e 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -217,7 +217,7 @@ static __be64 sig_mkey_mask(void) static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, struct mlx5_ib_mr *mr, u8 flags, bool atomic) { - int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; + int size = (mr->mmkey.ndescs + mr->meta_ndescs) * mr->desc_size; memset(umr, 0, sizeof(*umr)); @@ -374,7 +374,7 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, struct mlx5_ib_mr *mr, u32 key, int access) { - int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1; + int ndescs = ALIGN(mr->mmkey.ndescs + mr->meta_ndescs, 8) >> 1; memset(seg, 0, sizeof(*seg)); @@ -439,7 +439,7 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, struct mlx5_ib_mr *mr, struct mlx5_ib_pd *pd) { - int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs); + int bcount = mr->desc_size * (mr->mmkey.ndescs + mr->meta_ndescs); dseg->addr = cpu_to_be64(mr->desc_map); dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64)); @@ -861,7 +861,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); - int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; + int mr_list_size = (mr->mmkey.ndescs + mr->meta_ndescs) * mr->desc_size; bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; u8 flags = 0; @@ -1111,7 +1111,7 @@ static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev, memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr)); /* No UMR, use local_dma_lkey */ pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey; - pa_pi_mr.ndescs = mr->ndescs; + pa_pi_mr.mmkey.ndescs = mr->mmkey.ndescs; pa_pi_mr.data_length = mr->data_length; pa_pi_mr.data_iova = mr->data_iova; if (mr->meta_ndescs) { |