aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx5
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r--drivers/infiniband/hw/mlx5/Kconfig1
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile1
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c9
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c69
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c1067
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c13
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c52
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.h16
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c60
-rw-r--r--drivers/infiniband/hw/mlx5/main.c238
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c20
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h50
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c564
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c56
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c314
15 files changed, 2118 insertions, 412 deletions
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
index 8d651c05de62..ea248def4556 100644
--- a/drivers/infiniband/hw/mlx5/Kconfig
+++ b/drivers/infiniband/hw/mlx5/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
config MLX5_INFINIBAND
tristate "Mellanox 5th generation network adapters (ConnectX series) support"
depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 33f5adb14e4e..9924be8384d8 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index e3ec79b8f7f5..6c8645033102 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -190,12 +190,12 @@ int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
u16 uid, phys_addr_t *addr, u32 *obj_id)
{
struct mlx5_core_dev *dev = dm->dev;
- u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {};
unsigned long *block_map;
u64 icm_start_addr;
u32 log_icm_size;
+ u32 num_blocks;
u32 max_blocks;
u64 block_idx;
void *sw_icm;
@@ -224,6 +224,8 @@ int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
return -EINVAL;
}
+ num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >>
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
spin_lock(&dm->lock);
block_idx = bitmap_find_next_zero_area(block_map,
@@ -266,13 +268,16 @@ int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
u16 uid, phys_addr_t addr, u32 obj_id)
{
struct mlx5_core_dev *dev = dm->dev;
- u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
unsigned long *block_map;
+ u32 num_blocks;
u64 start_idx;
int err;
+ num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >>
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+
switch (type) {
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
start_idx =
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 2e2e65f00257..45f48cde6b9d 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -37,7 +37,7 @@
#include "mlx5_ib.h"
#include "srq.h"
-static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
+static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe)
{
struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
@@ -522,9 +522,9 @@ repoll:
case MLX5_CQE_SIG_ERR:
sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
- read_lock(&dev->mdev->priv.mkey_table.lock);
- mmkey = __mlx5_mr_lookup(dev->mdev,
- mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
+ xa_lock(&dev->mdev->priv.mkey_table);
+ mmkey = xa_load(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
mr = to_mibmr(mmkey);
get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
mr->sig->sig_err_exists = true;
@@ -537,7 +537,7 @@ repoll:
mr->sig->err_item.expected,
mr->sig->err_item.actual);
- read_unlock(&dev->mdev->priv.mkey_table.lock);
+ xa_unlock(&dev->mdev->priv.mkey_table);
goto repoll;
}
@@ -884,14 +884,15 @@ static void notify_soft_wc_handler(struct work_struct *work)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
-struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_cq *cq;
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
int uninitialized_var(index);
int uninitialized_var(inlen);
u32 *cqb = NULL;
@@ -903,18 +904,14 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
if (entries < 0 ||
(entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (check_cq_create_flags(attr->flags))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
entries = roundup_pow_of_two(entries + 1);
if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
- return ERR_PTR(-EINVAL);
-
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
cq->ibcq.cqe = entries - 1;
mutex_init(&cq->resize_mutex);
@@ -929,13 +926,13 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size,
&index, &inlen);
if (err)
- goto err_create;
+ return err;
} else {
cqe_size = cache_line_size() == 128 ? 128 : 64;
err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
&index, &inlen);
if (err)
- goto err_create;
+ return err;
INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
}
@@ -958,7 +955,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
MLX5_SET(cqc, cqc, oi, 1);
- err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen);
+ err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out));
if (err)
goto err_cqb;
@@ -980,7 +977,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
kvfree(cqb);
- return &cq->ibcq;
+ return 0;
err_cmd:
mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
@@ -991,14 +988,10 @@ err_cqb:
destroy_cq_user(cq, udata);
else
destroy_cq_kernel(dev, cq);
-
-err_create:
- kfree(cq);
-
- return ERR_PTR(err);
+ return err;
}
-int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(cq->device);
struct mlx5_ib_cq *mcq = to_mcq(cq);
@@ -1008,10 +1001,6 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
destroy_cq_user(mcq, udata);
else
destroy_cq_kernel(dev, mcq);
-
- kfree(mcq);
-
- return 0;
}
static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
@@ -1137,11 +1126,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
return 0;
}
-static void un_resize_user(struct mlx5_ib_cq *cq)
-{
- ib_umem_release(cq->resize_umem);
-}
-
static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
int entries, int cqe_size)
{
@@ -1164,12 +1148,6 @@ ex:
return err;
}
-static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
-{
- free_cq_buf(dev, cq->resize_buf);
- cq->resize_buf = NULL;
-}
-
static int copy_resize_cqes(struct mlx5_ib_cq *cq)
{
struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
@@ -1350,10 +1328,11 @@ ex_alloc:
kvfree(in);
ex_resize:
- if (udata)
- un_resize_user(cq);
- else
- un_resize_kernel(dev, cq);
+ ib_umem_release(cq->resize_umem);
+ if (!udata) {
+ free_cq_buf(dev, cq->resize_buf);
+ cq->resize_buf = NULL;
+ }
ex:
mutex_unlock(&cq->resize_mutex);
return err;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 80b42d069328..ec4370f99381 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -14,13 +14,17 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
+#include <linux/xarray.h>
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
+static void dispatch_event_fd(struct list_head *fd_list, const void *data);
+
enum devx_obj_flags {
DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
DEVX_OBJ_FLAGS_DCT = 1 << 1,
+ DEVX_OBJ_FLAGS_CQ = 1 << 2,
};
struct devx_async_data {
@@ -33,9 +37,61 @@ struct devx_async_data {
struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
};
+struct devx_async_event_data {
+ struct list_head list; /* headed in ev_file->event_list */
+ struct mlx5_ib_uapi_devx_async_event_hdr hdr;
+};
+
+/* first level XA value data structure */
+struct devx_event {
+ struct xarray object_ids; /* second XA level, Key = object id */
+ struct list_head unaffiliated_list;
+};
+
+/* second level XA value data structure */
+struct devx_obj_event {
+ struct rcu_head rcu;
+ struct list_head obj_sub_list;
+};
+
+struct devx_event_subscription {
+ struct list_head file_list; /* headed in ev_file->
+ * subscribed_events_list
+ */
+ struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
+ * devx_obj_event->obj_sub_list
+ */
+ struct list_head obj_list; /* headed in devx_object */
+ struct list_head event_list; /* headed in ev_file->event_list or in
+ * temp list via subscription
+ */
+
+ u8 is_cleaned:1;
+ u32 xa_key_level1;
+ u32 xa_key_level2;
+ struct rcu_head rcu;
+ u64 cookie;
+ struct devx_async_event_file *ev_file;
+ struct file *filp; /* Upon hot unplug we need a direct access to */
+ struct eventfd_ctx *eventfd;
+};
+
+struct devx_async_event_file {
+ struct ib_uobject uobj;
+ /* Head of events that are subscribed to this FD */
+ struct list_head subscribed_events_list;
+ spinlock_t lock;
+ wait_queue_head_t poll_wait;
+ struct list_head event_list;
+ struct mlx5_ib_dev *dev;
+ u8 omit_data:1;
+ u8 is_overflow_err:1;
+ u8 is_destroyed:1;
+};
+
#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
struct devx_obj {
- struct mlx5_core_dev *mdev;
+ struct mlx5_ib_dev *ib_dev;
u64 obj_id;
u32 dinlen; /* destroy inbox length */
u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
@@ -43,7 +99,9 @@ struct devx_obj {
union {
struct mlx5_ib_devx_mr devx_mr;
struct mlx5_core_dct core_dct;
+ struct mlx5_core_cq core_cq;
};
+ struct list_head event_sub; /* holds devx_event_subscription entries */
};
struct devx_umem {
@@ -149,6 +207,127 @@ bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id)
return false;
}
+static bool is_legacy_unaffiliated_event_num(u16 event_num)
+{
+ switch (event_num) {
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool is_legacy_obj_event_num(u16 event_num)
+{
+ switch (event_num) {
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_COMP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static u16 get_legacy_obj_type(u16 opcode)
+{
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_RQ:
+ return MLX5_EVENT_QUEUE_TYPE_RQ;
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_EVENT_QUEUE_TYPE_QP;
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_EVENT_QUEUE_TYPE_SQ;
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_EVENT_QUEUE_TYPE_DCT;
+ default:
+ return 0;
+ }
+}
+
+static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num)
+{
+ u16 opcode;
+
+ opcode = (obj->obj_id >> 32) & 0xffff;
+
+ if (is_legacy_obj_event_num(event_num))
+ return get_legacy_obj_type(opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ return (obj->obj_id >> 48);
+ case MLX5_CMD_OP_CREATE_RQ:
+ return MLX5_OBJ_TYPE_RQ;
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_OBJ_TYPE_QP;
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_OBJ_TYPE_SQ;
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_OBJ_TYPE_DCT;
+ case MLX5_CMD_OP_CREATE_TIR:
+ return MLX5_OBJ_TYPE_TIR;
+ case MLX5_CMD_OP_CREATE_TIS:
+ return MLX5_OBJ_TYPE_TIS;
+ case MLX5_CMD_OP_CREATE_PSV:
+ return MLX5_OBJ_TYPE_PSV;
+ case MLX5_OBJ_TYPE_MKEY:
+ return MLX5_OBJ_TYPE_MKEY;
+ case MLX5_CMD_OP_CREATE_RMP:
+ return MLX5_OBJ_TYPE_RMP;
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ return MLX5_OBJ_TYPE_XRC_SRQ;
+ case MLX5_CMD_OP_CREATE_XRQ:
+ return MLX5_OBJ_TYPE_XRQ;
+ case MLX5_CMD_OP_CREATE_RQT:
+ return MLX5_OBJ_TYPE_RQT;
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ return MLX5_OBJ_TYPE_FLOW_COUNTER;
+ case MLX5_CMD_OP_CREATE_CQ:
+ return MLX5_OBJ_TYPE_CQ;
+ default:
+ return 0;
+ }
+}
+
+static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
+{
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ return eqe->data.qp_srq.type;
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ return 0;
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ return MLX5_EVENT_QUEUE_TYPE_DCT;
+ default:
+ return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
+ }
+}
+
+static u32 get_dec_obj_id(u64 obj_id)
+{
+ return (obj_id & 0xffffffff);
+}
+
/*
* As the obj_id in the firmware is not globally unique the object type
* must be considered upon checking for a valid object id.
@@ -715,12 +894,16 @@ static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
return c->devx_uid;
}
-static bool devx_is_general_cmd(void *in)
+
+static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
- if (opcode >= MLX5_CMD_OP_GENERAL_START &&
- opcode < MLX5_CMD_OP_GENERAL_END)
+ /* Pass all cmds for vhca_tunnel as general, tracking is done in FW */
+ if ((MLX5_CAP_GEN_64(dev->mdev, vhca_tunnel_commands) &&
+ MLX5_GET(general_obj_in_cmd_hdr, in, vhca_tunnel_id)) ||
+ (opcode >= MLX5_CMD_OP_GENERAL_START &&
+ opcode < MLX5_CMD_OP_GENERAL_END))
return true;
switch (opcode) {
@@ -846,7 +1029,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
return uid;
/* Only white list of some general HCA commands are allowed for this method. */
- if (!devx_is_general_cmd(cmd_in))
+ if (!devx_is_general_cmd(cmd_in, dev))
return -EINVAL;
cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -1043,13 +1226,10 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
struct mlx5_ib_dev *dev,
void *in, void *out)
{
- struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
- unsigned long flags;
struct mlx5_core_mkey *mkey;
void *mkc;
u8 key;
- int err;
mkey = &devx_mr->mmkey;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
@@ -1062,11 +1242,8 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
mkey->pd = MLX5_GET(mkc, mkc, pd);
devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
- write_lock_irqsave(&table->lock, flags);
- err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key),
- mkey);
- write_unlock_irqrestore(&table->lock, flags);
- return err;
+ return xa_err(xa_store(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL));
}
static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
@@ -1117,12 +1294,37 @@ static void devx_free_indirect_mkey(struct rcu_head *rcu)
*/
static void devx_cleanup_mkey(struct devx_obj *obj)
{
- struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table;
- unsigned long flags;
+ xa_erase(&obj->ib_dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(obj->devx_mr.mmkey.key));
+}
- write_lock_irqsave(&table->lock, flags);
- radix_tree_delete(&table->tree, mlx5_base_mkey(obj->devx_mr.mmkey.key));
- write_unlock_irqrestore(&table->lock, flags);
+static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
+ struct devx_event_subscription *sub)
+{
+ struct devx_event *event;
+ struct devx_obj_event *xa_val_level2;
+
+ if (sub->is_cleaned)
+ return;
+
+ sub->is_cleaned = 1;
+ list_del_rcu(&sub->xa_list);
+
+ if (list_empty(&sub->obj_list))
+ return;
+
+ list_del_rcu(&sub->obj_list);
+ /* check whether key level 1 for this obj_sub_list is empty */
+ event = xa_load(&dev->devx_event_table.event_xa,
+ sub->xa_key_level1);
+ WARN_ON(!event);
+
+ xa_val_level2 = xa_load(&event->object_ids, sub->xa_key_level2);
+ if (list_empty(&xa_val_level2->obj_sub_list)) {
+ xa_erase(&event->object_ids,
+ sub->xa_key_level2);
+ kfree_rcu(xa_val_level2, rcu);
+ }
}
static int devx_obj_cleanup(struct ib_uobject *uobject,
@@ -1130,24 +1332,34 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
struct uverbs_attr_bundle *attrs)
{
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ struct mlx5_devx_event_table *devx_event_table;
struct devx_obj *obj = uobject->object;
+ struct devx_event_subscription *sub_entry, *tmp;
+ struct mlx5_ib_dev *dev;
int ret;
+ dev = mlx5_udata_to_mdev(&attrs->driver_udata);
if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
devx_cleanup_mkey(obj);
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
- ret = mlx5_core_destroy_dct(obj->mdev, &obj->core_dct);
+ ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
+ else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
+ ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
else
- ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out,
- sizeof(out));
+ ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox,
+ obj->dinlen, out, sizeof(out));
if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
- struct mlx5_ib_dev *dev =
- mlx5_udata_to_mdev(&attrs->driver_udata);
+ devx_event_table = &dev->devx_event_table;
+
+ mutex_lock(&devx_event_table->event_xa_lock);
+ list_for_each_entry_safe(sub_entry, tmp, &obj->event_sub, obj_list)
+ devx_cleanup_subscription(dev, sub_entry);
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu,
devx_free_indirect_mkey);
return ret;
@@ -1157,6 +1369,29 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
return ret;
}
+static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
+{
+ struct devx_obj *obj = container_of(mcq, struct devx_obj, core_cq);
+ struct mlx5_devx_event_table *table;
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+ u32 obj_id = mcq->cqn;
+
+ table = &obj->ib_dev->devx_event_table;
+ rcu_read_lock();
+ event = xa_load(&table->event_xa, MLX5_EVENT_TYPE_COMP);
+ if (!event)
+ goto out;
+
+ obj_event = xa_load(&event->object_ids, obj_id);
+ if (!obj_event)
+ goto out;
+
+ dispatch_event_fd(&obj_event->obj_sub_list, eqe);
+out:
+ rcu_read_unlock();
+}
+
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
struct uverbs_attr_bundle *attrs)
{
@@ -1179,6 +1414,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
u32 obj_id;
u16 opcode;
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
return uid;
@@ -1208,6 +1446,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
err = mlx5_core_create_dct(dev->mdev, &obj->core_dct,
cmd_in, cmd_in_len,
cmd_out, cmd_out_len);
+ } else if (opcode == MLX5_CMD_OP_CREATE_CQ) {
+ obj->flags |= DEVX_OBJ_FLAGS_CQ;
+ obj->core_cq.comp = devx_cq_comp;
+ err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
+ cmd_in, cmd_in_len, cmd_out,
+ cmd_out_len);
} else {
err = mlx5_cmd_exec(dev->mdev, cmd_in,
cmd_in_len,
@@ -1218,7 +1462,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
goto obj_free;
uobj->object = obj;
- obj->mdev = dev->mdev;
+ INIT_LIST_HEAD(&obj->event_sub);
+ obj->ib_dev = dev;
devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
&obj_id);
WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
@@ -1245,9 +1490,11 @@ err_copy:
devx_cleanup_mkey(obj);
obj_destroy:
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
- mlx5_core_destroy_dct(obj->mdev, &obj->core_dct);
+ mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
+ else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
+ mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
else
- mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out,
+ mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out,
sizeof(out));
obj_free:
kfree(obj);
@@ -1269,6 +1516,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
int err;
int uid;
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
return uid;
@@ -1311,6 +1561,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
int uid;
struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
return uid;
@@ -1375,6 +1628,38 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
return 0;
}
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE);
+ struct devx_async_event_file *ev_file;
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ u32 flags;
+ int err;
+
+ err = uverbs_get_flags32(&flags, attrs,
+ MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
+ MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA);
+
+ if (err)
+ return err;
+
+ ev_file = container_of(uobj, struct devx_async_event_file,
+ uobj);
+ spin_lock_init(&ev_file->lock);
+ INIT_LIST_HEAD(&ev_file->event_list);
+ init_waitqueue_head(&ev_file->poll_wait);
+ if (flags & MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA)
+ ev_file->omit_data = 1;
+ INIT_LIST_HEAD(&ev_file->subscribed_events_list);
+ ev_file->dev = dev;
+ get_device(&dev->ib_dev.dev);
+ return 0;
+}
+
static void devx_query_callback(int status, struct mlx5_async_work *context)
{
struct devx_async_data *async_data =
@@ -1416,6 +1701,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
struct devx_async_cmd_event_file *ev_file;
struct devx_async_data *async_data;
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
return uid;
@@ -1484,6 +1772,331 @@ sub_bytes:
return err;
}
+static void
+subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
+ u32 key_level1,
+ bool is_level2,
+ u32 key_level2)
+{
+ struct devx_event *event;
+ struct devx_obj_event *xa_val_level2;
+
+ /* Level 1 is valid for future use, no need to free */
+ if (!is_level2)
+ return;
+
+ event = xa_load(&devx_event_table->event_xa, key_level1);
+ WARN_ON(!event);
+
+ xa_val_level2 = xa_load(&event->object_ids,
+ key_level2);
+ if (list_empty(&xa_val_level2->obj_sub_list)) {
+ xa_erase(&event->object_ids,
+ key_level2);
+ kfree_rcu(xa_val_level2, rcu);
+ }
+}
+
+static int
+subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
+ u32 key_level1,
+ bool is_level2,
+ u32 key_level2)
+{
+ struct devx_obj_event *obj_event;
+ struct devx_event *event;
+ int err;
+
+ event = xa_load(&devx_event_table->event_xa, key_level1);
+ if (!event) {
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (!event)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&event->unaffiliated_list);
+ xa_init(&event->object_ids);
+
+ err = xa_insert(&devx_event_table->event_xa,
+ key_level1,
+ event,
+ GFP_KERNEL);
+ if (err) {
+ kfree(event);
+ return err;
+ }
+ }
+
+ if (!is_level2)
+ return 0;
+
+ obj_event = xa_load(&event->object_ids, key_level2);
+ if (!obj_event) {
+ obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
+ if (!obj_event)
+ /* Level1 is valid for future use, no need to free */
+ return -ENOMEM;
+
+ err = xa_insert(&event->object_ids,
+ key_level2,
+ obj_event,
+ GFP_KERNEL);
+ if (err)
+ return err;
+ INIT_LIST_HEAD(&obj_event->obj_sub_list);
+ }
+
+ return 0;
+}
+
+static bool is_valid_events_legacy(int num_events, u16 *event_type_num_list,
+ struct devx_obj *obj)
+{
+ int i;
+
+ for (i = 0; i < num_events; i++) {
+ if (obj) {
+ if (!is_legacy_obj_event_num(event_type_num_list[i]))
+ return false;
+ } else if (!is_legacy_unaffiliated_event_num(
+ event_type_num_list[i])) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#define MAX_SUPP_EVENT_NUM 255
+static bool is_valid_events(struct mlx5_core_dev *dev,
+ int num_events, u16 *event_type_num_list,
+ struct devx_obj *obj)
+{
+ __be64 *aff_events;
+ __be64 *unaff_events;
+ int mask_entry;
+ int mask_bit;
+ int i;
+
+ if (MLX5_CAP_GEN(dev, event_cap)) {
+ aff_events = MLX5_CAP_DEV_EVENT(dev,
+ user_affiliated_events);
+ unaff_events = MLX5_CAP_DEV_EVENT(dev,
+ user_unaffiliated_events);
+ } else {
+ return is_valid_events_legacy(num_events, event_type_num_list,
+ obj);
+ }
+
+ for (i = 0; i < num_events; i++) {
+ if (event_type_num_list[i] > MAX_SUPP_EVENT_NUM)
+ return false;
+
+ mask_entry = event_type_num_list[i] / 64;
+ mask_bit = event_type_num_list[i] % 64;
+
+ if (obj) {
+ /* CQ completion */
+ if (event_type_num_list[i] == 0)
+ continue;
+
+ if (!(be64_to_cpu(aff_events[mask_entry]) &
+ (1ull << mask_bit)))
+ return false;
+
+ continue;
+ }
+
+ if (!(be64_to_cpu(unaff_events[mask_entry]) &
+ (1ull << mask_bit)))
+ return false;
+ }
+
+ return true;
+}
+
+#define MAX_NUM_EVENTS 16
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
+ attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ struct ib_uobject *fd_uobj;
+ struct devx_obj *obj = NULL;
+ struct devx_async_event_file *ev_file;
+ struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
+ u16 *event_type_num_list;
+ struct devx_event_subscription *event_sub, *tmp_sub;
+ struct list_head sub_list;
+ int redirect_fd;
+ bool use_eventfd = false;
+ int num_events;
+ int num_alloc_xa_entries = 0;
+ u16 obj_type = 0;
+ u64 cookie = 0;
+ u32 obj_id = 0;
+ int err;
+ int i;
+
+ if (!c->devx_uid)
+ return -EINVAL;
+
+ if (!IS_ERR(devx_uobj)) {
+ obj = (struct devx_obj *)devx_uobj->object;
+ if (obj)
+ obj_id = get_dec_obj_id(obj->obj_id);
+ }
+
+ fd_uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
+ if (IS_ERR(fd_uobj))
+ return PTR_ERR(fd_uobj);
+
+ ev_file = container_of(fd_uobj, struct devx_async_event_file,
+ uobj);
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
+ err = uverbs_copy_from(&redirect_fd, attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
+ if (err)
+ return err;
+
+ use_eventfd = true;
+ }
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
+ if (use_eventfd)
+ return -EINVAL;
+
+ err = uverbs_copy_from(&cookie, attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
+ if (err)
+ return err;
+ }
+
+ num_events = uverbs_attr_ptr_get_array_size(
+ attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
+ sizeof(u16));
+
+ if (num_events < 0)
+ return num_events;
+
+ if (num_events > MAX_NUM_EVENTS)
+ return -EINVAL;
+
+ event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
+
+ if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
+ return -EINVAL;
+
+ INIT_LIST_HEAD(&sub_list);
+
+ /* Protect from concurrent subscriptions to same XA entries to allow
+ * both to succeed
+ */
+ mutex_lock(&devx_event_table->event_xa_lock);
+ for (i = 0; i < num_events; i++) {
+ u32 key_level1;
+
+ if (obj)
+ obj_type = get_dec_obj_type(obj,
+ event_type_num_list[i]);
+ key_level1 = event_type_num_list[i] | obj_type << 16;
+
+ err = subscribe_event_xa_alloc(devx_event_table,
+ key_level1,
+ obj,
+ obj_id);
+ if (err)
+ goto err;
+
+ num_alloc_xa_entries++;
+ event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
+ if (!event_sub)
+ goto err;
+
+ list_add_tail(&event_sub->event_list, &sub_list);
+ if (use_eventfd) {
+ event_sub->eventfd =
+ eventfd_ctx_fdget(redirect_fd);
+
+ if (IS_ERR(event_sub)) {
+ err = PTR_ERR(event_sub->eventfd);
+ event_sub->eventfd = NULL;
+ goto err;
+ }
+ }
+
+ event_sub->cookie = cookie;
+ event_sub->ev_file = ev_file;
+ event_sub->filp = fd_uobj->object;
+ /* May be needed upon cleanup the devx object/subscription */
+ event_sub->xa_key_level1 = key_level1;
+ event_sub->xa_key_level2 = obj_id;
+ INIT_LIST_HEAD(&event_sub->obj_list);
+ }
+
+ /* Once all the allocations and the XA data insertions were done we
+ * can go ahead and add all the subscriptions to the relevant lists
+ * without concern of a failure.
+ */
+ list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+
+ list_del_init(&event_sub->event_list);
+
+ spin_lock_irq(&ev_file->lock);
+ list_add_tail_rcu(&event_sub->file_list,
+ &ev_file->subscribed_events_list);
+ spin_unlock_irq(&ev_file->lock);
+
+ event = xa_load(&devx_event_table->event_xa,
+ event_sub->xa_key_level1);
+ WARN_ON(!event);
+
+ if (!obj) {
+ list_add_tail_rcu(&event_sub->xa_list,
+ &event->unaffiliated_list);
+ continue;
+ }
+
+ obj_event = xa_load(&event->object_ids, obj_id);
+ WARN_ON(!obj_event);
+ list_add_tail_rcu(&event_sub->xa_list,
+ &obj_event->obj_sub_list);
+ list_add_tail_rcu(&event_sub->obj_list,
+ &obj->event_sub);
+ }
+
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ return 0;
+
+err:
+ list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
+ list_del(&event_sub->event_list);
+
+ subscribe_event_xa_dealloc(devx_event_table,
+ event_sub->xa_key_level1,
+ obj,
+ obj_id);
+
+ if (event_sub->eventfd)
+ eventfd_ctx_put(event_sub->eventfd);
+
+ kfree(event_sub);
+ }
+
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ return err;
+}
+
static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
struct uverbs_attr_bundle *attrs,
struct devx_umem *obj)
@@ -1631,6 +2244,203 @@ static int devx_umem_cleanup(struct ib_uobject *uobject,
return 0;
}
+static bool is_unaffiliated_event(struct mlx5_core_dev *dev,
+ unsigned long event_type)
+{
+ __be64 *unaff_events;
+ int mask_entry;
+ int mask_bit;
+
+ if (!MLX5_CAP_GEN(dev, event_cap))
+ return is_legacy_unaffiliated_event_num(event_type);
+
+ unaff_events = MLX5_CAP_DEV_EVENT(dev,
+ user_unaffiliated_events);
+ WARN_ON(event_type > MAX_SUPP_EVENT_NUM);
+
+ mask_entry = event_type / 64;
+ mask_bit = event_type % 64;
+
+ if (!(be64_to_cpu(unaff_events[mask_entry]) & (1ull << mask_bit)))
+ return false;
+
+ return true;
+}
+
+static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
+{
+ struct mlx5_eqe *eqe = data;
+ u32 obj_id = 0;
+
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ break;
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+ break;
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ obj_id = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+ break;
+ default:
+ obj_id = MLX5_GET(affiliated_event_header, &eqe->data, obj_id);
+ break;
+ }
+
+ return obj_id;
+}
+
+static int deliver_event(struct devx_event_subscription *event_sub,
+ const void *data)
+{
+ struct devx_async_event_file *ev_file;
+ struct devx_async_event_data *event_data;
+ unsigned long flags;
+
+ ev_file = event_sub->ev_file;
+
+ if (ev_file->omit_data) {
+ spin_lock_irqsave(&ev_file->lock, flags);
+ if (!list_empty(&event_sub->event_list)) {
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ return 0;
+ }
+
+ list_add_tail(&event_sub->event_list, &ev_file->event_list);
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ wake_up_interruptible(&ev_file->poll_wait);
+ return 0;
+ }
+
+ event_data = kzalloc(sizeof(*event_data) + sizeof(struct mlx5_eqe),
+ GFP_ATOMIC);
+ if (!event_data) {
+ spin_lock_irqsave(&ev_file->lock, flags);
+ ev_file->is_overflow_err = 1;
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ return -ENOMEM;
+ }
+
+ event_data->hdr.cookie = event_sub->cookie;
+ memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe));
+
+ spin_lock_irqsave(&ev_file->lock, flags);
+ list_add_tail(&event_data->list, &ev_file->event_list);
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ wake_up_interruptible(&ev_file->poll_wait);
+
+ return 0;
+}
+
+static void dispatch_event_fd(struct list_head *fd_list,
+ const void *data)
+{
+ struct devx_event_subscription *item;
+
+ list_for_each_entry_rcu(item, fd_list, xa_list) {
+ if (!get_file_rcu(item->filp))
+ continue;
+
+ if (item->eventfd) {
+ eventfd_signal(item->eventfd, 1);
+ fput(item->filp);
+ continue;
+ }
+
+ deliver_event(item, data);
+ fput(item->filp);
+ }
+}
+
+static int devx_event_notifier(struct notifier_block *nb,
+ unsigned long event_type, void *data)
+{
+ struct mlx5_devx_event_table *table;
+ struct mlx5_ib_dev *dev;
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+ u16 obj_type = 0;
+ bool is_unaffiliated;
+ u32 obj_id;
+
+ /* Explicit filtering to kernel events which may occur frequently */
+ if (event_type == MLX5_EVENT_TYPE_CMD ||
+ event_type == MLX5_EVENT_TYPE_PAGE_REQUEST)
+ return NOTIFY_OK;
+
+ table = container_of(nb, struct mlx5_devx_event_table, devx_nb.nb);
+ dev = container_of(table, struct mlx5_ib_dev, devx_event_table);
+ is_unaffiliated = is_unaffiliated_event(dev->mdev, event_type);
+
+ if (!is_unaffiliated)
+ obj_type = get_event_obj_type(event_type, data);
+
+ rcu_read_lock();
+ event = xa_load(&table->event_xa, event_type | (obj_type << 16));
+ if (!event) {
+ rcu_read_unlock();
+ return NOTIFY_DONE;
+ }
+
+ if (is_unaffiliated) {
+ dispatch_event_fd(&event->unaffiliated_list, data);
+ rcu_read_unlock();
+ return NOTIFY_OK;
+ }
+
+ obj_id = devx_get_obj_id_from_event(event_type, data);
+ obj_event = xa_load(&event->object_ids, obj_id);
+ if (!obj_event) {
+ rcu_read_unlock();
+ return NOTIFY_DONE;
+ }
+
+ dispatch_event_fd(&obj_event->obj_sub_list, data);
+
+ rcu_read_unlock();
+ return NOTIFY_OK;
+}
+
+void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_devx_event_table *table = &dev->devx_event_table;
+
+ xa_init(&table->event_xa);
+ mutex_init(&table->event_xa_lock);
+ MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY);
+ mlx5_eq_notifier_register(dev->mdev, &table->devx_nb);
+}
+
+void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_devx_event_table *table = &dev->devx_event_table;
+ struct devx_event_subscription *sub, *tmp;
+ struct devx_event *event;
+ void *entry;
+ unsigned long id;
+
+ mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
+ mutex_lock(&dev->devx_event_table.event_xa_lock);
+ xa_for_each(&table->event_xa, id, entry) {
+ event = entry;
+ list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list,
+ xa_list)
+ devx_cleanup_subscription(dev, sub);
+ kfree(entry);
+ }
+ mutex_unlock(&dev->devx_event_table.event_xa_lock);
+ xa_destroy(&table->event_xa);
+}
+
static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
@@ -1729,6 +2539,149 @@ static const struct file_operations devx_async_cmd_event_fops = {
.llseek = no_llseek,
};
+static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct devx_async_event_file *ev_file = filp->private_data;
+ struct devx_event_subscription *event_sub;
+ struct devx_async_event_data *uninitialized_var(event);
+ int ret = 0;
+ size_t eventsz;
+ bool omit_data;
+ void *event_data;
+
+ omit_data = ev_file->omit_data;
+
+ spin_lock_irq(&ev_file->lock);
+
+ if (ev_file->is_overflow_err) {
+ ev_file->is_overflow_err = 0;
+ spin_unlock_irq(&ev_file->lock);
+ return -EOVERFLOW;
+ }
+
+ if (ev_file->is_destroyed) {
+ spin_unlock_irq(&ev_file->lock);
+ return -EIO;
+ }
+
+ while (list_empty(&ev_file->event_list)) {
+ spin_unlock_irq(&ev_file->lock);
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(ev_file->poll_wait,
+ (!list_empty(&ev_file->event_list) ||
+ ev_file->is_destroyed))) {
+ return -ERESTARTSYS;
+ }
+
+ spin_lock_irq(&ev_file->lock);
+ if (ev_file->is_destroyed) {
+ spin_unlock_irq(&ev_file->lock);
+ return -EIO;
+ }
+ }
+
+ if (omit_data) {
+ event_sub = list_first_entry(&ev_file->event_list,
+ struct devx_event_subscription,
+ event_list);
+ eventsz = sizeof(event_sub->cookie);
+ event_data = &event_sub->cookie;
+ } else {
+ event = list_first_entry(&ev_file->event_list,
+ struct devx_async_event_data, list);
+ eventsz = sizeof(struct mlx5_eqe) +
+ sizeof(struct mlx5_ib_uapi_devx_async_event_hdr);
+ event_data = &event->hdr;
+ }
+
+ if (eventsz > count) {
+ spin_unlock_irq(&ev_file->lock);
+ return -EINVAL;
+ }
+
+ if (omit_data)
+ list_del_init(&event_sub->event_list);
+ else
+ list_del(&event->list);
+
+ spin_unlock_irq(&ev_file->lock);
+
+ if (copy_to_user(buf, event_data, eventsz))
+ /* This points to an application issue, not a kernel concern */
+ ret = -EFAULT;
+ else
+ ret = eventsz;
+
+ if (!omit_data)
+ kfree(event);
+ return ret;
+}
+
+static __poll_t devx_async_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ struct devx_async_event_file *ev_file = filp->private_data;
+ __poll_t pollflags = 0;
+
+ poll_wait(filp, &ev_file->poll_wait, wait);
+
+ spin_lock_irq(&ev_file->lock);
+ if (ev_file->is_destroyed)
+ pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
+ else if (!list_empty(&ev_file->event_list))
+ pollflags = EPOLLIN | EPOLLRDNORM;
+ spin_unlock_irq(&ev_file->lock);
+
+ return pollflags;
+}
+
+static int devx_async_event_close(struct inode *inode, struct file *filp)
+{
+ struct devx_async_event_file *ev_file = filp->private_data;
+ struct devx_event_subscription *event_sub, *event_sub_tmp;
+ struct devx_async_event_data *entry, *tmp;
+
+ mutex_lock(&ev_file->dev->devx_event_table.event_xa_lock);
+ /* delete the subscriptions which are related to this FD */
+ list_for_each_entry_safe(event_sub, event_sub_tmp,
+ &ev_file->subscribed_events_list, file_list) {
+ devx_cleanup_subscription(ev_file->dev, event_sub);
+ if (event_sub->eventfd)
+ eventfd_ctx_put(event_sub->eventfd);
+
+ list_del_rcu(&event_sub->file_list);
+ /* subscription may not be used by the read API any more */
+ kfree_rcu(event_sub, rcu);
+ }
+
+ mutex_unlock(&ev_file->dev->devx_event_table.event_xa_lock);
+
+ /* free the pending events allocation */
+ if (!ev_file->omit_data) {
+ spin_lock_irq(&ev_file->lock);
+ list_for_each_entry_safe(entry, tmp,
+ &ev_file->event_list, list)
+ kfree(entry); /* read can't come any more */
+ spin_unlock_irq(&ev_file->lock);
+ }
+
+ uverbs_close_fd(filp);
+ put_device(&ev_file->dev->ib_dev.dev);
+ return 0;
+}
+
+static const struct file_operations devx_async_event_fops = {
+ .owner = THIS_MODULE,
+ .read = devx_async_event_read,
+ .poll = devx_async_event_poll,
+ .release = devx_async_event_close,
+ .llseek = no_llseek,
+};
+
static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
@@ -1748,6 +2701,21 @@ static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj,
return 0;
};
+static int devx_hot_unplug_async_event_file(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct devx_async_event_file *ev_file =
+ container_of(uobj, struct devx_async_event_file,
+ uobj);
+
+ spin_lock_irq(&ev_file->lock);
+ ev_file->is_destroyed = 1;
+ spin_unlock_irq(&ev_file->lock);
+
+ wake_up_interruptible(&ev_file->poll_wait);
+ return 0;
+};
+
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_UMEM_REG,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
@@ -1879,10 +2847,32 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_TYPE(u64),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT,
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE,
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u16)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
+
DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
- &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN));
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT));
DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
@@ -1913,6 +2903,24 @@ DECLARE_UVERBS_NAMED_OBJECT(
O_RDONLY),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC,
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
+ enum mlx5_ib_uapi_devx_create_event_channel_flags,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file),
+ devx_hot_unplug_async_event_file,
+ &devx_async_event_fops, "[devx_async_event]",
+ O_RDONLY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC));
+
static bool devx_is_supported(struct ib_device *device)
{
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -1933,5 +2941,8 @@ const struct uapi_definition mlx5_ib_devx_defs[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
{},
};
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index 1fc302d41a53..b8841355fcd5 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -65,11 +65,12 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
struct uverbs_attr_bundle *attrs)
{
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
+ struct mlx5_flow_context flow_context = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
struct mlx5_ib_flow_handler *flow_handler;
struct mlx5_ib_flow_matcher *fs_matcher;
struct ib_uobject **arr_flow_actions;
struct ib_uflow_resources *uflow_res;
+ struct mlx5_flow_act flow_act = {};
void *devx_obj;
int dest_id, dest_type;
void *cmd_in;
@@ -172,17 +173,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
arr_flow_actions[i]->object);
}
- ret = uverbs_copy_from(&flow_act.flow_tag, attrs,
+ ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
MLX5_IB_ATTR_CREATE_FLOW_TAG);
if (!ret) {
- if (flow_act.flow_tag >= BIT(24)) {
+ if (flow_context.flow_tag >= BIT(24)) {
ret = -EINVAL;
goto err_out;
}
- flow_act.flags |= FLOW_ACT_HAS_TAG;
+ flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
}
- flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act,
+ flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher,
+ &flow_context,
+ &flow_act,
counter_id,
cmd_in, inlen,
dest_id, dest_type);
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index cbcc40d776b9..74ce9249e75a 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -14,9 +14,10 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
int vport_index;
ibdev = mlx5_ib_get_uplink_ibdev(dev->priv.eswitch);
- vport_index = ibdev->free_port++;
+ vport_index = rep->vport_index;
ibdev->port[vport_index].rep = rep;
+ rep->rep_data[REP_IB].priv = ibdev;
write_lock(&ibdev->port[vport_index].roce.netdev_lock);
ibdev->port[vport_index].roce.netdev =
mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
@@ -28,7 +29,7 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
static int
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
- int num_ports = MLX5_TOTAL_VPORTS(dev);
+ int num_ports = mlx5_eswitch_get_total_vports(dev);
const struct mlx5_ib_profile *profile;
struct mlx5_ib_dev *ibdev;
int vport_index;
@@ -50,7 +51,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
}
ibdev->is_rep = true;
- vport_index = ibdev->free_port++;
+ vport_index = rep->vport_index;
ibdev->port[vport_index].rep = rep;
ibdev->port[vport_index].roce.netdev =
mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
@@ -60,7 +61,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
if (!__mlx5_ib_add(ibdev, profile))
return -EINVAL;
- rep->rep_if[REP_IB].priv = ibdev;
+ rep->rep_data[REP_IB].priv = ibdev;
return 0;
}
@@ -68,15 +69,18 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
static void
mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
{
- struct mlx5_ib_dev *dev;
+ struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
+ struct mlx5_ib_port *port;
- if (!rep->rep_if[REP_IB].priv ||
- rep->vport != MLX5_VPORT_UPLINK)
- return;
+ port = &dev->port[rep->vport_index];
+ write_lock(&port->roce.netdev_lock);
+ port->roce.netdev = NULL;
+ write_unlock(&port->roce.netdev_lock);
+ rep->rep_data[REP_IB].priv = NULL;
+ port->rep = NULL;
- dev = mlx5_ib_rep_to_dev(rep);
- __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
- rep->rep_if[REP_IB].priv = NULL;
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}
static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
@@ -84,16 +88,17 @@ static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
return mlx5_ib_rep_to_dev(rep);
}
+static const struct mlx5_eswitch_rep_ops rep_ops = {
+ .load = mlx5_ib_vport_rep_load,
+ .unload = mlx5_ib_vport_rep_unload,
+ .get_proto_dev = mlx5_ib_vport_get_proto_dev,
+};
+
void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev)
{
struct mlx5_eswitch *esw = mdev->priv.eswitch;
- struct mlx5_eswitch_rep_if rep_if = {};
-
- rep_if.load = mlx5_ib_vport_rep_load;
- rep_if.unload = mlx5_ib_vport_rep_unload;
- rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
- mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_IB);
+ mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
}
void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev)
@@ -109,15 +114,15 @@ u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
}
struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
- int vport_index)
+ u16 vport_num)
{
- return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
+ return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_IB);
}
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
- int vport_index)
+ u16 vport_num)
{
- return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
+ return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH);
}
struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
@@ -125,9 +130,10 @@ struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
}
-struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+ u16 vport_num)
{
- return mlx5_eswitch_vport_rep(esw, vport);
+ return mlx5_eswitch_vport_rep(esw, vport_num);
}
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
index 1d9778da8a50..de43b423bafc 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.h
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -14,26 +14,26 @@ extern const struct mlx5_ib_profile uplink_rep_profile;
u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
- int vport_index);
+ u16 vport_num);
struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
- int vport_index);
+ u16 vport_num);
void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev);
void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev);
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq,
u16 port);
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
- int vport_index);
+ u16 vport_num);
#else /* CONFIG_MLX5_ESWITCH */
static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
{
- return SRIOV_NONE;
+ return MLX5_ESWITCH_NONE;
}
static inline
struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
- int vport_index)
+ u16 vport_num)
{
return NULL;
}
@@ -46,7 +46,7 @@ struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
static inline
struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
- int vport_index)
+ u16 vport_num)
{
return NULL;
}
@@ -63,7 +63,7 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
static inline
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
- int vport_index)
+ u16 vport_num)
{
return NULL;
}
@@ -72,6 +72,6 @@ struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
static inline
struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
{
- return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv;
+ return rep->rep_data[REP_IB].priv;
}
#endif /* __MLX5_IB_REP_H__ */
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 6c529e6f3a01..348c1df69cdc 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -200,19 +200,33 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
vl_15_dropped);
}
-static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num,
+static int process_pma_cmd(struct mlx5_ib_dev *dev, u8 port_num,
const struct ib_mad *in_mad, struct ib_mad *out_mad)
{
- int err;
+ struct mlx5_core_dev *mdev;
+ bool native_port = true;
+ u8 mdev_port_num;
void *out_cnt;
+ int err;
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
+ if (!mdev) {
+ /* Fail to get the native port, likely due to 2nd port is still
+ * unaffiliated. In such case default to 1st port and attached
+ * PF device.
+ */
+ native_port = false;
+ mdev = dev->mdev;
+ mdev_port_num = 1;
+ }
/* Declaring support of extended counters */
if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) {
struct ib_class_port_info cpi = {};
cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ goto done;
}
if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) {
@@ -221,11 +235,13 @@ static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num,
int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
out_cnt = kvzalloc(sz, GFP_KERNEL);
- if (!out_cnt)
- return IB_MAD_RESULT_FAILURE;
+ if (!out_cnt) {
+ err = IB_MAD_RESULT_FAILURE;
+ goto done;
+ }
err = mlx5_core_query_vport_counter(mdev, 0, 0,
- port_num, out_cnt, sz);
+ mdev_port_num, out_cnt, sz);
if (!err)
pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
} else {
@@ -234,20 +250,23 @@ static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num,
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
out_cnt = kvzalloc(sz, GFP_KERNEL);
- if (!out_cnt)
- return IB_MAD_RESULT_FAILURE;
+ if (!out_cnt) {
+ err = IB_MAD_RESULT_FAILURE;
+ goto done;
+ }
- err = mlx5_core_query_ib_ppcnt(mdev, port_num,
+ err = mlx5_core_query_ib_ppcnt(mdev, mdev_port_num,
out_cnt, sz);
if (!err)
pma_cnt_assign(pma_cnt, out_cnt);
- }
-
+ }
kvfree(out_cnt);
- if (err)
- return IB_MAD_RESULT_FAILURE;
-
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ err = err ? IB_MAD_RESULT_FAILURE :
+ IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+done:
+ if (native_port)
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+ return err;
}
int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -259,8 +278,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct ib_mad *in_mad = (const struct ib_mad *)in;
struct ib_mad *out_mad = (struct ib_mad *)out;
- struct mlx5_core_dev *mdev;
- u8 mdev_port_num;
int ret;
if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
@@ -269,19 +286,14 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
memset(out_mad->data, 0, sizeof(out_mad->data));
- mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
- if (!mdev)
- return IB_MAD_RESULT_FAILURE;
-
- if (MLX5_CAP_GEN(mdev, vport_counters) &&
+ if (MLX5_CAP_GEN(dev->mdev, vport_counters) &&
in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) {
- ret = process_pma_cmd(mdev, mdev_port_num, in_mad, out_mad);
+ ret = process_pma_cmd(dev, port_num, in_mad, out_mad);
} else {
ret = process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
in_mad, out_mad);
}
- mlx5_ib_put_native_port_mdev(dev, port_num);
return ret;
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index abac70ad5c7c..c2a5780cb394 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -52,6 +52,7 @@
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
+#include <linux/mlx5/eswitch.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
@@ -888,7 +889,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
- props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
+ props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER;
/* At this stage no support for signature handover */
props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
IB_PROT_T10DIF_TYPE_2 |
@@ -1008,6 +1009,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len =
1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
+ props->max_pi_fast_reg_page_list_len =
+ props->max_fast_reg_page_list_len / 2;
get_atomic_caps_qp(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
@@ -1043,15 +1046,19 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
if (MLX5_CAP_GEN(mdev, tag_matching)) {
- props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
props->tm_caps.max_num_tags =
(1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
- props->tm_caps.flags = IB_TM_CAP_RC;
props->tm_caps.max_ops =
1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
}
+ if (MLX5_CAP_GEN(mdev, tag_matching) &&
+ MLX5_CAP_GEN(mdev, rndv_offload_rc)) {
+ props->tm_caps.flags = IB_TM_CAP_RNDV_RC;
+ props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
+ }
+
if (MLX5_CAP_GEN(dev->mdev, cq_moderation)) {
props->cq_caps.max_cq_moderation_count =
MLX5_MAX_CQ_COUNT;
@@ -2344,7 +2351,7 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
/* Allocation size must a multiple of the basic block size
* and a power of 2.
*/
- act_size = roundup(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev));
+ act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev));
act_size = roundup_pow_of_two(act_size);
dm->size = act_size;
@@ -2666,11 +2673,15 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
}
}
-static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
- u32 *match_v, const union ib_flow_spec *ib_spec,
+static int parse_flow_attr(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_spec *spec,
+ const union ib_flow_spec *ib_spec,
const struct ib_flow_attr *flow_attr,
struct mlx5_flow_act *action, u32 prev_type)
{
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
+ u32 *match_c = spec->match_criteria;
+ u32 *match_v = spec->match_value;
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
@@ -2989,8 +3000,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
if (ib_spec->flow_tag.tag_id >= BIT(24))
return -EINVAL;
- action->flow_tag = ib_spec->flow_tag.tag_id;
- action->flags |= FLOW_ACT_HAS_TAG;
+ flow_context->flow_tag = ib_spec->flow_tag.tag_id;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
break;
case IB_FLOW_SPEC_ACTION_DROP:
if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
@@ -3084,7 +3095,8 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
return VALID_SPEC_NA;
return is_crypto && is_ipsec &&
- (!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ?
+ (!egress || (!is_drop &&
+ !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
VALID_SPEC_VALID : VALID_SPEC_INVALID;
}
@@ -3252,11 +3264,14 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
int max_table_size;
int num_entries;
int num_groups;
+ bool esw_encap;
u32 flags = 0;
int priority;
max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
log_max_ft_size));
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
enum mlx5_flow_namespace_type fn_type;
@@ -3269,10 +3284,10 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
if (ft_type == MLX5_IB_FT_RX) {
fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
prio = &dev->flow_db->prios[priority];
- if (!dev->is_rep &&
+ if (!dev->is_rep && !esw_encap &&
MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (!dev->is_rep &&
+ if (!dev->is_rep && !esw_encap &&
MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
reformat_l3_tunnel_to_l2))
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
@@ -3282,7 +3297,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
log_max_ft_size));
fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
prio = &dev->flow_db->egress_prios[priority];
- if (!dev->is_rep &&
+ if (!dev->is_rep && !esw_encap &&
MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
}
@@ -3464,6 +3479,37 @@ free:
return ret;
}
+static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw,
+ rep->vport));
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ }
+}
+
static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
const struct ib_flow_attr *flow_attr,
@@ -3473,7 +3519,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
{
struct mlx5_flow_table *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
+ struct mlx5_flow_act flow_act = {};
struct mlx5_flow_spec *spec;
struct mlx5_flow_destination dest_arr[2] = {};
struct mlx5_flow_destination *rule_dst = dest_arr;
@@ -3504,8 +3550,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- err = parse_flow_attr(dev->mdev, spec->match_criteria,
- spec->match_value,
+ err = parse_flow_attr(dev->mdev, spec,
ib_flow, flow_attr, &flow_act,
prev_type);
if (err < 0)
@@ -3519,19 +3564,15 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
set_underlay_qp(dev, spec, underlay_qpn);
if (dev->is_rep) {
- void *misc;
+ struct mlx5_eswitch_rep *rep;
- if (!dev->port[flow_attr->port - 1].rep) {
+ rep = dev->port[flow_attr->port - 1].rep;
+ if (!rep) {
err = -EINVAL;
goto free;
}
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port,
- dev->port[flow_attr->port - 1].rep->vport);
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ mlx5_ib_set_rule_source_port(dev, spec, rep);
}
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
@@ -3572,11 +3613,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
}
- if ((flow_act.flags & FLOW_ACT_HAS_TAG) &&
+ if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
(flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
- flow_act.flow_tag, flow_attr->type);
+ spec->flow_context.flow_tag, flow_attr->type);
err = -EINVAL;
goto free;
}
@@ -3892,6 +3933,7 @@ _get_flow_table(struct mlx5_ib_dev *dev,
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio = NULL;
int max_table_size = 0;
+ bool esw_encap;
u32 flags = 0;
int priority;
@@ -3900,22 +3942,30 @@ _get_flow_table(struct mlx5_ib_dev *dev,
else
priority = ib_prio_to_core_prio(fs_matcher->priority, false);
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2))
+ reformat_l3_tunnel_to_l2) &&
+ !esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
} else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
max_table_size = BIT(
MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
} else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
max_table_size = BIT(
MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) &&
+ esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
priority = FDB_BYPASS_PATH;
}
@@ -3947,6 +3997,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct mlx5_flow_destination *dst,
struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
struct mlx5_flow_act *flow_act,
void *cmd_in, int inlen,
int dst_num)
@@ -3969,6 +4020,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
fs_matcher->mask_len);
spec->match_criteria_enable = fs_matcher->match_criteria_enable;
+ spec->flow_context = *flow_context;
handler->rule = mlx5_add_flow_rules(ft, spec,
flow_act, dst, dst_num);
@@ -4033,6 +4085,7 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
struct mlx5_ib_flow_handler *
mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
struct mlx5_flow_act *flow_act,
u32 counter_id,
void *cmd_in, int inlen, int dest_id,
@@ -4085,7 +4138,8 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
dst_num++;
}
- handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
+ handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher,
+ flow_context, flow_act,
cmd_in, inlen, dst_num);
if (IS_ERR(handler)) {
@@ -4457,7 +4511,7 @@ static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
* lock/unlock above locks Now need to arm all involved CQs.
*/
list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
- mcq->comp(mcq);
+ mcq->comp(mcq, NULL);
}
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}
@@ -4676,7 +4730,7 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port)
int err = -ENOMEM;
struct ib_udata uhw = {.inlen = 0, .outlen = 0};
- pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
+ pprops = kzalloc(sizeof(*pprops), GFP_KERNEL);
if (!pprops)
goto out;
@@ -4690,7 +4744,6 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port)
goto out;
}
- memset(pprops, 0, sizeof(*pprops));
err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
if (err) {
mlx5_ib_warn(dev, "query_port %d failed %d\n",
@@ -4891,18 +4944,19 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
if (ret)
goto error0;
- devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL);
- if (IS_ERR(devr->c0)) {
- ret = PTR_ERR(devr->c0);
+ devr->c0 = rdma_zalloc_drv_obj(ibdev, ib_cq);
+ if (!devr->c0) {
+ ret = -ENOMEM;
goto error1;
}
- devr->c0->device = &dev->ib_dev;
- devr->c0->uobject = NULL;
- devr->c0->comp_handler = NULL;
- devr->c0->event_handler = NULL;
- devr->c0->cq_context = NULL;
+
+ devr->c0->device = &dev->ib_dev;
atomic_set(&devr->c0->usecnt, 0);
+ ret = mlx5_ib_create_cq(devr->c0, &cq_attr, NULL);
+ if (ret)
+ goto err_create_cq;
+
devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
if (IS_ERR(devr->x0)) {
ret = PTR_ERR(devr->x0);
@@ -4994,6 +5048,8 @@ error3:
mlx5_ib_dealloc_xrcd(devr->x0, NULL);
error2:
mlx5_ib_destroy_cq(devr->c0, NULL);
+err_create_cq:
+ kfree(devr->c0);
error1:
mlx5_ib_dealloc_pd(devr->p0, NULL);
error0:
@@ -5012,6 +5068,7 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
mlx5_ib_dealloc_xrcd(devr->x0, NULL);
mlx5_ib_dealloc_xrcd(devr->x1, NULL);
mlx5_ib_destroy_cq(devr->c0, NULL);
+ kfree(devr->c0);
mlx5_ib_dealloc_pd(devr->p0, NULL);
kfree(devr->p0);
@@ -5424,7 +5481,8 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
struct mlx5_ib_port *port,
- struct rdma_hw_stats *stats)
+ struct rdma_hw_stats *stats,
+ u16 set_id)
{
int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
void *out;
@@ -5435,9 +5493,7 @@ static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
if (!out)
return -ENOMEM;
- ret = mlx5_core_query_q_counter(mdev,
- port->cnts.set_id, 0,
- out, outlen);
+ ret = mlx5_core_query_q_counter(mdev, set_id, 0, out, outlen);
if (ret)
goto free;
@@ -5497,7 +5553,8 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
port->cnts.num_ext_ppcnt_counters;
/* q_counters are per IB device, query the master mdev */
- ret = mlx5_ib_query_q_counters(dev->mdev, port, stats);
+ ret = mlx5_ib_query_q_counters(dev->mdev, port, stats,
+ port->cnts.set_id);
if (ret)
return ret;
@@ -5533,6 +5590,68 @@ done:
return num_counters;
}
+static struct rdma_hw_stats *
+mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ struct mlx5_ib_port *port = &dev->port[counter->port - 1];
+
+ /* Q counters are in the beginning of all counters */
+ return rdma_alloc_hw_stats_struct(port->cnts.names,
+ port->cnts.num_q_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ struct mlx5_ib_port *port = &dev->port[counter->port - 1];
+
+ return mlx5_ib_query_q_counters(dev->mdev, port,
+ counter->stats, counter->id);
+}
+
+static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ u16 cnt_set_id = 0;
+ int err;
+
+ if (!counter->id) {
+ err = mlx5_cmd_alloc_q_counter(dev->mdev,
+ &cnt_set_id,
+ MLX5_SHARED_RESOURCE_UID);
+ if (err)
+ return err;
+ counter->id = cnt_set_id;
+ }
+
+ err = mlx5_ib_qp_set_counter(qp, counter);
+ if (err)
+ goto fail_set_counter;
+
+ return 0;
+
+fail_set_counter:
+ mlx5_core_dealloc_q_counter(dev->mdev, cnt_set_id);
+ counter->id = 0;
+
+ return err;
+}
+
+static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
+{
+ return mlx5_ib_qp_set_counter(qp, NULL);
+}
+
+static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+
+ return mlx5_core_dealloc_q_counter(dev->mdev, counter->id);
+}
+
static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type,
struct rdma_netdev_alloc_params *params)
@@ -6044,7 +6163,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
- dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
dev->ib_dev.phys_port_cnt = dev->num_ports;
@@ -6124,8 +6242,13 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
}
static const struct ib_device_ops mlx5_ib_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_MLX5,
+ .uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION,
+
.add_gid = mlx5_ib_add_gid,
.alloc_mr = mlx5_ib_alloc_mr,
+ .alloc_mr_integrity = mlx5_ib_alloc_mr_integrity,
.alloc_pd = mlx5_ib_alloc_pd,
.alloc_ucontext = mlx5_ib_alloc_ucontext,
.attach_mcast = mlx5_ib_mcg_attach,
@@ -6155,6 +6278,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.get_dma_mr = mlx5_ib_get_dma_mr,
.get_link_layer = mlx5_ib_port_link_layer,
.map_mr_sg = mlx5_ib_map_mr_sg,
+ .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi,
.mmap = mlx5_ib_mmap,
.modify_cq = mlx5_ib_modify_cq,
.modify_device = mlx5_ib_modify_device,
@@ -6179,6 +6303,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.resize_cq = mlx5_ib_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext),
@@ -6221,7 +6346,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
struct mlx5_core_dev *mdev = dev->mdev;
int err;
- dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -6290,7 +6414,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
if (mlx5_accel_ipsec_device_caps(dev->mdev) &
MLX5_ACCEL_IPSEC_CAP_DEVICE)
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops);
- dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops);
if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
@@ -6305,6 +6428,8 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
mutex_init(&dev->lb.mutex);
+ dev->ib_dev.use_cq_dim = true;
+
return 0;
}
@@ -6452,6 +6577,11 @@ static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = {
.alloc_hw_stats = mlx5_ib_alloc_hw_stats,
.get_hw_stats = mlx5_ib_get_hw_stats,
+ .counter_bind_qp = mlx5_ib_counter_bind_qp,
+ .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
+ .counter_dealloc = mlx5_ib_counter_dealloc,
+ .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
+ .counter_update_stats = mlx5_ib_counter_update_stats,
};
static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
@@ -6572,15 +6702,19 @@ static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
int uid;
uid = mlx5_ib_devx_create(dev, false);
- if (uid > 0)
+ if (uid > 0) {
dev->devx_whitelist_uid = uid;
+ mlx5_ib_devx_init_event_table(dev);
+ }
return 0;
}
static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
{
- if (dev->devx_whitelist_uid)
+ if (dev->devx_whitelist_uid) {
+ mlx5_ib_devx_cleanup_event_table(dev);
mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
+ }
}
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
@@ -6779,7 +6913,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
printk_once(KERN_INFO "%s", mlx5_version);
if (MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) {
if (!mlx5_core_mp_enabled(mdev))
mlx5_ib_register_vport_reps(mdev);
return mdev;
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 9f90be296ee0..fe1a76d8531c 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -55,9 +55,10 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
int i = 0;
struct scatterlist *sg;
int entry;
- unsigned long page_shift = umem->page_shift;
if (umem->is_odp) {
+ unsigned int page_shift = to_ib_umem_odp(umem)->page_shift;
+
*ncont = ib_umem_page_count(umem);
*count = *ncont << (page_shift - PAGE_SHIFT);
*shift = page_shift;
@@ -67,15 +68,15 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
return;
}
- addr = addr >> page_shift;
+ addr = addr >> PAGE_SHIFT;
tmp = (unsigned long)addr;
m = find_first_bit(&tmp, BITS_PER_LONG);
if (max_page_shift)
- m = min_t(unsigned long, max_page_shift - page_shift, m);
+ m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m);
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> page_shift;
- pfn = sg_dma_address(sg) >> page_shift;
+ len = sg_dma_len(sg) >> PAGE_SHIFT;
+ pfn = sg_dma_address(sg) >> PAGE_SHIFT;
if (base + p != pfn) {
/* If either the offset or the new
* base are unaligned update m
@@ -107,7 +108,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
*ncont = 0;
}
- *shift = page_shift + m;
+ *shift = PAGE_SHIFT + m;
*count = i;
}
@@ -140,8 +141,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int page_shift, size_t offset, size_t num_pages,
__be64 *pas, int access_flags)
{
- unsigned long umem_page_shift = umem->page_shift;
- int shift = page_shift - umem_page_shift;
+ int shift = page_shift - PAGE_SHIFT;
int mask = (1 << shift) - 1;
int i, k, idx;
u64 cur = 0;
@@ -165,7 +165,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
i = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> umem_page_shift;
+ len = sg_dma_len(sg) >> PAGE_SHIFT;
base = sg_dma_address(sg);
/* Skip elements below offset */
@@ -184,7 +184,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
for (; k < len; k++) {
if (!(i & mask)) {
- cur = base + (k << umem_page_shift);
+ cur = base + (k << PAGE_SHIFT);
cur |= access_flags;
idx = (i >> shift) - offset;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 40eb8be482e4..c482f19958b3 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -431,9 +431,6 @@ struct mlx5_ib_qp {
int create_type;
- /* Store signature errors */
- bool signature_en;
-
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
@@ -442,6 +439,10 @@ struct mlx5_ib_qp {
u32 flags_en;
/* storage for qp sub type when core qp type is IB_QPT_DRIVER */
enum ib_qp_type qp_sub_type;
+ /* A flag to indicate if there's a new counter is configured
+ * but not take effective
+ */
+ u32 counter_pending;
};
struct mlx5_ib_cq_buf {
@@ -587,6 +588,9 @@ struct mlx5_ib_mr {
void *descs;
dma_addr_t desc_map;
int ndescs;
+ int data_length;
+ int meta_ndescs;
+ int meta_length;
int max_descs;
int desc_size;
int access_mode;
@@ -605,6 +609,13 @@ struct mlx5_ib_mr {
int access_flags; /* Needed for rereg MR */
struct mlx5_ib_mr *parent;
+ /* Needed for IB_MR_TYPE_INTEGRITY */
+ struct mlx5_ib_mr *pi_mr;
+ struct mlx5_ib_mr *klm_mr;
+ struct mlx5_ib_mr *mtt_mr;
+ u64 data_iova;
+ u64 pi_iova;
+
atomic_t num_leaf_free;
wait_queue_head_t q_leaf_free;
struct mlx5_async_work cb_work;
@@ -920,6 +931,7 @@ struct mlx5_ib_lb_state {
};
struct mlx5_ib_pf_eq {
+ struct notifier_block irq_nb;
struct mlx5_ib_dev *dev;
struct mlx5_eq *core;
struct work_struct work;
@@ -928,6 +940,13 @@ struct mlx5_ib_pf_eq {
mempool_t *pool;
};
+struct mlx5_devx_event_table {
+ struct mlx5_nb devx_nb;
+ /* serialize updating the event_xa */
+ struct mutex event_xa_lock;
+ struct xarray event_xa;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
@@ -977,7 +996,7 @@ struct mlx5_ib_dev {
u16 devx_whitelist_uid;
struct mlx5_srq_table srq_table;
struct mlx5_async_ctx async_ctx;
- int free_port;
+ struct mlx5_devx_event_table devx_event_table;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1115,10 +1134,9 @@ int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
int buflen, size_t *bc);
int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index,
void *buffer, int buflen, size_t *bc);
-struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
-int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
@@ -1148,8 +1166,15 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg, struct ib_udata *udata);
+struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_sg,
+ u32 max_num_meta_sg);
int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
+int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset);
int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad_hdr *in, size_t in_mad_size,
@@ -1201,7 +1226,7 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
-int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
+void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
@@ -1311,11 +1336,14 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
+void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev);
+void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev);
const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
extern const struct uapi_definition mlx5_ib_devx_defs[];
extern const struct uapi_definition mlx5_ib_flow_defs[];
struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
struct mlx5_flow_act *flow_act, u32 counter_id,
void *cmd_in, int inlen, int dest_id, int dest_type);
bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
@@ -1327,6 +1355,8 @@ static inline int
mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
bool is_user) { return -EOPNOTSUPP; }
static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
+static inline void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) {}
+static inline void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) {}
static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
int *dest_type)
{
@@ -1442,4 +1472,6 @@ void mlx5_ib_put_xlt_emergency_page(void);
int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi, u32 bfregn,
bool dyn_bfreg);
+
+int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 5f09699fab98..20ece6e0b2fc 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -130,7 +130,7 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
struct mlx5_cache_ent *ent = &cache->ent[c];
u8 key;
unsigned long flags;
- struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
+ struct xarray *mkeys = &dev->mdev->priv.mkey_table;
int err;
spin_lock_irqsave(&ent->lock, flags);
@@ -158,12 +158,12 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
ent->size++;
spin_unlock_irqrestore(&ent->lock, flags);
- write_lock_irqsave(&table->lock, flags);
- err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
- &mr->mmkey);
+ xa_lock_irqsave(mkeys, flags);
+ err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_ATOMIC));
+ xa_unlock_irqrestore(mkeys, flags);
if (err)
pr_err("Error inserting to mkey tree. 0x%x\n", -err);
- write_unlock_irqrestore(&table->lock, flags);
if (!completion_done(&ent->compl))
complete(&ent->compl);
@@ -1507,10 +1507,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
return 0;
err:
- if (mr->umem) {
- ib_umem_release(mr->umem);
- mr->umem = NULL;
- }
+ ib_umem_release(mr->umem);
+ mr->umem = NULL;
+
clean_mr(dev, mr);
return err;
}
@@ -1606,8 +1605,9 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */
if (umem_odp->page_list)
- mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem),
- ib_umem_end(umem));
+ mlx5_ib_invalidate_range(umem_odp,
+ ib_umem_start(umem_odp),
+ ib_umem_end(umem_odp));
else
mlx5_ib_free_implicit_mr(mr);
/*
@@ -1629,28 +1629,85 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
* remove the DMA mapping.
*/
mlx5_mr_cache_free(dev, mr);
- if (umem) {
- ib_umem_release(umem);
+ ib_umem_release(umem);
+ if (umem)
atomic_sub(npages, &dev->mdev->priv.reg_pages);
- }
+
if (!mr->allocated_from_cache)
kfree(mr);
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
- dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr));
+ struct mlx5_ib_mr *mmr = to_mmr(ibmr);
+
+ if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
+ dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr);
+ dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr);
+ }
+
+ dereg_mr(to_mdev(ibmr->device), mmr);
+
return 0;
}
-struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
+ int access_mode, int page_shift)
+{
+ void *mkc;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
+ MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, log_page_size, page_shift);
+}
+
+static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, int desc_size, int page_shift,
+ int access_mode, u32 *in, int inlen)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int err;
+
+ mr->access_mode = access_mode;
+ mr->desc_size = desc_size;
+ mr->max_descs = ndescs;
+
+ err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size);
+ if (err)
+ return err;
+
+ mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
+
+ err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
+ if (err)
+ goto err_free_descs;
+
+ mr->mmkey.type = MLX5_MKEY_MR;
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+
+ return 0;
+
+err_free_descs:
+ mlx5_free_priv_descs(mr);
+ return err;
+}
+
+static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
+ u32 max_num_sg, u32 max_num_meta_sg,
+ int desc_size, int access_mode)
+{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- int ndescs = ALIGN(max_num_sg, 4);
+ int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
+ int page_shift = 0;
struct mlx5_ib_mr *mr;
- void *mkc;
u32 *in;
int err;
@@ -1658,99 +1715,168 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
if (!mr)
return ERR_PTR(-ENOMEM);
+ mr->ibmr.pd = pd;
+ mr->ibmr.device = pd->device;
+
in = kzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_free;
}
+ if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
+ page_shift = PAGE_SHIFT;
+
+ err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift,
+ access_mode, in, inlen);
+ if (err)
+ goto err_free_in;
+
+ mr->umem = NULL;
+ kfree(in);
+
+ return mr;
+
+err_free_in:
+ kfree(in);
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, u32 *in, int inlen)
+{
+ return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt),
+ PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in,
+ inlen);
+}
+
+static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, u32 *in, int inlen)
+{
+ return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm),
+ 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
+}
+
+static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int max_num_sg, int max_num_meta_sg,
+ u32 *in, int inlen)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ u32 psv_index[2];
+ void *mkc;
+ int err;
+
+ mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
+ if (!mr->sig)
+ return -ENOMEM;
+
+ /* create mem & wire PSVs */
+ err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index);
+ if (err)
+ goto err_free_sig;
+
+ mr->sig->psv_memory.psv_idx = psv_index[0];
+ mr->sig->psv_wire.psv_idx = psv_index[1];
+
+ mr->sig->sig_status_checked = true;
+ mr->sig->sig_err_exists = false;
+ /* Next UMR, Arm SIGERR */
+ ++mr->sig->sigerr_count;
+ mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
+ sizeof(struct mlx5_klm),
+ MLX5_MKC_ACCESS_MODE_KLMS);
+ if (IS_ERR(mr->klm_mr)) {
+ err = PTR_ERR(mr->klm_mr);
+ goto err_destroy_psv;
+ }
+ mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
+ sizeof(struct mlx5_mtt),
+ MLX5_MKC_ACCESS_MODE_MTT);
+ if (IS_ERR(mr->mtt_mr)) {
+ err = PTR_ERR(mr->mtt_mr);
+ goto err_free_klm_mr;
+ }
+
+ /* Set bsf descriptors for mkey */
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- MLX5_SET(mkc, mkc, free, 1);
- MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, bsf_en, 1);
+ MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
- if (mr_type == IB_MR_TYPE_MEM_REG) {
- mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
- MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
- err = mlx5_alloc_priv_descs(pd->device, mr,
- ndescs, sizeof(struct mlx5_mtt));
- if (err)
- goto err_free_in;
+ err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0,
+ MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
+ if (err)
+ goto err_free_mtt_mr;
- mr->desc_size = sizeof(struct mlx5_mtt);
- mr->max_descs = ndescs;
- } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
- mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
+ return 0;
- err = mlx5_alloc_priv_descs(pd->device, mr,
- ndescs, sizeof(struct mlx5_klm));
- if (err)
- goto err_free_in;
- mr->desc_size = sizeof(struct mlx5_klm);
- mr->max_descs = ndescs;
- } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
- u32 psv_index[2];
-
- MLX5_SET(mkc, mkc, bsf_en, 1);
- MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
- mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
- if (!mr->sig) {
- err = -ENOMEM;
- goto err_free_in;
- }
+err_free_mtt_mr:
+ dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr);
+ mr->mtt_mr = NULL;
+err_free_klm_mr:
+ dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr);
+ mr->klm_mr = NULL;
+err_destroy_psv:
+ if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
+ mr->sig->psv_memory.psv_idx);
+ if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
+ mr->sig->psv_wire.psv_idx);
+err_free_sig:
+ kfree(mr->sig);
- /* create mem & wire PSVs */
- err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
- 2, psv_index);
- if (err)
- goto err_free_sig;
+ return err;
+}
+
+static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type, u32 max_num_sg,
+ u32 max_num_meta_sg)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ int ndescs = ALIGN(max_num_sg, 4);
+ struct mlx5_ib_mr *mr;
+ u32 *in;
+ int err;
- mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
- mr->sig->psv_memory.psv_idx = psv_index[0];
- mr->sig->psv_wire.psv_idx = psv_index[1];
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
- mr->sig->sig_status_checked = true;
- mr->sig->sig_err_exists = false;
- /* Next UMR, Arm SIGERR */
- ++mr->sig->sigerr_count;
- } else {
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ mr->ibmr.device = pd->device;
+ mr->umem = NULL;
+
+ switch (mr_type) {
+ case IB_MR_TYPE_MEM_REG:
+ err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen);
+ break;
+ case IB_MR_TYPE_SG_GAPS:
+ err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen);
+ break;
+ case IB_MR_TYPE_INTEGRITY:
+ err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg,
+ max_num_meta_sg, in, inlen);
+ break;
+ default:
mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
err = -EINVAL;
- goto err_free_in;
}
- MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3);
- MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7);
- MLX5_SET(mkc, mkc, umr_en, 1);
-
- mr->ibmr.device = pd->device;
- err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
if (err)
- goto err_destroy_psv;
+ goto err_free_in;
- mr->mmkey.type = MLX5_MKEY_MR;
- mr->ibmr.lkey = mr->mmkey.key;
- mr->ibmr.rkey = mr->mmkey.key;
- mr->umem = NULL;
kfree(in);
return &mr->ibmr;
-err_destroy_psv:
- if (mr->sig) {
- if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_memory.psv_idx))
- mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
- mr->sig->psv_memory.psv_idx);
- if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_wire.psv_idx))
- mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
- mr->sig->psv_wire.psv_idx);
- }
- mlx5_free_priv_descs(mr);
-err_free_sig:
- kfree(mr->sig);
err_free_in:
kfree(in);
err_free:
@@ -1758,6 +1884,19 @@ err_free:
return ERR_PTR(err);
}
+struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg, struct ib_udata *udata)
+{
+ return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
+}
+
+struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_sg, u32 max_num_meta_sg)
+{
+ return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
+ max_num_meta_sg);
+}
+
struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata)
{
@@ -1887,16 +2026,53 @@ done:
}
static int
+mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ unsigned int sg_offset = 0;
+ int n = 0;
+
+ mr->meta_length = 0;
+ if (data_sg_nents == 1) {
+ n++;
+ mr->ndescs = 1;
+ if (data_sg_offset)
+ sg_offset = *data_sg_offset;
+ mr->data_length = sg_dma_len(data_sg) - sg_offset;
+ mr->data_iova = sg_dma_address(data_sg) + sg_offset;
+ if (meta_sg_nents == 1) {
+ n++;
+ mr->meta_ndescs = 1;
+ if (meta_sg_offset)
+ sg_offset = *meta_sg_offset;
+ else
+ sg_offset = 0;
+ mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
+ mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
+ }
+ ibmr->length = mr->data_length + mr->meta_length;
+ }
+
+ return n;
+}
+
+static int
mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
struct scatterlist *sgl,
unsigned short sg_nents,
- unsigned int *sg_offset_p)
+ unsigned int *sg_offset_p,
+ struct scatterlist *meta_sgl,
+ unsigned short meta_sg_nents,
+ unsigned int *meta_sg_offset_p)
{
struct scatterlist *sg = sgl;
struct mlx5_klm *klms = mr->descs;
unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
u32 lkey = mr->ibmr.pd->local_dma_lkey;
- int i;
+ int i, j = 0;
mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
mr->ibmr.length = 0;
@@ -1911,12 +2087,36 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
sg_offset = 0;
}
- mr->ndescs = i;
if (sg_offset_p)
*sg_offset_p = sg_offset;
- return i;
+ mr->ndescs = i;
+ mr->data_length = mr->ibmr.length;
+
+ if (meta_sg_nents) {
+ sg = meta_sgl;
+ sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
+ for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
+ if (unlikely(i + j >= mr->max_descs))
+ break;
+ klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
+ sg_offset);
+ klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
+ sg_offset);
+ klms[i + j].key = cpu_to_be32(lkey);
+ mr->ibmr.length += sg_dma_len(sg) - sg_offset;
+
+ sg_offset = 0;
+ }
+ if (meta_sg_offset_p)
+ *meta_sg_offset_p = sg_offset;
+
+ mr->meta_ndescs = j;
+ mr->meta_length = mr->ibmr.length - mr->data_length;
+ }
+
+ return i + j;
}
static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
@@ -1933,6 +2133,181 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
return 0;
}
+static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ __be64 *descs;
+
+ if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs))
+ return -ENOMEM;
+
+ descs = mr->descs;
+ descs[mr->ndescs + mr->meta_ndescs++] =
+ cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
+
+ return 0;
+}
+
+static int
+mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
+ int n;
+
+ pi_mr->ndescs = 0;
+ pi_mr->meta_ndescs = 0;
+ pi_mr->meta_length = 0;
+
+ ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ pi_mr->ibmr.page_size = ibmr->page_size;
+ n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset,
+ mlx5_set_page);
+ if (n != data_sg_nents)
+ return n;
+
+ pi_mr->data_iova = pi_mr->ibmr.iova;
+ pi_mr->data_length = pi_mr->ibmr.length;
+ pi_mr->ibmr.length = pi_mr->data_length;
+ ibmr->length = pi_mr->data_length;
+
+ if (meta_sg_nents) {
+ u64 page_mask = ~((u64)ibmr->page_size - 1);
+ u64 iova = pi_mr->data_iova;
+
+ n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
+ meta_sg_offset, mlx5_set_page_pi);
+
+ pi_mr->meta_length = pi_mr->ibmr.length;
+ /*
+ * PI address for the HW is the offset of the metadata address
+ * relative to the first data page address.
+ * It equals to first data page address + size of data pages +
+ * metadata offset at the first metadata page
+ */
+ pi_mr->pi_iova = (iova & page_mask) +
+ pi_mr->ndescs * ibmr->page_size +
+ (pi_mr->ibmr.iova & ~page_mask);
+ /*
+ * In order to use one MTT MR for data and metadata, we register
+ * also the gaps between the end of the data and the start of
+ * the metadata (the sig MR will verify that the HW will access
+ * to right addresses). This mapping is safe because we use
+ * internal mkey for the registration.
+ */
+ pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova;
+ pi_mr->ibmr.iova = iova;
+ ibmr->length += pi_mr->meta_length;
+ }
+
+ ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ return n;
+}
+
+static int
+mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = mr->klm_mr;
+ int n;
+
+ pi_mr->ndescs = 0;
+ pi_mr->meta_ndescs = 0;
+ pi_mr->meta_length = 0;
+
+ ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
+ meta_sg, meta_sg_nents, meta_sg_offset);
+
+ ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ /* This is zero-based memory region */
+ pi_mr->data_iova = 0;
+ pi_mr->ibmr.iova = 0;
+ pi_mr->pi_iova = pi_mr->data_length;
+ ibmr->length = pi_mr->ibmr.length;
+
+ return n;
+}
+
+int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = NULL;
+ int n;
+
+ WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
+
+ mr->ndescs = 0;
+ mr->data_length = 0;
+ mr->data_iova = 0;
+ mr->meta_ndescs = 0;
+ mr->pi_iova = 0;
+ /*
+ * As a performance optimization, if possible, there is no need to
+ * perform UMR operation to register the data/metadata buffers.
+ * First try to map the sg lists to PA descriptors with local_dma_lkey.
+ * Fallback to UMR only in case of a failure.
+ */
+ n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (n == data_sg_nents + meta_sg_nents)
+ goto out;
+ /*
+ * As a performance optimization, if possible, there is no need to map
+ * the sg lists to KLM descriptors. First try to map the sg lists to MTT
+ * descriptors and fallback to KLM only in case of a failure.
+ * It's more efficient for the HW to work with MTT descriptors
+ * (especially in high load).
+ * Use KLM (indirect access) only if it's mandatory.
+ */
+ pi_mr = mr->mtt_mr;
+ n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (n == data_sg_nents + meta_sg_nents)
+ goto out;
+
+ pi_mr = mr->klm_mr;
+ n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (unlikely(n != data_sg_nents + meta_sg_nents))
+ return -ENOMEM;
+
+out:
+ /* This is zero-based memory region */
+ ibmr->iova = 0;
+ mr->pi_mr = pi_mr;
+ if (pi_mr)
+ ibmr->sig_attrs->meta_length = pi_mr->meta_length;
+ else
+ ibmr->sig_attrs->meta_length = mr->meta_length;
+
+ return 0;
+}
+
int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset)
{
@@ -1946,7 +2321,8 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
DMA_TO_DEVICE);
if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
- n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
+ n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
+ NULL);
else
n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
mlx5_set_page);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 91507a2e9290..5b642d81e617 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -150,7 +150,7 @@ static struct ib_umem_odp *odp_lookup(u64 start, u64 length,
if (!rb)
goto not_found;
odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
- if (ib_umem_start(&odp->umem) > start + length)
+ if (ib_umem_start(odp) > start + length)
goto not_found;
}
not_found:
@@ -200,7 +200,7 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
static void mr_leaf_free_action(struct work_struct *work)
{
struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
- int idx = ib_umem_start(&odp->umem) >> MLX5_IMR_MTT_SHIFT;
+ int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
mr->parent = NULL;
@@ -224,7 +224,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT /
sizeof(struct mlx5_mtt)) - 1;
u64 idx = 0, blk_start_idx = 0;
- struct ib_umem *umem;
int in_block = 0;
u64 addr;
@@ -232,15 +231,14 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
pr_err("invalidation called on NULL umem or non-ODP umem\n");
return;
}
- umem = &umem_odp->umem;
mr = umem_odp->private;
if (!mr || !mr->ibmr.pd)
return;
- start = max_t(u64, ib_umem_start(umem), start);
- end = min_t(u64, ib_umem_end(umem), end);
+ start = max_t(u64, ib_umem_start(umem_odp), start);
+ end = min_t(u64, ib_umem_end(umem_odp), end);
/*
* Iteration one - zap the HW's MTTs. The notifiers_count ensures that
@@ -249,8 +247,8 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
* but they will write 0s as well, so no difference in the end result.
*/
- for (addr = start; addr < end; addr += BIT(umem->page_shift)) {
- idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
+ for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) {
+ idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
/*
* Strive to write the MTTs in chunks, but avoid overwriting
* non-existing MTTs. The huristic here can be improved to
@@ -544,13 +542,12 @@ static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end,
void *cookie)
{
struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie;
- struct ib_umem *umem = &umem_odp->umem;
if (mr->parent != imr)
return 0;
- ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
- ib_umem_end(umem));
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
+ ib_umem_end(umem_odp));
if (umem_odp->dying)
return 0;
@@ -602,9 +599,9 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
}
next_mr:
- size = min_t(size_t, bcnt, ib_umem_end(&odp->umem) - io_virt);
+ size = min_t(size_t, bcnt, ib_umem_end(odp) - io_virt);
- page_shift = mr->umem->page_shift;
+ page_shift = odp->page_shift;
page_mask = ~(BIT(page_shift) - 1);
start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
access_mask = ODP_READ_ALLOWED_BIT;
@@ -768,7 +765,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
bcnt -= *bytes_committed;
next_mr:
- mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key));
+ mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key));
if (!mkey_is_eq(mmkey, key)) {
mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
ret = -EFAULT;
@@ -1488,9 +1485,11 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
mlx5_eq_update_ci(eq->core, cc, 1);
}
-static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr)
+static int mlx5_ib_eq_pf_int(struct notifier_block *nb, unsigned long type,
+ void *data)
{
- struct mlx5_ib_pf_eq *eq = eq_ptr;
+ struct mlx5_ib_pf_eq *eq =
+ container_of(nb, struct mlx5_ib_pf_eq, irq_nb);
unsigned long flags;
if (spin_trylock_irqsave(&eq->lock, flags)) {
@@ -1553,20 +1552,26 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
goto err_mempool;
}
+ eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
param = (struct mlx5_eq_param) {
- .index = MLX5_EQ_PFAULT_IDX,
- .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
+ .irq_index = 0,
.nent = MLX5_IB_NUM_PF_EQE,
- .context = eq,
- .handler = mlx5_ib_eq_pf_int
};
- eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", &param);
+ param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;
+ eq->core = mlx5_eq_create_generic(dev->mdev, &param);
if (IS_ERR(eq->core)) {
err = PTR_ERR(eq->core);
goto err_wq;
}
+ err = mlx5_eq_enable(dev->mdev, eq->core, &eq->irq_nb);
+ if (err) {
+ mlx5_ib_err(dev, "failed to enable odp EQ %d\n", err);
+ goto err_eq;
+ }
return 0;
+err_eq:
+ mlx5_eq_destroy_generic(dev->mdev, eq->core);
err_wq:
destroy_workqueue(eq->wq);
err_mempool:
@@ -1579,6 +1584,7 @@ mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
{
int err;
+ mlx5_eq_disable(dev->mdev, eq->core, &eq->irq_nb);
err = mlx5_eq_destroy_generic(dev->mdev, eq->core);
cancel_work_sync(&eq->work);
destroy_workqueue(eq->wq);
@@ -1677,8 +1683,8 @@ static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev,
struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mr *mr;
- mmkey = __mlx5_mr_lookup(dev->mdev,
- mlx5_base_mkey(sg_list[i].lkey));
+ mmkey = xa_load(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(sg_list[i].lkey));
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
atomic_dec(&mr->num_pending_prefetch);
}
@@ -1697,8 +1703,8 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd,
struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mr *mr;
- mmkey = __mlx5_mr_lookup(dev->mdev,
- mlx5_base_mkey(sg_list[i].lkey));
+ mmkey = xa_load(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(sg_list[i].lkey));
if (!mmkey || mmkey->key != sg_list[i].lkey) {
ret = false;
break;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index f6623c77443a..2a97619ed603 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -34,6 +34,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/rdma_counter.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
#include "ib_rep.h"
@@ -442,9 +443,9 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
}
size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN &&
+ if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN &&
ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE)
- return MLX5_SIG_WQE_SIZE;
+ return MLX5_SIG_WQE_SIZE;
else
return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
}
@@ -496,9 +497,6 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
sizeof(struct mlx5_wqe_inline_seg);
attr->cap.max_inline_data = qp->max_inline_data;
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
- qp->signature_en = true;
-
wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
@@ -790,8 +788,7 @@ static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
atomic_dec(&dev->delay_drop.rqs_cnt);
mlx5_ib_db_unmap_user(context, &rwq->db);
- if (rwq->umem)
- ib_umem_release(rwq->umem);
+ ib_umem_release(rwq->umem);
}
static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
@@ -977,8 +974,7 @@ err_free:
kvfree(*in);
err_umem:
- if (ubuffer->umem)
- ib_umem_release(ubuffer->umem);
+ ib_umem_release(ubuffer->umem);
err_bfreg:
if (bfregn != MLX5_IB_INVALID_BFREG)
@@ -997,8 +993,7 @@ static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd,
ibucontext);
mlx5_ib_db_unmap_user(context, &qp->db);
- if (base->ubuffer.umem)
- ib_umem_release(base->ubuffer.umem);
+ ib_umem_release(base->ubuffer.umem);
/*
* Free only the BFREGs which are handled by the kernel.
@@ -1042,7 +1037,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
void *qpc;
int err;
- if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN |
+ if (init_attr->create_flags & ~(IB_QP_CREATE_INTEGRITY_EN |
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
IB_QP_CREATE_IPOIB_UD_LSO |
IB_QP_CREATE_NETIF_QP |
@@ -3386,6 +3381,35 @@ static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev,
return tx_port_affinity;
}
+static int __mlx5_ib_qp_set_counter(struct ib_qp *qp,
+ struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_qp_context context = {};
+ struct mlx5_ib_port *mibport = NULL;
+ struct mlx5_ib_qp_base *base;
+ u32 set_id;
+
+ if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id))
+ return 0;
+
+ if (counter) {
+ set_id = counter->id;
+ } else {
+ mibport = &dev->port[mqp->port - 1];
+ set_id = mibport->cnts.set_id;
+ }
+
+ base = &mqp->trans_qp.base;
+ context.qp_counter_set_usr_page &= cpu_to_be32(0xffffff);
+ context.qp_counter_set_usr_page |= cpu_to_be32(set_id << 24);
+ return mlx5_core_qp_modify(dev->mdev,
+ MLX5_CMD_OP_RTS2RTS_QP,
+ MLX5_QP_OPTPAR_COUNTER_SET_ID,
+ &context, &base->mqp);
+}
+
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state,
@@ -3439,6 +3463,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
struct mlx5_ib_port *mibport = NULL;
enum mlx5_qp_state mlx5_cur, mlx5_new;
enum mlx5_qp_optpar optpar;
+ u32 set_id = 0;
int mlx5_st;
int err;
u16 op;
@@ -3601,8 +3626,12 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
port_num = 0;
mibport = &dev->port[port_num];
+ if (ibqp->counter)
+ set_id = ibqp->counter->id;
+ else
+ set_id = mibport->cnts.set_id;
context->qp_counter_set_usr_page |=
- cpu_to_be32((u32)(mibport->cnts.set_id) << 24);
+ cpu_to_be32(set_id << 24);
}
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
@@ -3630,7 +3659,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
raw_qp_param.operation = op;
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- raw_qp_param.rq_q_ctr_id = mibport->cnts.set_id;
+ raw_qp_param.rq_q_ctr_id = set_id;
raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
}
@@ -3707,6 +3736,12 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->db.db[MLX5_SND_DBR] = 0;
}
+ if ((new_state == IB_QPS_RTS) && qp->counter_pending) {
+ err = __mlx5_ib_qp_set_counter(ibqp, ibqp->counter);
+ if (!err)
+ qp->counter_pending = 0;
+ }
+
out:
kfree(context);
return err;
@@ -4170,15 +4205,13 @@ static __be64 sig_mkey_mask(void)
}
static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct mlx5_ib_mr *mr, bool umr_inline)
+ struct mlx5_ib_mr *mr, u8 flags)
{
- int size = mr->ndescs * mr->desc_size;
+ int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
memset(umr, 0, sizeof(*umr));
- umr->flags = MLX5_UMR_CHECK_NOT_FREE;
- if (umr_inline)
- umr->flags |= MLX5_UMR_INLINE;
+ umr->flags = flags;
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
umr->mkey_mask = frwr_mkey_mask();
}
@@ -4305,7 +4338,7 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
struct mlx5_ib_mr *mr,
u32 key, int access)
{
- int ndescs = ALIGN(mr->ndescs, 8) >> 1;
+ int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1;
memset(seg, 0, sizeof(*seg));
@@ -4356,7 +4389,7 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
struct mlx5_ib_mr *mr,
struct mlx5_ib_pd *pd)
{
- int bcount = mr->desc_size * mr->ndescs;
+ int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs);
dseg->addr = cpu_to_be64(mr->desc_map);
dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
@@ -4549,23 +4582,37 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
return 0;
}
-static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
- struct mlx5_ib_qp *qp, void **seg,
- int *size, void **cur_edge)
+static int set_sig_data_segment(const struct ib_send_wr *send_wr,
+ struct ib_mr *sig_mr,
+ struct ib_sig_attrs *sig_attrs,
+ struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
{
- struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
- struct ib_mr *sig_mr = wr->sig_mr;
struct mlx5_bsf *bsf;
- u32 data_len = wr->wr.sg_list->length;
- u32 data_key = wr->wr.sg_list->lkey;
- u64 data_va = wr->wr.sg_list->addr;
+ u32 data_len;
+ u32 data_key;
+ u64 data_va;
+ u32 prot_len = 0;
+ u32 prot_key = 0;
+ u64 prot_va = 0;
+ bool prot = false;
int ret;
int wqe_size;
+ struct mlx5_ib_mr *mr = to_mmr(sig_mr);
+ struct mlx5_ib_mr *pi_mr = mr->pi_mr;
+
+ data_len = pi_mr->data_length;
+ data_key = pi_mr->ibmr.lkey;
+ data_va = pi_mr->data_iova;
+ if (pi_mr->meta_ndescs) {
+ prot_len = pi_mr->meta_length;
+ prot_key = pi_mr->ibmr.lkey;
+ prot_va = pi_mr->pi_iova;
+ prot = true;
+ }
- if (!wr->prot ||
- (data_key == wr->prot->lkey &&
- data_va == wr->prot->addr &&
- data_len == wr->prot->length)) {
+ if (!prot || (data_key == prot_key && data_va == prot_va &&
+ data_len == prot_len)) {
/**
* Source domain doesn't contain signature information
* or data and protection are interleaved in memory.
@@ -4599,8 +4646,6 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
struct mlx5_stride_block_entry *data_sentry;
struct mlx5_stride_block_entry *prot_sentry;
- u32 prot_key = wr->prot->lkey;
- u64 prot_va = wr->prot->addr;
u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
int prot_size;
@@ -4650,17 +4695,15 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
}
static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
- const struct ib_sig_handover_wr *wr, u32 size,
- u32 length, u32 pdn)
+ struct ib_mr *sig_mr, int access_flags,
+ u32 size, u32 length, u32 pdn)
{
- struct ib_mr *sig_mr = wr->sig_mr;
u32 sig_key = sig_mr->rkey;
u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
memset(seg, 0, sizeof(*seg));
- seg->flags = get_umr_flags(wr->access_flags) |
- MLX5_MKC_ACCESS_MODE_KLMS;
+ seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS;
seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
MLX5_MKEY_BSF_EN | pdn);
@@ -4680,49 +4723,50 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
umr->mkey_mask = sig_mkey_mask();
}
-
-static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
- struct mlx5_ib_qp *qp, void **seg, int *size,
- void **cur_edge)
+static int set_pi_umr_wr(const struct ib_send_wr *send_wr,
+ struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
{
- const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
- struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
+ const struct ib_reg_wr *wr = reg_wr(send_wr);
+ struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr);
+ struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr;
+ struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs;
u32 pdn = get_pd(qp)->pdn;
u32 xlt_size;
int region_len, ret;
- if (unlikely(wr->wr.num_sge != 1) ||
- unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) ||
- unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
+ if (unlikely(send_wr->num_sge != 0) ||
+ unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) ||
+ unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) ||
unlikely(!sig_mr->sig->sig_status_checked))
return -EINVAL;
/* length of the protected region, data + protection */
- region_len = wr->wr.sg_list->length;
- if (wr->prot &&
- (wr->prot->lkey != wr->wr.sg_list->lkey ||
- wr->prot->addr != wr->wr.sg_list->addr ||
- wr->prot->length != wr->wr.sg_list->length))
- region_len += wr->prot->length;
+ region_len = pi_mr->ibmr.length;
/**
* KLM octoword size - if protection was provided
* then we use strided block format (3 octowords),
* else we use single KLM (1 octoword)
**/
- xlt_size = wr->prot ? 0x30 : sizeof(struct mlx5_klm);
+ if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE)
+ xlt_size = 0x30;
+ else
+ xlt_size = sizeof(struct mlx5_klm);
set_sig_umr_segment(*seg, xlt_size);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn);
+ set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len,
+ pdn);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- ret = set_sig_data_segment(wr, qp, seg, size, cur_edge);
+ ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size,
+ cur_edge);
if (ret)
return ret;
@@ -4759,12 +4803,14 @@ static int set_psv_wr(struct ib_sig_domain *domain,
static int set_reg_wr(struct mlx5_ib_qp *qp,
const struct ib_reg_wr *wr,
- void **seg, int *size, void **cur_edge)
+ void **seg, int *size, void **cur_edge,
+ bool check_not_free)
{
struct mlx5_ib_mr *mr = to_mmr(wr->mr);
struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
- size_t mr_list_size = mr->ndescs * mr->desc_size;
+ int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
+ u8 flags = 0;
if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
mlx5_ib_warn(to_mdev(qp->ibqp.device),
@@ -4772,7 +4818,12 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
return -EINVAL;
}
- set_reg_umr_seg(*seg, mr, umr_inline);
+ if (check_not_free)
+ flags |= MLX5_UMR_CHECK_NOT_FREE;
+ if (umr_inline)
+ flags |= MLX5_UMR_INLINE;
+
+ set_reg_umr_seg(*seg, mr, flags);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
@@ -4898,8 +4949,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_core_dev *mdev = dev->mdev;
+ struct ib_reg_wr reg_pi_wr;
struct mlx5_ib_qp *qp;
struct mlx5_ib_mr *mr;
+ struct mlx5_ib_mr *pi_mr;
+ struct mlx5_ib_mr pa_pi_mr;
+ struct ib_sig_attrs *sig_attrs;
struct mlx5_wqe_xrc_seg *xrc;
struct mlx5_bf *bf;
void *cur_edge;
@@ -4953,7 +5008,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
}
- if (wr->opcode == IB_WR_REG_MR) {
+ if (wr->opcode == IB_WR_REG_MR ||
+ wr->opcode == IB_WR_REG_MR_INTEGRITY) {
fence = dev->umr_fence;
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
} else {
@@ -5003,7 +5059,7 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
qp->sq.wr_data[idx] = IB_WR_REG_MR;
ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
err = set_reg_wr(qp, reg_wr(wr), &seg, &size,
- &cur_edge);
+ &cur_edge, true);
if (err) {
*bad_wr = wr;
goto out;
@@ -5011,26 +5067,82 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
num_sge = 0;
break;
- case IB_WR_REG_SIG_MR:
- qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
- mr = to_mmr(sig_handover_wr(wr)->sig_mr);
-
+ case IB_WR_REG_MR_INTEGRITY:
+ qp->sq.wr_data[idx] = IB_WR_REG_MR_INTEGRITY;
+
+ mr = to_mmr(reg_wr(wr)->mr);
+ pi_mr = mr->pi_mr;
+
+ if (pi_mr) {
+ memset(&reg_pi_wr, 0,
+ sizeof(struct ib_reg_wr));
+
+ reg_pi_wr.mr = &pi_mr->ibmr;
+ reg_pi_wr.access = reg_wr(wr)->access;
+ reg_pi_wr.key = pi_mr->ibmr.rkey;
+
+ ctrl->imm = cpu_to_be32(reg_pi_wr.key);
+ /* UMR for data + prot registration */
+ err = set_reg_wr(qp, &reg_pi_wr, &seg,
+ &size, &cur_edge,
+ false);
+ if (err) {
+ *bad_wr = wr;
+ goto out;
+ }
+ finish_wqe(qp, ctrl, seg, size,
+ cur_edge, idx, wr->wr_id,
+ nreq, fence,
+ MLX5_OPCODE_UMR);
+
+ err = begin_wqe(qp, &seg, &ctrl, wr,
+ &idx, &size, &cur_edge,
+ nreq);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+ } else {
+ memset(&pa_pi_mr, 0,
+ sizeof(struct mlx5_ib_mr));
+ /* No UMR, use local_dma_lkey */
+ pa_pi_mr.ibmr.lkey =
+ mr->ibmr.pd->local_dma_lkey;
+
+ pa_pi_mr.ndescs = mr->ndescs;
+ pa_pi_mr.data_length = mr->data_length;
+ pa_pi_mr.data_iova = mr->data_iova;
+ if (mr->meta_ndescs) {
+ pa_pi_mr.meta_ndescs =
+ mr->meta_ndescs;
+ pa_pi_mr.meta_length =
+ mr->meta_length;
+ pa_pi_mr.pi_iova = mr->pi_iova;
+ }
+
+ pa_pi_mr.ibmr.length = mr->ibmr.length;
+ mr->pi_mr = &pa_pi_mr;
+ }
ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
- err = set_sig_umr_wr(wr, qp, &seg, &size,
- &cur_edge);
+ /* UMR for sig MR */
+ err = set_pi_umr_wr(wr, qp, &seg, &size,
+ &cur_edge);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
-
finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
wr->wr_id, nreq, fence,
MLX5_OPCODE_UMR);
+
/*
* SET_PSV WQEs are not signaled and solicited
* on error
*/
+ sig_attrs = mr->ibmr.sig_attrs;
err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
&size, &cur_edge, nreq, false,
true);
@@ -5040,19 +5152,18 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
*bad_wr = wr;
goto out;
}
-
- err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem,
- mr->sig->psv_memory.psv_idx, &seg,
- &size);
+ err = set_psv_wr(&sig_attrs->mem,
+ mr->sig->psv_memory.psv_idx,
+ &seg, &size);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
-
finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
- wr->wr_id, nreq, fence,
+ wr->wr_id, nreq, next_fence,
MLX5_OPCODE_SET_PSV);
+
err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
&size, &cur_edge, nreq, false,
true);
@@ -5062,20 +5173,20 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
*bad_wr = wr;
goto out;
}
-
- err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire,
- mr->sig->psv_wire.psv_idx, &seg,
- &size);
+ err = set_psv_wr(&sig_attrs->wire,
+ mr->sig->psv_wire.psv_idx,
+ &seg, &size);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
-
finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
- wr->wr_id, nreq, fence,
+ wr->wr_id, nreq, next_fence,
MLX5_OPCODE_SET_PSV);
- qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+
+ qp->next_fence =
+ MLX5_FENCE_MODE_INITIATOR_SMALL;
num_sge = 0;
goto skip_psv;
@@ -6047,7 +6158,7 @@ err:
return ERR_PTR(err);
}
-int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
+void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(wq->device);
struct mlx5_ib_rwq *rwq = to_mrwq(wq);
@@ -6055,8 +6166,6 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
destroy_user_rq(dev, wq->pd, rwq, udata);
kfree(rwq);
-
- return 0;
}
struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
@@ -6297,7 +6406,7 @@ static void handle_drain_completion(struct ib_cq *cq,
/* Run the CQ handler - this makes sure that the drain WR will
* be processed if wasn't processed yet.
*/
- mcq->mcq.comp(&mcq->mcq);
+ mcq->mcq.comp(&mcq->mcq, NULL);
}
wait_for_completion(&sdrain->done);
@@ -6367,3 +6476,34 @@ void mlx5_ib_drain_rq(struct ib_qp *qp)
handle_drain_completion(cq, &rdrain, dev);
}
+
+/**
+ * Bind a qp to a counter. If @counter is NULL then bind the qp to
+ * the default counter
+ */
+int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter)
+{
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ int err = 0;
+
+ mutex_lock(&mqp->mutex);
+ if (mqp->state == IB_QPS_RESET) {
+ qp->counter = counter;
+ goto out;
+ }
+
+ if (mqp->state == IB_QPS_RTS) {
+ err = __mlx5_ib_qp_set_counter(qp, counter);
+ if (!err)
+ qp->counter = counter;
+
+ goto out;
+ }
+
+ mqp->counter_pending = 1;
+ qp->counter = counter;
+
+out:
+ mutex_unlock(&mqp->mutex);
+ return err;
+}