aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx5
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r--drivers/infiniband/hw/mlx5/Kconfig1
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c37
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h2
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c15
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c15
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c443
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c6
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c2
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c11
-rw-r--r--drivers/infiniband/hw/mlx5/main.c177
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c5
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h44
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c153
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c178
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c232
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c5
-rw-r--r--drivers/infiniband/hw/mlx5/srq.h2
-rw-r--r--drivers/infiniband/hw/mlx5/srq_cmd.c16
18 files changed, 992 insertions, 352 deletions
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
index 0440966bc6ec..8d651c05de62 100644
--- a/drivers/infiniband/hw/mlx5/Kconfig
+++ b/drivers/infiniband/hw/mlx5/Kconfig
@@ -1,7 +1,6 @@
config MLX5_INFINIBAND
tristate "Mellanox 5th generation network adapters (ConnectX series) support"
depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
- depends on INFINIBAND_USER_ACCESS || INFINIBAND_USER_ACCESS=n
---help---
This driver provides low-level InfiniBand support for
Mellanox Connect-IB PCI Express host channel adapters (HCAs).
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 356bccc715ee..6bcc63aaa50b 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -345,3 +345,40 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
counter_set_id);
return err;
}
+
+int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
+ u16 opmod, u8 port)
+{
+ int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
+ int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in);
+ int err = -ENOMEM;
+ void *data;
+ void *resp;
+ u32 *out;
+ u32 *in;
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!in || !out)
+ goto out;
+
+ MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
+ MLX5_SET(mad_ifc_in, in, op_mod, opmod);
+ MLX5_SET(mad_ifc_in, in, port, port);
+
+ data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
+ memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+ if (err)
+ goto out;
+
+ resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet);
+ memcpy(outb, resp,
+ MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet));
+
+out:
+ kfree(out);
+ kfree(in);
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 1e76dc67a369..923a7b93f507 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -63,4 +63,6 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
u16 uid);
+int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
+ u16 opmod, u8 port);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
index 7e4e358a4fd8..8ba439fabf7f 100644
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -389,19 +389,19 @@ void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
dev->port[port_num].dbg_cc_params = NULL;
}
-int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
+void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
{
struct mlx5_ib_dbg_cc_params *dbg_cc_params;
struct mlx5_core_dev *mdev;
int i;
if (!mlx5_debugfs_root)
- goto out;
+ return;
/* Takes a 1-based port number */
mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
if (!mdev)
- goto out;
+ return;
if (!MLX5_CAP_GEN(mdev, cc_query_allowed) ||
!MLX5_CAP_GEN(mdev, cc_modify_allowed))
@@ -415,8 +415,6 @@ int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
dbg_cc_params->root = debugfs_create_dir("cc_params",
mdev->priv.dbg_root);
- if (!dbg_cc_params->root)
- goto err;
for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
dbg_cc_params->params[i].offset = i;
@@ -427,14 +425,11 @@ int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
0600, dbg_cc_params->root,
&dbg_cc_params->params[i],
&dbg_cc_fops);
- if (!dbg_cc_params->params[i].dentry)
- goto err;
}
put_mdev:
mlx5_ib_put_native_port_mdev(dev, port_num + 1);
-out:
- return 0;
+ return;
err:
mlx5_ib_warn(dev, "cong debugfs failure\n");
@@ -445,5 +440,5 @@ err:
* We don't want to fail driver if debugfs failed to initialize,
* so we are not forwarding error to the user.
*/
- return 0;
+ return;
}
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 90f1b0bae5b5..18704e503508 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -187,8 +187,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wqe_ctr = be16_to_cpu(cqe->wqe_counter);
wc->wr_id = srq->wrid[wqe_ctr];
mlx5_ib_free_srq_wqe(srq, wqe_ctr);
- if (msrq && atomic_dec_and_test(&msrq->refcount))
- complete(&msrq->free);
+ if (msrq)
+ mlx5_core_res_put(&msrq->common);
}
} else {
wq = &qp->rq;
@@ -707,15 +707,15 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
*cqe_size = ucmd.cqe_size;
- cq->buf.umem = ib_umem_get(context, ucmd.buf_addr,
- entries * ucmd.cqe_size,
- IB_ACCESS_LOCAL_WRITE, 1);
+ cq->buf.umem =
+ ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size,
+ IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(cq->buf.umem)) {
err = PTR_ERR(cq->buf.umem);
return err;
}
- err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
+ err = mlx5_ib_db_map_user(to_mucontext(context), udata, ucmd.db_addr,
&cq->db);
if (err)
goto err_umem;
@@ -1111,7 +1111,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
struct ib_umem *umem;
int err;
int npages;
- struct ib_ucontext *context = cq->buf.umem->context;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
if (err)
@@ -1124,7 +1123,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
return -EINVAL;
- umem = ib_umem_get(context, ucmd.buf_addr,
+ umem = ib_umem_get(udata, ucmd.buf_addr,
(size_t)ucmd.cqe_size * entries,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(umem)) {
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 5a588f3cfb1b..cd43e39ced87 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -8,6 +8,7 @@
#include <rdma/uverbs_types.h>
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/uverbs_std_types.h>
#include <linux/mlx5/driver.h>
@@ -17,12 +18,28 @@
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
+enum devx_obj_flags {
+ DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
+};
+
+struct devx_async_data {
+ struct mlx5_ib_dev *mdev;
+ struct list_head list;
+ struct ib_uobject *fd_uobj;
+ struct mlx5_async_work cb_work;
+ u16 cmd_out_len;
+ /* must be last field in this structure */
+ struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
+};
+
#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
struct devx_obj {
struct mlx5_core_dev *mdev;
u64 obj_id;
u32 dinlen; /* destroy inbox length */
u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
+ u32 flags;
+ struct mlx5_ib_devx_mr devx_mr;
};
struct devx_umem {
@@ -1011,6 +1028,92 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
}
}
+static int devx_handle_mkey_indirect(struct devx_obj *obj,
+ struct mlx5_ib_dev *dev,
+ void *in, void *out)
+{
+ struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
+ struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
+ unsigned long flags;
+ struct mlx5_core_mkey *mkey;
+ void *mkc;
+ u8 key;
+ int err;
+
+ mkey = &devx_mr->mmkey;
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ key = MLX5_GET(mkc, mkc, mkey_7_0);
+ mkey->key = mlx5_idx_to_mkey(
+ MLX5_GET(create_mkey_out, out, mkey_index)) | key;
+ mkey->type = MLX5_MKEY_INDIRECT_DEVX;
+ mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
+ mkey->size = MLX5_GET64(mkc, mkc, len);
+ mkey->pd = MLX5_GET(mkc, mkc, pd);
+ devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
+
+ write_lock_irqsave(&table->lock, flags);
+ err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key),
+ mkey);
+ write_unlock_irqrestore(&table->lock, flags);
+ return err;
+}
+
+static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
+ struct devx_obj *obj,
+ void *in, int in_len)
+{
+ int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) +
+ MLX5_FLD_SZ_BYTES(create_mkey_in,
+ memory_key_mkey_entry);
+ void *mkc;
+ u8 access_mode;
+
+ if (in_len < min_len)
+ return -EINVAL;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ access_mode = MLX5_GET(mkc, mkc, access_mode_1_0);
+ access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2;
+
+ if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS ||
+ access_mode == MLX5_MKC_ACCESS_MODE_KSM) {
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ obj->flags |= DEVX_OBJ_FLAGS_INDIRECT_MKEY;
+ return 0;
+ }
+
+ MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
+ return 0;
+}
+
+static void devx_free_indirect_mkey(struct rcu_head *rcu)
+{
+ kfree(container_of(rcu, struct devx_obj, devx_mr.rcu));
+}
+
+/* This function to delete from the radix tree needs to be called before
+ * destroying the underlying mkey. Otherwise a race might occur in case that
+ * other thread will get the same mkey before this one will be deleted,
+ * in that case it will fail via inserting to the tree its own data.
+ *
+ * Note:
+ * An error in the destroy is not expected unless there is some other indirect
+ * mkey which points to this one. In a kernel cleanup flow it will be just
+ * destroyed in the iterative destruction call. In a user flow, in case
+ * the application didn't close in the expected order it's its own problem,
+ * the mkey won't be part of the tree, in both cases the kernel is safe.
+ */
+static void devx_cleanup_mkey(struct devx_obj *obj)
+{
+ struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table;
+ unsigned long flags;
+
+ write_lock_irqsave(&table->lock, flags);
+ radix_tree_delete(&table->tree, mlx5_base_mkey(obj->devx_mr.mmkey.key));
+ write_unlock_irqrestore(&table->lock, flags);
+}
+
static int devx_obj_cleanup(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
@@ -1018,10 +1121,21 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
struct devx_obj *obj = uobject->object;
int ret;
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
+ devx_cleanup_mkey(obj);
+
ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+ struct mlx5_ib_dev *dev = to_mdev(uobject->context->device);
+
+ call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu,
+ devx_free_indirect_mkey);
+ return ret;
+ }
+
kfree(obj);
return ret;
}
@@ -1032,6 +1146,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
int cmd_out_len = uverbs_attr_get_len(attrs,
MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
+ int cmd_in_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
void *cmd_out;
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
@@ -1060,10 +1176,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
return -ENOMEM;
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
- devx_set_umem_valid(cmd_in);
+ if (opcode == MLX5_CMD_OP_CREATE_MKEY) {
+ err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len);
+ if (err)
+ goto obj_free;
+ } else {
+ devx_set_umem_valid(cmd_in);
+ }
err = mlx5_cmd_exec(dev->mdev, cmd_in,
- uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN),
+ cmd_in_len,
cmd_out, cmd_out_len);
if (err)
goto obj_free;
@@ -1074,6 +1196,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
&obj_id);
WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+ err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
+ if (err)
+ goto obj_destroy;
+ }
+
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
if (err)
goto obj_destroy;
@@ -1082,6 +1210,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
return 0;
obj_destroy:
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
+ devx_cleanup_mkey(obj);
mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
obj_free:
kfree(obj);
@@ -1168,6 +1298,153 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
cmd_out, cmd_out_len);
}
+struct devx_async_event_queue {
+ spinlock_t lock;
+ wait_queue_head_t poll_wait;
+ struct list_head event_list;
+ atomic_t bytes_in_use;
+ u8 is_destroyed:1;
+};
+
+struct devx_async_cmd_event_file {
+ struct ib_uobject uobj;
+ struct devx_async_event_queue ev_queue;
+ struct mlx5_async_ctx async_ctx;
+};
+
+static void devx_init_event_queue(struct devx_async_event_queue *ev_queue)
+{
+ spin_lock_init(&ev_queue->lock);
+ INIT_LIST_HEAD(&ev_queue->event_list);
+ init_waitqueue_head(&ev_queue->poll_wait);
+ atomic_set(&ev_queue->bytes_in_use, 0);
+ ev_queue->is_destroyed = 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct devx_async_cmd_event_file *ev_file;
+
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE);
+ struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
+
+ ev_file = container_of(uobj, struct devx_async_cmd_event_file,
+ uobj);
+ devx_init_event_queue(&ev_file->ev_queue);
+ mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx);
+ return 0;
+}
+
+static void devx_query_callback(int status, struct mlx5_async_work *context)
+{
+ struct devx_async_data *async_data =
+ container_of(context, struct devx_async_data, cb_work);
+ struct ib_uobject *fd_uobj = async_data->fd_uobj;
+ struct devx_async_cmd_event_file *ev_file;
+ struct devx_async_event_queue *ev_queue;
+ unsigned long flags;
+
+ ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
+ uobj);
+ ev_queue = &ev_file->ev_queue;
+
+ spin_lock_irqsave(&ev_queue->lock, flags);
+ list_add_tail(&async_data->list, &ev_queue->event_list);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
+
+ wake_up_interruptible(&ev_queue->poll_wait);
+ fput(fd_uobj->object);
+}
+
+#define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ void *cmd_in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN);
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE);
+ u16 cmd_out_len;
+ struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
+ struct ib_uobject *fd_uobj;
+ int err;
+ int uid;
+ struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
+ struct devx_async_cmd_event_file *ev_file;
+ struct devx_async_data *async_data;
+
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
+
+ if (!devx_is_obj_query_cmd(cmd_in))
+ return -EINVAL;
+
+ err = uverbs_get_const(&cmd_out_len, attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN);
+ if (err)
+ return err;
+
+ if (!devx_is_valid_obj_id(uobj, cmd_in))
+ return -EINVAL;
+
+ fd_uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD);
+ if (IS_ERR(fd_uobj))
+ return PTR_ERR(fd_uobj);
+
+ ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
+ uobj);
+
+ if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) >
+ MAX_ASYNC_BYTES_IN_USE) {
+ atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
+ return -EAGAIN;
+ }
+
+ async_data = kvzalloc(struct_size(async_data, hdr.out_data,
+ cmd_out_len), GFP_KERNEL);
+ if (!async_data) {
+ err = -ENOMEM;
+ goto sub_bytes;
+ }
+
+ err = uverbs_copy_from(&async_data->hdr.wr_id, attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID);
+ if (err)
+ goto free_async;
+
+ async_data->cmd_out_len = cmd_out_len;
+ async_data->mdev = mdev;
+ async_data->fd_uobj = fd_uobj;
+
+ get_file(fd_uobj->object);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in,
+ uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN),
+ async_data->hdr.out_data,
+ async_data->cmd_out_len,
+ devx_query_callback, &async_data->cb_work);
+
+ if (err)
+ goto cb_err;
+
+ return 0;
+
+cb_err:
+ fput(fd_uobj->object);
+free_async:
+ kvfree(async_data);
+sub_bytes:
+ atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
+ return err;
+}
+
static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
struct uverbs_attr_bundle *attrs,
struct devx_umem *obj)
@@ -1195,7 +1472,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
if (err)
return err;
- obj->umem = ib_umem_get(ucontext, addr, size, access, 0);
+ obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0);
if (IS_ERR(obj->umem))
return PTR_ERR(obj->umem);
@@ -1313,6 +1590,123 @@ static int devx_umem_cleanup(struct ib_uobject *uobject,
return 0;
}
+static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
+ struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
+ struct devx_async_data *event;
+ int ret = 0;
+ size_t eventsz;
+
+ spin_lock_irq(&ev_queue->lock);
+
+ while (list_empty(&ev_queue->event_list)) {
+ spin_unlock_irq(&ev_queue->lock);
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(
+ ev_queue->poll_wait,
+ (!list_empty(&ev_queue->event_list) ||
+ ev_queue->is_destroyed))) {
+ return -ERESTARTSYS;
+ }
+
+ if (list_empty(&ev_queue->event_list) &&
+ ev_queue->is_destroyed)
+ return -EIO;
+
+ spin_lock_irq(&ev_queue->lock);
+ }
+
+ event = list_entry(ev_queue->event_list.next,
+ struct devx_async_data, list);
+ eventsz = event->cmd_out_len +
+ sizeof(struct mlx5_ib_uapi_devx_async_cmd_hdr);
+
+ if (eventsz > count) {
+ spin_unlock_irq(&ev_queue->lock);
+ return -ENOSPC;
+ }
+
+ list_del(ev_queue->event_list.next);
+ spin_unlock_irq(&ev_queue->lock);
+
+ if (copy_to_user(buf, &event->hdr, eventsz))
+ ret = -EFAULT;
+ else
+ ret = eventsz;
+
+ atomic_sub(event->cmd_out_len, &ev_queue->bytes_in_use);
+ kvfree(event);
+ return ret;
+}
+
+static int devx_async_cmd_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uobject *uobj = filp->private_data;
+ struct devx_async_cmd_event_file *comp_ev_file = container_of(
+ uobj, struct devx_async_cmd_event_file, uobj);
+ struct devx_async_data *entry, *tmp;
+
+ spin_lock_irq(&comp_ev_file->ev_queue.lock);
+ list_for_each_entry_safe(entry, tmp,
+ &comp_ev_file->ev_queue.event_list, list)
+ kvfree(entry);
+ spin_unlock_irq(&comp_ev_file->ev_queue.lock);
+
+ uverbs_close_fd(filp);
+ return 0;
+}
+
+static __poll_t devx_async_cmd_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
+ struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
+ __poll_t pollflags = 0;
+
+ poll_wait(filp, &ev_queue->poll_wait, wait);
+
+ spin_lock_irq(&ev_queue->lock);
+ if (ev_queue->is_destroyed)
+ pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
+ else if (!list_empty(&ev_queue->event_list))
+ pollflags = EPOLLIN | EPOLLRDNORM;
+ spin_unlock_irq(&ev_queue->lock);
+
+ return pollflags;
+}
+
+const struct file_operations devx_async_cmd_event_fops = {
+ .owner = THIS_MODULE,
+ .read = devx_async_cmd_event_read,
+ .poll = devx_async_cmd_event_poll,
+ .release = devx_async_cmd_event_close,
+ .llseek = no_llseek,
+};
+
+static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct devx_async_cmd_event_file *comp_ev_file =
+ container_of(uobj, struct devx_async_cmd_event_file,
+ uobj);
+ struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
+
+ spin_lock_irq(&ev_queue->lock);
+ ev_queue->is_destroyed = 1;
+ spin_unlock_irq(&ev_queue->lock);
+
+ if (why == RDMA_REMOVE_DRIVER_REMOVE)
+ wake_up_interruptible(&ev_queue->poll_wait);
+
+ mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx);
+ return 0;
+};
+
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_UMEM_REG,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
@@ -1423,6 +1817,27 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
+ UVERBS_IDR_ANY_OBJECT,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN,
+ u16, UA_MANDATORY),
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD,
+ MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
@@ -1433,13 +1848,30 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
- &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY));
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY));
DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC,
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file),
+ devx_hot_unplug_async_cmd_event_file,
+ &devx_async_cmd_event_fops, "[devx_async_cmd]",
+ O_RDONLY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
+
static bool devx_is_supported(struct ib_device *device)
{
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -1457,5 +1889,8 @@ const struct uapi_definition mlx5_ib_devx_defs[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
MLX5_IB_OBJECT_DEVX_UMEM,
UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
{},
};
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
index a0e4e6ddb71a..8f4e5f22b84c 100644
--- a/drivers/infiniband/hw/mlx5/doorbell.c
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -43,7 +43,8 @@ struct mlx5_ib_user_db_page {
int refcnt;
};
-int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
+ struct ib_udata *udata, unsigned long virt,
struct mlx5_db *db)
{
struct mlx5_ib_user_db_page *page;
@@ -63,8 +64,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
- page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
- PAGE_SIZE, 0, 0);
+ page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 46a9ddc8ca56..6d7b8bad4b61 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -70,7 +70,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
struct mlx5_ib_dev *ibdev;
- ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
+ ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
if (!ibdev)
return -ENOMEM;
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 558638468edb..6c529e6f3a01 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -36,6 +36,7 @@
#include <rdma/ib_smi.h>
#include <rdma/ib_pma.h>
#include "mlx5_ib.h"
+#include "cmd.h"
enum {
MLX5_IB_VENDOR_CLASS1 = 0x9,
@@ -51,9 +52,10 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
return dev->mdev->port_caps[port_num - 1].has_smi;
}
-int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
- u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const void *in_mad, void *response_mad)
+static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey,
+ int ignore_bkey, u8 port, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh, const void *in_mad,
+ void *response_mad)
{
u8 op_modifier = 0;
@@ -68,7 +70,8 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
if (ignore_bkey || !in_wc)
op_modifier |= 0x2;
- return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port);
+ return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier,
+ port);
}
static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 94fe253d4956..76d6c2557d0c 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -476,24 +476,51 @@ out:
return err;
}
+struct mlx5_ib_vlan_info {
+ u16 vlan_id;
+ bool vlan;
+};
+
+static int get_lower_dev_vlan(struct net_device *lower_dev, void *data)
+{
+ struct mlx5_ib_vlan_info *vlan_info = data;
+
+ if (is_vlan_dev(lower_dev)) {
+ vlan_info->vlan = true;
+ vlan_info->vlan_id = vlan_dev_vlan_id(lower_dev);
+ }
+ /* We are interested only in first level vlan device, so
+ * always return 1 to stop iterating over next level devices.
+ */
+ return 1;
+}
+
static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
unsigned int index, const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ struct mlx5_ib_vlan_info vlan_info = { };
u8 roce_version = 0;
u8 roce_l3_type = 0;
- bool vlan = false;
u8 mac[ETH_ALEN];
- u16 vlan_id = 0;
if (gid) {
gid_type = attr->gid_type;
ether_addr_copy(mac, attr->ndev->dev_addr);
if (is_vlan_dev(attr->ndev)) {
- vlan = true;
- vlan_id = vlan_dev_vlan_id(attr->ndev);
+ vlan_info.vlan = true;
+ vlan_info.vlan_id = vlan_dev_vlan_id(attr->ndev);
+ } else {
+ /* If the netdev is upper device and if it's lower
+ * lower device is vlan device, consider vlan id of
+ * the lower vlan device for this gid entry.
+ */
+ rcu_read_lock();
+ netdev_walk_all_lower_dev_rcu(attr->ndev,
+ get_lower_dev_vlan, &vlan_info);
+ rcu_read_unlock();
}
}
@@ -514,8 +541,9 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
}
return mlx5_core_roce_gid_set(dev->mdev, index, roce_version,
- roce_l3_type, gid->raw, mac, vlan,
- vlan_id, port_num);
+ roce_l3_type, gid->raw, mac,
+ vlan_info.vlan, vlan_info.vlan_id,
+ port_num);
}
static int mlx5_ib_add_gid(const struct ib_gid_attr *attr,
@@ -923,11 +951,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(mdev, pg))
- props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
- props->odp_caps = dev->odp_caps;
-#endif
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ if (MLX5_CAP_GEN(mdev, pg))
+ props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
+ props->odp_caps = dev->odp_caps;
+ }
if (MLX5_CAP_GEN(mdev, cd))
props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
@@ -1763,9 +1791,9 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (err)
goto out_sys_pages;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
-#endif
+ if (ibdev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)
+ context->ibucontext.invalidate_range =
+ &mlx5_ib_invalidate_range;
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
err = mlx5_ib_devx_create(dev, true);
@@ -1897,12 +1925,10 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_bfreg_info *bfregi;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
/* All umem's must be destroyed before destroying the ucontext. */
mutex_lock(&ibcontext->per_mm_list_lock);
WARN_ON(!list_empty(&ibcontext->per_mm_list));
mutex_unlock(&ibcontext->per_mm_list_lock);
-#endif
bfregi = &context->bfregi;
mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
@@ -2265,7 +2291,7 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
u16 uid = 0;
- pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
@@ -2335,10 +2361,29 @@ static u8 get_match_criteria_enable(u32 *match_criteria)
return match_criteria_enable;
}
-static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
+static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
{
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
+ u8 entry_mask;
+ u8 entry_val;
+ int err = 0;
+
+ if (!mask)
+ goto out;
+
+ entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
+ ip_protocol);
+ entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
+ ip_protocol);
+ if (!entry_mask) {
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
+ goto out;
+ }
+ /* Don't override existing ip protocol */
+ if (mask != entry_mask || val != entry_val)
+ err = -EINVAL;
+out:
+ return err;
}
static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
@@ -2572,8 +2617,10 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
set_tos(headers_c, headers_v,
ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
- set_proto(headers_c, headers_v,
- ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto);
+ if (set_proto(headers_c, headers_v,
+ ib_spec->ipv4.mask.proto,
+ ib_spec->ipv4.val.proto))
+ return -EINVAL;
break;
case IB_FLOW_SPEC_IPV6:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
@@ -2612,9 +2659,10 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
ib_spec->ipv6.mask.traffic_class,
ib_spec->ipv6.val.traffic_class);
- set_proto(headers_c, headers_v,
- ib_spec->ipv6.mask.next_hdr,
- ib_spec->ipv6.val.next_hdr);
+ if (set_proto(headers_c, headers_v,
+ ib_spec->ipv6.mask.next_hdr,
+ ib_spec->ipv6.val.next_hdr))
+ return -EINVAL;
set_flow_label(misc_params_c, misc_params_v,
ntohl(ib_spec->ipv6.mask.flow_label),
@@ -2635,10 +2683,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
LAST_TCP_UDP_FIELD))
return -EOPNOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
- 0xff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
- IPPROTO_TCP);
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
+ return -EINVAL;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
ntohs(ib_spec->tcp_udp.mask.src_port));
@@ -2655,10 +2701,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
LAST_TCP_UDP_FIELD))
return -EOPNOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
- 0xff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
- IPPROTO_UDP);
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
+ return -EINVAL;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
ntohs(ib_spec->tcp_udp.mask.src_port));
@@ -2674,6 +2718,9 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
if (ib_spec->gre.mask.c_ks_res0_ver)
return -EOPNOTSUPP;
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
+ return -EINVAL;
+
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
@@ -3825,7 +3872,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
return ERR_PTR(-ENOMEM);
- dst = kzalloc(sizeof(*dst) * 2, GFP_KERNEL);
+ dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
if (!dst)
return ERR_PTR(-ENOMEM);
@@ -4106,7 +4153,7 @@ static ssize_t fw_pages_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
}
@@ -4116,7 +4163,7 @@ static ssize_t reg_pages_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
}
@@ -4126,7 +4173,8 @@ static ssize_t hca_type_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
+
return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
}
static DEVICE_ATTR_RO(hca_type);
@@ -4135,7 +4183,8 @@ static ssize_t hw_rev_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
+
return sprintf(buf, "%x\n", dev->mdev->rev_id);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -4144,7 +4193,8 @@ static ssize_t board_id_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
+
return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
dev->mdev->board_id);
}
@@ -5508,9 +5558,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);
- err = mlx5_ib_init_cong_debugfs(ibdev, port_num);
- if (err)
- goto unbind;
+ mlx5_ib_init_cong_debugfs(ibdev, port_num);
return true;
@@ -5722,11 +5770,12 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- cleanup_srcu_struct(&dev->mr_srcu);
- drain_workqueue(dev->advise_mr_wq);
- destroy_workqueue(dev->advise_mr_wq);
-#endif
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ srcu_barrier(&dev->mr_srcu);
+ cleanup_srcu_struct(&dev->mr_srcu);
+ drain_workqueue(dev->advise_mr_wq);
+ destroy_workqueue(dev->advise_mr_wq);
+ }
kfree(dev->port);
}
@@ -5779,19 +5828,20 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
spin_lock_init(&dev->memic.memic_lock);
dev->memic.dev = mdev;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- dev->advise_mr_wq = alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0);
- if (!dev->advise_mr_wq) {
- err = -ENOMEM;
- goto err_mp;
- }
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ dev->advise_mr_wq =
+ alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0);
+ if (!dev->advise_mr_wq) {
+ err = -ENOMEM;
+ goto err_mp;
+ }
- err = init_srcu_struct(&dev->mr_srcu);
- if (err) {
- destroy_workqueue(dev->advise_mr_wq);
- goto err_mp;
+ err = init_srcu_struct(&dev->mr_srcu);
+ if (err) {
+ destroy_workqueue(dev->advise_mr_wq);
+ goto err_mp;
+ }
}
-#endif
return 0;
err_mp:
@@ -6154,7 +6204,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
return mlx5_ib_odp_init_one(dev);
}
-void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_odp_cleanup_one(dev);
}
@@ -6183,8 +6233,9 @@ void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev)
{
- return mlx5_ib_init_cong_debugfs(dev,
- mlx5_core_native_port_num(dev->mdev) - 1);
+ mlx5_ib_init_cong_debugfs(dev,
+ mlx5_core_native_port_num(dev->mdev) - 1);
+ return 0;
}
static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev)
@@ -6234,7 +6285,7 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
name = "mlx5_%d";
else
name = "mlx5_bond_%d";
- return ib_register_device(&dev->ib_dev, name, NULL);
+ return ib_register_device(&dev->ib_dev, name);
}
void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
@@ -6485,7 +6536,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET)
return mlx5_ib_add_slave_port(mdev);
- dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+ dev = ib_alloc_device(mlx5_ib_dev, ib_dev);
if (!dev)
return NULL;
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 549234988bb4..9f90be296ee0 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -111,7 +111,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
*count = i;
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
{
u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
@@ -123,7 +122,6 @@ static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
return mtt_entry;
}
-#endif
/*
* Populate the given array with bus addresses from the umem.
@@ -151,7 +149,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int len;
struct scatterlist *sg;
int entry;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+
if (umem->is_odp) {
WARN_ON(shift != 0);
WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
@@ -164,7 +162,6 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
}
return;
}
-#endif
i = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b06d3b1efea8..f9817284c7a3 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -36,6 +36,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
#include <rdma/ib_smi.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cq.h>
@@ -587,14 +588,27 @@ struct mlx5_ib_mr {
struct mlx5_ib_mr *parent;
atomic_t num_leaf_free;
wait_queue_head_t q_leaf_free;
+ struct mlx5_async_work cb_work;
};
+static inline bool is_odp_mr(struct mlx5_ib_mr *mr)
+{
+ return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem &&
+ mr->umem->is_odp;
+}
+
struct mlx5_ib_mw {
struct ib_mw ibmw;
struct mlx5_core_mkey mmkey;
int ndescs;
};
+struct mlx5_ib_devx_mr {
+ struct mlx5_core_mkey mmkey;
+ int ndescs;
+ struct rcu_head rcu;
+};
+
struct mlx5_ib_umr_context {
struct ib_cqe cqe;
enum ib_wc_status status;
@@ -623,7 +637,6 @@ struct mlx5_cache_ent {
spinlock_t lock;
- struct dentry *dir;
char name[4];
u32 order;
u32 xlt;
@@ -635,11 +648,6 @@ struct mlx5_cache_ent {
u32 miss;
u32 limit;
- struct dentry *fsize;
- struct dentry *fcur;
- struct dentry *fmiss;
- struct dentry *flimit;
-
struct mlx5_ib_dev *dev;
struct work_struct work;
struct delayed_work dwork;
@@ -911,7 +919,6 @@ struct mlx5_ib_dev {
/* Prevents soft lock on massive reg MRs */
struct mutex slow_path_mutex;
int fill_delay;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_odp_caps odp_caps;
u64 odp_max_size;
struct mlx5_ib_pf_eq odp_pf_eq;
@@ -923,7 +930,6 @@ struct mlx5_ib_dev {
struct srcu_struct mr_srcu;
u32 null_mkey;
struct workqueue_struct *advise_mr_wq;
-#endif
struct mlx5_ib_flow_db *flow_db;
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
@@ -944,6 +950,7 @@ struct mlx5_ib_dev {
struct mlx5_memic memic;
u16 devx_whitelist_uid;
struct mlx5_srq_table srq_table;
+ struct mlx5_async_ctx async_ctx;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1032,15 +1039,13 @@ to_mflow_act(struct ib_flow_action *ibact)
return container_of(ibact, struct mlx5_ib_flow_action, ib_action);
}
-int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
+ struct ib_udata *udata, unsigned long virt,
struct mlx5_db *db);
void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
-int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
- u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const void *in_mad, void *response_mad);
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
u32 flags, struct ib_udata *udata);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
@@ -1070,9 +1075,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
-int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
- void *buffer, u32 length,
- struct mlx5_ib_qp_base *base);
+int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
+ int buflen, size_t *bc);
+int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
+ int buflen, size_t *bc);
+int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index,
+ void *buffer, int buflen, size_t *bc);
struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
@@ -1098,6 +1106,7 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw);
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+ struct ib_udata *udata,
int access_flags);
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
@@ -1215,6 +1224,9 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
{
return -EOPNOTSUPP;
}
+static inline void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp,
+ unsigned long start,
+ unsigned long end){};
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
/* Needed for rep profile */
@@ -1254,7 +1266,7 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
const struct ib_gid_attr *attr);
void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
-int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
+void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
/* GSI QP helper functions */
struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index fd6ea1f75085..705a79cd21da 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -71,10 +71,9 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- /* Wait until all page fault handlers using the mr complete. */
- synchronize_srcu(&dev->mr_srcu);
-#endif
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ /* Wait until all page fault handlers using the mr complete. */
+ synchronize_srcu(&dev->mr_srcu);
return err;
}
@@ -95,10 +94,9 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
static void update_odp_mr(struct mlx5_ib_mr *mr)
{
- if (mr->umem->is_odp) {
+ if (is_odp_mr(mr)) {
/*
* This barrier prevents the compiler from moving the
* setting of umem->odp_data->private to point to our
@@ -121,11 +119,11 @@ static void update_odp_mr(struct mlx5_ib_mr *mr)
smp_wmb();
}
}
-#endif
-static void reg_mr_callback(int status, void *context)
+static void reg_mr_callback(int status, struct mlx5_async_work *context)
{
- struct mlx5_ib_mr *mr = context;
+ struct mlx5_ib_mr *mr =
+ container_of(context, struct mlx5_ib_mr, cb_work);
struct mlx5_ib_dev *dev = mr->dev;
struct mlx5_mr_cache *cache = &dev->cache;
int c = order2idx(dev, mr->order);
@@ -216,9 +214,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
ent->pending++;
spin_unlock_irq(&ent->lock);
err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
- in, inlen,
+ &dev->async_ctx, in, inlen,
mr->out, sizeof(mr->out),
- reg_mr_callback, mr);
+ reg_mr_callback, &mr->cb_work);
if (err) {
spin_lock_irq(&ent->lock);
ent->pending--;
@@ -256,9 +254,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- synchronize_srcu(&dev->mr_srcu);
-#endif
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ synchronize_srcu(&dev->mr_srcu);
list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
list_del(&mr->list);
@@ -610,52 +607,27 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
dev->cache.root = NULL;
}
-static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
+static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
+ struct dentry *dir;
int i;
if (!mlx5_debugfs_root || dev->rep)
- return 0;
+ return;
cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
- if (!cache->root)
- return -ENOMEM;
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
ent = &cache->ent[i];
sprintf(ent->name, "%d", ent->order);
- ent->dir = debugfs_create_dir(ent->name, cache->root);
- if (!ent->dir)
- goto err;
-
- ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
- &size_fops);
- if (!ent->fsize)
- goto err;
-
- ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
- &limit_fops);
- if (!ent->flimit)
- goto err;
-
- ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
- &ent->cur);
- if (!ent->fcur)
- goto err;
-
- ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
- &ent->miss);
- if (!ent->fmiss)
- goto err;
+ dir = debugfs_create_dir(ent->name, cache->root);
+ debugfs_create_file("size", 0600, dir, ent, &size_fops);
+ debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
+ debugfs_create_u32("cur", 0400, dir, &ent->cur);
+ debugfs_create_u32("miss", 0600, dir, &ent->miss);
}
-
- return 0;
-err:
- mlx5_mr_cache_debugfs_cleanup(dev);
-
- return -ENOMEM;
}
static void delay_time_func(struct timer_list *t)
@@ -669,7 +641,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
- int err;
int i;
mutex_init(&dev->slow_path_mutex);
@@ -679,6 +650,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
return -ENOMEM;
}
+ mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
timer_setup(&dev->delay_timer, delay_time_func, 0);
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
ent = &cache->ent[i];
@@ -713,45 +685,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
queue_work(cache->wq, &ent->work);
}
- err = mlx5_mr_cache_debugfs_init(dev);
- if (err)
- mlx5_ib_warn(dev, "cache debugfs failure\n");
-
- /*
- * We don't want to fail driver if debugfs failed to initialize,
- * so we are not forwarding error to the user.
- */
+ mlx5_mr_cache_debugfs_init(dev);
return 0;
}
-static void wait_for_async_commands(struct mlx5_ib_dev *dev)
-{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent;
- int total = 0;
- int i;
- int j;
-
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
- ent = &cache->ent[i];
- for (j = 0 ; j < 1000; j++) {
- if (!ent->pending)
- break;
- msleep(50);
- }
- }
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
- ent = &cache->ent[i];
- total += ent->pending;
- }
-
- if (total)
- mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total);
- else
- mlx5_ib_warn(dev, "done with all pending requests\n");
-}
-
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
{
int i;
@@ -763,12 +701,12 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
flush_workqueue(dev->cache.wq);
mlx5_mr_cache_debugfs_cleanup(dev);
+ mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
clean_keys(dev, i);
destroy_workqueue(dev->cache.wq);
- wait_for_async_commands(dev);
del_timer_sync(&dev->delay_timer);
return 0;
@@ -847,18 +785,17 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev)
return MLX5_MAX_UMR_SHIFT;
}
-static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
- int access_flags, struct ib_umem **umem,
- int *npages, int *page_shift, int *ncont,
- int *order)
+static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
+ u64 start, u64 length, int access_flags,
+ struct ib_umem **umem, int *npages, int *page_shift,
+ int *ncont, int *order)
{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct ib_umem *u;
int err;
*umem = NULL;
- u = ib_umem_get(pd->uobject->context, start, length, access_flags, 0);
+ u = ib_umem_get(udata, start, length, access_flags, 0);
err = PTR_ERR_OR_ZERO(u);
if (err) {
mlx5_ib_dbg(dev, "umem get failed (%d)\n", err);
@@ -1331,21 +1268,20 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (!start && length == U64_MAX) {
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
+ length == U64_MAX) {
if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
return ERR_PTR(-EINVAL);
- mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
+ mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
if (IS_ERR(mr))
return ERR_CAST(mr);
return &mr->ibmr;
}
-#endif
- err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
- &page_shift, &ncont, &order);
+ err = mr_umem_get(dev, udata, start, length, access_flags, &umem,
+ &npages, &page_shift, &ncont, &order);
if (err < 0)
return ERR_PTR(err);
@@ -1386,9 +1322,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->umem = umem;
set_mr_fields(dev, mr, npages, length, access_flags);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
update_odp_mr(mr);
-#endif
if (!populate_mtts) {
int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
@@ -1405,9 +1339,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- mr->live = 1;
-#endif
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ mr->live = 1;
+
return &mr->ibmr;
error:
ib_umem_release(umem);
@@ -1495,8 +1429,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
flags |= IB_MR_REREG_TRANS;
ib_umem_release(mr->umem);
mr->umem = NULL;
- err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
- &npages, &page_shift, &ncont, &order);
+ err = mr_umem_get(dev, udata, addr, len, access_flags,
+ &mr->umem, &npages, &page_shift, &ncont,
+ &order);
if (err)
goto err;
}
@@ -1522,9 +1457,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
}
mr->allocated_from_cache = 0;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- mr->live = 1;
-#endif
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ mr->live = 1;
} else {
/*
* Send a UMR WQE
@@ -1553,9 +1487,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
set_mr_fields(dev, mr, npages, len, access_flags);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
update_odp_mr(mr);
-#endif
return 0;
err:
@@ -1641,8 +1573,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
int npages = mr->npages;
struct ib_umem *umem = mr->umem;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (umem && umem->is_odp) {
+ if (is_odp_mr(mr)) {
struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem);
/* Prevent new page faults from succeeding */
@@ -1666,7 +1597,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
/* Avoid double-freeing the umem. */
umem = NULL;
}
-#endif
+
clean_mr(dev, mr);
/*
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 4ee32964e1dd..335fd0c6ea2a 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -101,9 +101,9 @@ static int check_parent(struct ib_umem_odp *odp,
return mr && mr->parent == parent && !odp->dying;
}
-struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr)
+static struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr)
{
- if (WARN_ON(!mr || !mr->umem || !mr->umem->is_odp))
+ if (WARN_ON(!mr || !is_odp_mr(mr)))
return NULL;
return to_ib_umem_odp(mr->umem)->per_mm;
@@ -315,6 +315,9 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send))
caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND;
+ if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.srq_receive))
+ caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
+
if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.send))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND;
@@ -330,6 +333,27 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
+ if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.srq_receive))
+ caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
+
+ if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.send))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SEND;
+
+ if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.receive))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_RECV;
+
+ if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.write))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_WRITE;
+
+ if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.read))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_READ;
+
+ if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.atomic))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
+
+ if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.srq_receive))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
+
if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
MLX5_CAP_GEN(dev->mdev, null_mkey) &&
MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
@@ -439,7 +463,7 @@ next_mr:
if (nentries)
nentries++;
} else {
- odp = ib_alloc_odp_umem(odp_mr->per_mm, addr,
+ odp = ib_alloc_odp_umem(odp_mr, addr,
MLX5_IMR_MTT_SIZE);
if (IS_ERR(odp)) {
mutex_unlock(&odp_mr->umem_mutex);
@@ -492,13 +516,13 @@ next_mr:
}
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+ struct ib_udata *udata,
int access_flags)
{
- struct ib_ucontext *ctx = pd->ibpd.uobject->context;
struct mlx5_ib_mr *imr;
struct ib_umem *umem;
- umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0);
+ umem = ib_umem_get(udata, 0, 0, IB_ACCESS_ON_DEMAND, 0);
if (IS_ERR(umem))
return ERR_CAST(umem);
@@ -685,6 +709,21 @@ struct pf_frame {
int depth;
};
+static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey)
+{
+ struct mlx5_ib_mw *mw;
+ struct mlx5_ib_devx_mr *devx_mr;
+
+ if (mmkey->type == MLX5_MKEY_MW) {
+ mw = container_of(mmkey, struct mlx5_ib_mw, mmkey);
+ return mw->ndescs;
+ }
+
+ devx_mr = container_of(mmkey, struct mlx5_ib_devx_mr,
+ mmkey);
+ return devx_mr->ndescs;
+}
+
/*
* Handle a single data segment in a page-fault WQE or RDMA region.
*
@@ -705,11 +744,11 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key,
bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
struct pf_frame *head = NULL, *frame;
struct mlx5_core_mkey *mmkey;
- struct mlx5_ib_mw *mw;
struct mlx5_ib_mr *mr;
struct mlx5_klm *pklm;
u32 *out = NULL;
size_t offset;
+ int ndescs;
srcu_key = srcu_read_lock(&dev->mr_srcu);
@@ -739,12 +778,12 @@ next_mr:
goto srcu_unlock;
}
- if (prefetch && !mr->umem->is_odp) {
+ if (prefetch && !is_odp_mr(mr)) {
ret = -EINVAL;
goto srcu_unlock;
}
- if (!mr->umem->is_odp) {
+ if (!is_odp_mr(mr)) {
mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
key);
if (bytes_mapped)
@@ -762,7 +801,8 @@ next_mr:
break;
case MLX5_MKEY_MW:
- mw = container_of(mmkey, struct mlx5_ib_mw, mmkey);
+ case MLX5_MKEY_INDIRECT_DEVX:
+ ndescs = get_indirect_num_descs(mmkey);
if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) {
mlx5_ib_dbg(dev, "indirection level exceeded\n");
@@ -771,7 +811,7 @@ next_mr:
}
outlen = MLX5_ST_SZ_BYTES(query_mkey_out) +
- sizeof(*pklm) * (mw->ndescs - 2);
+ sizeof(*pklm) * (ndescs - 2);
if (outlen > cur_outlen) {
kfree(out);
@@ -786,14 +826,14 @@ next_mr:
pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out,
bsf0_klm0_pas_mtt0_1);
- ret = mlx5_core_query_mkey(dev->mdev, &mw->mmkey, out, outlen);
+ ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen);
if (ret)
goto srcu_unlock;
offset = io_virt - MLX5_GET64(query_mkey_out, out,
memory_key_mkey_entry.start_addr);
- for (i = 0; bcnt && i < mw->ndescs; i++, pklm++) {
+ for (i = 0; bcnt && i < ndescs; i++, pklm++) {
if (offset >= be32_to_cpu(pklm->bcount)) {
offset -= be32_to_cpu(pklm->bcount);
continue;
@@ -853,7 +893,6 @@ srcu_unlock:
/**
* Parse a series of data segments for page fault handling.
*
- * @qp the QP on which the fault occurred.
* @pfault contains page fault information.
* @wqe points at the first data segment in the WQE.
* @wqe_end points after the end of the WQE.
@@ -870,7 +909,7 @@ srcu_unlock:
*/
static int pagefault_data_segments(struct mlx5_ib_dev *dev,
struct mlx5_pagefault *pfault,
- struct mlx5_ib_qp *qp, void *wqe,
+ void *wqe,
void *wqe_end, u32 *bytes_mapped,
u32 *total_wqe_bytes, int receive_queue)
{
@@ -881,10 +920,6 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
size_t bcnt;
int inline_segment;
- /* Skip SRQ next-WQE segment. */
- if (receive_queue && qp->ibqp.srq)
- wqe += sizeof(struct mlx5_wqe_srq_next_seg);
-
if (bytes_mapped)
*bytes_mapped = 0;
if (total_wqe_bytes)
@@ -1009,6 +1044,10 @@ static int mlx5_ib_mr_initiator_pfault_handler(
MLX5_WQE_CTRL_OPCODE_MASK;
switch (qp->ibqp.qp_type) {
+ case IB_QPT_XRC_INI:
+ *wqe += sizeof(struct mlx5_wqe_xrc_seg);
+ transport_caps = dev->odp_caps.per_transport_caps.xrc_odp_caps;
+ break;
case IB_QPT_RC:
transport_caps = dev->odp_caps.per_transport_caps.rc_odp_caps;
break;
@@ -1028,7 +1067,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
return -EFAULT;
}
- if (qp->ibqp.qp_type != IB_QPT_RC) {
+ if (qp->ibqp.qp_type == IB_QPT_UD) {
av = *wqe;
if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
*wqe += sizeof(struct mlx5_av);
@@ -1053,21 +1092,34 @@ static int mlx5_ib_mr_initiator_pfault_handler(
}
/*
- * Parse responder WQE. Advances the wqe pointer to point at the
- * scatter-gather list, and set wqe_end to the end of the WQE.
+ * Parse responder WQE and set wqe_end to the end of the WQE.
*/
-static int mlx5_ib_mr_responder_pfault_handler(
- struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault,
- struct mlx5_ib_qp *qp, void **wqe, void **wqe_end, int wqe_length)
+static int mlx5_ib_mr_responder_pfault_handler_srq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_srq *srq,
+ void **wqe, void **wqe_end,
+ int wqe_length)
{
- struct mlx5_ib_wq *wq = &qp->rq;
- int wqe_size = 1 << wq->wqe_shift;
+ int wqe_size = 1 << srq->msrq.wqe_shift;
- if (qp->ibqp.srq) {
- mlx5_ib_err(dev, "ODP fault on SRQ is not supported\n");
+ if (wqe_size > wqe_length) {
+ mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n");
return -EFAULT;
}
+ *wqe_end = *wqe + wqe_size;
+ *wqe += sizeof(struct mlx5_wqe_srq_next_seg);
+
+ return 0;
+}
+
+static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
+ void *wqe, void **wqe_end,
+ int wqe_length)
+{
+ struct mlx5_ib_wq *wq = &qp->rq;
+ int wqe_size = 1 << wq->wqe_shift;
+
if (qp->wq_sig) {
mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n");
return -EFAULT;
@@ -1091,7 +1143,7 @@ invalid_transport_or_opcode:
return -EFAULT;
}
- *wqe_end = *wqe + wqe_size;
+ *wqe_end = wqe + wqe_size;
return 0;
}
@@ -1099,22 +1151,25 @@ invalid_transport_or_opcode:
static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev,
u32 wq_num, int pf_type)
{
- enum mlx5_res_type res_type;
+ struct mlx5_core_rsc_common *common = NULL;
+ struct mlx5_core_srq *srq;
switch (pf_type) {
case MLX5_WQE_PF_TYPE_RMP:
- res_type = MLX5_RES_SRQ;
+ srq = mlx5_cmd_get_srq(dev, wq_num);
+ if (srq)
+ common = &srq->common;
break;
case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE:
case MLX5_WQE_PF_TYPE_RESP:
case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC:
- res_type = MLX5_RES_QP;
+ common = mlx5_core_res_hold(dev->mdev, wq_num, MLX5_RES_QP);
break;
default:
- return NULL;
+ break;
}
- return mlx5_core_res_hold(dev->mdev, wq_num, res_type);
+ return common;
}
static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res)
@@ -1124,6 +1179,14 @@ static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res)
return to_mibqp(mqp);
}
+static inline struct mlx5_ib_srq *res_to_srq(struct mlx5_core_rsc_common *res)
+{
+ struct mlx5_core_srq *msrq =
+ container_of(res, struct mlx5_core_srq, common);
+
+ return to_mibsrq(msrq);
+}
+
static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
struct mlx5_pagefault *pfault)
{
@@ -1134,8 +1197,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
int resume_with_error = 1;
u16 wqe_index = pfault->wqe.wqe_index;
int requestor = pfault->type & MLX5_PFAULT_REQUESTOR;
- struct mlx5_core_rsc_common *res;
- struct mlx5_ib_qp *qp;
+ struct mlx5_core_rsc_common *res = NULL;
+ struct mlx5_ib_qp *qp = NULL;
+ struct mlx5_ib_srq *srq = NULL;
+ size_t bytes_copied;
res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type);
if (!res) {
@@ -1147,6 +1212,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
case MLX5_RES_QP:
qp = res_to_qp(res);
break;
+ case MLX5_RES_SRQ:
+ case MLX5_RES_XSRQ:
+ srq = res_to_srq(res);
+ break;
default:
mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type);
goto resolve_page_fault;
@@ -1158,9 +1227,23 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
goto resolve_page_fault;
}
- ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
- PAGE_SIZE, &qp->trans_qp.base);
- if (ret < 0) {
+ if (qp) {
+ if (requestor) {
+ ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index,
+ buffer, PAGE_SIZE,
+ &bytes_copied);
+ } else {
+ ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index,
+ buffer, PAGE_SIZE,
+ &bytes_copied);
+ }
+ } else {
+ ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index,
+ buffer, PAGE_SIZE,
+ &bytes_copied);
+ }
+
+ if (ret) {
mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%d, wqe_index=%x, qpn=%x\n",
ret, wqe_index, pfault->token);
goto resolve_page_fault;
@@ -1168,11 +1251,18 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
wqe = buffer;
if (requestor)
- ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, &wqe,
- &wqe_end, ret);
+ ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp,
+ &wqe, &wqe_end,
+ bytes_copied);
+ else if (qp)
+ ret = mlx5_ib_mr_responder_pfault_handler_rq(dev, qp,
+ wqe, &wqe_end,
+ bytes_copied);
else
- ret = mlx5_ib_mr_responder_pfault_handler(dev, pfault, qp, &wqe,
- &wqe_end, ret);
+ ret = mlx5_ib_mr_responder_pfault_handler_srq(dev, srq,
+ &wqe, &wqe_end,
+ bytes_copied);
+
if (ret < 0)
goto resolve_page_fault;
@@ -1181,7 +1271,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
goto resolve_page_fault;
}
- ret = pagefault_data_segments(dev, pfault, qp, wqe, wqe_end,
+ ret = pagefault_data_segments(dev, pfault, wqe, wqe_end,
&bytes_mapped, &total_wqe_bytes,
!requestor);
if (ret == -EAGAIN) {
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 7db778d96ef5..c6ccd4d36a90 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -109,75 +109,173 @@ static int is_sqp(enum ib_qp_type qp_type)
}
/**
- * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space.
+ * mlx5_ib_read_user_wqe_common() - Copy a WQE (or part of) from user WQ
+ * to kernel buffer
*
- * @qp: QP to copy from.
- * @send: copy from the send queue when non-zero, use the receive queue
- * otherwise.
- * @wqe_index: index to start copying from. For send work queues, the
- * wqe_index is in units of MLX5_SEND_WQE_BB.
- * For receive work queue, it is the number of work queue
- * element in the queue.
- * @buffer: destination buffer.
- * @length: maximum number of bytes to copy.
+ * @umem: User space memory where the WQ is
+ * @buffer: buffer to copy to
+ * @buflen: buffer length
+ * @wqe_index: index of WQE to copy from
+ * @wq_offset: offset to start of WQ
+ * @wq_wqe_cnt: number of WQEs in WQ
+ * @wq_wqe_shift: log2 of WQE size
+ * @bcnt: number of bytes to copy
+ * @bytes_copied: number of bytes to copy (return value)
*
- * Copies at least a single WQE, but may copy more data.
+ * Copies from start of WQE bcnt or less bytes.
+ * Does not gurantee to copy the entire WQE.
*
- * Return: the number of bytes copied, or an error code.
+ * Return: zero on success, or an error code.
*/
-int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
- void *buffer, u32 length,
- struct mlx5_ib_qp_base *base)
+static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem,
+ void *buffer,
+ u32 buflen,
+ int wqe_index,
+ int wq_offset,
+ int wq_wqe_cnt,
+ int wq_wqe_shift,
+ int bcnt,
+ size_t *bytes_copied)
+{
+ size_t offset = wq_offset + ((wqe_index % wq_wqe_cnt) << wq_wqe_shift);
+ size_t wq_end = wq_offset + (wq_wqe_cnt << wq_wqe_shift);
+ size_t copy_length;
+ int ret;
+
+ /* don't copy more than requested, more than buffer length or
+ * beyond WQ end
+ */
+ copy_length = min_t(u32, buflen, wq_end - offset);
+ copy_length = min_t(u32, copy_length, bcnt);
+
+ ret = ib_umem_copy_from(buffer, umem, offset, copy_length);
+ if (ret)
+ return ret;
+
+ if (!ret && bytes_copied)
+ *bytes_copied = copy_length;
+
+ return 0;
+}
+
+int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp,
+ int wqe_index,
+ void *buffer,
+ int buflen,
+ size_t *bc)
{
- struct ib_device *ibdev = qp->ibqp.device;
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
- size_t offset;
- size_t wq_end;
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct ib_umem *umem = base->ubuffer.umem;
- u32 first_copy_length;
- int wqe_length;
+ struct mlx5_ib_wq *wq = &qp->sq;
+ struct mlx5_wqe_ctrl_seg *ctrl;
+ size_t bytes_copied;
+ size_t bytes_copied2;
+ size_t wqe_length;
int ret;
+ int ds;
- if (wq->wqe_cnt == 0) {
- mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n",
- qp->ibqp.qp_type);
+ if (buflen < sizeof(*ctrl))
return -EINVAL;
- }
- offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift);
- wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift);
+ /* at first read as much as possible */
+ ret = mlx5_ib_read_user_wqe_common(umem,
+ buffer,
+ buflen,
+ wqe_index,
+ wq->offset,
+ wq->wqe_cnt,
+ wq->wqe_shift,
+ buflen,
+ &bytes_copied);
+ if (ret)
+ return ret;
- if (send && length < sizeof(struct mlx5_wqe_ctrl_seg))
+ /* we need at least control segment size to proceed */
+ if (bytes_copied < sizeof(*ctrl))
return -EINVAL;
- if (offset > umem->length ||
- (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length))
- return -EINVAL;
+ ctrl = buffer;
+ ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+ wqe_length = ds * MLX5_WQE_DS_UNITS;
+
+ /* if we copied enough then we are done */
+ if (bytes_copied >= wqe_length) {
+ *bc = bytes_copied;
+ return 0;
+ }
+
+ /* otherwise this a wrapped around wqe
+ * so read the remaining bytes starting
+ * from wqe_index 0
+ */
+ ret = mlx5_ib_read_user_wqe_common(umem,
+ buffer + bytes_copied,
+ buflen - bytes_copied,
+ 0,
+ wq->offset,
+ wq->wqe_cnt,
+ wq->wqe_shift,
+ wqe_length - bytes_copied,
+ &bytes_copied2);
- first_copy_length = min_t(u32, offset + length, wq_end) - offset;
- ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length);
if (ret)
return ret;
+ *bc = bytes_copied + bytes_copied2;
+ return 0;
+}
- if (send) {
- struct mlx5_wqe_ctrl_seg *ctrl = buffer;
- int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
-
- wqe_length = ds * MLX5_WQE_DS_UNITS;
- } else {
- wqe_length = 1 << wq->wqe_shift;
- }
+int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp,
+ int wqe_index,
+ void *buffer,
+ int buflen,
+ size_t *bc)
+{
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
+ struct ib_umem *umem = base->ubuffer.umem;
+ struct mlx5_ib_wq *wq = &qp->rq;
+ size_t bytes_copied;
+ int ret;
- if (wqe_length <= first_copy_length)
- return first_copy_length;
+ ret = mlx5_ib_read_user_wqe_common(umem,
+ buffer,
+ buflen,
+ wqe_index,
+ wq->offset,
+ wq->wqe_cnt,
+ wq->wqe_shift,
+ buflen,
+ &bytes_copied);
- ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset,
- wqe_length - first_copy_length);
if (ret)
return ret;
+ *bc = bytes_copied;
+ return 0;
+}
+
+int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq,
+ int wqe_index,
+ void *buffer,
+ int buflen,
+ size_t *bc)
+{
+ struct ib_umem *umem = srq->umem;
+ size_t bytes_copied;
+ int ret;
+
+ ret = mlx5_ib_read_user_wqe_common(umem,
+ buffer,
+ buflen,
+ wqe_index,
+ 0,
+ srq->msrq.max,
+ srq->msrq.wqe_shift,
+ buflen,
+ &bytes_copied);
- return wqe_length;
+ if (ret)
+ return ret;
+ *bc = bytes_copied;
+ return 0;
}
static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
@@ -645,16 +743,14 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
return bfregi->sys_pages[index_of_sys_page] + offset;
}
-static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
- struct ib_pd *pd,
+static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
unsigned long addr, size_t size,
- struct ib_umem **umem,
- int *npages, int *page_shift, int *ncont,
- u32 *offset)
+ struct ib_umem **umem, int *npages, int *page_shift,
+ int *ncont, u32 *offset)
{
int err;
- *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0);
+ *umem = ib_umem_get(udata, addr, size, 0, 0);
if (IS_ERR(*umem)) {
mlx5_ib_dbg(dev, "umem_get failed\n");
return PTR_ERR(*umem);
@@ -695,10 +791,9 @@ static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
}
static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
- struct mlx5_ib_rwq *rwq,
+ struct ib_udata *udata, struct mlx5_ib_rwq *rwq,
struct mlx5_ib_create_wq *ucmd)
{
- struct mlx5_ib_ucontext *context;
int page_shift = 0;
int npages;
u32 offset = 0;
@@ -708,9 +803,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (!ucmd->buf_addr)
return -EINVAL;
- context = to_mucontext(pd->uobject->context);
- rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr,
- rwq->buf_size, 0, 0);
+ rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0, 0);
if (IS_ERR(rwq->umem)) {
mlx5_ib_dbg(dev, "umem_get failed\n");
err = PTR_ERR(rwq->umem);
@@ -735,7 +828,8 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
(unsigned long long)ucmd->buf_addr, rwq->buf_size,
npages, page_shift, ncont, offset);
- err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db);
+ err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), udata,
+ ucmd->db_addr, &rwq->db);
if (err) {
mlx5_ib_dbg(dev, "map failed\n");
goto err_umem;
@@ -819,10 +913,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (ucmd.buf_addr && ubuffer->buf_size) {
ubuffer->buf_addr = ucmd.buf_addr;
- err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr,
- ubuffer->buf_size,
- &ubuffer->umem, &npages, &page_shift,
- &ncont, &offset);
+ err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr,
+ ubuffer->buf_size, &ubuffer->umem,
+ &npages, &page_shift, &ncont, &offset);
if (err)
goto err_bfreg;
} else {
@@ -856,7 +949,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
resp->bfreg_index = MLX5_IB_INVALID_BFREG;
qp->bfregn = bfregn;
- err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db);
+ err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &qp->db);
if (err) {
mlx5_ib_dbg(dev, "map failed\n");
goto err_free;
@@ -1119,6 +1212,7 @@ static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
}
static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+ struct ib_udata *udata,
struct mlx5_ib_sq *sq, void *qpin,
struct ib_pd *pd)
{
@@ -1135,9 +1229,9 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
int ncont = 0;
u32 offset = 0;
- err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size,
- &sq->ubuffer.umem, &npages, &page_shift,
- &ncont, &offset);
+ err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size,
+ &sq->ubuffer.umem, &npages, &page_shift, &ncont,
+ &offset);
if (err)
return err;
@@ -1374,7 +1468,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (err)
return err;
- err = create_raw_packet_qp_sq(dev, sq, in, pd);
+ err = create_raw_packet_qp_sq(dev, udata, sq, in, pd);
if (err)
goto err_destroy_tis;
@@ -5795,7 +5889,7 @@ static int prepare_user_rq(struct ib_pd *pd,
return err;
}
- err = create_user_rq(dev, pd, rwq, &ucmd);
+ err = create_user_rq(dev, pd, udata, rwq, &ucmd);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
return err;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 4e8d18009f58..22bd774e0b4e 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -79,8 +79,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
- srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
- 0, 0);
+ srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0);
if (IS_ERR(srq->umem)) {
mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
err = PTR_ERR(srq->umem);
@@ -104,7 +103,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0);
- err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context),
+ err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), udata,
ucmd.db_addr, &srq->db);
if (err) {
mlx5_ib_dbg(dev, "map doorbell failed\n");
diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h
index 75eb5839ae95..c330af35ff10 100644
--- a/drivers/infiniband/hw/mlx5/srq.h
+++ b/drivers/infiniband/hw/mlx5/srq.h
@@ -46,8 +46,6 @@ struct mlx5_core_srq {
int wqe_shift;
void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e);
- atomic_t refcount;
- struct completion free;
u16 uid;
};
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
index 7aaaffbd4afa..63ac38bb3498 100644
--- a/drivers/infiniband/hw/mlx5/srq_cmd.c
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -87,7 +87,7 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn)
srq = radix_tree_lookup(&table->tree, srqn);
if (srq)
- atomic_inc(&srq->refcount);
+ atomic_inc(&srq->common.refcount);
spin_unlock(&table->lock);
@@ -594,8 +594,8 @@ int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
if (err)
return err;
- atomic_set(&srq->refcount, 1);
- init_completion(&srq->free);
+ atomic_set(&srq->common.refcount, 1);
+ init_completion(&srq->common.free);
spin_lock_irq(&table->lock);
err = radix_tree_insert(&table->tree, srq->srqn, srq);
@@ -627,9 +627,8 @@ int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
if (err)
return err;
- if (atomic_dec_and_test(&srq->refcount))
- complete(&srq->free);
- wait_for_completion(&srq->free);
+ mlx5_core_res_put(&srq->common);
+ wait_for_completion(&srq->common.free);
return 0;
}
@@ -685,7 +684,7 @@ static int srq_event_notifier(struct notifier_block *nb,
srq = radix_tree_lookup(&table->tree, srqn);
if (srq)
- atomic_inc(&srq->refcount);
+ atomic_inc(&srq->common.refcount);
spin_unlock(&table->lock);
@@ -694,8 +693,7 @@ static int srq_event_notifier(struct notifier_block *nb,
srq->event(srq, eqe->type);
- if (atomic_dec_and_test(&srq->refcount))
- complete(&srq->free);
+ mlx5_core_res_put(&srq->common);
return NOTIFY_OK;
}