diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/Kconfig | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.c | 37 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cong.c | 15 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 15 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.c | 443 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/doorbell.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mad.c | 11 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 177 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mem.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 44 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 153 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 178 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 232 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq_cmd.c | 16 |
18 files changed, 992 insertions, 352 deletions
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig index 0440966bc6ec..8d651c05de62 100644 --- a/drivers/infiniband/hw/mlx5/Kconfig +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -1,7 +1,6 @@ config MLX5_INFINIBAND tristate "Mellanox 5th generation network adapters (ConnectX series) support" depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE - depends on INFINIBAND_USER_ACCESS || INFINIBAND_USER_ACCESS=n ---help--- This driver provides low-level InfiniBand support for Mellanox Connect-IB PCI Express host channel adapters (HCAs). diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 356bccc715ee..6bcc63aaa50b 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -345,3 +345,40 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, counter_set_id); return err; } + +int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port) +{ + int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); + int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); + int err = -ENOMEM; + void *data; + void *resp; + u32 *out; + u32 *in; + + in = kzalloc(inlen, GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); + if (!in || !out) + goto out; + + MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); + MLX5_SET(mad_ifc_in, in, op_mod, opmod); + MLX5_SET(mad_ifc_in, in, port, port); + + data = MLX5_ADDR_OF(mad_ifc_in, in, mad); + memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); + + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + if (err) + goto out; + + resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); + memcpy(outb, resp, + MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); + +out: + kfree(out); + kfree(in); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 1e76dc67a369..923a7b93f507 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -63,4 +63,6 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, u16 uid); +int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index 7e4e358a4fd8..8ba439fabf7f 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -389,19 +389,19 @@ void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) dev->port[port_num].dbg_cc_params = NULL; } -int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) +void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) { struct mlx5_ib_dbg_cc_params *dbg_cc_params; struct mlx5_core_dev *mdev; int i; if (!mlx5_debugfs_root) - goto out; + return; /* Takes a 1-based port number */ mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL); if (!mdev) - goto out; + return; if (!MLX5_CAP_GEN(mdev, cc_query_allowed) || !MLX5_CAP_GEN(mdev, cc_modify_allowed)) @@ -415,8 +415,6 @@ int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) dbg_cc_params->root = debugfs_create_dir("cc_params", mdev->priv.dbg_root); - if (!dbg_cc_params->root) - goto err; for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) { dbg_cc_params->params[i].offset = i; @@ -427,14 +425,11 @@ int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) 0600, dbg_cc_params->root, &dbg_cc_params->params[i], &dbg_cc_fops); - if (!dbg_cc_params->params[i].dentry) - goto err; } put_mdev: mlx5_ib_put_native_port_mdev(dev, port_num + 1); -out: - return 0; + return; err: mlx5_ib_warn(dev, "cong debugfs failure\n"); @@ -445,5 +440,5 @@ err: * We don't want to fail driver if debugfs failed to initialize, * so we are not forwarding error to the user. */ - return 0; + return; } diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 90f1b0bae5b5..18704e503508 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -187,8 +187,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, wqe_ctr = be16_to_cpu(cqe->wqe_counter); wc->wr_id = srq->wrid[wqe_ctr]; mlx5_ib_free_srq_wqe(srq, wqe_ctr); - if (msrq && atomic_dec_and_test(&msrq->refcount)) - complete(&msrq->free); + if (msrq) + mlx5_core_res_put(&msrq->common); } } else { wq = &qp->rq; @@ -707,15 +707,15 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, *cqe_size = ucmd.cqe_size; - cq->buf.umem = ib_umem_get(context, ucmd.buf_addr, - entries * ucmd.cqe_size, - IB_ACCESS_LOCAL_WRITE, 1); + cq->buf.umem = + ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size, + IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(cq->buf.umem)) { err = PTR_ERR(cq->buf.umem); return err; } - err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr, + err = mlx5_ib_db_map_user(to_mucontext(context), udata, ucmd.db_addr, &cq->db); if (err) goto err_umem; @@ -1111,7 +1111,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, struct ib_umem *umem; int err; int npages; - struct ib_ucontext *context = cq->buf.umem->context; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) @@ -1124,7 +1123,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1) return -EINVAL; - umem = ib_umem_get(context, ucmd.buf_addr, + umem = ib_umem_get(udata, ucmd.buf_addr, (size_t)ucmd.cqe_size * entries, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(umem)) { diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 5a588f3cfb1b..cd43e39ced87 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -8,6 +8,7 @@ #include <rdma/uverbs_types.h> #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/mlx5_user_ioctl_verbs.h> #include <rdma/ib_umem.h> #include <rdma/uverbs_std_types.h> #include <linux/mlx5/driver.h> @@ -17,12 +18,28 @@ #define UVERBS_MODULE_NAME mlx5_ib #include <rdma/uverbs_named_ioctl.h> +enum devx_obj_flags { + DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0, +}; + +struct devx_async_data { + struct mlx5_ib_dev *mdev; + struct list_head list; + struct ib_uobject *fd_uobj; + struct mlx5_async_work cb_work; + u16 cmd_out_len; + /* must be last field in this structure */ + struct mlx5_ib_uapi_devx_async_cmd_hdr hdr; +}; + #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) struct devx_obj { struct mlx5_core_dev *mdev; u64 obj_id; u32 dinlen; /* destroy inbox length */ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; + u32 flags; + struct mlx5_ib_devx_mr devx_mr; }; struct devx_umem { @@ -1011,6 +1028,92 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, } } +static int devx_handle_mkey_indirect(struct devx_obj *obj, + struct mlx5_ib_dev *dev, + void *in, void *out) +{ + struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table; + struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr; + unsigned long flags; + struct mlx5_core_mkey *mkey; + void *mkc; + u8 key; + int err; + + mkey = &devx_mr->mmkey; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + key = MLX5_GET(mkc, mkc, mkey_7_0); + mkey->key = mlx5_idx_to_mkey( + MLX5_GET(create_mkey_out, out, mkey_index)) | key; + mkey->type = MLX5_MKEY_INDIRECT_DEVX; + mkey->iova = MLX5_GET64(mkc, mkc, start_addr); + mkey->size = MLX5_GET64(mkc, mkc, len); + mkey->pd = MLX5_GET(mkc, mkc, pd); + devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); + + write_lock_irqsave(&table->lock, flags); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), + mkey); + write_unlock_irqrestore(&table->lock, flags); + return err; +} + +static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, + struct devx_obj *obj, + void *in, int in_len) +{ + int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) + + MLX5_FLD_SZ_BYTES(create_mkey_in, + memory_key_mkey_entry); + void *mkc; + u8 access_mode; + + if (in_len < min_len) + return -EINVAL; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + access_mode = MLX5_GET(mkc, mkc, access_mode_1_0); + access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2; + + if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS || + access_mode == MLX5_MKC_ACCESS_MODE_KSM) { + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + obj->flags |= DEVX_OBJ_FLAGS_INDIRECT_MKEY; + return 0; + } + + MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1); + return 0; +} + +static void devx_free_indirect_mkey(struct rcu_head *rcu) +{ + kfree(container_of(rcu, struct devx_obj, devx_mr.rcu)); +} + +/* This function to delete from the radix tree needs to be called before + * destroying the underlying mkey. Otherwise a race might occur in case that + * other thread will get the same mkey before this one will be deleted, + * in that case it will fail via inserting to the tree its own data. + * + * Note: + * An error in the destroy is not expected unless there is some other indirect + * mkey which points to this one. In a kernel cleanup flow it will be just + * destroyed in the iterative destruction call. In a user flow, in case + * the application didn't close in the expected order it's its own problem, + * the mkey won't be part of the tree, in both cases the kernel is safe. + */ +static void devx_cleanup_mkey(struct devx_obj *obj) +{ + struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table; + unsigned long flags; + + write_lock_irqsave(&table->lock, flags); + radix_tree_delete(&table->tree, mlx5_base_mkey(obj->devx_mr.mmkey.key)); + write_unlock_irqrestore(&table->lock, flags); +} + static int devx_obj_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why) { @@ -1018,10 +1121,21 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, struct devx_obj *obj = uobject->object; int ret; + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) + devx_cleanup_mkey(obj); + ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + struct mlx5_ib_dev *dev = to_mdev(uobject->context->device); + + call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, + devx_free_indirect_mkey); + return ret; + } + kfree(obj); return ret; } @@ -1032,6 +1146,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT); + int cmd_in_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); void *cmd_out; struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); @@ -1060,10 +1176,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( return -ENOMEM; MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); - devx_set_umem_valid(cmd_in); + if (opcode == MLX5_CMD_OP_CREATE_MKEY) { + err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len); + if (err) + goto obj_free; + } else { + devx_set_umem_valid(cmd_in); + } err = mlx5_cmd_exec(dev->mdev, cmd_in, - uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN), + cmd_in_len, cmd_out, cmd_out_len); if (err) goto obj_free; @@ -1074,6 +1196,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( &obj_id); WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); + if (err) + goto obj_destroy; + } + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); if (err) goto obj_destroy; @@ -1082,6 +1210,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( return 0; obj_destroy: + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) + devx_cleanup_mkey(obj); mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); obj_free: kfree(obj); @@ -1168,6 +1298,153 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( cmd_out, cmd_out_len); } +struct devx_async_event_queue { + spinlock_t lock; + wait_queue_head_t poll_wait; + struct list_head event_list; + atomic_t bytes_in_use; + u8 is_destroyed:1; +}; + +struct devx_async_cmd_event_file { + struct ib_uobject uobj; + struct devx_async_event_queue ev_queue; + struct mlx5_async_ctx async_ctx; +}; + +static void devx_init_event_queue(struct devx_async_event_queue *ev_queue) +{ + spin_lock_init(&ev_queue->lock); + INIT_LIST_HEAD(&ev_queue->event_list); + init_waitqueue_head(&ev_queue->poll_wait); + atomic_set(&ev_queue->bytes_in_use, 0); + ev_queue->is_destroyed = 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + struct devx_async_cmd_event_file *ev_file; + + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE); + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + + ev_file = container_of(uobj, struct devx_async_cmd_event_file, + uobj); + devx_init_event_queue(&ev_file->ev_queue); + mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx); + return 0; +} + +static void devx_query_callback(int status, struct mlx5_async_work *context) +{ + struct devx_async_data *async_data = + container_of(context, struct devx_async_data, cb_work); + struct ib_uobject *fd_uobj = async_data->fd_uobj; + struct devx_async_cmd_event_file *ev_file; + struct devx_async_event_queue *ev_queue; + unsigned long flags; + + ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file, + uobj); + ev_queue = &ev_file->ev_queue; + + spin_lock_irqsave(&ev_queue->lock, flags); + list_add_tail(&async_data->list, &ev_queue->event_list); + spin_unlock_irqrestore(&ev_queue->lock, flags); + + wake_up_interruptible(&ev_queue->poll_wait); + fput(fd_uobj->object); +} + +#define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */ + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)( + struct uverbs_attr_bundle *attrs) +{ + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN); + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE); + u16 cmd_out_len; + struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct ib_uobject *fd_uobj; + int err; + int uid; + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + struct devx_async_cmd_event_file *ev_file; + struct devx_async_data *async_data; + + uid = devx_get_uid(c, cmd_in); + if (uid < 0) + return uid; + + if (!devx_is_obj_query_cmd(cmd_in)) + return -EINVAL; + + err = uverbs_get_const(&cmd_out_len, attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN); + if (err) + return err; + + if (!devx_is_valid_obj_id(uobj, cmd_in)) + return -EINVAL; + + fd_uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD); + if (IS_ERR(fd_uobj)) + return PTR_ERR(fd_uobj); + + ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file, + uobj); + + if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) > + MAX_ASYNC_BYTES_IN_USE) { + atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use); + return -EAGAIN; + } + + async_data = kvzalloc(struct_size(async_data, hdr.out_data, + cmd_out_len), GFP_KERNEL); + if (!async_data) { + err = -ENOMEM; + goto sub_bytes; + } + + err = uverbs_copy_from(&async_data->hdr.wr_id, attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID); + if (err) + goto free_async; + + async_data->cmd_out_len = cmd_out_len; + async_data->mdev = mdev; + async_data->fd_uobj = fd_uobj; + + get_file(fd_uobj->object); + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); + err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in, + uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN), + async_data->hdr.out_data, + async_data->cmd_out_len, + devx_query_callback, &async_data->cb_work); + + if (err) + goto cb_err; + + return 0; + +cb_err: + fput(fd_uobj->object); +free_async: + kvfree(async_data); +sub_bytes: + atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use); + return err; +} + static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, struct uverbs_attr_bundle *attrs, struct devx_umem *obj) @@ -1195,7 +1472,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - obj->umem = ib_umem_get(ucontext, addr, size, access, 0); + obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); @@ -1313,6 +1590,123 @@ static int devx_umem_cleanup(struct ib_uobject *uobject, return 0; } +static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct devx_async_cmd_event_file *comp_ev_file = filp->private_data; + struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + struct devx_async_data *event; + int ret = 0; + size_t eventsz; + + spin_lock_irq(&ev_queue->lock); + + while (list_empty(&ev_queue->event_list)) { + spin_unlock_irq(&ev_queue->lock); + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible( + ev_queue->poll_wait, + (!list_empty(&ev_queue->event_list) || + ev_queue->is_destroyed))) { + return -ERESTARTSYS; + } + + if (list_empty(&ev_queue->event_list) && + ev_queue->is_destroyed) + return -EIO; + + spin_lock_irq(&ev_queue->lock); + } + + event = list_entry(ev_queue->event_list.next, + struct devx_async_data, list); + eventsz = event->cmd_out_len + + sizeof(struct mlx5_ib_uapi_devx_async_cmd_hdr); + + if (eventsz > count) { + spin_unlock_irq(&ev_queue->lock); + return -ENOSPC; + } + + list_del(ev_queue->event_list.next); + spin_unlock_irq(&ev_queue->lock); + + if (copy_to_user(buf, &event->hdr, eventsz)) + ret = -EFAULT; + else + ret = eventsz; + + atomic_sub(event->cmd_out_len, &ev_queue->bytes_in_use); + kvfree(event); + return ret; +} + +static int devx_async_cmd_event_close(struct inode *inode, struct file *filp) +{ + struct ib_uobject *uobj = filp->private_data; + struct devx_async_cmd_event_file *comp_ev_file = container_of( + uobj, struct devx_async_cmd_event_file, uobj); + struct devx_async_data *entry, *tmp; + + spin_lock_irq(&comp_ev_file->ev_queue.lock); + list_for_each_entry_safe(entry, tmp, + &comp_ev_file->ev_queue.event_list, list) + kvfree(entry); + spin_unlock_irq(&comp_ev_file->ev_queue.lock); + + uverbs_close_fd(filp); + return 0; +} + +static __poll_t devx_async_cmd_event_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct devx_async_cmd_event_file *comp_ev_file = filp->private_data; + struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + __poll_t pollflags = 0; + + poll_wait(filp, &ev_queue->poll_wait, wait); + + spin_lock_irq(&ev_queue->lock); + if (ev_queue->is_destroyed) + pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + else if (!list_empty(&ev_queue->event_list)) + pollflags = EPOLLIN | EPOLLRDNORM; + spin_unlock_irq(&ev_queue->lock); + + return pollflags; +} + +const struct file_operations devx_async_cmd_event_fops = { + .owner = THIS_MODULE, + .read = devx_async_cmd_event_read, + .poll = devx_async_cmd_event_poll, + .release = devx_async_cmd_event_close, + .llseek = no_llseek, +}; + +static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + struct devx_async_cmd_event_file *comp_ev_file = + container_of(uobj, struct devx_async_cmd_event_file, + uobj); + struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + + spin_lock_irq(&ev_queue->lock); + ev_queue->is_destroyed = 1; + spin_unlock_irq(&ev_queue->lock); + + if (why == RDMA_REMOVE_DRIVER_REMOVE) + wake_up_interruptible(&ev_queue->poll_wait); + + mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx); + return 0; +}; + DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_UMEM_REG, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE, @@ -1423,6 +1817,27 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), UA_MANDATORY)); +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, + UVERBS_IDR_ANY_OBJECT, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN, + u16, UA_MANDATORY), + UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD, + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY)); + DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR), @@ -1433,13 +1848,30 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY), - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY)); + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)); DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC, + UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE, + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UVERBS_ACCESS_NEW, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file), + devx_hot_unplug_async_cmd_event_file, + &devx_async_cmd_event_fops, "[devx_async_cmd]", + O_RDONLY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)); + static bool devx_is_supported(struct ib_device *device) { struct mlx5_ib_dev *dev = to_mdev(device); @@ -1457,5 +1889,8 @@ const struct uapi_definition mlx5_ib_devx_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_DEVX_UMEM, UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), {}, }; diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index a0e4e6ddb71a..8f4e5f22b84c 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -43,7 +43,8 @@ struct mlx5_ib_user_db_page { int refcnt; }; -int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, +int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, + struct ib_udata *udata, unsigned long virt, struct mlx5_db *db) { struct mlx5_ib_user_db_page *page; @@ -63,8 +64,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK, - PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 46a9ddc8ca56..6d7b8bad4b61 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -70,7 +70,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { struct mlx5_ib_dev *ibdev; - ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev)); + ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!ibdev) return -ENOMEM; diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 558638468edb..6c529e6f3a01 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -36,6 +36,7 @@ #include <rdma/ib_smi.h> #include <rdma/ib_pma.h> #include "mlx5_ib.h" +#include "cmd.h" enum { MLX5_IB_VENDOR_CLASS1 = 0x9, @@ -51,9 +52,10 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num, return dev->mdev->port_caps[port_num - 1].has_smi; } -int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const void *in_mad, void *response_mad) +static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, + int ignore_bkey, u8 port, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const void *in_mad, + void *response_mad) { u8 op_modifier = 0; @@ -68,7 +70,8 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, if (ignore_bkey || !in_wc) op_modifier |= 0x2; - return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port); + return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, + port); } static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 94fe253d4956..76d6c2557d0c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -476,24 +476,51 @@ out: return err; } +struct mlx5_ib_vlan_info { + u16 vlan_id; + bool vlan; +}; + +static int get_lower_dev_vlan(struct net_device *lower_dev, void *data) +{ + struct mlx5_ib_vlan_info *vlan_info = data; + + if (is_vlan_dev(lower_dev)) { + vlan_info->vlan = true; + vlan_info->vlan_id = vlan_dev_vlan_id(lower_dev); + } + /* We are interested only in first level vlan device, so + * always return 1 to stop iterating over next level devices. + */ + return 1; +} + static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr) { enum ib_gid_type gid_type = IB_GID_TYPE_IB; + struct mlx5_ib_vlan_info vlan_info = { }; u8 roce_version = 0; u8 roce_l3_type = 0; - bool vlan = false; u8 mac[ETH_ALEN]; - u16 vlan_id = 0; if (gid) { gid_type = attr->gid_type; ether_addr_copy(mac, attr->ndev->dev_addr); if (is_vlan_dev(attr->ndev)) { - vlan = true; - vlan_id = vlan_dev_vlan_id(attr->ndev); + vlan_info.vlan = true; + vlan_info.vlan_id = vlan_dev_vlan_id(attr->ndev); + } else { + /* If the netdev is upper device and if it's lower + * lower device is vlan device, consider vlan id of + * the lower vlan device for this gid entry. + */ + rcu_read_lock(); + netdev_walk_all_lower_dev_rcu(attr->ndev, + get_lower_dev_vlan, &vlan_info); + rcu_read_unlock(); } } @@ -514,8 +541,9 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, } return mlx5_core_roce_gid_set(dev->mdev, index, roce_version, - roce_l3_type, gid->raw, mac, vlan, - vlan_id, port_num); + roce_l3_type, gid->raw, mac, + vlan_info.vlan, vlan_info.vlan_id, + port_num); } static int mlx5_ib_add_gid(const struct ib_gid_attr *attr, @@ -923,11 +951,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(mdev, pg)) - props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; - props->odp_caps = dev->odp_caps; -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + if (MLX5_CAP_GEN(mdev, pg)) + props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; + props->odp_caps = dev->odp_caps; + } if (MLX5_CAP_GEN(mdev, cd)) props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; @@ -1763,9 +1791,9 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (err) goto out_sys_pages; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; -#endif + if (ibdev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING) + context->ibucontext.invalidate_range = + &mlx5_ib_invalidate_range; if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { err = mlx5_ib_devx_create(dev, true); @@ -1897,12 +1925,10 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); struct mlx5_bfreg_info *bfregi; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* All umem's must be destroyed before destroying the ucontext. */ mutex_lock(&ibcontext->per_mm_list_lock); WARN_ON(!list_empty(&ibcontext->per_mm_list)); mutex_unlock(&ibcontext->per_mm_list_lock); -#endif bfregi = &context->bfregi; mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); @@ -2265,7 +2291,7 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; u16 uid = 0; - pd = kmalloc(sizeof(*pd), GFP_KERNEL); + pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) return ERR_PTR(-ENOMEM); @@ -2335,10 +2361,29 @@ static u8 get_match_criteria_enable(u32 *match_criteria) return match_criteria_enable; } -static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) +static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) { - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); + u8 entry_mask; + u8 entry_val; + int err = 0; + + if (!mask) + goto out; + + entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c, + ip_protocol); + entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v, + ip_protocol); + if (!entry_mask) { + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); + goto out; + } + /* Don't override existing ip protocol */ + if (mask != entry_mask || val != entry_val) + err = -EINVAL; +out: + return err; } static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, @@ -2572,8 +2617,10 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, set_tos(headers_c, headers_v, ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); - set_proto(headers_c, headers_v, - ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto); + if (set_proto(headers_c, headers_v, + ib_spec->ipv4.mask.proto, + ib_spec->ipv4.val.proto)) + return -EINVAL; break; case IB_FLOW_SPEC_IPV6: if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) @@ -2612,9 +2659,10 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, ib_spec->ipv6.mask.traffic_class, ib_spec->ipv6.val.traffic_class); - set_proto(headers_c, headers_v, - ib_spec->ipv6.mask.next_hdr, - ib_spec->ipv6.val.next_hdr); + if (set_proto(headers_c, headers_v, + ib_spec->ipv6.mask.next_hdr, + ib_spec->ipv6.val.next_hdr)) + return -EINVAL; set_flow_label(misc_params_c, misc_params_v, ntohl(ib_spec->ipv6.mask.flow_label), @@ -2635,10 +2683,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, LAST_TCP_UDP_FIELD)) return -EOPNOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - 0xff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - IPPROTO_TCP); + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP)) + return -EINVAL; MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, ntohs(ib_spec->tcp_udp.mask.src_port)); @@ -2655,10 +2701,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, LAST_TCP_UDP_FIELD)) return -EOPNOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - 0xff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - IPPROTO_UDP); + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP)) + return -EINVAL; MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(ib_spec->tcp_udp.mask.src_port)); @@ -2674,6 +2718,9 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, if (ib_spec->gre.mask.c_ks_res0_ver) return -EOPNOTSUPP; + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE)) + return -EINVAL; + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 0xff); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, @@ -3825,7 +3872,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); - dst = kzalloc(sizeof(*dst) * 2, GFP_KERNEL); + dst = kcalloc(2, sizeof(*dst), GFP_KERNEL); if (!dst) return ERR_PTR(-ENOMEM); @@ -4106,7 +4153,7 @@ static ssize_t fw_pages_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); } @@ -4116,7 +4163,7 @@ static ssize_t reg_pages_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); } @@ -4126,7 +4173,8 @@ static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); + return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -4135,7 +4183,8 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); + return sprintf(buf, "%x\n", dev->mdev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -4144,7 +4193,8 @@ static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); + return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, dev->mdev->board_id); } @@ -5508,9 +5558,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port; mlx5_notifier_register(mpi->mdev, &mpi->mdev_events); - err = mlx5_ib_init_cong_debugfs(ibdev, port_num); - if (err) - goto unbind; + mlx5_ib_init_cong_debugfs(ibdev, port_num); return true; @@ -5722,11 +5770,12 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - cleanup_srcu_struct(&dev->mr_srcu); - drain_workqueue(dev->advise_mr_wq); - destroy_workqueue(dev->advise_mr_wq); -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + srcu_barrier(&dev->mr_srcu); + cleanup_srcu_struct(&dev->mr_srcu); + drain_workqueue(dev->advise_mr_wq); + destroy_workqueue(dev->advise_mr_wq); + } kfree(dev->port); } @@ -5779,19 +5828,20 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->memic.memic_lock); dev->memic.dev = mdev; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - dev->advise_mr_wq = alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0); - if (!dev->advise_mr_wq) { - err = -ENOMEM; - goto err_mp; - } + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + dev->advise_mr_wq = + alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0); + if (!dev->advise_mr_wq) { + err = -ENOMEM; + goto err_mp; + } - err = init_srcu_struct(&dev->mr_srcu); - if (err) { - destroy_workqueue(dev->advise_mr_wq); - goto err_mp; + err = init_srcu_struct(&dev->mr_srcu); + if (err) { + destroy_workqueue(dev->advise_mr_wq); + goto err_mp; + } } -#endif return 0; err_mp: @@ -6154,7 +6204,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) return mlx5_ib_odp_init_one(dev); } -void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_odp_cleanup_one(dev); } @@ -6183,8 +6233,9 @@ void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev) { - return mlx5_ib_init_cong_debugfs(dev, - mlx5_core_native_port_num(dev->mdev) - 1); + mlx5_ib_init_cong_debugfs(dev, + mlx5_core_native_port_num(dev->mdev) - 1); + return 0; } static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev) @@ -6234,7 +6285,7 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) name = "mlx5_%d"; else name = "mlx5_bond_%d"; - return ib_register_device(&dev->ib_dev, name, NULL); + return ib_register_device(&dev->ib_dev, name); } void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) @@ -6485,7 +6536,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) return mlx5_ib_add_slave_port(mdev); - dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); + dev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!dev) return NULL; diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 549234988bb4..9f90be296ee0 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -111,7 +111,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, *count = i; } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING static u64 umem_dma_to_mtt(dma_addr_t umem_dma) { u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK; @@ -123,7 +122,6 @@ static u64 umem_dma_to_mtt(dma_addr_t umem_dma) return mtt_entry; } -#endif /* * Populate the given array with bus addresses from the umem. @@ -151,7 +149,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int len; struct scatterlist *sg; int entry; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (umem->is_odp) { WARN_ON(shift != 0); WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); @@ -164,7 +162,6 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, } return; } -#endif i = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b06d3b1efea8..f9817284c7a3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -36,6 +36,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <rdma/ib_verbs.h> +#include <rdma/ib_umem.h> #include <rdma/ib_smi.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/cq.h> @@ -587,14 +588,27 @@ struct mlx5_ib_mr { struct mlx5_ib_mr *parent; atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; + struct mlx5_async_work cb_work; }; +static inline bool is_odp_mr(struct mlx5_ib_mr *mr) +{ + return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && + mr->umem->is_odp; +} + struct mlx5_ib_mw { struct ib_mw ibmw; struct mlx5_core_mkey mmkey; int ndescs; }; +struct mlx5_ib_devx_mr { + struct mlx5_core_mkey mmkey; + int ndescs; + struct rcu_head rcu; +}; + struct mlx5_ib_umr_context { struct ib_cqe cqe; enum ib_wc_status status; @@ -623,7 +637,6 @@ struct mlx5_cache_ent { spinlock_t lock; - struct dentry *dir; char name[4]; u32 order; u32 xlt; @@ -635,11 +648,6 @@ struct mlx5_cache_ent { u32 miss; u32 limit; - struct dentry *fsize; - struct dentry *fcur; - struct dentry *fmiss; - struct dentry *flimit; - struct mlx5_ib_dev *dev; struct work_struct work; struct delayed_work dwork; @@ -911,7 +919,6 @@ struct mlx5_ib_dev { /* Prevents soft lock on massive reg MRs */ struct mutex slow_path_mutex; int fill_delay; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_odp_caps odp_caps; u64 odp_max_size; struct mlx5_ib_pf_eq odp_pf_eq; @@ -923,7 +930,6 @@ struct mlx5_ib_dev { struct srcu_struct mr_srcu; u32 null_mkey; struct workqueue_struct *advise_mr_wq; -#endif struct mlx5_ib_flow_db *flow_db; /* protect resources needed as part of reset flow */ spinlock_t reset_flow_resource_lock; @@ -944,6 +950,7 @@ struct mlx5_ib_dev { struct mlx5_memic memic; u16 devx_whitelist_uid; struct mlx5_srq_table srq_table; + struct mlx5_async_ctx async_ctx; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1032,15 +1039,13 @@ to_mflow_act(struct ib_flow_action *ibact) return container_of(ibact, struct mlx5_ib_flow_action, ib_action); } -int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, +int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, + struct ib_udata *udata, unsigned long virt, struct mlx5_db *db); void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db); void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); -int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const void *in_mad, void *response_mad); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); @@ -1070,9 +1075,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); -int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, - void *buffer, u32 length, - struct mlx5_ib_qp_base *base); +int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, + int buflen, size_t *bc); +int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, + int buflen, size_t *bc); +int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, + void *buffer, int buflen, size_t *bc); struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, @@ -1098,6 +1106,7 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags); struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, + struct ib_udata *udata, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, @@ -1215,6 +1224,9 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { return -EOPNOTSUPP; } +static inline void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, + unsigned long start, + unsigned long end){}; #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ /* Needed for rep profile */ @@ -1254,7 +1266,7 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, const struct ib_gid_attr *attr); void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); -int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); +void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); /* GSI QP helper functions */ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index fd6ea1f75085..705a79cd21da 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -71,10 +71,9 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - /* Wait until all page fault handlers using the mr complete. */ - synchronize_srcu(&dev->mr_srcu); -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + /* Wait until all page fault handlers using the mr complete. */ + synchronize_srcu(&dev->mr_srcu); return err; } @@ -95,10 +94,9 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING static void update_odp_mr(struct mlx5_ib_mr *mr) { - if (mr->umem->is_odp) { + if (is_odp_mr(mr)) { /* * This barrier prevents the compiler from moving the * setting of umem->odp_data->private to point to our @@ -121,11 +119,11 @@ static void update_odp_mr(struct mlx5_ib_mr *mr) smp_wmb(); } } -#endif -static void reg_mr_callback(int status, void *context) +static void reg_mr_callback(int status, struct mlx5_async_work *context) { - struct mlx5_ib_mr *mr = context; + struct mlx5_ib_mr *mr = + container_of(context, struct mlx5_ib_mr, cb_work); struct mlx5_ib_dev *dev = mr->dev; struct mlx5_mr_cache *cache = &dev->cache; int c = order2idx(dev, mr->order); @@ -216,9 +214,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) ent->pending++; spin_unlock_irq(&ent->lock); err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, - in, inlen, + &dev->async_ctx, in, inlen, mr->out, sizeof(mr->out), - reg_mr_callback, mr); + reg_mr_callback, &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; @@ -256,9 +254,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + synchronize_srcu(&dev->mr_srcu); list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { list_del(&mr->list); @@ -610,52 +607,27 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) dev->cache.root = NULL; } -static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) +static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; + struct dentry *dir; int i; if (!mlx5_debugfs_root || dev->rep) - return 0; + return; cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); - if (!cache->root) - return -ENOMEM; for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; sprintf(ent->name, "%d", ent->order); - ent->dir = debugfs_create_dir(ent->name, cache->root); - if (!ent->dir) - goto err; - - ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, - &size_fops); - if (!ent->fsize) - goto err; - - ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, - &limit_fops); - if (!ent->flimit) - goto err; - - ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, - &ent->cur); - if (!ent->fcur) - goto err; - - ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, - &ent->miss); - if (!ent->fmiss) - goto err; + dir = debugfs_create_dir(ent->name, cache->root); + debugfs_create_file("size", 0600, dir, ent, &size_fops); + debugfs_create_file("limit", 0600, dir, ent, &limit_fops); + debugfs_create_u32("cur", 0400, dir, &ent->cur); + debugfs_create_u32("miss", 0600, dir, &ent->miss); } - - return 0; -err: - mlx5_mr_cache_debugfs_cleanup(dev); - - return -ENOMEM; } static void delay_time_func(struct timer_list *t) @@ -669,7 +641,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; - int err; int i; mutex_init(&dev->slow_path_mutex); @@ -679,6 +650,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return -ENOMEM; } + mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); timer_setup(&dev->delay_timer, delay_time_func, 0); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; @@ -713,45 +685,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) queue_work(cache->wq, &ent->work); } - err = mlx5_mr_cache_debugfs_init(dev); - if (err) - mlx5_ib_warn(dev, "cache debugfs failure\n"); - - /* - * We don't want to fail driver if debugfs failed to initialize, - * so we are not forwarding error to the user. - */ + mlx5_mr_cache_debugfs_init(dev); return 0; } -static void wait_for_async_commands(struct mlx5_ib_dev *dev) -{ - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; - int total = 0; - int i; - int j; - - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - for (j = 0 ; j < 1000; j++) { - if (!ent->pending) - break; - msleep(50); - } - } - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - total += ent->pending; - } - - if (total) - mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total); - else - mlx5_ib_warn(dev, "done with all pending requests\n"); -} - int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { int i; @@ -763,12 +701,12 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) flush_workqueue(dev->cache.wq); mlx5_mr_cache_debugfs_cleanup(dev); + mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) clean_keys(dev, i); destroy_workqueue(dev->cache.wq); - wait_for_async_commands(dev); del_timer_sync(&dev->delay_timer); return 0; @@ -847,18 +785,17 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) return MLX5_MAX_UMR_SHIFT; } -static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, - int access_flags, struct ib_umem **umem, - int *npages, int *page_shift, int *ncont, - int *order) +static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, + u64 start, u64 length, int access_flags, + struct ib_umem **umem, int *npages, int *page_shift, + int *ncont, int *order) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); struct ib_umem *u; int err; *umem = NULL; - u = ib_umem_get(pd->uobject->context, start, length, access_flags, 0); + u = ib_umem_get(udata, start, length, access_flags, 0); err = PTR_ERR_OR_ZERO(u); if (err) { mlx5_ib_dbg(dev, "umem get failed (%d)\n", err); @@ -1331,21 +1268,20 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (!start && length == U64_MAX) { + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && + length == U64_MAX) { if (!(access_flags & IB_ACCESS_ON_DEMAND) || !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) return ERR_PTR(-EINVAL); - mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); + mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); if (IS_ERR(mr)) return ERR_CAST(mr); return &mr->ibmr; } -#endif - err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, - &page_shift, &ncont, &order); + err = mr_umem_get(dev, udata, start, length, access_flags, &umem, + &npages, &page_shift, &ncont, &order); if (err < 0) return ERR_PTR(err); @@ -1386,9 +1322,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->umem = umem; set_mr_fields(dev, mr, npages, length, access_flags); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); -#endif if (!populate_mtts) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; @@ -1405,9 +1339,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - mr->live = 1; -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + mr->live = 1; + return &mr->ibmr; error: ib_umem_release(umem); @@ -1495,8 +1429,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, flags |= IB_MR_REREG_TRANS; ib_umem_release(mr->umem); mr->umem = NULL; - err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, - &npages, &page_shift, &ncont, &order); + err = mr_umem_get(dev, udata, addr, len, access_flags, + &mr->umem, &npages, &page_shift, &ncont, + &order); if (err) goto err; } @@ -1522,9 +1457,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } mr->allocated_from_cache = 0; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - mr->live = 1; -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + mr->live = 1; } else { /* * Send a UMR WQE @@ -1553,9 +1487,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, set_mr_fields(dev, mr, npages, len, access_flags); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); -#endif return 0; err: @@ -1641,8 +1573,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) int npages = mr->npages; struct ib_umem *umem = mr->umem; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (umem && umem->is_odp) { + if (is_odp_mr(mr)) { struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem); /* Prevent new page faults from succeeding */ @@ -1666,7 +1597,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Avoid double-freeing the umem. */ umem = NULL; } -#endif + clean_mr(dev, mr); /* diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 4ee32964e1dd..335fd0c6ea2a 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -101,9 +101,9 @@ static int check_parent(struct ib_umem_odp *odp, return mr && mr->parent == parent && !odp->dying; } -struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr) +static struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr) { - if (WARN_ON(!mr || !mr->umem || !mr->umem->is_odp)) + if (WARN_ON(!mr || !is_odp_mr(mr))) return NULL; return to_ib_umem_odp(mr->umem)->per_mm; @@ -315,6 +315,9 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send)) caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND; + if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.srq_receive)) + caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.send)) caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND; @@ -330,6 +333,27 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic)) caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.srq_receive)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.send)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SEND; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.receive)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_RECV; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.write)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_WRITE; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.read)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_READ; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.atomic)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.srq_receive)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; + if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) && MLX5_CAP_GEN(dev->mdev, null_mkey) && MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) @@ -439,7 +463,7 @@ next_mr: if (nentries) nentries++; } else { - odp = ib_alloc_odp_umem(odp_mr->per_mm, addr, + odp = ib_alloc_odp_umem(odp_mr, addr, MLX5_IMR_MTT_SIZE); if (IS_ERR(odp)) { mutex_unlock(&odp_mr->umem_mutex); @@ -492,13 +516,13 @@ next_mr: } struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, + struct ib_udata *udata, int access_flags) { - struct ib_ucontext *ctx = pd->ibpd.uobject->context; struct mlx5_ib_mr *imr; struct ib_umem *umem; - umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0); + umem = ib_umem_get(udata, 0, 0, IB_ACCESS_ON_DEMAND, 0); if (IS_ERR(umem)) return ERR_CAST(umem); @@ -685,6 +709,21 @@ struct pf_frame { int depth; }; +static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey) +{ + struct mlx5_ib_mw *mw; + struct mlx5_ib_devx_mr *devx_mr; + + if (mmkey->type == MLX5_MKEY_MW) { + mw = container_of(mmkey, struct mlx5_ib_mw, mmkey); + return mw->ndescs; + } + + devx_mr = container_of(mmkey, struct mlx5_ib_devx_mr, + mmkey); + return devx_mr->ndescs; +} + /* * Handle a single data segment in a page-fault WQE or RDMA region. * @@ -705,11 +744,11 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key, bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mw *mw; struct mlx5_ib_mr *mr; struct mlx5_klm *pklm; u32 *out = NULL; size_t offset; + int ndescs; srcu_key = srcu_read_lock(&dev->mr_srcu); @@ -739,12 +778,12 @@ next_mr: goto srcu_unlock; } - if (prefetch && !mr->umem->is_odp) { + if (prefetch && !is_odp_mr(mr)) { ret = -EINVAL; goto srcu_unlock; } - if (!mr->umem->is_odp) { + if (!is_odp_mr(mr)) { mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", key); if (bytes_mapped) @@ -762,7 +801,8 @@ next_mr: break; case MLX5_MKEY_MW: - mw = container_of(mmkey, struct mlx5_ib_mw, mmkey); + case MLX5_MKEY_INDIRECT_DEVX: + ndescs = get_indirect_num_descs(mmkey); if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) { mlx5_ib_dbg(dev, "indirection level exceeded\n"); @@ -771,7 +811,7 @@ next_mr: } outlen = MLX5_ST_SZ_BYTES(query_mkey_out) + - sizeof(*pklm) * (mw->ndescs - 2); + sizeof(*pklm) * (ndescs - 2); if (outlen > cur_outlen) { kfree(out); @@ -786,14 +826,14 @@ next_mr: pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out, bsf0_klm0_pas_mtt0_1); - ret = mlx5_core_query_mkey(dev->mdev, &mw->mmkey, out, outlen); + ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen); if (ret) goto srcu_unlock; offset = io_virt - MLX5_GET64(query_mkey_out, out, memory_key_mkey_entry.start_addr); - for (i = 0; bcnt && i < mw->ndescs; i++, pklm++) { + for (i = 0; bcnt && i < ndescs; i++, pklm++) { if (offset >= be32_to_cpu(pklm->bcount)) { offset -= be32_to_cpu(pklm->bcount); continue; @@ -853,7 +893,6 @@ srcu_unlock: /** * Parse a series of data segments for page fault handling. * - * @qp the QP on which the fault occurred. * @pfault contains page fault information. * @wqe points at the first data segment in the WQE. * @wqe_end points after the end of the WQE. @@ -870,7 +909,7 @@ srcu_unlock: */ static int pagefault_data_segments(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, - struct mlx5_ib_qp *qp, void *wqe, + void *wqe, void *wqe_end, u32 *bytes_mapped, u32 *total_wqe_bytes, int receive_queue) { @@ -881,10 +920,6 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, size_t bcnt; int inline_segment; - /* Skip SRQ next-WQE segment. */ - if (receive_queue && qp->ibqp.srq) - wqe += sizeof(struct mlx5_wqe_srq_next_seg); - if (bytes_mapped) *bytes_mapped = 0; if (total_wqe_bytes) @@ -1009,6 +1044,10 @@ static int mlx5_ib_mr_initiator_pfault_handler( MLX5_WQE_CTRL_OPCODE_MASK; switch (qp->ibqp.qp_type) { + case IB_QPT_XRC_INI: + *wqe += sizeof(struct mlx5_wqe_xrc_seg); + transport_caps = dev->odp_caps.per_transport_caps.xrc_odp_caps; + break; case IB_QPT_RC: transport_caps = dev->odp_caps.per_transport_caps.rc_odp_caps; break; @@ -1028,7 +1067,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( return -EFAULT; } - if (qp->ibqp.qp_type != IB_QPT_RC) { + if (qp->ibqp.qp_type == IB_QPT_UD) { av = *wqe; if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); @@ -1053,21 +1092,34 @@ static int mlx5_ib_mr_initiator_pfault_handler( } /* - * Parse responder WQE. Advances the wqe pointer to point at the - * scatter-gather list, and set wqe_end to the end of the WQE. + * Parse responder WQE and set wqe_end to the end of the WQE. */ -static int mlx5_ib_mr_responder_pfault_handler( - struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, - struct mlx5_ib_qp *qp, void **wqe, void **wqe_end, int wqe_length) +static int mlx5_ib_mr_responder_pfault_handler_srq(struct mlx5_ib_dev *dev, + struct mlx5_ib_srq *srq, + void **wqe, void **wqe_end, + int wqe_length) { - struct mlx5_ib_wq *wq = &qp->rq; - int wqe_size = 1 << wq->wqe_shift; + int wqe_size = 1 << srq->msrq.wqe_shift; - if (qp->ibqp.srq) { - mlx5_ib_err(dev, "ODP fault on SRQ is not supported\n"); + if (wqe_size > wqe_length) { + mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n"); return -EFAULT; } + *wqe_end = *wqe + wqe_size; + *wqe += sizeof(struct mlx5_wqe_srq_next_seg); + + return 0; +} + +static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + void *wqe, void **wqe_end, + int wqe_length) +{ + struct mlx5_ib_wq *wq = &qp->rq; + int wqe_size = 1 << wq->wqe_shift; + if (qp->wq_sig) { mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n"); return -EFAULT; @@ -1091,7 +1143,7 @@ invalid_transport_or_opcode: return -EFAULT; } - *wqe_end = *wqe + wqe_size; + *wqe_end = wqe + wqe_size; return 0; } @@ -1099,22 +1151,25 @@ invalid_transport_or_opcode: static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev, u32 wq_num, int pf_type) { - enum mlx5_res_type res_type; + struct mlx5_core_rsc_common *common = NULL; + struct mlx5_core_srq *srq; switch (pf_type) { case MLX5_WQE_PF_TYPE_RMP: - res_type = MLX5_RES_SRQ; + srq = mlx5_cmd_get_srq(dev, wq_num); + if (srq) + common = &srq->common; break; case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE: case MLX5_WQE_PF_TYPE_RESP: case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC: - res_type = MLX5_RES_QP; + common = mlx5_core_res_hold(dev->mdev, wq_num, MLX5_RES_QP); break; default: - return NULL; + break; } - return mlx5_core_res_hold(dev->mdev, wq_num, res_type); + return common; } static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res) @@ -1124,6 +1179,14 @@ static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res) return to_mibqp(mqp); } +static inline struct mlx5_ib_srq *res_to_srq(struct mlx5_core_rsc_common *res) +{ + struct mlx5_core_srq *msrq = + container_of(res, struct mlx5_core_srq, common); + + return to_mibsrq(msrq); +} + static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { @@ -1134,8 +1197,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, int resume_with_error = 1; u16 wqe_index = pfault->wqe.wqe_index; int requestor = pfault->type & MLX5_PFAULT_REQUESTOR; - struct mlx5_core_rsc_common *res; - struct mlx5_ib_qp *qp; + struct mlx5_core_rsc_common *res = NULL; + struct mlx5_ib_qp *qp = NULL; + struct mlx5_ib_srq *srq = NULL; + size_t bytes_copied; res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type); if (!res) { @@ -1147,6 +1212,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, case MLX5_RES_QP: qp = res_to_qp(res); break; + case MLX5_RES_SRQ: + case MLX5_RES_XSRQ: + srq = res_to_srq(res); + break; default: mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type); goto resolve_page_fault; @@ -1158,9 +1227,23 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, goto resolve_page_fault; } - ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer, - PAGE_SIZE, &qp->trans_qp.base); - if (ret < 0) { + if (qp) { + if (requestor) { + ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, + buffer, PAGE_SIZE, + &bytes_copied); + } else { + ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index, + buffer, PAGE_SIZE, + &bytes_copied); + } + } else { + ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index, + buffer, PAGE_SIZE, + &bytes_copied); + } + + if (ret) { mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%d, wqe_index=%x, qpn=%x\n", ret, wqe_index, pfault->token); goto resolve_page_fault; @@ -1168,11 +1251,18 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, wqe = buffer; if (requestor) - ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, &wqe, - &wqe_end, ret); + ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, + &wqe, &wqe_end, + bytes_copied); + else if (qp) + ret = mlx5_ib_mr_responder_pfault_handler_rq(dev, qp, + wqe, &wqe_end, + bytes_copied); else - ret = mlx5_ib_mr_responder_pfault_handler(dev, pfault, qp, &wqe, - &wqe_end, ret); + ret = mlx5_ib_mr_responder_pfault_handler_srq(dev, srq, + &wqe, &wqe_end, + bytes_copied); + if (ret < 0) goto resolve_page_fault; @@ -1181,7 +1271,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, goto resolve_page_fault; } - ret = pagefault_data_segments(dev, pfault, qp, wqe, wqe_end, + ret = pagefault_data_segments(dev, pfault, wqe, wqe_end, &bytes_mapped, &total_wqe_bytes, !requestor); if (ret == -EAGAIN) { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7db778d96ef5..c6ccd4d36a90 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -109,75 +109,173 @@ static int is_sqp(enum ib_qp_type qp_type) } /** - * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space. + * mlx5_ib_read_user_wqe_common() - Copy a WQE (or part of) from user WQ + * to kernel buffer * - * @qp: QP to copy from. - * @send: copy from the send queue when non-zero, use the receive queue - * otherwise. - * @wqe_index: index to start copying from. For send work queues, the - * wqe_index is in units of MLX5_SEND_WQE_BB. - * For receive work queue, it is the number of work queue - * element in the queue. - * @buffer: destination buffer. - * @length: maximum number of bytes to copy. + * @umem: User space memory where the WQ is + * @buffer: buffer to copy to + * @buflen: buffer length + * @wqe_index: index of WQE to copy from + * @wq_offset: offset to start of WQ + * @wq_wqe_cnt: number of WQEs in WQ + * @wq_wqe_shift: log2 of WQE size + * @bcnt: number of bytes to copy + * @bytes_copied: number of bytes to copy (return value) * - * Copies at least a single WQE, but may copy more data. + * Copies from start of WQE bcnt or less bytes. + * Does not gurantee to copy the entire WQE. * - * Return: the number of bytes copied, or an error code. + * Return: zero on success, or an error code. */ -int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, - void *buffer, u32 length, - struct mlx5_ib_qp_base *base) +static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem, + void *buffer, + u32 buflen, + int wqe_index, + int wq_offset, + int wq_wqe_cnt, + int wq_wqe_shift, + int bcnt, + size_t *bytes_copied) +{ + size_t offset = wq_offset + ((wqe_index % wq_wqe_cnt) << wq_wqe_shift); + size_t wq_end = wq_offset + (wq_wqe_cnt << wq_wqe_shift); + size_t copy_length; + int ret; + + /* don't copy more than requested, more than buffer length or + * beyond WQ end + */ + copy_length = min_t(u32, buflen, wq_end - offset); + copy_length = min_t(u32, copy_length, bcnt); + + ret = ib_umem_copy_from(buffer, umem, offset, copy_length); + if (ret) + return ret; + + if (!ret && bytes_copied) + *bytes_copied = copy_length; + + return 0; +} + +int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, + int wqe_index, + void *buffer, + int buflen, + size_t *bc) { - struct ib_device *ibdev = qp->ibqp.device; - struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq; - size_t offset; - size_t wq_end; + struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct ib_umem *umem = base->ubuffer.umem; - u32 first_copy_length; - int wqe_length; + struct mlx5_ib_wq *wq = &qp->sq; + struct mlx5_wqe_ctrl_seg *ctrl; + size_t bytes_copied; + size_t bytes_copied2; + size_t wqe_length; int ret; + int ds; - if (wq->wqe_cnt == 0) { - mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n", - qp->ibqp.qp_type); + if (buflen < sizeof(*ctrl)) return -EINVAL; - } - offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift); - wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift); + /* at first read as much as possible */ + ret = mlx5_ib_read_user_wqe_common(umem, + buffer, + buflen, + wqe_index, + wq->offset, + wq->wqe_cnt, + wq->wqe_shift, + buflen, + &bytes_copied); + if (ret) + return ret; - if (send && length < sizeof(struct mlx5_wqe_ctrl_seg)) + /* we need at least control segment size to proceed */ + if (bytes_copied < sizeof(*ctrl)) return -EINVAL; - if (offset > umem->length || - (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length)) - return -EINVAL; + ctrl = buffer; + ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; + wqe_length = ds * MLX5_WQE_DS_UNITS; + + /* if we copied enough then we are done */ + if (bytes_copied >= wqe_length) { + *bc = bytes_copied; + return 0; + } + + /* otherwise this a wrapped around wqe + * so read the remaining bytes starting + * from wqe_index 0 + */ + ret = mlx5_ib_read_user_wqe_common(umem, + buffer + bytes_copied, + buflen - bytes_copied, + 0, + wq->offset, + wq->wqe_cnt, + wq->wqe_shift, + wqe_length - bytes_copied, + &bytes_copied2); - first_copy_length = min_t(u32, offset + length, wq_end) - offset; - ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length); if (ret) return ret; + *bc = bytes_copied + bytes_copied2; + return 0; +} - if (send) { - struct mlx5_wqe_ctrl_seg *ctrl = buffer; - int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; - - wqe_length = ds * MLX5_WQE_DS_UNITS; - } else { - wqe_length = 1 << wq->wqe_shift; - } +int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, + int wqe_index, + void *buffer, + int buflen, + size_t *bc) +{ + struct mlx5_ib_qp_base *base = &qp->trans_qp.base; + struct ib_umem *umem = base->ubuffer.umem; + struct mlx5_ib_wq *wq = &qp->rq; + size_t bytes_copied; + int ret; - if (wqe_length <= first_copy_length) - return first_copy_length; + ret = mlx5_ib_read_user_wqe_common(umem, + buffer, + buflen, + wqe_index, + wq->offset, + wq->wqe_cnt, + wq->wqe_shift, + buflen, + &bytes_copied); - ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset, - wqe_length - first_copy_length); if (ret) return ret; + *bc = bytes_copied; + return 0; +} + +int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, + int wqe_index, + void *buffer, + int buflen, + size_t *bc) +{ + struct ib_umem *umem = srq->umem; + size_t bytes_copied; + int ret; + + ret = mlx5_ib_read_user_wqe_common(umem, + buffer, + buflen, + wqe_index, + 0, + srq->msrq.max, + srq->msrq.wqe_shift, + buflen, + &bytes_copied); - return wqe_length; + if (ret) + return ret; + *bc = bytes_copied; + return 0; } static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) @@ -645,16 +743,14 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, return bfregi->sys_pages[index_of_sys_page] + offset; } -static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, - struct ib_pd *pd, +static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, unsigned long addr, size_t size, - struct ib_umem **umem, - int *npages, int *page_shift, int *ncont, - u32 *offset) + struct ib_umem **umem, int *npages, int *page_shift, + int *ncont, u32 *offset) { int err; - *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0); + *umem = ib_umem_get(udata, addr, size, 0, 0); if (IS_ERR(*umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); return PTR_ERR(*umem); @@ -695,10 +791,9 @@ static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, } static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct mlx5_ib_rwq *rwq, + struct ib_udata *udata, struct mlx5_ib_rwq *rwq, struct mlx5_ib_create_wq *ucmd) { - struct mlx5_ib_ucontext *context; int page_shift = 0; int npages; u32 offset = 0; @@ -708,9 +803,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (!ucmd->buf_addr) return -EINVAL; - context = to_mucontext(pd->uobject->context); - rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr, - rwq->buf_size, 0, 0); + rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0, 0); if (IS_ERR(rwq->umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); err = PTR_ERR(rwq->umem); @@ -735,7 +828,8 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, (unsigned long long)ucmd->buf_addr, rwq->buf_size, npages, page_shift, ncont, offset); - err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db); + err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), udata, + ucmd->db_addr, &rwq->db); if (err) { mlx5_ib_dbg(dev, "map failed\n"); goto err_umem; @@ -819,10 +913,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd.buf_addr && ubuffer->buf_size) { ubuffer->buf_addr = ucmd.buf_addr; - err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, - ubuffer->buf_size, - &ubuffer->umem, &npages, &page_shift, - &ncont, &offset); + err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, + ubuffer->buf_size, &ubuffer->umem, + &npages, &page_shift, &ncont, &offset); if (err) goto err_bfreg; } else { @@ -856,7 +949,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, resp->bfreg_index = MLX5_IB_INVALID_BFREG; qp->bfregn = bfregn; - err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db); + err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &qp->db); if (err) { mlx5_ib_dbg(dev, "map failed\n"); goto err_free; @@ -1119,6 +1212,7 @@ static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev, } static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, + struct ib_udata *udata, struct mlx5_ib_sq *sq, void *qpin, struct ib_pd *pd) { @@ -1135,9 +1229,9 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, int ncont = 0; u32 offset = 0; - err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size, - &sq->ubuffer.umem, &npages, &page_shift, - &ncont, &offset); + err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, + &sq->ubuffer.umem, &npages, &page_shift, &ncont, + &offset); if (err) return err; @@ -1374,7 +1468,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (err) return err; - err = create_raw_packet_qp_sq(dev, sq, in, pd); + err = create_raw_packet_qp_sq(dev, udata, sq, in, pd); if (err) goto err_destroy_tis; @@ -5795,7 +5889,7 @@ static int prepare_user_rq(struct ib_pd *pd, return err; } - err = create_user_rq(dev, pd, rwq, &ucmd); + err = create_user_rq(dev, pd, udata, rwq, &ucmd); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 4e8d18009f58..22bd774e0b4e 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -79,8 +79,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); - srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size, - 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); if (IS_ERR(srq->umem)) { mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size); err = PTR_ERR(srq->umem); @@ -104,7 +103,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); - err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), + err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), udata, ucmd.db_addr, &srq->db); if (err) { mlx5_ib_dbg(dev, "map doorbell failed\n"); diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index 75eb5839ae95..c330af35ff10 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -46,8 +46,6 @@ struct mlx5_core_srq { int wqe_shift; void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e); - atomic_t refcount; - struct completion free; u16 uid; }; diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 7aaaffbd4afa..63ac38bb3498 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -87,7 +87,7 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) srq = radix_tree_lookup(&table->tree, srqn); if (srq) - atomic_inc(&srq->refcount); + atomic_inc(&srq->common.refcount); spin_unlock(&table->lock); @@ -594,8 +594,8 @@ int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, if (err) return err; - atomic_set(&srq->refcount, 1); - init_completion(&srq->free); + atomic_set(&srq->common.refcount, 1); + init_completion(&srq->common.free); spin_lock_irq(&table->lock); err = radix_tree_insert(&table->tree, srq->srqn, srq); @@ -627,9 +627,8 @@ int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) if (err) return err; - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); - wait_for_completion(&srq->free); + mlx5_core_res_put(&srq->common); + wait_for_completion(&srq->common.free); return 0; } @@ -685,7 +684,7 @@ static int srq_event_notifier(struct notifier_block *nb, srq = radix_tree_lookup(&table->tree, srqn); if (srq) - atomic_inc(&srq->refcount); + atomic_inc(&srq->common.refcount); spin_unlock(&table->lock); @@ -694,8 +693,7 @@ static int srq_event_notifier(struct notifier_block *nb, srq->event(srq, eqe->type); - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); + mlx5_core_res_put(&srq->common); return NOTIFY_OK; } |