diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.c | 184 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/doorbell.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/gsi.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_virt.c | 28 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 287 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mem.c | 25 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 51 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 101 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 94 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 180 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.c | 2 |
12 files changed, 633 insertions, 331 deletions
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index dd8d24ee8e1d..367a71bc5f4b 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -708,8 +708,8 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, *cqe_size = ucmd.cqe_size; cq->buf.umem = - ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size, - IB_ACCESS_LOCAL_WRITE); + ib_umem_get(&dev->ib_dev, ucmd.buf_addr, + entries * ucmd.cqe_size, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->buf.umem)) { err = PTR_ERR(cq->buf.umem); return err; @@ -1108,7 +1108,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1) return -EINVAL; - umem = ib_umem_get(udata, ucmd.buf_addr, + umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr, (size_t)ucmd.cqe_size * entries, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) { diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9d0a18cf9e5e..46e1ab771f10 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -30,7 +30,7 @@ enum devx_obj_flags { struct devx_async_data { struct mlx5_ib_dev *mdev; struct list_head list; - struct ib_uobject *fd_uobj; + struct devx_async_cmd_event_file *ev_file; struct mlx5_async_work cb_work; u16 cmd_out_len; /* must be last field in this structure */ @@ -72,7 +72,6 @@ struct devx_event_subscription { struct rcu_head rcu; u64 cookie; struct devx_async_event_file *ev_file; - struct file *filp; /* Upon hot unplug we need a direct access to */ struct eventfd_ctx *eventfd; }; @@ -1674,21 +1673,20 @@ static void devx_query_callback(int status, struct mlx5_async_work *context) { struct devx_async_data *async_data = container_of(context, struct devx_async_data, cb_work); - struct ib_uobject *fd_uobj = async_data->fd_uobj; - struct devx_async_cmd_event_file *ev_file; - struct devx_async_event_queue *ev_queue; + struct devx_async_cmd_event_file *ev_file = async_data->ev_file; + struct devx_async_event_queue *ev_queue = &ev_file->ev_queue; unsigned long flags; - ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file, - uobj); - ev_queue = &ev_file->ev_queue; - + /* + * Note that if the struct devx_async_cmd_event_file uobj begins to be + * destroyed it will block at mlx5_cmd_cleanup_async_ctx() until this + * routine returns, ensuring that it always remains valid here. + */ spin_lock_irqsave(&ev_queue->lock, flags); list_add_tail(&async_data->list, &ev_queue->event_list); spin_unlock_irqrestore(&ev_queue->lock, flags); wake_up_interruptible(&ev_queue->poll_wait); - fput(fd_uobj->object); } #define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */ @@ -1757,9 +1755,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)( async_data->cmd_out_len = cmd_out_len; async_data->mdev = mdev; - async_data->fd_uobj = fd_uobj; + async_data->ev_file = ev_file; - get_file(fd_uobj->object); MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in, uverbs_attr_get_len(attrs, @@ -1769,12 +1766,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)( devx_query_callback, &async_data->cb_work); if (err) - goto cb_err; + goto free_async; return 0; -cb_err: - fput(fd_uobj->object); free_async: kvfree(async_data); sub_bytes: @@ -2032,6 +2027,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( goto err; list_add_tail(&event_sub->event_list, &sub_list); + uverbs_uobject_get(&ev_file->uobj); if (use_eventfd) { event_sub->eventfd = eventfd_ctx_fdget(redirect_fd); @@ -2045,7 +2041,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( event_sub->cookie = cookie; event_sub->ev_file = ev_file; - event_sub->filp = fd_uobj->object; /* May be needed upon cleanup the devx object/subscription */ event_sub->xa_key_level1 = key_level1; event_sub->xa_key_level2 = obj_id; @@ -2099,7 +2094,7 @@ err: if (event_sub->eventfd) eventfd_ctx_put(event_sub->eventfd); - + uverbs_uobject_put(&event_sub->ev_file->uobj); kfree(event_sub); } @@ -2134,7 +2129,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access); + obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); @@ -2324,7 +2319,8 @@ static int deliver_event(struct devx_event_subscription *event_sub, if (ev_file->omit_data) { spin_lock_irqsave(&ev_file->lock, flags); - if (!list_empty(&event_sub->event_list)) { + if (!list_empty(&event_sub->event_list) || + ev_file->is_destroyed) { spin_unlock_irqrestore(&ev_file->lock, flags); return 0; } @@ -2348,7 +2344,10 @@ static int deliver_event(struct devx_event_subscription *event_sub, memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe)); spin_lock_irqsave(&ev_file->lock, flags); - list_add_tail(&event_data->list, &ev_file->event_list); + if (!ev_file->is_destroyed) + list_add_tail(&event_data->list, &ev_file->event_list); + else + kfree(event_data); spin_unlock_irqrestore(&ev_file->lock, flags); wake_up_interruptible(&ev_file->poll_wait); @@ -2361,17 +2360,10 @@ static void dispatch_event_fd(struct list_head *fd_list, struct devx_event_subscription *item; list_for_each_entry_rcu(item, fd_list, xa_list) { - if (!get_file_rcu(item->filp)) - continue; - - if (item->eventfd) { + if (item->eventfd) eventfd_signal(item->eventfd, 1); - fput(item->filp); - continue; - } - - deliver_event(item, data); - fput(item->filp); + else + deliver_event(item, data); } } @@ -2479,11 +2471,11 @@ static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, return -ERESTARTSYS; } - if (list_empty(&ev_queue->event_list) && - ev_queue->is_destroyed) - return -EIO; - spin_lock_irq(&ev_queue->lock); + if (ev_queue->is_destroyed) { + spin_unlock_irq(&ev_queue->lock); + return -EIO; + } } event = list_entry(ev_queue->event_list.next, @@ -2509,23 +2501,6 @@ static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, return ret; } -static int devx_async_cmd_event_close(struct inode *inode, struct file *filp) -{ - struct ib_uobject *uobj = filp->private_data; - struct devx_async_cmd_event_file *comp_ev_file = container_of( - uobj, struct devx_async_cmd_event_file, uobj); - struct devx_async_data *entry, *tmp; - - spin_lock_irq(&comp_ev_file->ev_queue.lock); - list_for_each_entry_safe(entry, tmp, - &comp_ev_file->ev_queue.event_list, list) - kvfree(entry); - spin_unlock_irq(&comp_ev_file->ev_queue.lock); - - uverbs_close_fd(filp); - return 0; -} - static __poll_t devx_async_cmd_event_poll(struct file *filp, struct poll_table_struct *wait) { @@ -2549,7 +2524,7 @@ static const struct file_operations devx_async_cmd_event_fops = { .owner = THIS_MODULE, .read = devx_async_cmd_event_read, .poll = devx_async_cmd_event_poll, - .release = devx_async_cmd_event_close, + .release = uverbs_uobject_fd_release, .llseek = no_llseek, }; @@ -2574,10 +2549,6 @@ static ssize_t devx_async_event_read(struct file *filp, char __user *buf, return -EOVERFLOW; } - if (ev_file->is_destroyed) { - spin_unlock_irq(&ev_file->lock); - return -EIO; - } while (list_empty(&ev_file->event_list)) { spin_unlock_irq(&ev_file->lock); @@ -2653,81 +2624,96 @@ static __poll_t devx_async_event_poll(struct file *filp, return pollflags; } -static int devx_async_event_close(struct inode *inode, struct file *filp) +static void devx_free_subscription(struct rcu_head *rcu) { - struct devx_async_event_file *ev_file = filp->private_data; - struct devx_event_subscription *event_sub, *event_sub_tmp; - struct devx_async_event_data *entry, *tmp; - struct mlx5_ib_dev *dev = ev_file->dev; - - mutex_lock(&dev->devx_event_table.event_xa_lock); - /* delete the subscriptions which are related to this FD */ - list_for_each_entry_safe(event_sub, event_sub_tmp, - &ev_file->subscribed_events_list, file_list) { - devx_cleanup_subscription(dev, event_sub); - if (event_sub->eventfd) - eventfd_ctx_put(event_sub->eventfd); - - list_del_rcu(&event_sub->file_list); - /* subscription may not be used by the read API any more */ - kfree_rcu(event_sub, rcu); - } - - mutex_unlock(&dev->devx_event_table.event_xa_lock); - - /* free the pending events allocation */ - if (!ev_file->omit_data) { - spin_lock_irq(&ev_file->lock); - list_for_each_entry_safe(entry, tmp, - &ev_file->event_list, list) - kfree(entry); /* read can't come any more */ - spin_unlock_irq(&ev_file->lock); - } + struct devx_event_subscription *event_sub = + container_of(rcu, struct devx_event_subscription, rcu); - uverbs_close_fd(filp); - put_device(&dev->ib_dev.dev); - return 0; + if (event_sub->eventfd) + eventfd_ctx_put(event_sub->eventfd); + uverbs_uobject_put(&event_sub->ev_file->uobj); + kfree(event_sub); } static const struct file_operations devx_async_event_fops = { .owner = THIS_MODULE, .read = devx_async_event_read, .poll = devx_async_event_poll, - .release = devx_async_event_close, + .release = uverbs_uobject_fd_release, .llseek = no_llseek, }; -static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_cmd_event_file *comp_ev_file = container_of(uobj, struct devx_async_cmd_event_file, uobj); struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + struct devx_async_data *entry, *tmp; spin_lock_irq(&ev_queue->lock); ev_queue->is_destroyed = 1; spin_unlock_irq(&ev_queue->lock); - - if (why == RDMA_REMOVE_DRIVER_REMOVE) - wake_up_interruptible(&ev_queue->poll_wait); + wake_up_interruptible(&ev_queue->poll_wait); mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx); + + spin_lock_irq(&comp_ev_file->ev_queue.lock); + list_for_each_entry_safe(entry, tmp, + &comp_ev_file->ev_queue.event_list, list) { + list_del(&entry->list); + kvfree(entry); + } + spin_unlock_irq(&comp_ev_file->ev_queue.lock); return 0; }; -static int devx_hot_unplug_async_event_file(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_event_file *ev_file = container_of(uobj, struct devx_async_event_file, uobj); + struct devx_event_subscription *event_sub, *event_sub_tmp; + struct mlx5_ib_dev *dev = ev_file->dev; spin_lock_irq(&ev_file->lock); ev_file->is_destroyed = 1; - spin_unlock_irq(&ev_file->lock); + /* free the pending events allocation */ + if (ev_file->omit_data) { + struct devx_event_subscription *event_sub, *tmp; + + list_for_each_entry_safe(event_sub, tmp, &ev_file->event_list, + event_list) + list_del_init(&event_sub->event_list); + + } else { + struct devx_async_event_data *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &ev_file->event_list, + list) { + list_del(&entry->list); + kfree(entry); + } + } + + spin_unlock_irq(&ev_file->lock); wake_up_interruptible(&ev_file->poll_wait); + + mutex_lock(&dev->devx_event_table.event_xa_lock); + /* delete the subscriptions which are related to this FD */ + list_for_each_entry_safe(event_sub, event_sub_tmp, + &ev_file->subscribed_events_list, file_list) { + devx_cleanup_subscription(dev, event_sub); + list_del_rcu(&event_sub->file_list); + /* subscription may not be used by the read API any more */ + call_rcu(&event_sub->rcu, devx_free_subscription); + } + mutex_unlock(&dev->devx_event_table.event_xa_lock); + + put_device(&dev->ib_dev.dev); return 0; }; @@ -2913,7 +2899,7 @@ DECLARE_UVERBS_NAMED_METHOD( DECLARE_UVERBS_NAMED_OBJECT( MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file), - devx_hot_unplug_async_cmd_event_file, + devx_async_cmd_event_destroy_uobj, &devx_async_cmd_event_fops, "[devx_async_cmd]", O_RDONLY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)); @@ -2931,7 +2917,7 @@ DECLARE_UVERBS_NAMED_METHOD( DECLARE_UVERBS_NAMED_OBJECT( MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD, UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file), - devx_hot_unplug_async_event_file, + devx_async_event_destroy_uobj, &devx_async_event_fops, "[devx_async_event]", O_RDONLY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)); diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 12737c509aa2..61475b571531 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -64,7 +64,8 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0); + page->umem = ib_umem_get(context->ibucontext.device, virt & PAGE_MASK, + PAGE_SIZE, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index ac4d8d1b9a07..1ae6fd95acaa 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -507,8 +507,7 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr); if (ret) { /* Undo the effect of adding the outstanding wr */ - gsi->outstanding_pi = (gsi->outstanding_pi - 1) % - gsi->cap.max_send_wr; + gsi->outstanding_pi--; goto err; } spin_unlock_irqrestore(&gsi->lock, flags); diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c index 4f0edd4832bd..b61165359954 100644 --- a/drivers/infiniband/hw/mlx5/ib_virt.c +++ b/drivers/infiniband/hw/mlx5/ib_virt.c @@ -164,8 +164,10 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid) in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID; in->node_guid = guid; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); - if (!err) + if (!err) { vfs_ctx[vf].node_guid = guid; + vfs_ctx[vf].node_guid_valid = 1; + } kfree(in); return err; } @@ -185,8 +187,10 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid) in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID; in->port_guid = guid; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); - if (!err) + if (!err) { vfs_ctx[vf].port_guid = guid; + vfs_ctx[vf].port_guid_valid = 1; + } kfree(in); return err; } @@ -208,20 +212,12 @@ int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port, { struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_hca_vport_context *rep; - int err; - - rep = kzalloc(sizeof(*rep), GFP_KERNEL); - if (!rep) - return -ENOMEM; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; - err = mlx5_query_hca_vport_context(mdev, 1, 1, vf+1, rep); - if (err) - goto ex; + node_guid->guid = + vfs_ctx[vf].node_guid_valid ? vfs_ctx[vf].node_guid : 0; + port_guid->guid = + vfs_ctx[vf].port_guid_valid ? vfs_ctx[vf].port_guid : 0; - port_guid->guid = rep->port_guid; - node_guid->guid = rep->node_guid; -ex: - kfree(rep); - return err; + return 0; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 997cbfe4b90c..e4bcfa81b70a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -40,7 +40,7 @@ #include <linux/slab.h> #include <linux/bitmap.h> #if defined(CONFIG_X86) -#include <asm/pat.h> +#include <asm/memtype.h> #endif #include <linux/sched.h> #include <linux/sched/mm.h> @@ -815,6 +815,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) { + size_t uhw_outlen = (uhw) ? uhw->outlen : 0; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; int err = -ENOMEM; @@ -828,12 +829,12 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, u64 max_tso; resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); - if (uhw->outlen && uhw->outlen < resp_len) + if (uhw_outlen && uhw_outlen < resp_len) return -EINVAL; resp.response_length = resp_len; - if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) + if (uhw && uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) return -EINVAL; memset(props, 0, sizeof(*props)); @@ -897,7 +898,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->raw_packet_caps |= IB_RAW_PACKET_CAP_CVLAN_STRIPPING; - if (field_avail(typeof(resp), tso_caps, uhw->outlen)) { + if (field_avail(typeof(resp), tso_caps, uhw_outlen)) { max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); if (max_tso) { resp.tso_caps.max_tso = 1 << max_tso; @@ -907,7 +908,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), rss_caps, uhw->outlen)) { + if (field_avail(typeof(resp), rss_caps, uhw_outlen)) { resp.rss_caps.rx_hash_function = MLX5_RX_HASH_FUNC_TOEPLITZ; resp.rss_caps.rx_hash_fields_mask = @@ -927,9 +928,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.response_length += sizeof(resp.rss_caps); } } else { - if (field_avail(typeof(resp), tso_caps, uhw->outlen)) + if (field_avail(typeof(resp), tso_caps, uhw_outlen)) resp.response_length += sizeof(resp.tso_caps); - if (field_avail(typeof(resp), rss_caps, uhw->outlen)) + if (field_avail(typeof(resp), rss_caps, uhw_outlen)) resp.response_length += sizeof(resp.rss_caps); } @@ -1014,6 +1015,23 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; props->odp_caps = dev->odp_caps; + if (!uhw) { + /* ODP for kernel QPs is not implemented for receive + * WQEs and SRQ WQEs + */ + props->odp_caps.per_transport_caps.rc_odp_caps &= + ~(IB_ODP_SUPPORT_READ | + IB_ODP_SUPPORT_SRQ_RECV); + props->odp_caps.per_transport_caps.uc_odp_caps &= + ~(IB_ODP_SUPPORT_READ | + IB_ODP_SUPPORT_SRQ_RECV); + props->odp_caps.per_transport_caps.ud_odp_caps &= + ~(IB_ODP_SUPPORT_READ | + IB_ODP_SUPPORT_SRQ_RECV); + props->odp_caps.per_transport_caps.xrc_odp_caps &= + ~(IB_ODP_SUPPORT_READ | + IB_ODP_SUPPORT_SRQ_RECV); + } } if (MLX5_CAP_GEN(mdev, cd)) @@ -1054,7 +1072,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_MAX_CQ_PERIOD; } - if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { + if (field_avail(typeof(resp), cqe_comp_caps, uhw_outlen)) { resp.response_length += sizeof(resp.cqe_comp_caps); if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) { @@ -1072,7 +1090,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) && + if (field_avail(typeof(resp), packet_pacing_caps, uhw_outlen) && raw_support) { if (MLX5_CAP_QOS(mdev, packet_pacing) && MLX5_CAP_GEN(mdev, qos)) { @@ -1091,7 +1109,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, - uhw->outlen)) { + uhw_outlen)) { if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) resp.mlx5_ib_support_multi_pkt_send_wqes = MLX5_IB_ALLOW_MPW; @@ -1104,7 +1122,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } - if (field_avail(typeof(resp), flags, uhw->outlen)) { + if (field_avail(typeof(resp), flags, uhw_outlen)) { resp.response_length += sizeof(resp.flags); if (MLX5_CAP_GEN(mdev, cqe_compression_128)) @@ -1120,8 +1138,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT; } - if (field_avail(typeof(resp), sw_parsing_caps, - uhw->outlen)) { + if (field_avail(typeof(resp), sw_parsing_caps, uhw_outlen)) { resp.response_length += sizeof(resp.sw_parsing_caps); if (MLX5_CAP_ETH(mdev, swp)) { resp.sw_parsing_caps.sw_parsing_offloads |= @@ -1141,7 +1158,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen) && + if (field_avail(typeof(resp), striding_rq_caps, uhw_outlen) && raw_support) { resp.response_length += sizeof(resp.striding_rq_caps); if (MLX5_CAP_GEN(mdev, striding_rq)) { @@ -1164,8 +1181,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), tunnel_offloads_caps, - uhw->outlen)) { + if (field_avail(typeof(resp), tunnel_offloads_caps, uhw_outlen)) { resp.response_length += sizeof(resp.tunnel_offloads_caps); if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan)) resp.tunnel_offloads_caps |= @@ -1186,7 +1202,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP; } - if (uhw->outlen) { + if (uhw_outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); if (err) @@ -2078,6 +2094,7 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry) { struct mlx5_user_mmap_entry *mentry = to_mmmap(entry); struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device); + struct mlx5_var_table *var_table = &dev->var_table; struct mlx5_ib_dm *mdm; switch (mentry->mmap_flag) { @@ -2087,6 +2104,12 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry) mdm->size); kfree(mdm); break; + case MLX5_IB_MMAP_TYPE_VAR: + mutex_lock(&var_table->bitmap_lock); + clear_bit(mentry->page_idx, var_table->bitmap); + mutex_unlock(&var_table->bitmap_lock); + kfree(mentry); + break; default: WARN_ON(true); } @@ -2246,7 +2269,10 @@ static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev, mentry = to_mmmap(entry); pfn = (mentry->address >> PAGE_SHIFT); - prot = pgprot_writecombine(vma->vm_page_prot); + if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR) + prot = pgprot_noncached(vma->vm_page_prot); + else + prot = pgprot_writecombine(vma->vm_page_prot); ret = rdma_user_mmap_io(ucontext, vma, pfn, entry->npages * PAGE_SIZE, prot, @@ -2255,6 +2281,15 @@ static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev, return ret; } +static u64 mlx5_entry_to_mmap_offset(struct mlx5_user_mmap_entry *entry) +{ + u64 cmd = (entry->rdma_entry.start_pgoff >> 16) & 0xFFFF; + u64 index = entry->rdma_entry.start_pgoff & 0xFFFF; + + return (((index >> 8) << 16) | (cmd << MLX5_IB_MMAP_CMD_SHIFT) | + (index & 0xFF)) << PAGE_SHIFT; +} + static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); @@ -3276,12 +3311,14 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, int num_entries, int num_groups, u32 flags) { + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_table *ft; - ft = mlx5_create_auto_grouped_flow_table(ns, priority, - num_entries, - num_groups, - 0, flags); + ft_attr.prio = priority; + ft_attr.max_fte = num_entries; + ft_attr.flags = flags; + ft_attr.autogroup.max_num_groups = num_groups; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); if (IS_ERR(ft)) return ERR_CAST(ft); @@ -4771,7 +4808,6 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; int err = -ENOMEM; - struct ib_udata uhw = {.inlen = 0, .outlen = 0}; pprops = kzalloc(sizeof(*pprops), GFP_KERNEL); if (!pprops) @@ -4781,7 +4817,7 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) if (!dprops) goto out; - err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); + err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL); if (err) { mlx5_ib_warn(dev, "query_device failed %d\n", err); goto out; @@ -5351,6 +5387,14 @@ static const struct mlx5_ib_counter extended_err_cnts[] = { INIT_Q_COUNTER(req_cqe_flush_error), }; +static const struct mlx5_ib_counter roce_accl_cnts[] = { + INIT_Q_COUNTER(roce_adp_retrans), + INIT_Q_COUNTER(roce_adp_retrans_to), + INIT_Q_COUNTER(roce_slow_restart), + INIT_Q_COUNTER(roce_slow_restart_cnps), + INIT_Q_COUNTER(roce_slow_restart_trans), +}; + #define INIT_EXT_PPCNT_COUNTER(_name) \ { .name = #_name, .offset = \ MLX5_BYTE_OFF(ppcnt_reg, \ @@ -5399,6 +5443,9 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) num_counters += ARRAY_SIZE(extended_err_cnts); + if (MLX5_CAP_GEN(dev->mdev, roce_accl)) + num_counters += ARRAY_SIZE(roce_accl_cnts); + cnts->num_q_counters = num_counters; if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { @@ -5459,6 +5506,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, } } + if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { + for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { + names[j] = roce_accl_cnts[i].name; + offsets[j] = roce_accl_cnts[i].offset; + } + } + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { names[j] = cong_cnts[i].name; @@ -6034,6 +6088,145 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } +static int var_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_user_mmap_entry *obj = uobject->object; + + rdma_user_mmap_entry_remove(&obj->rdma_entry); + return 0; +} + +static struct mlx5_user_mmap_entry * +alloc_var_entry(struct mlx5_ib_ucontext *c) +{ + struct mlx5_user_mmap_entry *entry; + struct mlx5_var_table *var_table; + u32 page_idx; + int err; + + var_table = &to_mdev(c->ibucontext.device)->var_table; + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + mutex_lock(&var_table->bitmap_lock); + page_idx = find_first_zero_bit(var_table->bitmap, + var_table->num_var_hw_entries); + if (page_idx >= var_table->num_var_hw_entries) { + err = -ENOSPC; + mutex_unlock(&var_table->bitmap_lock); + goto end; + } + + set_bit(page_idx, var_table->bitmap); + mutex_unlock(&var_table->bitmap_lock); + + entry->address = var_table->hw_start_addr + + (page_idx * var_table->stride_size); + entry->page_idx = page_idx; + entry->mmap_flag = MLX5_IB_MMAP_TYPE_VAR; + + err = rdma_user_mmap_entry_insert_range( + &c->ibucontext, &entry->rdma_entry, var_table->stride_size, + MLX5_IB_MMAP_OFFSET_START << 16, + (MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1); + if (err) + goto err_insert; + + return entry; + +err_insert: + mutex_lock(&var_table->bitmap_lock); + clear_bit(page_idx, var_table->bitmap); + mutex_unlock(&var_table->bitmap_lock); +end: + kfree(entry); + return ERR_PTR(err); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_VAR_OBJ_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE); + struct mlx5_ib_ucontext *c; + struct mlx5_user_mmap_entry *entry; + u64 mmap_offset; + u32 length; + int err; + + c = to_mucontext(ib_uverbs_get_ucontext(attrs)); + if (IS_ERR(c)) + return PTR_ERR(c); + + entry = alloc_var_entry(c); + if (IS_ERR(entry)) + return PTR_ERR(entry); + + mmap_offset = mlx5_entry_to_mmap_offset(entry); + length = entry->rdma_entry.npages * PAGE_SIZE; + uobj->object = entry; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET, + &mmap_offset, sizeof(mmap_offset)); + if (err) + goto err; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID, + &entry->page_idx, sizeof(entry->page_idx)); + if (err) + goto err; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH, + &length, sizeof(length)); + if (err) + goto err; + + return 0; + +err: + rdma_user_mmap_entry_remove(&entry->rdma_entry); + return err; +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_VAR_OBJ_ALLOC, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE, + MLX5_IB_OBJECT_VAR, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_VAR_OBJ_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_DESTROY_HANDLE, + MLX5_IB_OBJECT_VAR, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_VAR, + UVERBS_TYPE_ALLOC_IDR(var_obj_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_ALLOC), + &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_DESTROY)); + +static bool var_is_supported(struct ib_device *device) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + + return (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q); +} + ADD_UVERBS_ATTRIBUTES_SIMPLE( mlx5_ib_dm, UVERBS_OBJECT_DM, @@ -6056,14 +6249,14 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( enum mlx5_ib_uapi_flow_action_flags)); static const struct uapi_definition mlx5_ib_defs[] = { -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) UAPI_DEF_CHAIN(mlx5_ib_devx_defs), UAPI_DEF_CHAIN(mlx5_ib_flow_defs), -#endif UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_action), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, + UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), {} }; @@ -6335,6 +6528,35 @@ static const struct ib_device_ops mlx5_ib_dev_dm_ops = { .reg_dm_mr = mlx5_ib_reg_dm_mr, }; +static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_var_table *var_table = &dev->var_table; + u8 log_doorbell_bar_size; + u8 log_doorbell_stride; + u64 bar_size; + + log_doorbell_bar_size = MLX5_CAP_DEV_VDPA_EMULATION(mdev, + log_doorbell_bar_size); + log_doorbell_stride = MLX5_CAP_DEV_VDPA_EMULATION(mdev, + log_doorbell_stride); + var_table->hw_start_addr = dev->mdev->bar_addr + + MLX5_CAP64_DEV_VDPA_EMULATION(mdev, + doorbell_bar_offset); + bar_size = (1ULL << log_doorbell_bar_size) * 4096; + var_table->stride_size = 1ULL << log_doorbell_stride; + var_table->num_var_hw_entries = div64_u64(bar_size, var_table->stride_size); + mutex_init(&var_table->bitmap_lock); + var_table->bitmap = bitmap_zalloc(var_table->num_var_hw_entries, + GFP_KERNEL); + return (var_table->bitmap) ? 0 : -ENOMEM; +} + +static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev) +{ + bitmap_free(dev->var_table.bitmap); +} + static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; @@ -6422,6 +6644,13 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) mutex_init(&dev->lb.mutex); + if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { + err = mlx5_ib_init_var_table(dev); + if (err) + return err; + } + dev->ib_dev.use_cq_dim = true; return 0; @@ -6725,6 +6954,8 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) { + dev->ib_active = false; + /* Number of stages to cleanup */ while (stage) { stage--; @@ -6770,7 +7001,7 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_flow_db_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, - NULL), + mlx5_ib_stage_caps_cleanup), STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, mlx5_ib_stage_non_default_cb, NULL), @@ -6827,7 +7058,7 @@ const struct mlx5_ib_profile raw_eth_profile = { mlx5_ib_stage_flow_db_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, - NULL), + mlx5_ib_stage_caps_cleanup), STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, mlx5_ib_stage_raw_eth_non_default_cb, NULL), diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 048f4e974a61..b90a3649e7d1 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -101,18 +101,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, *count = i; } -static u64 umem_dma_to_mtt(dma_addr_t umem_dma) -{ - u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK; - - if (umem_dma & ODP_READ_ALLOWED_BIT) - mtt_entry |= MLX5_IB_MTT_READ; - if (umem_dma & ODP_WRITE_ALLOWED_BIT) - mtt_entry |= MLX5_IB_MTT_WRITE; - - return mtt_entry; -} - /* * Populate the given array with bus addresses from the umem. * @@ -139,19 +127,6 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, struct scatterlist *sg; int entry; - if (umem->is_odp) { - WARN_ON(shift != 0); - WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); - - for (i = 0; i < num_pages; ++i) { - dma_addr_t pa = - to_ib_umem_odp(umem)->dma_list[offset + i]; - - pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); - } - return; - } - i = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { len = sg_dma_len(sg) >> PAGE_SHIFT; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b06f32ff5748..bb78142bca5e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -72,6 +72,11 @@ #define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size) enum { + MLX5_IB_MMAP_OFFSET_START = 9, + MLX5_IB_MMAP_OFFSET_END = 255, +}; + +enum { MLX5_IB_MMAP_CMD_SHIFT = 8, MLX5_IB_MMAP_CMD_MASK = 0xff, }; @@ -120,6 +125,7 @@ enum { enum mlx5_ib_mmap_type { MLX5_IB_MMAP_TYPE_MEMIC = 1, + MLX5_IB_MMAP_TYPE_VAR = 2, }; #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) \ @@ -563,6 +569,7 @@ struct mlx5_user_mmap_entry { struct rdma_user_mmap_entry rdma_entry; u8 mmap_flag; u64 address; + u32 page_idx; }; struct mlx5_ib_dm { @@ -629,6 +636,7 @@ struct mlx5_ib_mr { /* For ODP and implicit */ atomic_t num_deferred_work; + wait_queue_head_t q_deferred_work; struct xarray implicit_children; union { struct rcu_head rcu; @@ -959,6 +967,15 @@ struct mlx5_devx_event_table { struct xarray event_xa; }; +struct mlx5_var_table { + /* serialize updating the bitmap */ + struct mutex bitmap_lock; + unsigned long *bitmap; + u64 hw_start_addr; + u32 stride_size; + u64 num_var_hw_entries; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -1013,6 +1030,7 @@ struct mlx5_ib_dev { struct mlx5_srq_table srq_table; struct mlx5_async_ctx async_ctx; struct mlx5_devx_event_table devx_event_table; + struct mlx5_var_table var_table; struct xarray sig_mrs; }; @@ -1153,12 +1171,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); -int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, - int buflen, size_t *bc); -int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, - int buflen, size_t *bc); -int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, - void *buffer, int buflen, size_t *bc); +int mlx5_ib_read_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, + size_t buflen, size_t *bc); +int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, + size_t buflen, size_t *bc); +int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer, + size_t buflen, size_t *bc); int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); @@ -1276,8 +1294,8 @@ void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); -void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, - size_t nentries, struct mlx5_ib_mr *mr, int flags); +void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, + struct mlx5_ib_mr *mr, int flags); int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, @@ -1293,9 +1311,8 @@ static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } static inline void mlx5_ib_odp_cleanup(void) {} static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {} -static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, - size_t nentries, struct mlx5_ib_mr *mr, - int flags) {} +static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, + struct mlx5_ib_mr *mr, int flags) {} static inline int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, @@ -1363,14 +1380,14 @@ int mlx5_ib_fill_res_entry(struct sk_buff *msg, int mlx5_ib_fill_stat_entry(struct sk_buff *msg, struct rdma_restrack_entry *res); +extern const struct uapi_definition mlx5_ib_devx_defs[]; +extern const struct uapi_definition mlx5_ib_flow_defs[]; + #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev); void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev); -const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); -extern const struct uapi_definition mlx5_ib_devx_defs[]; -extern const struct uapi_definition mlx5_ib_flow_defs[]; struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_flow_context *flow_context, @@ -1378,7 +1395,6 @@ struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( void *cmd_in, int inlen, int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id); -int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); #else static inline int @@ -1507,7 +1523,7 @@ int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num); static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, - bool do_modify_atomic) + bool do_modify_atomic, int access_flags) { if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) return false; @@ -1517,6 +1533,9 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) return false; + if (access_flags & IB_ACCESS_RELAXED_ORDERING) + return false; + return true; } diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index ea8bfc3e2d8d..6fa0a83c19de 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -147,7 +147,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) break; } mr->order = ent->order; - mr->allocated_from_cache = 1; + mr->allocated_from_cache = true; mr->dev = dev; MLX5_SET(mkc, mkc, free, 1); @@ -661,12 +661,21 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, struct ib_pd *pd) { + struct mlx5_ib_dev *dev = to_mdev(pd->device); + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, mkc, lr, 1); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) + MLX5_SET(mkc, mkc, relaxed_ordering_write, + !!(acc & IB_ACCESS_RELAXED_ORDERING)); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) + MLX5_SET(mkc, mkc, relaxed_ordering_read, + !!(acc & IB_ACCESS_RELAXED_ORDERING)); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET64(mkc, mkc, start_addr, start_addr); @@ -737,10 +746,9 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) return MLX5_MAX_UMR_SHIFT; } -static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, - u64 start, u64 length, int access_flags, - struct ib_umem **umem, int *npages, int *page_shift, - int *ncont, int *order) +static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length, + int access_flags, struct ib_umem **umem, int *npages, + int *page_shift, int *ncont, int *order) { struct ib_umem *u; @@ -749,7 +757,7 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, if (access_flags & IB_ACCESS_ON_DEMAND) { struct ib_umem_odp *odp; - odp = ib_umem_odp_get(udata, start, length, access_flags, + odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, &mlx5_mn_ops); if (IS_ERR(odp)) { mlx5_ib_dbg(dev, "umem get failed (%ld)\n", @@ -765,7 +773,7 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, if (order) *order = ilog2(roundup_pow_of_two(*ncont)); } else { - u = ib_umem_get(udata, start, length, access_flags); + u = ib_umem_get(&dev->ib_dev, start, length, access_flags); if (IS_ERR(u)) { mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); return PTR_ERR(u); @@ -868,36 +876,6 @@ static struct mlx5_ib_mr *alloc_mr_from_cache( return mr; } -static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages, - void *xlt, int page_shift, size_t size, - int flags) -{ - struct mlx5_ib_dev *dev = mr->dev; - struct ib_umem *umem = mr->umem; - - if (flags & MLX5_IB_UPD_XLT_INDIRECT) { - if (!umr_can_use_indirect_mkey(dev)) - return -EPERM; - mlx5_odp_populate_klm(xlt, idx, npages, mr, flags); - return npages; - } - - npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx); - - if (!(flags & MLX5_IB_UPD_XLT_ZAP)) { - __mlx5_ib_populate_pas(dev, umem, page_shift, - idx, npages, xlt, - MLX5_IB_MTT_PRESENT); - /* Clear padding after the pages - * brought from the umem. - */ - memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0, - size - npages * sizeof(struct mlx5_mtt)); - } - - return npages; -} - #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \ MLX5_UMR_MTT_ALIGNMENT) #define MLX5_SPARE_UMR_CHUNK 0x10000 @@ -921,6 +899,7 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, size_t pages_mapped = 0; size_t pages_to_map = 0; size_t pages_iter = 0; + size_t size_to_map = 0; gfp_t gfp; bool use_emergency_page = false; @@ -967,6 +946,15 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, goto free_xlt; } + if (mr->umem->is_odp) { + if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + size_t max_pages = ib_umem_odp_num_pages(odp) - idx; + + pages_to_map = min_t(size_t, pages_to_map, max_pages); + } + } + sg.addr = dma; sg.lkey = dev->umrc.pd->local_dma_lkey; @@ -989,14 +977,22 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, pages_mapped < pages_to_map && !err; pages_mapped += pages_iter, idx += pages_iter) { npages = min_t(int, pages_iter, pages_to_map - pages_mapped); + size_to_map = npages * desc_size; dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); - npages = populate_xlt(mr, idx, npages, xlt, - page_shift, size, flags); - + if (mr->umem->is_odp) { + mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); + } else { + __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx, + npages, xlt, + MLX5_IB_MTT_PRESENT); + /* Clear padding after the pages + * brought from the umem. + */ + memset(xlt + size_to_map, 0, size - size_to_map); + } dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); - sg.length = ALIGN(npages * desc_size, - MLX5_UMR_MTT_ALIGNMENT); + sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); if (pages_mapped + pages_iter >= pages_to_map) { if (flags & MLX5_IB_UPD_XLT_ENABLE) @@ -1075,6 +1071,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, free, !populate); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) + MLX5_SET(mkc, mkc, relaxed_ordering_write, + !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) + MLX5_SET(mkc, mkc, relaxed_ordering_read, + !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); @@ -1247,6 +1249,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && length == U64_MAX) { + if (virt_addr != start) + return ERR_PTR(-EINVAL); if (!(access_flags & IB_ACCESS_ON_DEMAND) || !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) return ERR_PTR(-EINVAL); @@ -1257,13 +1261,13 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return &mr->ibmr; } - err = mr_umem_get(dev, udata, start, length, access_flags, &umem, + err = mr_umem_get(dev, start, length, access_flags, &umem, &npages, &page_shift, &ncont, &order); if (err < 0) return ERR_PTR(err); - use_umr = mlx5_ib_can_use_umr(dev, true); + use_umr = mlx5_ib_can_use_umr(dev, true, access_flags); if (order <= mr_cache_max_order(dev) && use_umr) { mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, @@ -1424,14 +1428,13 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, flags |= IB_MR_REREG_TRANS; ib_umem_release(mr->umem); mr->umem = NULL; - err = mr_umem_get(dev, udata, addr, len, access_flags, - &mr->umem, &npages, &page_shift, &ncont, - &order); + err = mr_umem_get(dev, addr, len, access_flags, &mr->umem, + &npages, &page_shift, &ncont, &order); if (err) goto err; } - if (!mlx5_ib_can_use_umr(dev, true) || + if (!mlx5_ib_can_use_umr(dev, true, access_flags) || (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) { /* * UMR can't be used - MKey needs to be replaced. @@ -1452,7 +1455,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - mr->allocated_from_cache = 0; + mr->allocated_from_cache = false; } else { /* * Send a UMR WQE diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index f924250f80c2..bf50cd91f472 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -93,8 +93,8 @@ struct mlx5_pagefault { static u64 mlx5_imr_ksm_entries; -void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, - struct mlx5_ib_mr *imr, int flags) +static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, + struct mlx5_ib_mr *imr, int flags) { struct mlx5_klm *end = pklm + nentries; @@ -144,6 +144,44 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, } } +static u64 umem_dma_to_mtt(dma_addr_t umem_dma) +{ + u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK; + + if (umem_dma & ODP_READ_ALLOWED_BIT) + mtt_entry |= MLX5_IB_MTT_READ; + if (umem_dma & ODP_WRITE_ALLOWED_BIT) + mtt_entry |= MLX5_IB_MTT_WRITE; + + return mtt_entry; +} + +static void populate_mtt(__be64 *pas, size_t idx, size_t nentries, + struct mlx5_ib_mr *mr, int flags) +{ + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + dma_addr_t pa; + size_t i; + + if (flags & MLX5_IB_UPD_XLT_ZAP) + return; + + for (i = 0; i < nentries; i++) { + pa = odp->dma_list[idx + i]; + pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); + } +} + +void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, + struct mlx5_ib_mr *mr, int flags) +{ + if (flags & MLX5_IB_UPD_XLT_INDIRECT) { + populate_klm(xlt, idx, nentries, mr, flags); + } else { + populate_mtt(xlt, idx, nentries, mr, flags); + } +} + static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); @@ -197,7 +235,8 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) mr->parent = NULL; mlx5_mr_cache_free(mr->dev, mr); ib_umem_odp_release(odp); - atomic_dec(&imr->num_deferred_work); + if (atomic_dec_and_test(&imr->num_deferred_work)) + wake_up(&imr->q_deferred_work); } static void free_implicit_child_mr_work(struct work_struct *work) @@ -342,7 +381,7 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) memset(caps, 0, sizeof(*caps)); if (!MLX5_CAP_GEN(dev->mdev, pg) || - !mlx5_ib_can_use_umr(dev, true)) + !mlx5_ib_can_use_umr(dev, true, 0)) return; caps->general_caps = IB_ODP_SUPPORT; @@ -497,7 +536,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct mlx5_ib_mr *imr; int err; - umem_odp = ib_umem_odp_alloc_implicit(udata, access_flags); + umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags); if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); @@ -516,6 +555,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; atomic_set(&imr->num_deferred_work, 0); + init_waitqueue_head(&imr->q_deferred_work); xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, @@ -573,10 +613,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) * under xa_lock while the child is in the xarray. Thus at this point * it is only decreasing, and all work holding it is now on the wq. */ - if (atomic_read(&imr->num_deferred_work)) { - flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&imr->num_deferred_work)); - } + wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); /* * Fence the imr before we destroy the children. This allows us to @@ -607,10 +644,7 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) /* Wait for all running page-fault handlers to finish. */ synchronize_srcu(&mr->dev->odp_srcu); - if (atomic_read(&mr->num_deferred_work)) { - flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&mr->num_deferred_work)); - } + wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); dma_fence_odp_mr(mr); } @@ -624,11 +658,10 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; unsigned long current_seq; u64 access_mask; - u64 start_idx, page_mask; + u64 start_idx; page_shift = odp->page_shift; - page_mask = ~(BIT(page_shift) - 1); - start_idx = (user_va - (mr->mmkey.iova & page_mask)) >> page_shift; + start_idx = (user_va - ib_umem_start(odp)) >> page_shift; access_mask = ODP_READ_ALLOWED_BIT; if (odp->umem.writable && !downgrade) @@ -767,11 +800,19 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + if (unlikely(io_virt < mr->mmkey.iova)) + return -EFAULT; + if (!odp->is_implicit_odp) { - if (unlikely(io_virt < ib_umem_start(odp) || - ib_umem_end(odp) - io_virt < bcnt)) + u64 user_va; + + if (check_add_overflow(io_virt - mr->mmkey.iova, + (u64)odp->umem.address, &user_va)) + return -EFAULT; + if (unlikely(user_va >= ib_umem_end(odp) || + ib_umem_end(odp) - user_va < bcnt)) return -EFAULT; - return pagefault_real_mr(mr, odp, io_virt, bcnt, bytes_mapped, + return pagefault_real_mr(mr, odp, user_va, bcnt, bytes_mapped, flags); } return pagefault_implicit_mr(mr, odp, io_virt, bcnt, bytes_mapped, @@ -1237,15 +1278,15 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, wqe = wqe_start; qp = (res->res == MLX5_RES_QP) ? res_to_qp(res) : NULL; if (qp && sq) { - ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE, - &bytes_copied); + ret = mlx5_ib_read_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE, + &bytes_copied); if (ret) goto read_user; ret = mlx5_ib_mr_initiator_pfault_handler( dev, pfault, qp, &wqe, &wqe_end, bytes_copied); } else if (qp && !sq) { - ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index, wqe, PAGE_SIZE, - &bytes_copied); + ret = mlx5_ib_read_wqe_rq(qp, wqe_index, wqe, PAGE_SIZE, + &bytes_copied); if (ret) goto read_user; ret = mlx5_ib_mr_responder_pfault_handler_rq( @@ -1253,8 +1294,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, } else if (!qp) { struct mlx5_ib_srq *srq = res_to_srq(res); - ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index, wqe, PAGE_SIZE, - &bytes_copied); + ret = mlx5_ib_read_wqe_srq(srq, wqe_index, wqe, PAGE_SIZE, + &bytes_copied); if (ret) goto read_user; ret = mlx5_ib_mr_responder_pfault_handler_srq( @@ -1675,7 +1716,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work) u32 i; for (i = 0; i < work->num_sge; ++i) - atomic_dec(&work->frags[i].mr->num_deferred_work); + if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work)) + wake_up(&work->frags[i].mr->q_deferred_work); kvfree(work); } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7e51870e9e01..957f3a52589b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -129,14 +129,10 @@ static int is_sqp(enum ib_qp_type qp_type) * * Return: zero on success, or an error code. */ -static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem, - void *buffer, - u32 buflen, - int wqe_index, - int wq_offset, - int wq_wqe_cnt, - int wq_wqe_shift, - int bcnt, +static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem, void *buffer, + size_t buflen, int wqe_index, + int wq_offset, int wq_wqe_cnt, + int wq_wqe_shift, int bcnt, size_t *bytes_copied) { size_t offset = wq_offset + ((wqe_index % wq_wqe_cnt) << wq_wqe_shift); @@ -160,11 +156,43 @@ static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem, return 0; } -int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, - int wqe_index, - void *buffer, - int buflen, - size_t *bc) +static int mlx5_ib_read_kernel_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, + void *buffer, size_t buflen, size_t *bc) +{ + struct mlx5_wqe_ctrl_seg *ctrl; + size_t bytes_copied = 0; + size_t wqe_length; + void *p; + int ds; + + wqe_index = wqe_index & qp->sq.fbc.sz_m1; + + /* read the control segment first */ + p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, wqe_index); + ctrl = p; + ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; + wqe_length = ds * MLX5_WQE_DS_UNITS; + + /* read rest of WQE if it spreads over more than one stride */ + while (bytes_copied < wqe_length) { + size_t copy_length = + min_t(size_t, buflen - bytes_copied, MLX5_SEND_WQE_BB); + + if (!copy_length) + break; + + memcpy(buffer + bytes_copied, p, copy_length); + bytes_copied += copy_length; + + wqe_index = (wqe_index + 1) & qp->sq.fbc.sz_m1; + p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, wqe_index); + } + *bc = bytes_copied; + return 0; +} + +static int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, + void *buffer, size_t buflen, size_t *bc) { struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct ib_umem *umem = base->ubuffer.umem; @@ -176,18 +204,10 @@ int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int ret; int ds; - if (buflen < sizeof(*ctrl)) - return -EINVAL; - /* at first read as much as possible */ - ret = mlx5_ib_read_user_wqe_common(umem, - buffer, - buflen, - wqe_index, - wq->offset, - wq->wqe_cnt, - wq->wqe_shift, - buflen, + ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index, + wq->offset, wq->wqe_cnt, + wq->wqe_shift, buflen, &bytes_copied); if (ret) return ret; @@ -210,13 +230,9 @@ int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, * so read the remaining bytes starting * from wqe_index 0 */ - ret = mlx5_ib_read_user_wqe_common(umem, - buffer + bytes_copied, - buflen - bytes_copied, - 0, - wq->offset, - wq->wqe_cnt, - wq->wqe_shift, + ret = mlx5_ib_read_user_wqe_common(umem, buffer + bytes_copied, + buflen - bytes_copied, 0, wq->offset, + wq->wqe_cnt, wq->wqe_shift, wqe_length - bytes_copied, &bytes_copied2); @@ -226,11 +242,24 @@ int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, return 0; } -int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, - int wqe_index, - void *buffer, - int buflen, - size_t *bc) +int mlx5_ib_read_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, + size_t buflen, size_t *bc) +{ + struct mlx5_ib_qp_base *base = &qp->trans_qp.base; + struct ib_umem *umem = base->ubuffer.umem; + + if (buflen < sizeof(struct mlx5_wqe_ctrl_seg)) + return -EINVAL; + + if (!umem) + return mlx5_ib_read_kernel_wqe_sq(qp, wqe_index, buffer, + buflen, bc); + + return mlx5_ib_read_user_wqe_sq(qp, wqe_index, buffer, buflen, bc); +} + +static int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, + void *buffer, size_t buflen, size_t *bc) { struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct ib_umem *umem = base->ubuffer.umem; @@ -238,14 +267,9 @@ int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, size_t bytes_copied; int ret; - ret = mlx5_ib_read_user_wqe_common(umem, - buffer, - buflen, - wqe_index, - wq->offset, - wq->wqe_cnt, - wq->wqe_shift, - buflen, + ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index, + wq->offset, wq->wqe_cnt, + wq->wqe_shift, buflen, &bytes_copied); if (ret) @@ -254,25 +278,33 @@ int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, return 0; } -int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, - int wqe_index, - void *buffer, - int buflen, - size_t *bc) +int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, + size_t buflen, size_t *bc) +{ + struct mlx5_ib_qp_base *base = &qp->trans_qp.base; + struct ib_umem *umem = base->ubuffer.umem; + struct mlx5_ib_wq *wq = &qp->rq; + size_t wqe_size = 1 << wq->wqe_shift; + + if (buflen < wqe_size) + return -EINVAL; + + if (!umem) + return -EOPNOTSUPP; + + return mlx5_ib_read_user_wqe_rq(qp, wqe_index, buffer, buflen, bc); +} + +static int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, + void *buffer, size_t buflen, size_t *bc) { struct ib_umem *umem = srq->umem; size_t bytes_copied; int ret; - ret = mlx5_ib_read_user_wqe_common(umem, - buffer, - buflen, - wqe_index, - 0, - srq->msrq.max, - srq->msrq.wqe_shift, - buflen, - &bytes_copied); + ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index, 0, + srq->msrq.max, srq->msrq.wqe_shift, + buflen, &bytes_copied); if (ret) return ret; @@ -280,6 +312,21 @@ int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, return 0; } +int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer, + size_t buflen, size_t *bc) +{ + struct ib_umem *umem = srq->umem; + size_t wqe_size = 1 << srq->msrq.wqe_shift; + + if (buflen < wqe_size) + return -EINVAL; + + if (!umem) + return -EOPNOTSUPP; + + return mlx5_ib_read_user_wqe_srq(srq, wqe_index, buffer, buflen, bc); +} + static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) { struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; @@ -749,7 +796,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, { int err; - *umem = ib_umem_get(udata, addr, size, 0); + *umem = ib_umem_get(&dev->ib_dev, addr, size, 0); if (IS_ERR(*umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); return PTR_ERR(*umem); @@ -806,7 +853,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (!ucmd->buf_addr) return -EINVAL; - rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0); + rwq->umem = ib_umem_get(&dev->ib_dev, ucmd->buf_addr, rwq->buf_size, 0); if (IS_ERR(rwq->umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); err = PTR_ERR(rwq->umem); @@ -1871,7 +1918,7 @@ static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev, { enum ib_qp_type qpt = init_attr->qp_type; int scqe_sz; - bool allow_scat_cqe = 0; + bool allow_scat_cqe = false; if (qpt == IB_QPT_UC || qpt == IB_QPT_UD) return; @@ -3394,9 +3441,6 @@ static int __mlx5_ib_qp_set_counter(struct ib_qp *qp, struct mlx5_ib_qp_base *base; u32 set_id; - if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id)) - return 0; - if (counter) set_id = counter->id; else @@ -4823,7 +4867,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; u8 flags = 0; - if (!mlx5_ib_can_use_umr(dev, atomic)) { + if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) { mlx5_ib_warn(to_mdev(qp->ibqp.device), "Fast update of %s for MR is disabled\n", (MLX5_CAP_GEN(dev->mdev, @@ -6529,6 +6573,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp) */ int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter) { + struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); int err = 0; @@ -6538,6 +6583,11 @@ int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter) goto out; } + if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id)) { + err = -EOPNOTSUPP; + goto out; + } + if (mqp->state == IB_QPS_RTS) { err = __mlx5_ib_qp_set_counter(qp, counter); if (!err) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 62939df3c692..b1a8a9175040 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -80,7 +80,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); - srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0); + srq->umem = ib_umem_get(pd->device, ucmd.buf_addr, buf_size, 0); if (IS_ERR(srq->umem)) { mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size); err = PTR_ERR(srq->umem); |