diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/counters.c | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 77 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.c | 90 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/fs.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.c | 112 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.h | 45 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 254 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mem.c | 192 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 106 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 960 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 56 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 197 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/restrack.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.c | 34 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.h | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq_cmd.c | 80 |
16 files changed, 1187 insertions, 1032 deletions
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 70c8fd67ee2f..084652e2b15a 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -138,13 +138,6 @@ static int mlx5_ib_create_counters(struct ib_counters *counters, } -static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev) -{ - return MLX5_ESWITCH_MANAGER(mdev) && - mlx5_ib_eswitch_mode(mdev->priv.eswitch) == - MLX5_ESWITCH_OFFLOADS; -} - static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, u8 port_num) { diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index fb62f1d04afa..eb92cefffd77 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -707,10 +707,10 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd = {}; + unsigned long page_size; + unsigned int page_offset_quantized; size_t ucmdlen; - int page_shift; __be64 *pas; - int npages; int ncont; void *cqc; int err; @@ -742,14 +742,24 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, return err; } + page_size = mlx5_umem_find_best_cq_quantized_pgoff( + cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } + err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db); if (err) goto err_umem; - mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift, - &ncont, NULL); - mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", - ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); + ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size); + mlx5_ib_dbg( + dev, + "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n", + ucmd.buf_addr, entries * ucmd.cqe_size, + ib_umem_num_pages(cq->buf.umem), page_size, ncont); *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; @@ -760,11 +770,12 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, } pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); - mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0); + mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0); cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); MLX5_SET(cqc, cqc, log_page_size, - page_shift - MLX5_ADAPTER_PAGE_SHIFT); + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { *index = ucmd.uar_page_index; @@ -1128,13 +1139,12 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) } static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, - int entries, struct ib_udata *udata, int *npas, - int *page_shift, int *cqe_size) + int entries, struct ib_udata *udata, + int *cqe_size) { struct mlx5_ib_resize_cq ucmd; struct ib_umem *umem; int err; - int npages; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) @@ -1155,9 +1165,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, return err; } - mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift, - npas, NULL); - cq->resize_umem = umem; *cqe_size = ucmd.cqe_size; @@ -1250,7 +1257,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) int err; int npas; __be64 *pas; - int page_shift; + unsigned int page_offset_quantized = 0; + unsigned int page_shift; int inlen; int cqe_size; unsigned long flags; @@ -1277,22 +1285,34 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) mutex_lock(&cq->resize_mutex); if (udata) { - err = resize_user(dev, cq, entries, udata, &npas, &page_shift, - &cqe_size); + unsigned long page_size; + + err = resize_user(dev, cq, entries, udata, &cqe_size); + if (err) + goto ex; + + page_size = mlx5_umem_find_best_cq_quantized_pgoff( + cq->resize_umem, cqc, log_page_size, + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, + &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto ex_resize; + } + npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size); + page_shift = order_base_2(page_size); } else { + struct mlx5_frag_buf *frag_buf; + cqe_size = 64; err = resize_kernel(dev, cq, entries, cqe_size); - if (!err) { - struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf; - - npas = frag_buf->npages; - page_shift = frag_buf->page_shift; - } + if (err) + goto ex; + frag_buf = &cq->resize_buf->frag_buf; + npas = frag_buf->npages; + page_shift = frag_buf->page_shift; } - if (err) - goto ex; - inlen = MLX5_ST_SZ_BYTES(modify_cq_in) + MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas; @@ -1304,8 +1324,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas); if (udata) - mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, - pas, 0); + mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas, + 0); else mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas); @@ -1319,6 +1339,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size, cq->private_flags & diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9e3d8b826498..819c142857d6 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -93,9 +93,6 @@ struct devx_async_event_file { struct devx_umem { struct mlx5_core_dev *mdev; struct ib_umem *umem; - u32 page_offset; - int page_shift; - int ncont; u32 dinlen; u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; }; @@ -1311,7 +1308,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, else ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; devx_event_table = &dev->devx_event_table; @@ -2057,9 +2054,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, u64 addr; size_t size; u32 access; - int npages; int err; - u32 page_mask; if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN)) @@ -2073,57 +2068,62 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - err = ib_check_mr_access(access); + err = ib_check_mr_access(&dev->ib_dev, access); if (err) return err; obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); - - mlx5_ib_cont_pages(obj->umem, obj->umem->address, - MLX5_MKEY_PAGE_SHIFT_MASK, &npages, - &obj->page_shift, &obj->ncont, NULL); - - if (!npages) { - ib_umem_release(obj->umem); - return -EINVAL; - } - - page_mask = (1 << obj->page_shift) - 1; - obj->page_offset = obj->umem->address & page_mask; - return 0; } -static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs, +static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, + struct uverbs_attr_bundle *attrs, struct devx_umem *obj, struct devx_umem_reg_cmd *cmd) { + unsigned int page_size; + __be64 *mtt; + void *umem; + + /* + * We don't know what the user intends to use this umem for, but the HW + * restrictions must be met. MR, doorbell records, QP, WQ and CQ all + * have different requirements. Since we have no idea how to sort this + * out, only support PAGE_SIZE with the expectation that userspace will + * provide the necessary alignments inside the known PAGE_SIZE and that + * FW will check everything. + */ + page_size = ib_umem_find_best_pgoff( + obj->umem, PAGE_SIZE, + __mlx5_page_offset_to_bitmask(__mlx5_bit_sz(umem, page_offset), + 0)); + if (!page_size) + return -EINVAL; + cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + - (MLX5_ST_SZ_BYTES(mtt) * obj->ncont); + (MLX5_ST_SZ_BYTES(mtt) * + ib_umem_num_dma_blocks(obj->umem, page_size)); cmd->in = uverbs_zalloc(attrs, cmd->inlen); - return PTR_ERR_OR_ZERO(cmd->in); -} - -static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, - struct devx_umem *obj, - struct devx_umem_reg_cmd *cmd) -{ - void *umem; - __be64 *mtt; + if (IS_ERR(cmd->in)) + return PTR_ERR(cmd->in); umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM); - MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); - MLX5_SET(umem, umem, log_page_size, obj->page_shift - - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(umem, umem, page_offset, obj->page_offset); - mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt, + MLX5_SET64(umem, umem, num_of_mtt, + ib_umem_num_dma_blocks(obj->umem, page_size)); + MLX5_SET(umem, umem, log_page_size, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(umem, umem, page_offset, + ib_umem_dma_offset(obj->umem, page_size)); + + mlx5_ib_populate_pas(obj->umem, page_size, mtt, (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | - MLX5_IB_MTT_READ); + MLX5_IB_MTT_READ); + return 0; } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( @@ -2150,12 +2150,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( if (err) goto err_obj_free; - err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd); + err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd); if (err) goto err_umem_release; - devx_umem_reg_cmd_build(dev, obj, &cmd); - MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid); err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, sizeof(cmd.out)); @@ -2187,7 +2185,7 @@ static int devx_umem_cleanup(struct ib_uobject *uobject, int err; err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ib_is_destroy_retryable(err, why, uobject)) + if (err) return err; ib_umem_release(obj->umem); @@ -2600,8 +2598,8 @@ static const struct file_operations devx_async_event_fops = { .llseek = no_llseek, }; -static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_cmd_event_file *comp_ev_file = container_of(uobj, struct devx_async_cmd_event_file, @@ -2623,11 +2621,10 @@ static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, kvfree(entry); } spin_unlock_irq(&comp_ev_file->ev_queue.lock); - return 0; }; -static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void devx_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_event_file *ev_file = container_of(uobj, struct devx_async_event_file, @@ -2671,7 +2668,6 @@ static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, mutex_unlock(&dev->devx_event_table.event_xa_lock); put_device(&dev->ib_dev.dev); - return 0; }; DECLARE_UVERBS_NAMED_METHOD( diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 492cfe063bca..25da0b05b4e2 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -2035,11 +2035,9 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_flow_matcher *obj = uobject->object; - int ret; - ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&obj->usecnt)) + return -EBUSY; kfree(obj); return 0; diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 5c3d052ac30b..9164cc069ad4 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -13,7 +13,7 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) struct mlx5_ib_dev *ibdev; int vport_index; - ibdev = mlx5_ib_get_uplink_ibdev(dev->priv.eswitch); + ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB); vport_index = rep->vport_index; ibdev->port[vport_index].rep = rep; @@ -33,6 +33,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) const struct mlx5_ib_profile *profile; struct mlx5_ib_dev *ibdev; int vport_index; + int ret; if (rep->vport == MLX5_VPORT_UPLINK) profile = &raw_eth_profile; @@ -46,8 +47,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port), GFP_KERNEL); if (!ibdev->port) { - ib_dealloc_device(&ibdev->ib_dev); - return -ENOMEM; + ret = -ENOMEM; + goto fail_port; } ibdev->is_rep = true; @@ -58,12 +59,24 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ibdev->mdev = dev; ibdev->num_ports = num_ports; - if (!__mlx5_ib_add(ibdev, profile)) - return -EINVAL; + ret = __mlx5_ib_add(ibdev, profile); + if (ret) + goto fail_add; rep->rep_data[REP_IB].priv = ibdev; return 0; + +fail_add: + kfree(ibdev->port); +fail_port: + ib_dealloc_device(&ibdev->ib_dev); + return ret; +} + +static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) +{ + return rep->rep_data[REP_IB].priv; } static void @@ -83,59 +96,18 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); } -static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) -{ - return mlx5_ib_rep_to_dev(rep); -} - static const struct mlx5_eswitch_rep_ops rep_ops = { .load = mlx5_ib_vport_rep_load, .unload = mlx5_ib_vport_rep_unload, - .get_proto_dev = mlx5_ib_vport_get_proto_dev, + .get_proto_dev = mlx5_ib_rep_to_dev, }; -void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - - mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); -} - -void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - - mlx5_eswitch_unregister_vport_reps(esw, REP_IB); -} - -u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) -{ - return mlx5_eswitch_mode(esw); -} - -struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, - u16 vport_num) -{ - return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_IB); -} - struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, u16 vport_num) { return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH); } -struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw) -{ - return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB); -} - -struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, - u16 vport_num) -{ - return mlx5_eswitch_vport_rep(esw, vport_num); -} - struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, u16 port) @@ -154,3 +126,49 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, return mlx5_eswitch_add_send_to_vport_rule(esw, rep->vport, sq->base.mqp.qpn); } + +static int mlx5r_rep_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = idev->mdev; + struct mlx5_eswitch *esw; + + esw = mdev->priv.eswitch; + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); + return 0; +} + +static void mlx5r_rep_remove(struct auxiliary_device *adev) +{ + struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = idev->mdev; + struct mlx5_eswitch *esw; + + esw = mdev->priv.eswitch; + mlx5_eswitch_unregister_vport_reps(esw, REP_IB); +} + +static const struct auxiliary_device_id mlx5r_rep_id_table[] = { + { .name = MLX5_ADEV_NAME ".rdma-rep", }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table); + +static struct auxiliary_driver mlx5r_rep_driver = { + .name = "rep", + .probe = mlx5r_rep_probe, + .remove = mlx5r_rep_remove, + .id_table = mlx5r_rep_id_table, +}; + +int mlx5r_rep_init(void) +{ + return auxiliary_driver_register(&mlx5r_rep_driver); +} + +void mlx5r_rep_cleanup(void) +{ + auxiliary_driver_unregister(&mlx5r_rep_driver); +} diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 5b30d3fa8f8d..ce1dcb105dbd 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -12,47 +12,16 @@ extern const struct mlx5_ib_profile raw_eth_profile; #ifdef CONFIG_MLX5_ESWITCH -u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw); -struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, - u16 vport_num); -struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw); -struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, - u16 vport_num); -void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev); -void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev); +int mlx5r_rep_init(void); +void mlx5r_rep_cleanup(void); struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, u16 port); struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, u16 vport_num); #else /* CONFIG_MLX5_ESWITCH */ -static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) -{ - return MLX5_ESWITCH_NONE; -} - -static inline -struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, - u16 vport_num) -{ - return NULL; -} - -static inline -struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw) -{ - return NULL; -} - -static inline -struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, - u16 vport_num) -{ - return NULL; -} - -static inline void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) {} -static inline void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) {} +static inline int mlx5r_rep_init(void) { return 0; } +static inline void mlx5r_rep_cleanup(void) {} static inline struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, @@ -68,10 +37,4 @@ struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, return NULL; } #endif - -static inline -struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) -{ - return rep->rep_data[REP_IB].priv; -} #endif /* __MLX5_IB_REP_H__ */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 246e3cbe0b2c..3bae9ba0ead8 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -75,12 +75,6 @@ static LIST_HEAD(mlx5_ib_dev_list); */ static DEFINE_MUTEX(mlx5_ib_multiport_mutex); -/* We can't use an array for xlt_emergency_page because dma_map_single - * doesn't work on kernel modules memory - */ -static unsigned long xlt_emergency_page; -static struct mutex xlt_emergency_page_mutex; - struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi) { struct mlx5_ib_dev *dev; @@ -425,10 +419,22 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, *active_width = IB_WIDTH_2X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_100GAUI_1_100GBASE_CR_KR): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_NDR; + break; case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_200GAUI_2_200GBASE_CR2_KR2): + *active_width = IB_WIDTH_2X; + *active_speed = IB_SPEED_NDR; + break; + case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_NDR; + break; default: return -EINVAL; } @@ -2628,7 +2634,7 @@ static ssize_t fw_pages_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); + return sysfs_emit(buf, "%d\n", dev->mdev->priv.fw_pages); } static DEVICE_ATTR_RO(fw_pages); @@ -2638,7 +2644,7 @@ static ssize_t reg_pages_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); + return sysfs_emit(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); } static DEVICE_ATTR_RO(reg_pages); @@ -2648,7 +2654,7 @@ static ssize_t hca_type_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); + return sysfs_emit(buf, "MT%d\n", dev->mdev->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -2658,7 +2664,7 @@ static ssize_t hw_rev_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%x\n", dev->mdev->rev_id); + return sysfs_emit(buf, "%x\n", dev->mdev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -2668,8 +2674,8 @@ static ssize_t board_id_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, - dev->mdev->board_id); + return sysfs_emit(buf, "%.*s\n", MLX5_BOARD_ID_LEN, + dev->mdev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -4024,6 +4030,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .create_cq = mlx5_ib_create_cq, .create_qp = mlx5_ib_create_qp, .create_srq = mlx5_ib_create_srq, + .create_user_ah = mlx5_ib_create_ah, .dealloc_pd = mlx5_ib_dealloc_pd, .dealloc_ucontext = mlx5_ib_dealloc_ucontext, .del_gid = mlx5_ib_del_gid, @@ -4141,42 +4148,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; int err; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - dev->ib_dev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); - if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) ib_set_device_ops(&dev->ib_dev, @@ -4187,19 +4158,11 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); - if (MLX5_CAP_GEN(mdev, imaicl)) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + if (MLX5_CAP_GEN(mdev, imaicl)) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops); - } - if (MLX5_CAP_GEN(mdev, xrc)) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | - (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + if (MLX5_CAP_GEN(mdev, xrc)) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); - } if (MLX5_CAP_DEV_MEM(mdev, memic) || MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & @@ -4278,12 +4241,6 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); port_num = mlx5_core_native_port_num(dev->mdev) - 1; @@ -4593,8 +4550,8 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, ib_dealloc_device(&dev->ib_dev); } -void *__mlx5_ib_add(struct mlx5_ib_dev *dev, - const struct mlx5_ib_profile *profile) +int __mlx5_ib_add(struct mlx5_ib_dev *dev, + const struct mlx5_ib_profile *profile) { int err; int i; @@ -4610,13 +4567,16 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, } dev->ib_active = true; - - return dev; + return 0; err_out: - __mlx5_ib_remove(dev, profile, i); - - return NULL; + /* Clean up stages which were initialized */ + while (i) { + i--; + if (profile->stage[i].cleanup) + profile->stage[i].cleanup(dev); + } + return -ENOMEM; } static const struct mlx5_ib_profile pf_profile = { @@ -4739,8 +4699,11 @@ const struct mlx5_ib_profile raw_eth_profile = { NULL), }; -static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) +static int mlx5r_mp_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) { + struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = idev->mdev; struct mlx5_ib_multiport_info *mpi; struct mlx5_ib_dev *dev; bool bound = false; @@ -4748,15 +4711,14 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) mpi = kzalloc(sizeof(*mpi), GFP_KERNEL); if (!mpi) - return NULL; + return -ENOMEM; mpi->mdev = mdev; - err = mlx5_query_nic_vport_system_image_guid(mdev, &mpi->sys_image_guid); if (err) { kfree(mpi); - return NULL; + return err; } mutex_lock(&mlx5_ib_multiport_mutex); @@ -4777,40 +4739,46 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) } mutex_unlock(&mlx5_ib_multiport_mutex); - return mpi; + dev_set_drvdata(&adev->dev, mpi); + return 0; } -static void *mlx5_ib_add(struct mlx5_core_dev *mdev) +static void mlx5r_mp_remove(struct auxiliary_device *adev) { + struct mlx5_ib_multiport_info *mpi; + + mpi = dev_get_drvdata(&adev->dev); + mutex_lock(&mlx5_ib_multiport_mutex); + if (mpi->ibdev) + mlx5_ib_unbind_slave_port(mpi->ibdev, mpi); + list_del(&mpi->list); + mutex_unlock(&mlx5_ib_multiport_mutex); + kfree(mpi); +} + +static int mlx5r_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = idev->mdev; const struct mlx5_ib_profile *profile; + int port_type_cap, num_ports, ret; enum rdma_link_layer ll; struct mlx5_ib_dev *dev; - int port_type_cap; - int num_ports; - - if (MLX5_ESWITCH_MANAGER(mdev) && - mlx5_ib_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) { - if (!mlx5_core_mp_enabled(mdev)) - mlx5_ib_register_vport_reps(mdev); - return mdev; - } port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) - return mlx5_ib_add_slave_port(mdev); - num_ports = max(MLX5_CAP_GEN(mdev, num_ports), MLX5_CAP_GEN(mdev, num_vhca_ports)); dev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!dev) - return NULL; + return -ENOMEM; dev->port = kcalloc(num_ports, sizeof(*dev->port), GFP_KERNEL); if (!dev->port) { ib_dealloc_device(&dev->ib_dev); - return NULL; + return -ENOMEM; } dev->mdev = mdev; @@ -4821,80 +4789,96 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) else profile = &pf_profile; - return __mlx5_ib_add(dev, profile); + ret = __mlx5_ib_add(dev, profile); + if (ret) { + kfree(dev->port); + ib_dealloc_device(&dev->ib_dev); + return ret; + } + + dev_set_drvdata(&adev->dev, dev); + return 0; } -static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) +static void mlx5r_remove(struct auxiliary_device *adev) { - struct mlx5_ib_multiport_info *mpi; struct mlx5_ib_dev *dev; - if (MLX5_ESWITCH_MANAGER(mdev) && context == mdev) { - mlx5_ib_unregister_vport_reps(mdev); - return; - } - - if (mlx5_core_is_mp_slave(mdev)) { - mpi = context; - mutex_lock(&mlx5_ib_multiport_mutex); - if (mpi->ibdev) - mlx5_ib_unbind_slave_port(mpi->ibdev, mpi); - list_del(&mpi->list); - mutex_unlock(&mlx5_ib_multiport_mutex); - kfree(mpi); - return; - } - - dev = context; + dev = dev_get_drvdata(&adev->dev); __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); } -static struct mlx5_interface mlx5_ib_interface = { - .add = mlx5_ib_add, - .remove = mlx5_ib_remove, - .protocol = MLX5_INTERFACE_PROTOCOL_IB, +static const struct auxiliary_device_id mlx5r_mp_id_table[] = { + { .name = MLX5_ADEV_NAME ".multiport", }, + {}, }; -unsigned long mlx5_ib_get_xlt_emergency_page(void) -{ - mutex_lock(&xlt_emergency_page_mutex); - return xlt_emergency_page; -} +static const struct auxiliary_device_id mlx5r_id_table[] = { + { .name = MLX5_ADEV_NAME ".rdma", }, + {}, +}; -void mlx5_ib_put_xlt_emergency_page(void) -{ - mutex_unlock(&xlt_emergency_page_mutex); -} +MODULE_DEVICE_TABLE(auxiliary, mlx5r_mp_id_table); +MODULE_DEVICE_TABLE(auxiliary, mlx5r_id_table); + +static struct auxiliary_driver mlx5r_mp_driver = { + .name = "multiport", + .probe = mlx5r_mp_probe, + .remove = mlx5r_mp_remove, + .id_table = mlx5r_mp_id_table, +}; + +static struct auxiliary_driver mlx5r_driver = { + .name = "rdma", + .probe = mlx5r_probe, + .remove = mlx5r_remove, + .id_table = mlx5r_id_table, +}; static int __init mlx5_ib_init(void) { - int err; + int ret; - xlt_emergency_page = __get_free_page(GFP_KERNEL); + xlt_emergency_page = (void *)__get_free_page(GFP_KERNEL); if (!xlt_emergency_page) return -ENOMEM; - mutex_init(&xlt_emergency_page_mutex); - mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0); if (!mlx5_ib_event_wq) { - free_page(xlt_emergency_page); + free_page((unsigned long)xlt_emergency_page); return -ENOMEM; } mlx5_ib_odp_init(); + ret = mlx5r_rep_init(); + if (ret) + goto rep_err; + ret = auxiliary_driver_register(&mlx5r_mp_driver); + if (ret) + goto mp_err; + ret = auxiliary_driver_register(&mlx5r_driver); + if (ret) + goto drv_err; + return 0; - err = mlx5_register_interface(&mlx5_ib_interface); - - return err; +drv_err: + auxiliary_driver_unregister(&mlx5r_mp_driver); +mp_err: + mlx5r_rep_cleanup(); +rep_err: + destroy_workqueue(mlx5_ib_event_wq); + free_page((unsigned long)xlt_emergency_page); + return ret; } static void __exit mlx5_ib_cleanup(void) { - mlx5_unregister_interface(&mlx5_ib_interface); + auxiliary_driver_unregister(&mlx5r_driver); + auxiliary_driver_unregister(&mlx5r_mp_driver); + mlx5r_rep_cleanup(); + destroy_workqueue(mlx5_ib_event_wq); - mutex_destroy(&xlt_emergency_page_mutex); - free_page(xlt_emergency_page); + free_page((unsigned long)xlt_emergency_page); } module_init(mlx5_ib_init); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 13de3d2edd34..844545064c9e 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -36,161 +36,65 @@ #include "mlx5_ib.h" #include <linux/jiffies.h> -/* @umem: umem object to scan - * @addr: ib virtual address requested by the user - * @max_page_shift: high limit for page_shift - 0 means no limit - * @count: number of PAGE_SIZE pages covered by umem - * @shift: page shift for the compound pages found in the region - * @ncont: number of compund pages - * @order: log2 of the number of compound pages +/* + * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be + * filled in the pas array. */ -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, - int *count, int *shift, - int *ncont, int *order) +void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, + u64 access_flags) { - unsigned long tmp; - unsigned long m; - u64 base = ~0, p = 0; - u64 len, pfn; - int i = 0; - struct scatterlist *sg; - int entry; - - addr = addr >> PAGE_SHIFT; - tmp = (unsigned long)addr; - m = find_first_bit(&tmp, BITS_PER_LONG); - if (max_page_shift) - m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); - - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - pfn = sg_dma_address(sg) >> PAGE_SHIFT; - if (base + p != pfn) { - /* If either the offset or the new - * base are unaligned update m - */ - tmp = (unsigned long)(pfn | p); - if (!IS_ALIGNED(tmp, 1 << m)) - m = find_first_bit(&tmp, BITS_PER_LONG); - - base = pfn; - p = 0; - } - - p += len; - i += len; - } - - if (i) { - m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); - - if (order) - *order = ilog2(roundup_pow_of_two(i) >> m); - - *ncont = DIV_ROUND_UP(i, (1 << m)); - } else { - m = 0; + struct ib_block_iter biter; - if (order) - *order = 0; - - *ncont = 0; + rdma_umem_for_each_dma_block (umem, &biter, page_size) { + *pas = cpu_to_be64(rdma_block_iter_dma_address(&biter) | + access_flags); + pas++; } - *shift = PAGE_SHIFT + m; - *count = i; } /* - * Populate the given array with bus addresses from the umem. - * - * dev - mlx5_ib device - * umem - umem to use to fill the pages - * page_shift - determines the page size used in the resulting array - * offset - offset into the umem to start from, - * only implemented for ODP umems - * num_pages - total number of pages to fill - * pas - bus addresses array to fill - * access_flags - access flags to set on all present pages. - use enum mlx5_ib_mtt_access_flags for this. + * Compute the page shift and page_offset for mailboxes that use a quantized + * page_offset. The granulatity of the page offset scales according to page + * size. */ -void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, size_t offset, size_t num_pages, - __be64 *pas, int access_flags) +unsigned long __mlx5_umem_find_best_quantized_pgoff( + struct ib_umem *umem, unsigned long pgsz_bitmap, + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, + unsigned int *page_offset_quantized) { - int shift = page_shift - PAGE_SHIFT; - int mask = (1 << shift) - 1; - int i, k, idx; - u64 cur = 0; - u64 base; - int len; - struct scatterlist *sg; - int entry; - - i = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - base = sg_dma_address(sg); - - /* Skip elements below offset */ - if (i + len < offset << shift) { - i += len; - continue; - } - - /* Skip pages below offset */ - if (i < offset << shift) { - k = (offset << shift) - i; - i = offset << shift; - } else { - k = 0; - } - - for (; k < len; k++) { - if (!(i & mask)) { - cur = base + (k << PAGE_SHIFT); - cur |= access_flags; - idx = (i >> shift) - offset; - - pas[idx] = cpu_to_be64(cur); - mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", - i >> shift, be64_to_cpu(pas[idx])); - } - i++; - - /* Stop after num_pages reached */ - if (i >> shift >= offset + num_pages) - return; - } + const u64 page_offset_mask = (1UL << page_offset_bits) - 1; + unsigned long page_size; + u64 page_offset; + + page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, pgoff_bitmask); + if (!page_size) + return 0; + + /* + * page size is the largest possible page size. + * + * Reduce the page_size, and thus the page_offset and quanta, until the + * page_offset fits into the mailbox field. Once page_size < scale this + * loop is guaranteed to terminate. + */ + page_offset = ib_umem_dma_offset(umem, page_size); + while (page_offset & ~(u64)(page_offset_mask * (page_size / scale))) { + page_size /= 2; + page_offset = ib_umem_dma_offset(umem, page_size); } -} -void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, __be64 *pas, int access_flags) -{ - return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, - ib_umem_num_dma_blocks(umem, PAGE_SIZE), - pas, access_flags); -} -int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) -{ - u64 page_size; - u64 page_mask; - u64 off_size; - u64 off_mask; - u64 buf_off; - - page_size = (u64)1 << page_shift; - page_mask = page_size - 1; - buf_off = addr & page_mask; - off_size = page_size >> 6; - off_mask = off_size - 1; - - if (buf_off & off_mask) - return -EINVAL; - - *offset = buf_off >> ilog2(off_size); - return 0; + /* + * The address is not aligned, or otherwise cannot be represented by the + * page_offset. + */ + if (!(pgsz_bitmap & page_size)) + return 0; + + *page_offset_quantized = + (unsigned long)page_offset / (page_size / scale); + if (WARN_ON(*page_offset_quantized > page_offset_mask)) + return 0; + return page_size; } #define WR_ID_BF 0xBF diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b1f2b34e5955..b0fdc1b08e06 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -40,7 +40,73 @@ #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) -#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size) +static __always_inline unsigned long +__mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits, + unsigned int pgsz_shift) +{ + unsigned int largest_pg_shift = + min_t(unsigned long, (1ULL << log_pgsz_bits) - 1 + pgsz_shift, + BITS_PER_LONG - 1); + + /* + * Despite a command allowing it, the device does not support lower than + * 4k page size. + */ + pgsz_shift = max_t(unsigned int, MLX5_ADAPTER_PAGE_SHIFT, pgsz_shift); + return GENMASK(largest_pg_shift, pgsz_shift); +} + +/* + * For mkc users, instead of a page_offset the command has a start_iova which + * specifies both the page_offset and the on-the-wire IOVA + */ +#define mlx5_umem_find_best_pgsz(umem, typ, log_pgsz_fld, pgsz_shift, iova) \ + ib_umem_find_best_pgsz(umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), \ + pgsz_shift), \ + iova) + +static __always_inline unsigned long +__mlx5_page_offset_to_bitmask(unsigned int page_offset_bits, + unsigned int offset_shift) +{ + unsigned int largest_offset_shift = + min_t(unsigned long, page_offset_bits - 1 + offset_shift, + BITS_PER_LONG - 1); + + return GENMASK(largest_offset_shift, offset_shift); +} + +/* + * QP/CQ/WQ/etc type commands take a page offset that satisifies: + * page_offset_quantized * (page_size/scale) = page_offset + * Which restricts allowed page sizes to ones that satisify the above. + */ +unsigned long __mlx5_umem_find_best_quantized_pgoff( + struct ib_umem *umem, unsigned long pgsz_bitmap, + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, + unsigned int *page_offset_quantized); +#define mlx5_umem_find_best_quantized_pgoff(umem, typ, log_pgsz_fld, \ + pgsz_shift, page_offset_fld, \ + scale, page_offset_quantized) \ + __mlx5_umem_find_best_quantized_pgoff( \ + umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ + __mlx5_bit_sz(typ, page_offset_fld), \ + GENMASK(31, order_base_2(scale)), scale, \ + page_offset_quantized) + +#define mlx5_umem_find_best_cq_quantized_pgoff(umem, typ, log_pgsz_fld, \ + pgsz_shift, page_offset_fld, \ + scale, page_offset_quantized) \ + __mlx5_umem_find_best_quantized_pgoff( \ + umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ + __mlx5_bit_sz(typ, page_offset_fld), 0, scale, \ + page_offset_quantized) enum { MLX5_IB_MMAP_OFFSET_START = 9, @@ -597,14 +663,12 @@ struct mlx5_ib_mr { int max_descs; int desc_size; int access_mode; + unsigned int page_shift; struct mlx5_core_mkey mmkey; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; - unsigned int order; struct mlx5_cache_ent *cache_ent; - int npages; - struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; void *descs_alloc; @@ -1042,6 +1106,11 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) return container_of(ibdev, struct mlx5_ib_dev, ib_dev); } +static inline struct mlx5_ib_dev *mr_to_mdev(struct mlx5_ib_mr *mr) +{ + return to_mdev(mr->ibmr.device); +} + static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata) { struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( @@ -1189,9 +1258,9 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); -int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, - u64 length, u64 virt_addr, int access_flags, - struct ib_pd *pd, struct ib_udata *udata); +struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 virt_addr, int access_flags, + struct ib_pd *pd, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); @@ -1210,7 +1279,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); -int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, struct ib_smp *out_mad); @@ -1230,15 +1298,8 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, - int *count, int *shift, - int *ncont, int *order); -void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, size_t offset, size_t num_pages, - __be64 *pas, int access_flags); -void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, __be64 *pas, int access_flags); +void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, + u64 access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); @@ -1283,7 +1344,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge); -int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable); +int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1305,7 +1366,7 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { return -EOPNOTSUPP; } -static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) +static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) { return -EOPNOTSUPP; } @@ -1317,8 +1378,8 @@ extern const struct mmu_interval_notifier_ops mlx5_mn_ops; void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage); -void *__mlx5_ib_add(struct mlx5_ib_dev *dev, - const struct mlx5_ib_profile *profile); +int __mlx5_ib_add(struct mlx5_ib_dev *dev, + const struct mlx5_ib_profile *profile); int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info); @@ -1456,8 +1517,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev, return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages; } -unsigned long mlx5_ib_get_xlt_emergency_page(void); -void mlx5_ib_put_xlt_emergency_page(void); +extern void *xlt_emergency_page; int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b261797b258f..24f8d59a42ea 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -41,6 +41,13 @@ #include <rdma/ib_verbs.h> #include "mlx5_ib.h" +/* + * We can't use an array for xlt_emergency_page because dma_map_single doesn't + * work on kernel modules memory + */ +void *xlt_emergency_page; +static DEFINE_MUTEX(xlt_emergency_page_mutex); + enum { MAX_PENDING_REG_MR = 8, }; @@ -49,6 +56,9 @@ enum { static void create_mkey_callback(int status, struct mlx5_async_work *context); +static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags, + unsigned int page_size, bool populate); static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, struct ib_pd *pd) @@ -123,19 +133,12 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start, - u64 length) -{ - return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= - length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); -} - static void create_mkey_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = container_of(context, struct mlx5_ib_mr, cb_work); - struct mlx5_ib_dev *dev = mr->dev; struct mlx5_cache_ent *ent = mr->cache_ent; + struct mlx5_ib_dev *dev = ent->dev; unsigned long flags; if (status) { @@ -172,9 +175,7 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return NULL; - mr->order = ent->order; mr->cache_ent = ent; - mr->dev = ent->dev; set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); @@ -642,6 +643,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) if (mlx5_mr_cache_invalidate(mr)) { detach_mr_from_cache(mr); destroy_mkey(dev, mr); + kfree(mr); return; } @@ -867,57 +869,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) return MLX5_MAX_UMR_SHIFT; } -static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length, - int access_flags, struct ib_umem **umem, int *npages, - int *page_shift, int *ncont, int *order) -{ - struct ib_umem *u; - - *umem = NULL; - - if (access_flags & IB_ACCESS_ON_DEMAND) { - struct ib_umem_odp *odp; - - odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, - &mlx5_mn_ops); - if (IS_ERR(odp)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", - PTR_ERR(odp)); - return PTR_ERR(odp); - } - - u = &odp->umem; - - *page_shift = odp->page_shift; - *ncont = ib_umem_odp_num_pages(odp); - *npages = *ncont << (*page_shift - PAGE_SHIFT); - if (order) - *order = ilog2(roundup_pow_of_two(*ncont)); - } else { - u = ib_umem_get(&dev->ib_dev, start, length, access_flags); - if (IS_ERR(u)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); - return PTR_ERR(u); - } - - mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, - page_shift, ncont, order); - } - - if (!*npages) { - mlx5_ib_warn(dev, "avoid zero region\n"); - ib_umem_release(u); - return -EINVAL; - } - - *umem = u; - - mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", - *npages, *ncont, *order, *page_shift); - - return 0; -} - static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) { struct mlx5_ib_umr_context *context = @@ -974,25 +925,49 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, return &cache->ent[order]; } -static struct mlx5_ib_mr * -alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, - u64 len, int npages, int page_shift, unsigned int order, - int access_flags) +static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, + u64 length, int access_flags) +{ + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; + mr->ibmr.length = length; + mr->ibmr.device = &dev->ib_dev; + mr->access_flags = access_flags; +} + +static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, + struct ib_umem *umem, u64 iova, + int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order); + struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; + unsigned int page_size; - if (!ent) - return ERR_PTR(-E2BIG); - - /* Matches access in alloc_cache_mr() */ - if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) - return ERR_PTR(-EOPNOTSUPP); + page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); + if (WARN_ON(!page_size)) + return ERR_PTR(-EINVAL); + ent = mr_cache_ent_from_order( + dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); + /* + * Matches access in alloc_cache_mr(). If the MR can't come from the + * cache then synchronously create an uncached one. + */ + if (!ent || ent->limit == 0 || + !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) { + mutex_lock(&dev->slow_path_mutex); + mr = reg_create(pd, umem, iova, access_flags, page_size, false); + mutex_unlock(&dev->slow_path_mutex); + return mr; + } mr = get_cache_mr(ent); if (!mr) { mr = create_cache_mr(ent); + /* + * The above already tried to do the same stuff as reg_create(), + * no reason to try it again. + */ if (IS_ERR(mr)) return mr; } @@ -1001,9 +976,12 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, mr->umem = umem; mr->access_flags = access_flags; mr->desc_size = sizeof(struct mlx5_mtt); - mr->mmkey.iova = virt_addr; - mr->mmkey.size = len; + mr->mmkey.iova = iova; + mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; + mr->page_shift = order_base_2(page_size); + mr->umem = umem; + set_mr_fields(dev, mr, umem->length, access_flags); return mr; } @@ -1012,14 +990,144 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, MLX5_UMR_MTT_ALIGNMENT) #define MLX5_SPARE_UMR_CHUNK 0x10000 +/* + * Allocate a temporary buffer to hold the per-page information to transfer to + * HW. For efficiency this should be as large as it can be, but buffer + * allocation failure is not allowed, so try smaller sizes. + */ +static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) +{ + const size_t xlt_chunk_align = + MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size); + size_t size; + void *res = NULL; + + static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); + + /* + * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the + * allocation can't trigger any kind of reclaim. + */ + might_sleep(); + + gfp_mask |= __GFP_ZERO; + + /* + * If the system already has a suitable high order page then just use + * that, but don't try hard to create one. This max is about 1M, so a + * free x86 huge page will satisfy it. + */ + size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), + MLX5_MAX_UMR_CHUNK); + *nents = size / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + + if (size > MLX5_SPARE_UMR_CHUNK) { + size = MLX5_SPARE_UMR_CHUNK; + *nents = get_order(size) / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + } + + *nents = PAGE_SIZE / ent_size; + res = (void *)__get_free_page(gfp_mask); + if (res) + return res; + + mutex_lock(&xlt_emergency_page_mutex); + memset(xlt_emergency_page, 0, PAGE_SIZE); + return xlt_emergency_page; +} + +static void mlx5_ib_free_xlt(void *xlt, size_t length) +{ + if (xlt == xlt_emergency_page) { + mutex_unlock(&xlt_emergency_page_mutex); + return; + } + + free_pages((unsigned long)xlt, get_order(length)); +} + +/* + * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for + * submission. + */ +static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, + struct mlx5_umr_wr *wr, struct ib_sge *sg, + size_t nents, size_t ent_size, + unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; + dma_addr_t dma; + void *xlt; + + xlt = mlx5_ib_alloc_xlt(&nents, ent_size, + flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : + GFP_KERNEL); + sg->length = nents * ent_size; + dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) { + mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); + mlx5_ib_free_xlt(xlt, sg->length); + return NULL; + } + sg->addr = dma; + sg->lkey = dev->umrc.pd->local_dma_lkey; + + memset(wr, 0, sizeof(*wr)); + wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; + if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) + wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; + wr->wr.sg_list = sg; + wr->wr.num_sge = 1; + wr->wr.opcode = MLX5_IB_WR_UMR; + wr->pd = mr->ibmr.pd; + wr->mkey = mr->mmkey.key; + wr->length = mr->mmkey.size; + wr->virt_addr = mr->mmkey.iova; + wr->access_flags = mr->access_flags; + wr->page_shift = mr->page_shift; + wr->xlt_size = sg->length; + return xlt; +} + +static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, + struct ib_sge *sg) +{ + struct device *ddev = &dev->mdev->pdev->dev; + + dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); + mlx5_ib_free_xlt(xlt, sg->length); +} + +static unsigned int xlt_wr_final_send_flags(unsigned int flags) +{ + unsigned int res = 0; + + if (flags & MLX5_IB_UPD_XLT_ENABLE) + res |= MLX5_IB_SEND_UMR_ENABLE_MR | + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | + MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS) + res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; + if (flags & MLX5_IB_UPD_XLT_ADDR) + res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + return res; +} + int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags) { - struct mlx5_ib_dev *dev = mr->dev; - struct device *ddev = dev->ib_dev.dev.parent; - int size; + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; void *xlt; - dma_addr_t dma; struct mlx5_umr_wr wr; struct ib_sge sg; int err = 0; @@ -1030,15 +1138,17 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, const int page_mask = page_align - 1; size_t pages_mapped = 0; size_t pages_to_map = 0; - size_t pages_iter = 0; + size_t pages_iter; size_t size_to_map = 0; - gfp_t gfp; - bool use_emergency_page = false; + size_t orig_sg_length; if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && !umr_can_use_indirect_mkey(dev)) return -EPERM; + if (WARN_ON(!mr->umem->is_odp)) + return -EINVAL; + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, * so we need to align the offset and length accordingly */ @@ -1046,63 +1156,21 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, npages += idx & page_mask; idx &= ~page_mask; } - - gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL; - gfp |= __GFP_ZERO | __GFP_NOWARN; - pages_to_map = ALIGN(npages, page_align); - size = desc_size * pages_to_map; - size = min_t(int, size, MLX5_MAX_UMR_CHUNK); - - xlt = (void *)__get_free_pages(gfp, get_order(size)); - if (!xlt && size > MLX5_SPARE_UMR_CHUNK) { - mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n", - size, get_order(size), MLX5_SPARE_UMR_CHUNK); - - size = MLX5_SPARE_UMR_CHUNK; - xlt = (void *)__get_free_pages(gfp, get_order(size)); - } - if (!xlt) { - mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); - xlt = (void *)mlx5_ib_get_xlt_emergency_page(); - size = PAGE_SIZE; - memset(xlt, 0, size); - use_emergency_page = true; - } - pages_iter = size / desc_size; - dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) { - mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); - err = -ENOMEM; - goto free_xlt; - } + xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags); + if (!xlt) + return -ENOMEM; + pages_iter = sg.length / desc_size; + orig_sg_length = sg.length; - if (mr->umem->is_odp) { - if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { - struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - size_t max_pages = ib_umem_odp_num_pages(odp) - idx; + if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + size_t max_pages = ib_umem_odp_num_pages(odp) - idx; - pages_to_map = min_t(size_t, pages_to_map, max_pages); - } + pages_to_map = min_t(size_t, pages_to_map, max_pages); } - sg.addr = dma; - sg.lkey = dev->umrc.pd->local_dma_lkey; - - memset(&wr, 0, sizeof(wr)); - wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; - if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) - wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; - wr.wr.sg_list = &sg; - wr.wr.num_sge = 1; - wr.wr.opcode = MLX5_IB_WR_UMR; - - wr.pd = mr->ibmr.pd; - wr.mkey = mr->mmkey.key; - wr.length = mr->mmkey.size; - wr.virt_addr = mr->mmkey.iova; - wr.access_flags = mr->access_flags; wr.page_shift = page_shift; for (pages_mapped = 0; @@ -1110,50 +1178,87 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, pages_mapped += pages_iter, idx += pages_iter) { npages = min_t(int, pages_iter, pages_to_map - pages_mapped); size_to_map = npages * desc_size; - dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); - if (mr->umem->is_odp) { - mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); - } else { - __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx, - npages, xlt, - MLX5_IB_MTT_PRESENT); - /* Clear padding after the pages - * brought from the umem. - */ - memset(xlt + size_to_map, 0, size - size_to_map); - } - dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); - if (pages_mapped + pages_iter >= pages_to_map) { - if (flags & MLX5_IB_UPD_XLT_ENABLE) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_ENABLE_MR | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - if (flags & MLX5_IB_UPD_XLT_PD || - flags & MLX5_IB_UPD_XLT_ACCESS) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - if (flags & MLX5_IB_UPD_XLT_ADDR) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - } + if (pages_mapped + pages_iter >= pages_to_map) + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); wr.offset = idx * desc_size; wr.xlt_size = sg.length; err = mlx5_ib_post_send_wait(dev, &wr); } - dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); + sg.length = orig_sg_length; + mlx5_ib_unmap_free_xlt(dev, xlt, &sg); + return err; +} + +/* + * Send the DMA list to the HW for a normal MR using UMR. + */ +static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; + struct ib_block_iter biter; + struct mlx5_mtt *cur_mtt; + struct mlx5_umr_wr wr; + size_t orig_sg_length; + struct mlx5_mtt *mtt; + size_t final_size; + struct ib_sge sg; + int err = 0; + + if (WARN_ON(mr->umem->is_odp)) + return -EINVAL; + + mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, + ib_umem_num_dma_blocks(mr->umem, + 1 << mr->page_shift), + sizeof(*mtt), flags); + if (!mtt) + return -ENOMEM; + orig_sg_length = sg.length; + + cur_mtt = mtt; + rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap, + BIT(mr->page_shift)) { + if (cur_mtt == (void *)mtt + sg.length) { + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + err = mlx5_ib_post_send_wait(dev, &wr); + if (err) + goto err; + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + wr.offset += sg.length; + cur_mtt = mtt; + } + + cur_mtt->ptag = + cpu_to_be64(rdma_block_iter_dma_address(&biter) | + MLX5_IB_MTT_PRESENT); + cur_mtt++; + } -free_xlt: - if (use_emergency_page) - mlx5_ib_put_xlt_emergency_page(); - else - free_pages((unsigned long)xlt, get_order(size)); + final_size = (void *)cur_mtt - (void *)mtt; + sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); + memset(cur_mtt, 0, sg.length - final_size); + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); + wr.xlt_size = sg.length; + dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); + err = mlx5_ib_post_send_wait(dev, &wr); + +err: + sg.length = orig_sg_length; + mlx5_ib_unmap_free_xlt(dev, mtt, &sg); return err; } @@ -1161,11 +1266,9 @@ free_xlt: * If ibmr is NULL it will be allocated by reg_create. * Else, the given ibmr will be used. */ -static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, - u64 virt_addr, u64 length, - struct ib_umem *umem, int npages, - int page_shift, int access_flags, - bool populate) +static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags, + unsigned int page_size, bool populate) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr; @@ -1176,16 +1279,20 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); - mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); + if (!page_size) + return ERR_PTR(-EINVAL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); mr->ibmr.pd = pd; mr->access_flags = access_flags; + mr->page_shift = order_base_2(page_size); inlen = MLX5_ST_SZ_BYTES(create_mkey_in); if (populate) - inlen += sizeof(*pas) * roundup(npages, 2); + inlen += sizeof(*pas) * + roundup(ib_umem_num_dma_blocks(umem, page_size), 2); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; @@ -1197,7 +1304,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, err = -EINVAL; goto err_2; } - mlx5_ib_populate_pas(dev, umem, page_shift, pas, + mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); } @@ -1206,20 +1313,20 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr, + set_mkc_access_pd_addr_fields(mkc, access_flags, iova, populate ? pd : dev->umrc.pd); MLX5_SET(mkc, mkc, free, !populate); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET64(mkc, mkc, len, length); + MLX5_SET64(mkc, mkc, len, umem->length); MLX5_SET(mkc, mkc, bsf_octword_size, 0); MLX5_SET(mkc, mkc, translations_octword_size, - get_octo_len(virt_addr, length, page_shift)); - MLX5_SET(mkc, mkc, log_page_size, page_shift); + get_octo_len(iova, umem->length, mr->page_shift)); + MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); if (populate) { MLX5_SET(create_mkey_in, in, translations_octword_actual_size, - get_octo_len(virt_addr, length, page_shift)); + get_octo_len(iova, umem->length, mr->page_shift)); } err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); @@ -1229,7 +1336,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, } mr->mmkey.type = MLX5_MKEY_MR; mr->desc_size = sizeof(struct mlx5_mtt); - mr->dev = dev; + mr->umem = umem; + set_mr_fields(dev, mr, umem->length, access_flags); kvfree(in); mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); @@ -1238,25 +1346,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, err_2: kvfree(in); - err_1: - if (!ibmr) - kfree(mr); - + kfree(mr); return ERR_PTR(err); } -static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - int npages, u64 length, int access_flags) -{ - mr->npages = npages; - atomic_add(npages, &dev->mdev->priv.reg_pages); - mr->ibmr.lkey = mr->mmkey.key; - mr->ibmr.rkey = mr->mmkey.key; - mr->ibmr.length = length; - mr->access_flags = access_flags; -} - static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, u64 length, int acc, int mode) { @@ -1290,8 +1384,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, kfree(in); - mr->umem = NULL; - set_mr_fields(dev, mr, 0, length, acc); + set_mr_fields(dev, mr, length, acc); return &mr->ibmr; @@ -1352,116 +1445,128 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, attr->access_flags, mode); } -struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt_addr, int access_flags, - struct ib_udata *udata) +static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr = NULL; bool xlt_with_umr; - struct ib_umem *umem; - int page_shift; - int npages; - int ncont; - int order; int err; - if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) - return ERR_PTR(-EOPNOTSUPP); - - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", - start, virt_addr, length, access_flags); - - xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length); - /* ODP requires xlt update via umr to work. */ - if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND)) - return ERR_PTR(-EINVAL); - - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && - length == U64_MAX) { - if (virt_addr != start) - return ERR_PTR(-EINVAL); - if (!(access_flags & IB_ACCESS_ON_DEMAND) || - !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) - return ERR_PTR(-EINVAL); - - mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); - if (IS_ERR(mr)) - return ERR_CAST(mr); - return &mr->ibmr; - } - - err = mr_umem_get(dev, start, length, access_flags, &umem, - &npages, &page_shift, &ncont, &order); - - if (err < 0) - return ERR_PTR(err); - + xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length); if (xlt_with_umr) { - mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, - page_shift, order, access_flags); - if (IS_ERR(mr)) - mr = NULL; - } + mr = alloc_cacheable_mr(pd, umem, iova, access_flags); + } else { + unsigned int page_size = mlx5_umem_find_best_pgsz( + umem, mkc, log_page_size, 0, iova); - if (!mr) { mutex_lock(&dev->slow_path_mutex); - mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags, !xlt_with_umr); + mr = reg_create(pd, umem, iova, access_flags, page_size, true); mutex_unlock(&dev->slow_path_mutex); } - if (IS_ERR(mr)) { - err = PTR_ERR(mr); - goto error; + ib_umem_release(umem); + return ERR_CAST(mr); } mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); - mr->umem = umem; - set_mr_fields(dev, mr, npages, length, access_flags); + atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); - if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) { + if (xlt_with_umr) { /* * If the MR was created with reg_create then it will be * configured properly but left disabled. It is safe to go ahead * and configure it again via UMR while enabling it. */ - int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; - - err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, - update_xlt_flags); + err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); if (err) { dereg_mr(dev, mr); return ERR_PTR(err); } } + return &mr->ibmr; +} - if (is_odp_mr(mr)) { - to_ib_umem_odp(mr->umem)->private = mr; - init_waitqueue_head(&mr->q_deferred_work); - atomic_set(&mr->num_deferred_work, 0); - err = xa_err(xa_store(&dev->odp_mkeys, - mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, - GFP_KERNEL)); - if (err) { - dereg_mr(dev, mr); - return ERR_PTR(err); - } +static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, + u64 iova, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem_odp *odp; + struct mlx5_ib_mr *mr; + int err; - err = mlx5_ib_init_odp_mr(mr, xlt_with_umr); - if (err) { - dereg_mr(dev, mr); - return ERR_PTR(err); - } + if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + return ERR_PTR(-EOPNOTSUPP); + + if (!start && length == U64_MAX) { + if (iova != 0) + return ERR_PTR(-EINVAL); + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) + return ERR_PTR(-EINVAL); + + mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); + if (IS_ERR(mr)) + return ERR_CAST(mr); + return &mr->ibmr; + } + + /* ODP requires xlt update via umr to work. */ + if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + return ERR_PTR(-EINVAL); + + odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, + &mlx5_mn_ops); + if (IS_ERR(odp)) + return ERR_CAST(odp); + + mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags); + if (IS_ERR(mr)) { + ib_umem_release(&odp->umem); + return ERR_CAST(mr); } + odp->private = mr; + init_waitqueue_head(&mr->q_deferred_work); + atomic_set(&mr->num_deferred_work, 0); + err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_KERNEL)); + if (err) + goto err_dereg_mr; + + err = mlx5_ib_init_odp_mr(mr); + if (err) + goto err_dereg_mr; return &mr->ibmr; -error: - ib_umem_release(umem); + +err_dereg_mr: + dereg_mr(dev, mr); return ERR_PTR(err); } +struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 iova, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem *umem; + + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) + return ERR_PTR(-EOPNOTSUPP); + + mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, iova, length, access_flags); + + if (access_flags & IB_ACCESS_ON_DEMAND) + return create_user_odp_mr(pd, start, length, iova, access_flags, + udata); + umem = ib_umem_get(&dev->ib_dev, start, length, access_flags); + if (IS_ERR(umem)) + return ERR_CAST(umem); + return create_real_mr(pd, umem, iova, access_flags); +} + /** * mlx5_mr_cache_invalidate - Fence all DMA on the MR * @mr: The MR to fence @@ -1474,151 +1579,224 @@ int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr) { struct mlx5_umr_wr umrwr = {}; - if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) return 0; umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.pd = mr->dev->umrc.pd; + umrwr.pd = mr_to_mdev(mr)->umrc.pd; umrwr.mkey = mr->mmkey.key; umrwr.ignore_free_state = 1; - return mlx5_ib_post_send_wait(mr->dev, &umrwr); + return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr); } -static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, - int access_flags, int flags) -{ - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_umr_wr umrwr = {}; +/* + * True if the change in access flags can be done via UMR, only some access + * flags can be updated. + */ +static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, + unsigned int current_access_flags, + unsigned int target_access_flags) +{ + unsigned int diffs = current_access_flags ^ target_access_flags; + + if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING)) + return false; + return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags, + target_access_flags); +} + +static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, + int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + struct mlx5_umr_wr umrwr = { + .wr = { + .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS, + .opcode = MLX5_IB_WR_UMR, + }, + .mkey = mr->mmkey.key, + .pd = pd, + .access_flags = access_flags, + }; int err; - umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; - - umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.mkey = mr->mmkey.key; - - if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) { - umrwr.pd = pd; - umrwr.access_flags = access_flags; - umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - } - err = mlx5_ib_post_send_wait(dev, &umrwr); + if (err) + return err; - return err; + mr->access_flags = access_flags; + mr->mmkey.pd = to_mpd(pd)->pdn; + return 0; } -int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, - u64 length, u64 virt_addr, int new_access_flags, - struct ib_pd *new_pd, struct ib_udata *udata) +static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, + struct ib_umem *new_umem, + int new_access_flags, u64 iova, + unsigned long *page_size) { - struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); - struct mlx5_ib_mr *mr = to_mmr(ib_mr); - struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; - int access_flags = flags & IB_MR_REREG_ACCESS ? - new_access_flags : - mr->access_flags; - int page_shift = 0; - int upd_flags = 0; - int npages = 0; - int ncont = 0; - int order = 0; - u64 addr, len; - int err; + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", - start, virt_addr, length, access_flags); + /* We only track the allocated sizes of MRs from the cache */ + if (!mr->cache_ent) + return false; + if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length)) + return false; - atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); + *page_size = + mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova); + if (WARN_ON(!*page_size)) + return false; + return (1ULL << mr->cache_ent->order) >= + ib_umem_num_dma_blocks(new_umem, *page_size); +} - if (!mr->umem) - return -EINVAL; +static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, + int access_flags, int flags, struct ib_umem *new_umem, + u64 iova, unsigned long page_size) +{ + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE; + struct ib_umem *old_umem = mr->umem; + int err; - if (is_odp_mr(mr)) - return -EOPNOTSUPP; + /* + * To keep everything simple the MR is revoked before we start to mess + * with it. This ensure the change is atomic relative to any use of the + * MR. + */ + err = mlx5_mr_cache_invalidate(mr); + if (err) + return err; - if (flags & IB_MR_REREG_TRANS) { - addr = virt_addr; - len = length; - } else { - addr = mr->umem->address; - len = mr->umem->length; + if (flags & IB_MR_REREG_PD) { + mr->ibmr.pd = pd; + mr->mmkey.pd = to_mpd(pd)->pdn; + upd_flags |= MLX5_IB_UPD_XLT_PD; + } + if (flags & IB_MR_REREG_ACCESS) { + mr->access_flags = access_flags; + upd_flags |= MLX5_IB_UPD_XLT_ACCESS; } - if (flags != IB_MR_REREG_PD) { + mr->ibmr.length = new_umem->length; + mr->mmkey.iova = iova; + mr->mmkey.size = new_umem->length; + mr->page_shift = order_base_2(page_size); + mr->umem = new_umem; + err = mlx5_ib_update_mr_pas(mr, upd_flags); + if (err) { /* - * Replace umem. This needs to be done whether or not UMR is - * used. + * The MR is revoked at this point so there is no issue to free + * new_umem. */ - flags |= IB_MR_REREG_TRANS; - ib_umem_release(mr->umem); - mr->umem = NULL; - err = mr_umem_get(dev, addr, len, access_flags, &mr->umem, - &npages, &page_shift, &ncont, &order); - if (err) - goto err; + mr->umem = old_umem; + return err; } - if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags, - access_flags) || - !mlx5_ib_can_load_pas_with_umr(dev, len) || - (flags & IB_MR_REREG_TRANS && - !mlx5_ib_pas_fits_in_mr(mr, addr, len))) { - /* - * UMR can't be used - MKey needs to be replaced. - */ - if (mr->cache_ent) - detach_mr_from_cache(mr); - err = destroy_mkey(dev, mr); - if (err) - goto err; + atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages); + ib_umem_release(old_umem); + atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages); + return 0; +} - mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, - page_shift, access_flags, true); +struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 iova, int new_access_flags, + struct ib_pd *new_pd, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); + struct mlx5_ib_mr *mr = to_mmr(ib_mr); + int err; - if (IS_ERR(mr)) { - err = PTR_ERR(mr); - mr = to_mmr(ib_mr); - goto err; - } - } else { - /* - * Send a UMR WQE - */ - mr->ibmr.pd = pd; - mr->access_flags = access_flags; - mr->mmkey.iova = addr; - mr->mmkey.size = len; - mr->mmkey.pd = to_mpd(pd)->pdn; + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) + return ERR_PTR(-EOPNOTSUPP); - if (flags & IB_MR_REREG_TRANS) { - upd_flags = MLX5_IB_UPD_XLT_ADDR; - if (flags & IB_MR_REREG_PD) - upd_flags |= MLX5_IB_UPD_XLT_PD; - if (flags & IB_MR_REREG_ACCESS) - upd_flags |= MLX5_IB_UPD_XLT_ACCESS; - err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, - upd_flags); - } else { - err = rereg_umr(pd, mr, access_flags, flags); + mlx5_ib_dbg( + dev, + "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, iova, length, new_access_flags); + + if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) + return ERR_PTR(-EOPNOTSUPP); + + if (!(flags & IB_MR_REREG_ACCESS)) + new_access_flags = mr->access_flags; + if (!(flags & IB_MR_REREG_PD)) + new_pd = ib_mr->pd; + + if (!(flags & IB_MR_REREG_TRANS)) { + struct ib_umem *umem; + + /* Fast path for PD/access change */ + if (can_use_umr_rereg_access(dev, mr->access_flags, + new_access_flags)) { + err = umr_rereg_pd_access(mr, new_pd, new_access_flags); + if (err) + return ERR_PTR(err); + return NULL; } + /* DM or ODP MR's don't have a umem so we can't re-use it */ + if (!mr->umem || is_odp_mr(mr)) + goto recreate; + /* + * Only one active MR can refer to a umem at one time, revoke + * the old MR before assigning the umem to the new one. + */ + err = mlx5_mr_cache_invalidate(mr); if (err) - goto err; - } - - set_mr_fields(dev, mr, npages, len, access_flags); + return ERR_PTR(err); + umem = mr->umem; + mr->umem = NULL; + atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); - return 0; + return create_real_mr(new_pd, umem, mr->mmkey.iova, + new_access_flags); + } -err: - ib_umem_release(mr->umem); - mr->umem = NULL; + /* + * DM doesn't have a PAS list so we can't re-use it, odp does but the + * logic around releasing the umem is different + */ + if (!mr->umem || is_odp_mr(mr)) + goto recreate; + + if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && + can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) { + struct ib_umem *new_umem; + unsigned long page_size; + + new_umem = ib_umem_get(&dev->ib_dev, start, length, + new_access_flags); + if (IS_ERR(new_umem)) + return ERR_CAST(new_umem); + + /* Fast path for PAS change */ + if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova, + &page_size)) { + err = umr_rereg_pas(mr, new_pd, new_access_flags, flags, + new_umem, iova, page_size); + if (err) { + ib_umem_release(new_umem); + return ERR_PTR(err); + } + return NULL; + } + return create_real_mr(new_pd, new_umem, iova, new_access_flags); + } - clean_mr(dev, mr); - return err; + /* + * Everything else has no state we can preserve, just create a new MR + * from scratch + */ +recreate: + return mlx5_ib_reg_user_mr(new_pd, start, length, iova, + new_access_flags, udata); } static int @@ -1627,6 +1805,8 @@ mlx5_alloc_priv_descs(struct ib_device *device, int ndescs, int desc_size) { + struct mlx5_ib_dev *dev = to_mdev(device); + struct device *ddev = &dev->mdev->pdev->dev; int size = ndescs * desc_size; int add_size; int ret; @@ -1639,9 +1819,8 @@ mlx5_alloc_priv_descs(struct ib_device *device, mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); - mr->desc_map = dma_map_single(device->dev.parent, mr->descs, - size, DMA_TO_DEVICE); - if (dma_mapping_error(device->dev.parent, mr->desc_map)) { + mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, mr->desc_map)) { ret = -ENOMEM; goto err; } @@ -1659,9 +1838,10 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) if (mr->descs) { struct ib_device *device = mr->ibmr.device; int size = mr->max_descs * mr->desc_size; + struct mlx5_ib_dev *dev = to_mdev(device); - dma_unmap_single(device->dev.parent, mr->desc_map, - size, DMA_TO_DEVICE); + dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, + DMA_TO_DEVICE); kfree(mr->descs_alloc); mr->descs = NULL; } @@ -1691,7 +1871,6 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int npages = mr->npages; struct ib_umem *umem = mr->umem; /* Stop all DMA */ @@ -1700,14 +1879,17 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) else clean_mr(dev, mr); + if (umem) { + if (!is_odp_mr(mr)) + atomic_sub(ib_umem_num_pages(umem), + &dev->mdev->priv.reg_pages); + ib_umem_release(umem); + } + if (mr->cache_ent) mlx5_mr_cache_free(dev, mr); else kfree(mr); - - ib_umem_release(umem); - atomic_sub(npages, &dev->mdev->priv.reg_pages); - } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 5c853ec1b0d8..aa2413b50adc 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -102,7 +102,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, if (flags & MLX5_IB_UPD_XLT_ZAP) { for (; pklm != end; pklm++, idx++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - pklm->key = cpu_to_be32(imr->dev->null_mkey); + pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey); pklm->va = 0; } return; @@ -129,7 +129,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * locking around the xarray. */ lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); - lockdep_assert_held(&imr->dev->odp_srcu); + lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu); for (; pklm != end; pklm++, idx++) { struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); @@ -139,7 +139,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, pklm->key = cpu_to_be32(mtt->ibmr.lkey); pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE); } else { - pklm->key = cpu_to_be32(imr->dev->null_mkey); + pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey); pklm->va = 0; } } @@ -199,7 +199,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) mutex_unlock(&odp->umem_mutex); if (!mr->cache_ent) { - mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); + mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey); WARN_ON(mr->descs); } } @@ -222,19 +222,19 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) WARN_ON(atomic_read(&mr->num_deferred_work)); if (need_imr_xlt) { - srcu_key = srcu_read_lock(&mr->dev->odp_srcu); + srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu); mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); - srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); + srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key); } dma_fence_odp_mr(mr); mr->parent = NULL; - mlx5_mr_cache_free(mr->dev, mr); + mlx5_mr_cache_free(mr_to_mdev(mr), mr); ib_umem_odp_release(odp); if (atomic_dec_and_test(&imr->num_deferred_work)) wake_up(&imr->q_deferred_work); @@ -274,7 +274,7 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) goto out_unlock; atomic_inc(&imr->num_deferred_work); - call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu, + call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu, free_implicit_child_mr_rcu); out_unlock: @@ -476,12 +476,13 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(odp)) return ERR_CAST(odp); - ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY, - imr->access_flags); + ret = mr = mlx5_mr_cache_alloc( + mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY, imr->access_flags); if (IS_ERR(mr)) goto out_umem; mr->ibmr.pd = imr->ibmr.pd; + mr->ibmr.device = &mr_to_mdev(imr)->ib_dev; mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; @@ -517,11 +518,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_mr; } - mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); + mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr); return mr; out_mr: - mlx5_mr_cache_free(imr->dev, mr); + mlx5_mr_cache_free(mr_to_mdev(imr), mr); out_umem: ib_umem_odp_release(odp); return ret; @@ -536,6 +537,10 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct mlx5_ib_mr *imr; int err; + if (!mlx5_ib_can_load_pas_with_umr(dev, + MLX5_IMR_MTT_ENTRIES * PAGE_SIZE)) + return ERR_PTR(-EOPNOTSUPP); + umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags); if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); @@ -551,6 +556,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->umem = &umem_odp->umem; imr->ibmr.lkey = imr->mmkey.key; imr->ibmr.rkey = imr->mmkey.key; + imr->ibmr.device = &dev->ib_dev; imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; atomic_set(&imr->num_deferred_work, 0); @@ -584,7 +590,7 @@ out_umem: void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); - struct mlx5_ib_dev *dev = imr->dev; + struct mlx5_ib_dev *dev = mr_to_mdev(imr); struct list_head destroy_list; struct mlx5_ib_mr *mtt; struct mlx5_ib_mr *tmp; @@ -654,10 +660,10 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) { /* Prevent new page faults and prefetch requests from succeeding */ - xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&mr->dev->odp_srcu); + synchronize_srcu(&mr_to_mdev(mr)->odp_srcu); wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); @@ -701,7 +707,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, if (ret < 0) { if (ret != -EAGAIN) - mlx5_ib_err(mr->dev, + mlx5_ib_err(mr_to_mdev(mr), "Failed to update mkey page tables\n"); goto out; } @@ -791,7 +797,7 @@ out: MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); if (err) { - mlx5_ib_err(imr->dev, "Failed to update PAS\n"); + mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n"); return err; } return ret; @@ -811,7 +817,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - lockdep_assert_held(&mr->dev->odp_srcu); + lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu); if (unlikely(io_virt < mr->mmkey.iova)) return -EFAULT; @@ -831,17 +837,13 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, flags); } -int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) +int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) { - u32 flags = MLX5_PF_FLAGS_SNAPSHOT; int ret; - if (enable) - flags |= MLX5_PF_FLAGS_ENABLE; - - ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), - mr->umem->address, mr->umem->length, NULL, - flags); + ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), mr->umem->address, + mr->umem->length, NULL, + MLX5_PF_FLAGS_SNAPSHOT | MLX5_PF_FLAGS_ENABLE); return ret >= 0 ? 0 : ret; } @@ -1783,7 +1785,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ WARN_ON(!work->num_sge); - dev = work->frags[0].mr->dev; + dev = mr_to_mdev(work->frags[0].mr); /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < work->num_sge; ++i) { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 600e056798c0..0cb7cc642d87 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -778,39 +778,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, return bfregi->sys_pages[index_of_sys_page] + offset; } -static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, - unsigned long addr, size_t size, - struct ib_umem **umem, int *npages, int *page_shift, - int *ncont, u32 *offset) -{ - int err; - - *umem = ib_umem_get(&dev->ib_dev, addr, size, 0); - if (IS_ERR(*umem)) { - mlx5_ib_dbg(dev, "umem_get failed\n"); - return PTR_ERR(*umem); - } - - mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL); - - err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); - if (err) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; - } - - mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n", - addr, size, *npages, *page_shift, *ncont, *offset); - - return 0; - -err_umem: - ib_umem_release(*umem); - *umem = NULL; - - return err; -} - static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_rwq *rwq, struct ib_udata *udata) { @@ -833,10 +800,8 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - int page_shift = 0; - int npages; + unsigned long page_size = 0; u32 offset = 0; - int ncont = 0; int err; if (!ucmd->buf_addr) @@ -849,23 +814,26 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } - mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift, - &ncont, NULL); - err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, - &rwq->rq_page_offset); - if (err) { + page_size = mlx5_umem_find_best_quantized_pgoff( + rwq->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &rwq->rq_page_offset); + if (!page_size) { mlx5_ib_warn(dev, "bad offset\n"); + err = -EINVAL; goto err_umem; } - rwq->rq_num_pas = ncont; - rwq->page_shift = page_shift; - rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, page_size); + rwq->page_shift = order_base_2(page_size); + rwq->log_page_size = rwq->page_shift - MLX5_ADAPTER_PAGE_SHIFT; rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE); - mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n", - (unsigned long long)ucmd->buf_addr, rwq->buf_size, - npages, page_shift, ncont, offset); + mlx5_ib_dbg( + dev, + "addr 0x%llx, size %zd, npages %zu, page_size %ld, ncont %d, offset %d\n", + (unsigned long long)ucmd->buf_addr, rwq->buf_size, + ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas, + offset); err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db); if (err) { @@ -896,10 +864,9 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct mlx5_ib_ucontext *context; struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; - int page_shift = 0; + unsigned int page_offset_quantized = 0; + unsigned long page_size = 0; int uar_index = 0; - int npages; - u32 offset = 0; int bfregn; int ncont = 0; __be64 *pas; @@ -950,11 +917,21 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd->buf_addr && ubuffer->buf_size) { ubuffer->buf_addr = ucmd->buf_addr; - err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, - ubuffer->buf_size, &ubuffer->umem, - &npages, &page_shift, &ncont, &offset); - if (err) + ubuffer->umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, + ubuffer->buf_size, 0); + if (IS_ERR(ubuffer->umem)) { + err = PTR_ERR(ubuffer->umem); goto err_bfreg; + } + page_size = mlx5_umem_find_best_quantized_pgoff( + ubuffer->umem, qpc, log_page_size, + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, + &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } + ncont = ib_umem_num_dma_blocks(ubuffer->umem, page_size); } else { ubuffer->umem = NULL; } @@ -969,15 +946,14 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0; MLX5_SET(create_qp_in, *in, uid, uid); - pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); - if (ubuffer->umem) - mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); - qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); - - MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(qpc, qpc, page_offset, offset); - + pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); + if (ubuffer->umem) { + mlx5_ib_populate_pas(ubuffer->umem, page_size, pas, 0); + MLX5_SET(qpc, qpc, log_page_size, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, page_offset, page_offset_quantized); + } MLX5_SET(qpc, qpc, uar_page, uar_index); if (bfregn != MLX5_IB_INVALID_BFREG) resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn); @@ -1209,18 +1185,24 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, void *wq; int inlen; int err; - int page_shift = 0; - int npages; - int ncont = 0; - u32 offset = 0; - - err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, - &sq->ubuffer.umem, &npages, &page_shift, &ncont, - &offset); - if (err) - return err; + unsigned int page_offset_quantized; + unsigned long page_size; + + sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, + ubuffer->buf_size, 0); + if (IS_ERR(sq->ubuffer.umem)) + return PTR_ERR(sq->ubuffer.umem); + page_size = mlx5_umem_find_best_quantized_pgoff( + ubuffer->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } - inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont; + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * + ib_umem_num_dma_blocks(sq->ubuffer.umem, page_size); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; @@ -1248,11 +1230,12 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size)); - MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(wq, wq, page_offset, offset); + MLX5_SET(wq, wq, log_wq_pg_sz, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(wq, wq, page_offset, page_offset_quantized); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0); + mlx5_ib_populate_pas(sq->ubuffer.umem, page_size, pas, 0); err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp); @@ -1278,40 +1261,31 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, ib_umem_release(sq->ubuffer.umem); } -static size_t get_rq_pas_size(void *qpc) -{ - u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12; - u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride); - u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size); - u32 page_offset = MLX5_GET(qpc, qpc, page_offset); - u32 po_quanta = 1 << (log_page_size - 6); - u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride); - u32 page_size = 1 << log_page_size; - u32 rq_sz_po = rq_sz + (page_offset * po_quanta); - u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size; - - return rq_num_pas * sizeof(u64); -} - static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, void *qpin, - size_t qpinlen, struct ib_pd *pd) + struct ib_pd *pd) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; - __be64 *qp_pas; void *in; void *rqc; void *wq; void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc); - size_t rq_pas_size = get_rq_pas_size(qpc); + struct ib_umem *umem = rq->base.ubuffer.umem; + unsigned int page_offset_quantized; + unsigned long page_size = 0; size_t inlen; int err; - if (qpinlen < rq_pas_size + MLX5_BYTE_OFF(create_qp_in, pas)) + page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz, + MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, + &page_offset_quantized); + if (!page_size) return -EINVAL; - inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size; + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + + sizeof(u64) * ib_umem_num_dma_blocks(umem, page_size); in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1333,16 +1307,16 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); if (rq->flags & MLX5_IB_RQ_PCI_WRITE_END_PADDING) MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); - MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset)); + MLX5_SET(wq, wq, page_offset, page_offset_quantized); MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd)); MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4); - MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size)); + MLX5_SET(wq, wq, log_wq_pg_sz, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size)); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas); - memcpy(pas, qp_pas, rq_pas_size); + mlx5_ib_populate_pas(umem, page_size, pas, 0); err = mlx5_core_create_rq_tracked(dev, in, inlen, &rq->base.mqp); @@ -1463,7 +1437,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; - err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd); + err = create_raw_packet_qp_rq(dev, rq, in, pd); if (err) goto err_destroy_sq; @@ -2436,7 +2410,7 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd, } qp->state = IB_QPS_RESET; - + rdma_restrack_no_track(&qp->ibqp.res); return 0; } @@ -2460,6 +2434,7 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, case IB_QPT_GSI: if (dev->profile == &raw_eth_profile) goto out; + fallthrough; case IB_QPT_RAW_PACKET: case IB_QPT_UD: case MLX5_IB_QPT_REG_UMR: @@ -2712,11 +2687,12 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1, true, qp); - if (create_flags) + if (create_flags) { mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n", create_flags); - - return (create_flags) ? -EINVAL : 0; + return -EOPNOTSUPP; + } + return 0; } static int process_udata_size(struct mlx5_ib_dev *dev, @@ -3102,7 +3078,7 @@ static int ib_to_mlx5_rate_map(u8 rate) return 5; default: return rate + MLX5_STAT_RATE_OFFSET; - }; + } return 0; } @@ -4247,6 +4223,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int err = -EINVAL; int port; + if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) + return -EOPNOTSUPP; + if (ibqp->rwq_ind_tbl) return -ENOSYS; @@ -4576,7 +4555,9 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path); - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC_INI || + qp->ibqp.qp_type == IB_QPT_XRC_TGT) { to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path); to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path); qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index); @@ -4882,7 +4863,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, MLX5_SET(rqc, rqc, delay_drop_en, 1); } rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); + mlx5_ib_populate_pas(rwq->umem, 1UL << rwq->page_shift, rq_pas0, 0); err = mlx5_core_create_rq_tracked(dev, in, inlen, &rwq->core_qp); if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { err = set_delay_drop(dev); diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 887270dd3ce2..4ac429e72004 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -116,7 +116,7 @@ static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr) { struct mlx5_ib_mr *mr = to_mmr(ibmr); - return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY, + return fill_res_raw(msg, mr_to_mdev(mr), MLX5_SGMT_TYPE_PRM_QUERY_MKEY, mlx5_mkey_to_idx(mr->mmkey.key)); } diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index e2f720eec1e1..fab6736e4d6a 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -51,10 +51,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata, struct mlx5_ib_ucontext, ibucontext); size_t ucmdlen; int err; - int npages; - int page_shift; - int ncont; - u32 offset; u32 uidx = MLX5_IB_DEFAULT_UIDX; ucmdlen = min(udata->inlen, sizeof(ucmd)); @@ -86,32 +82,14 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, err = PTR_ERR(srq->umem); return err; } - - mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages, - &page_shift, &ncont, NULL); - err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, - &offset); - if (err) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; - } - - in->pas = kvcalloc(ncont, sizeof(*in->pas), GFP_KERNEL); - if (!in->pas) { - err = -ENOMEM; - goto err_umem; - } - - mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); + in->umem = srq->umem; err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db); if (err) { mlx5_ib_dbg(dev, "map doorbell failed\n"); - goto err_in; + goto err_umem; } - in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - in->page_offset = offset; in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type != IB_SRQT_BASIC) @@ -119,9 +97,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, return 0; -err_in: - kvfree(in->pas); - err_umem: ib_umem_release(srq->umem); @@ -226,6 +201,11 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, struct mlx5_srq_attr in = {}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC && + init_attr->srq_type != IB_SRQT_TM) + return -EOPNOTSUPP; + /* Sanity check SRQ size before proceeding */ if (init_attr->attr.max_wr >= max_srq_wqes) { mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index 2c3627b2509d..a7e3dc5564ac 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -28,6 +28,7 @@ struct mlx5_srq_attr { u32 user_index; u64 db_record; __be64 *pas; + struct ib_umem *umem; u32 tm_log_list_size; u32 tm_next_tag; u32 tm_hw_phase_cnt; diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index db889ec3fd48..8b3385396599 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -92,6 +92,25 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) return srq; } +static int __set_srq_page_size(struct mlx5_srq_attr *in, + unsigned long page_size) +{ + if (!page_size) + return -EINVAL; + in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT; + + if (WARN_ON(get_pas_size(in) != + ib_umem_num_dma_blocks(in->umem, page_size) * sizeof(u64))) + return -EINVAL; + return 0; +} + +#define set_srq_page_size(in, typ, log_pgsz_fld) \ + __set_srq_page_size(in, mlx5_umem_find_best_quantized_pgoff( \ + (in)->umem, typ, log_pgsz_fld, \ + MLX5_ADAPTER_PAGE_SHIFT, page_offset, \ + 64, &(in)->page_offset)) + static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { @@ -103,6 +122,12 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, srqc, log_page_size); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -114,7 +139,13 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); set_srqc(srqc, in); - memcpy(pas, in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_SRQ); @@ -194,6 +225,12 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, xrc_srqc, log_page_size); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -207,7 +244,13 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, set_srqc(xrc_srqc, in); MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); - memcpy(pas, in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_xrc_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); @@ -289,11 +332,18 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, void *create_in = NULL; void *rmpc; void *wq; + void *pas; int pas_size; int outlen; int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, wq, log_wq_pg_sz); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; outlen = MLX5_ST_SZ_BYTES(create_rmp_out); @@ -309,8 +359,16 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); MLX5_SET(create_rmp_in, create_in, uid, in->uid); + pas = MLX5_ADDR_OF(rmpc, rmpc, wq.pas); + set_wq(wq, in); - memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP); err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen); @@ -421,10 +479,17 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, void *create_in; void *xrqc; void *wq; + void *pas; int pas_size; int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, wq, log_wq_pg_sz); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -433,9 +498,16 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + pas = MLX5_ADDR_OF(xrqc, xrqc, wq.pas); set_wq(wq, in); - memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); if (in->type == IB_SRQT_TM) { MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); |