aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx5
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c7
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c77
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c90
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c6
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c112
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.h45
-rw-r--r--drivers/infiniband/hw/mlx5/main.c254
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c192
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h106
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c960
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c56
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c197
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.c2
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c34
-rw-r--r--drivers/infiniband/hw/mlx5/srq.h1
-rw-r--r--drivers/infiniband/hw/mlx5/srq_cmd.c80
16 files changed, 1187 insertions, 1032 deletions
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 70c8fd67ee2f..084652e2b15a 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -138,13 +138,6 @@ static int mlx5_ib_create_counters(struct ib_counters *counters,
}
-static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
-{
- return MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
- MLX5_ESWITCH_OFFLOADS;
-}
-
static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
u8 port_num)
{
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index fb62f1d04afa..eb92cefffd77 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -707,10 +707,10 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
int *cqe_size, int *index, int *inlen)
{
struct mlx5_ib_create_cq ucmd = {};
+ unsigned long page_size;
+ unsigned int page_offset_quantized;
size_t ucmdlen;
- int page_shift;
__be64 *pas;
- int npages;
int ncont;
void *cqc;
int err;
@@ -742,14 +742,24 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
return err;
}
+ page_size = mlx5_umem_find_best_cq_quantized_pgoff(
+ cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64, &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto err_umem;
+ }
+
err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db);
if (err)
goto err_umem;
- mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift,
- &ncont, NULL);
- mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
- ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
+ ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size);
+ mlx5_ib_dbg(
+ dev,
+ "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n",
+ ucmd.buf_addr, entries * ucmd.cqe_size,
+ ib_umem_num_pages(cq->buf.umem), page_size, ncont);
*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
@@ -760,11 +770,12 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
}
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
- mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0);
+ mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0);
cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
MLX5_SET(cqc, cqc, log_page_size,
- page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
*index = ucmd.uar_page_index;
@@ -1128,13 +1139,12 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
}
static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
- int entries, struct ib_udata *udata, int *npas,
- int *page_shift, int *cqe_size)
+ int entries, struct ib_udata *udata,
+ int *cqe_size)
{
struct mlx5_ib_resize_cq ucmd;
struct ib_umem *umem;
int err;
- int npages;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
if (err)
@@ -1155,9 +1165,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
return err;
}
- mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift,
- npas, NULL);
-
cq->resize_umem = umem;
*cqe_size = ucmd.cqe_size;
@@ -1250,7 +1257,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
int err;
int npas;
__be64 *pas;
- int page_shift;
+ unsigned int page_offset_quantized = 0;
+ unsigned int page_shift;
int inlen;
int cqe_size;
unsigned long flags;
@@ -1277,22 +1285,34 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
mutex_lock(&cq->resize_mutex);
if (udata) {
- err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
- &cqe_size);
+ unsigned long page_size;
+
+ err = resize_user(dev, cq, entries, udata, &cqe_size);
+ if (err)
+ goto ex;
+
+ page_size = mlx5_umem_find_best_cq_quantized_pgoff(
+ cq->resize_umem, cqc, log_page_size,
+ MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64,
+ &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto ex_resize;
+ }
+ npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size);
+ page_shift = order_base_2(page_size);
} else {
+ struct mlx5_frag_buf *frag_buf;
+
cqe_size = 64;
err = resize_kernel(dev, cq, entries, cqe_size);
- if (!err) {
- struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf;
-
- npas = frag_buf->npages;
- page_shift = frag_buf->page_shift;
- }
+ if (err)
+ goto ex;
+ frag_buf = &cq->resize_buf->frag_buf;
+ npas = frag_buf->npages;
+ page_shift = frag_buf->page_shift;
}
- if (err)
- goto ex;
-
inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
@@ -1304,8 +1324,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas);
if (udata)
- mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
- pas, 0);
+ mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas,
+ 0);
else
mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas);
@@ -1319,6 +1339,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
MLX5_SET(cqc, cqc, log_page_size,
page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
MLX5_SET(cqc, cqc, cqe_sz,
cqe_sz_to_mlx_sz(cqe_size,
cq->private_flags &
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 9e3d8b826498..819c142857d6 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -93,9 +93,6 @@ struct devx_async_event_file {
struct devx_umem {
struct mlx5_core_dev *mdev;
struct ib_umem *umem;
- u32 page_offset;
- int page_shift;
- int ncont;
u32 dinlen;
u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
};
@@ -1311,7 +1308,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
else
ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox,
obj->dinlen, out, sizeof(out));
- if (ib_is_destroy_retryable(ret, why, uobject))
+ if (ret)
return ret;
devx_event_table = &dev->devx_event_table;
@@ -2057,9 +2054,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
u64 addr;
size_t size;
u32 access;
- int npages;
int err;
- u32 page_mask;
if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
@@ -2073,57 +2068,62 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
if (err)
return err;
- err = ib_check_mr_access(access);
+ err = ib_check_mr_access(&dev->ib_dev, access);
if (err)
return err;
obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access);
if (IS_ERR(obj->umem))
return PTR_ERR(obj->umem);
-
- mlx5_ib_cont_pages(obj->umem, obj->umem->address,
- MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
- &obj->page_shift, &obj->ncont, NULL);
-
- if (!npages) {
- ib_umem_release(obj->umem);
- return -EINVAL;
- }
-
- page_mask = (1 << obj->page_shift) - 1;
- obj->page_offset = obj->umem->address & page_mask;
-
return 0;
}
-static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs,
+static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev,
+ struct uverbs_attr_bundle *attrs,
struct devx_umem *obj,
struct devx_umem_reg_cmd *cmd)
{
+ unsigned int page_size;
+ __be64 *mtt;
+ void *umem;
+
+ /*
+ * We don't know what the user intends to use this umem for, but the HW
+ * restrictions must be met. MR, doorbell records, QP, WQ and CQ all
+ * have different requirements. Since we have no idea how to sort this
+ * out, only support PAGE_SIZE with the expectation that userspace will
+ * provide the necessary alignments inside the known PAGE_SIZE and that
+ * FW will check everything.
+ */
+ page_size = ib_umem_find_best_pgoff(
+ obj->umem, PAGE_SIZE,
+ __mlx5_page_offset_to_bitmask(__mlx5_bit_sz(umem, page_offset),
+ 0));
+ if (!page_size)
+ return -EINVAL;
+
cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
- (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
+ (MLX5_ST_SZ_BYTES(mtt) *
+ ib_umem_num_dma_blocks(obj->umem, page_size));
cmd->in = uverbs_zalloc(attrs, cmd->inlen);
- return PTR_ERR_OR_ZERO(cmd->in);
-}
-
-static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
- struct devx_umem *obj,
- struct devx_umem_reg_cmd *cmd)
-{
- void *umem;
- __be64 *mtt;
+ if (IS_ERR(cmd->in))
+ return PTR_ERR(cmd->in);
umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
- MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
- MLX5_SET(umem, umem, log_page_size, obj->page_shift -
- MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET(umem, umem, page_offset, obj->page_offset);
- mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
+ MLX5_SET64(umem, umem, num_of_mtt,
+ ib_umem_num_dma_blocks(obj->umem, page_size));
+ MLX5_SET(umem, umem, log_page_size,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(umem, umem, page_offset,
+ ib_umem_dma_offset(obj->umem, page_size));
+
+ mlx5_ib_populate_pas(obj->umem, page_size, mtt,
(obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
- MLX5_IB_MTT_READ);
+ MLX5_IB_MTT_READ);
+ return 0;
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
@@ -2150,12 +2150,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
if (err)
goto err_obj_free;
- err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd);
+ err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd);
if (err)
goto err_umem_release;
- devx_umem_reg_cmd_build(dev, obj, &cmd);
-
MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
sizeof(cmd.out));
@@ -2187,7 +2185,7 @@ static int devx_umem_cleanup(struct ib_uobject *uobject,
int err;
err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
- if (ib_is_destroy_retryable(err, why, uobject))
+ if (err)
return err;
ib_umem_release(obj->umem);
@@ -2600,8 +2598,8 @@ static const struct file_operations devx_async_event_fops = {
.llseek = no_llseek,
};
-static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
+static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
{
struct devx_async_cmd_event_file *comp_ev_file =
container_of(uobj, struct devx_async_cmd_event_file,
@@ -2623,11 +2621,10 @@ static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
kvfree(entry);
}
spin_unlock_irq(&comp_ev_file->ev_queue.lock);
- return 0;
};
-static int devx_async_event_destroy_uobj(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
+static void devx_async_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
{
struct devx_async_event_file *ev_file =
container_of(uobj, struct devx_async_event_file,
@@ -2671,7 +2668,6 @@ static int devx_async_event_destroy_uobj(struct ib_uobject *uobj,
mutex_unlock(&dev->devx_event_table.event_xa_lock);
put_device(&dev->ib_dev.dev);
- return 0;
};
DECLARE_UVERBS_NAMED_METHOD(
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 492cfe063bca..25da0b05b4e2 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -2035,11 +2035,9 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject,
struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_flow_matcher *obj = uobject->object;
- int ret;
- ret = ib_destroy_usecnt(&obj->usecnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&obj->usecnt))
+ return -EBUSY;
kfree(obj);
return 0;
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 5c3d052ac30b..9164cc069ad4 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -13,7 +13,7 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
struct mlx5_ib_dev *ibdev;
int vport_index;
- ibdev = mlx5_ib_get_uplink_ibdev(dev->priv.eswitch);
+ ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
vport_index = rep->vport_index;
ibdev->port[vport_index].rep = rep;
@@ -33,6 +33,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
const struct mlx5_ib_profile *profile;
struct mlx5_ib_dev *ibdev;
int vport_index;
+ int ret;
if (rep->vport == MLX5_VPORT_UPLINK)
profile = &raw_eth_profile;
@@ -46,8 +47,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
GFP_KERNEL);
if (!ibdev->port) {
- ib_dealloc_device(&ibdev->ib_dev);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail_port;
}
ibdev->is_rep = true;
@@ -58,12 +59,24 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
ibdev->mdev = dev;
ibdev->num_ports = num_ports;
- if (!__mlx5_ib_add(ibdev, profile))
- return -EINVAL;
+ ret = __mlx5_ib_add(ibdev, profile);
+ if (ret)
+ goto fail_add;
rep->rep_data[REP_IB].priv = ibdev;
return 0;
+
+fail_add:
+ kfree(ibdev->port);
+fail_port:
+ ib_dealloc_device(&ibdev->ib_dev);
+ return ret;
+}
+
+static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
+{
+ return rep->rep_data[REP_IB].priv;
}
static void
@@ -83,59 +96,18 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}
-static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
-{
- return mlx5_ib_rep_to_dev(rep);
-}
-
static const struct mlx5_eswitch_rep_ops rep_ops = {
.load = mlx5_ib_vport_rep_load,
.unload = mlx5_ib_vport_rep_unload,
- .get_proto_dev = mlx5_ib_vport_get_proto_dev,
+ .get_proto_dev = mlx5_ib_rep_to_dev,
};
-void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev)
-{
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
-
- mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
-}
-
-void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev)
-{
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
-
- mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
-}
-
-u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
-{
- return mlx5_eswitch_mode(esw);
-}
-
-struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
- u16 vport_num)
-{
- return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_IB);
-}
-
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
u16 vport_num)
{
return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH);
}
-struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
-{
- return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
-}
-
-struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
- u16 vport_num)
-{
- return mlx5_eswitch_vport_rep(esw, vport_num);
-}
-
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq,
u16 port)
@@ -154,3 +126,49 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
return mlx5_eswitch_add_send_to_vport_rule(esw, rep->vport,
sq->base.mqp.qpn);
}
+
+static int mlx5r_rep_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = idev->mdev;
+ struct mlx5_eswitch *esw;
+
+ esw = mdev->priv.eswitch;
+ mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+ return 0;
+}
+
+static void mlx5r_rep_remove(struct auxiliary_device *adev)
+{
+ struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = idev->mdev;
+ struct mlx5_eswitch *esw;
+
+ esw = mdev->priv.eswitch;
+ mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+}
+
+static const struct auxiliary_device_id mlx5r_rep_id_table[] = {
+ { .name = MLX5_ADEV_NAME ".rdma-rep", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table);
+
+static struct auxiliary_driver mlx5r_rep_driver = {
+ .name = "rep",
+ .probe = mlx5r_rep_probe,
+ .remove = mlx5r_rep_remove,
+ .id_table = mlx5r_rep_id_table,
+};
+
+int mlx5r_rep_init(void)
+{
+ return auxiliary_driver_register(&mlx5r_rep_driver);
+}
+
+void mlx5r_rep_cleanup(void)
+{
+ auxiliary_driver_unregister(&mlx5r_rep_driver);
+}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
index 5b30d3fa8f8d..ce1dcb105dbd 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.h
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -12,47 +12,16 @@
extern const struct mlx5_ib_profile raw_eth_profile;
#ifdef CONFIG_MLX5_ESWITCH
-u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
-struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
- u16 vport_num);
-struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
-struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
- u16 vport_num);
-void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev);
-void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev);
+int mlx5r_rep_init(void);
+void mlx5r_rep_cleanup(void);
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq,
u16 port);
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
u16 vport_num);
#else /* CONFIG_MLX5_ESWITCH */
-static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
-{
- return MLX5_ESWITCH_NONE;
-}
-
-static inline
-struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
- u16 vport_num)
-{
- return NULL;
-}
-
-static inline
-struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
-{
- return NULL;
-}
-
-static inline
-struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
- u16 vport_num)
-{
- return NULL;
-}
-
-static inline void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) {}
-static inline void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) {}
+static inline int mlx5r_rep_init(void) { return 0; }
+static inline void mlx5r_rep_cleanup(void) {}
static inline
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq,
@@ -68,10 +37,4 @@ struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
return NULL;
}
#endif
-
-static inline
-struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
-{
- return rep->rep_data[REP_IB].priv;
-}
#endif /* __MLX5_IB_REP_H__ */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 246e3cbe0b2c..3bae9ba0ead8 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -75,12 +75,6 @@ static LIST_HEAD(mlx5_ib_dev_list);
*/
static DEFINE_MUTEX(mlx5_ib_multiport_mutex);
-/* We can't use an array for xlt_emergency_page because dma_map_single
- * doesn't work on kernel modules memory
- */
-static unsigned long xlt_emergency_page;
-static struct mutex xlt_emergency_page_mutex;
-
struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi)
{
struct mlx5_ib_dev *dev;
@@ -425,10 +419,22 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
*active_width = IB_WIDTH_2X;
*active_speed = IB_SPEED_HDR;
break;
+ case MLX5E_PROT_MASK(MLX5E_100GAUI_1_100GBASE_CR_KR):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_NDR;
+ break;
case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4):
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_HDR;
break;
+ case MLX5E_PROT_MASK(MLX5E_200GAUI_2_200GBASE_CR2_KR2):
+ *active_width = IB_WIDTH_2X;
+ *active_speed = IB_SPEED_NDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_NDR;
+ break;
default:
return -EINVAL;
}
@@ -2628,7 +2634,7 @@ static ssize_t fw_pages_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
+ return sysfs_emit(buf, "%d\n", dev->mdev->priv.fw_pages);
}
static DEVICE_ATTR_RO(fw_pages);
@@ -2638,7 +2644,7 @@ static ssize_t reg_pages_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
+ return sysfs_emit(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
}
static DEVICE_ATTR_RO(reg_pages);
@@ -2648,7 +2654,7 @@ static ssize_t hca_type_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
+ return sysfs_emit(buf, "MT%d\n", dev->mdev->pdev->device);
}
static DEVICE_ATTR_RO(hca_type);
@@ -2658,7 +2664,7 @@ static ssize_t hw_rev_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "%x\n", dev->mdev->rev_id);
+ return sysfs_emit(buf, "%x\n", dev->mdev->rev_id);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -2668,8 +2674,8 @@ static ssize_t board_id_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
- dev->mdev->board_id);
+ return sysfs_emit(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
+ dev->mdev->board_id);
}
static DEVICE_ATTR_RO(board_id);
@@ -4024,6 +4030,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.create_cq = mlx5_ib_create_cq,
.create_qp = mlx5_ib_create_qp,
.create_srq = mlx5_ib_create_srq,
+ .create_user_ah = mlx5_ib_create_ah,
.dealloc_pd = mlx5_ib_dealloc_pd,
.dealloc_ucontext = mlx5_ib_dealloc_ucontext,
.del_gid = mlx5_ib_del_gid,
@@ -4141,42 +4148,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
struct mlx5_core_dev *mdev = dev->mdev;
int err;
- dev->ib_dev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_REREG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
- (1ull << IB_USER_VERBS_CMD_OPEN_QP);
- dev->ib_dev.uverbs_ex_cmd_mask =
- (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
-
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
IS_ENABLED(CONFIG_MLX5_CORE_IPOIB))
ib_set_device_ops(&dev->ib_dev,
@@ -4187,19 +4158,11 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
- if (MLX5_CAP_GEN(mdev, imaicl)) {
- dev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+ if (MLX5_CAP_GEN(mdev, imaicl))
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops);
- }
- if (MLX5_CAP_GEN(mdev, xrc)) {
- dev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
- (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+ if (MLX5_CAP_GEN(mdev, xrc))
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops);
- }
if (MLX5_CAP_DEV_MEM(mdev, memic) ||
MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
@@ -4278,12 +4241,6 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
@@ -4593,8 +4550,8 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
ib_dealloc_device(&dev->ib_dev);
}
-void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
- const struct mlx5_ib_profile *profile)
+int __mlx5_ib_add(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile)
{
int err;
int i;
@@ -4610,13 +4567,16 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
}
dev->ib_active = true;
-
- return dev;
+ return 0;
err_out:
- __mlx5_ib_remove(dev, profile, i);
-
- return NULL;
+ /* Clean up stages which were initialized */
+ while (i) {
+ i--;
+ if (profile->stage[i].cleanup)
+ profile->stage[i].cleanup(dev);
+ }
+ return -ENOMEM;
}
static const struct mlx5_ib_profile pf_profile = {
@@ -4739,8 +4699,11 @@ const struct mlx5_ib_profile raw_eth_profile = {
NULL),
};
-static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
+static int mlx5r_mp_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
{
+ struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = idev->mdev;
struct mlx5_ib_multiport_info *mpi;
struct mlx5_ib_dev *dev;
bool bound = false;
@@ -4748,15 +4711,14 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
mpi = kzalloc(sizeof(*mpi), GFP_KERNEL);
if (!mpi)
- return NULL;
+ return -ENOMEM;
mpi->mdev = mdev;
-
err = mlx5_query_nic_vport_system_image_guid(mdev,
&mpi->sys_image_guid);
if (err) {
kfree(mpi);
- return NULL;
+ return err;
}
mutex_lock(&mlx5_ib_multiport_mutex);
@@ -4777,40 +4739,46 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
}
mutex_unlock(&mlx5_ib_multiport_mutex);
- return mpi;
+ dev_set_drvdata(&adev->dev, mpi);
+ return 0;
}
-static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
+static void mlx5r_mp_remove(struct auxiliary_device *adev)
{
+ struct mlx5_ib_multiport_info *mpi;
+
+ mpi = dev_get_drvdata(&adev->dev);
+ mutex_lock(&mlx5_ib_multiport_mutex);
+ if (mpi->ibdev)
+ mlx5_ib_unbind_slave_port(mpi->ibdev, mpi);
+ list_del(&mpi->list);
+ mutex_unlock(&mlx5_ib_multiport_mutex);
+ kfree(mpi);
+}
+
+static int mlx5r_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = idev->mdev;
const struct mlx5_ib_profile *profile;
+ int port_type_cap, num_ports, ret;
enum rdma_link_layer ll;
struct mlx5_ib_dev *dev;
- int port_type_cap;
- int num_ports;
-
- if (MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_ib_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) {
- if (!mlx5_core_mp_enabled(mdev))
- mlx5_ib_register_vport_reps(mdev);
- return mdev;
- }
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
- if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET)
- return mlx5_ib_add_slave_port(mdev);
-
num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
MLX5_CAP_GEN(mdev, num_vhca_ports));
dev = ib_alloc_device(mlx5_ib_dev, ib_dev);
if (!dev)
- return NULL;
+ return -ENOMEM;
dev->port = kcalloc(num_ports, sizeof(*dev->port),
GFP_KERNEL);
if (!dev->port) {
ib_dealloc_device(&dev->ib_dev);
- return NULL;
+ return -ENOMEM;
}
dev->mdev = mdev;
@@ -4821,80 +4789,96 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
else
profile = &pf_profile;
- return __mlx5_ib_add(dev, profile);
+ ret = __mlx5_ib_add(dev, profile);
+ if (ret) {
+ kfree(dev->port);
+ ib_dealloc_device(&dev->ib_dev);
+ return ret;
+ }
+
+ dev_set_drvdata(&adev->dev, dev);
+ return 0;
}
-static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
+static void mlx5r_remove(struct auxiliary_device *adev)
{
- struct mlx5_ib_multiport_info *mpi;
struct mlx5_ib_dev *dev;
- if (MLX5_ESWITCH_MANAGER(mdev) && context == mdev) {
- mlx5_ib_unregister_vport_reps(mdev);
- return;
- }
-
- if (mlx5_core_is_mp_slave(mdev)) {
- mpi = context;
- mutex_lock(&mlx5_ib_multiport_mutex);
- if (mpi->ibdev)
- mlx5_ib_unbind_slave_port(mpi->ibdev, mpi);
- list_del(&mpi->list);
- mutex_unlock(&mlx5_ib_multiport_mutex);
- kfree(mpi);
- return;
- }
-
- dev = context;
+ dev = dev_get_drvdata(&adev->dev);
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}
-static struct mlx5_interface mlx5_ib_interface = {
- .add = mlx5_ib_add,
- .remove = mlx5_ib_remove,
- .protocol = MLX5_INTERFACE_PROTOCOL_IB,
+static const struct auxiliary_device_id mlx5r_mp_id_table[] = {
+ { .name = MLX5_ADEV_NAME ".multiport", },
+ {},
};
-unsigned long mlx5_ib_get_xlt_emergency_page(void)
-{
- mutex_lock(&xlt_emergency_page_mutex);
- return xlt_emergency_page;
-}
+static const struct auxiliary_device_id mlx5r_id_table[] = {
+ { .name = MLX5_ADEV_NAME ".rdma", },
+ {},
+};
-void mlx5_ib_put_xlt_emergency_page(void)
-{
- mutex_unlock(&xlt_emergency_page_mutex);
-}
+MODULE_DEVICE_TABLE(auxiliary, mlx5r_mp_id_table);
+MODULE_DEVICE_TABLE(auxiliary, mlx5r_id_table);
+
+static struct auxiliary_driver mlx5r_mp_driver = {
+ .name = "multiport",
+ .probe = mlx5r_mp_probe,
+ .remove = mlx5r_mp_remove,
+ .id_table = mlx5r_mp_id_table,
+};
+
+static struct auxiliary_driver mlx5r_driver = {
+ .name = "rdma",
+ .probe = mlx5r_probe,
+ .remove = mlx5r_remove,
+ .id_table = mlx5r_id_table,
+};
static int __init mlx5_ib_init(void)
{
- int err;
+ int ret;
- xlt_emergency_page = __get_free_page(GFP_KERNEL);
+ xlt_emergency_page = (void *)__get_free_page(GFP_KERNEL);
if (!xlt_emergency_page)
return -ENOMEM;
- mutex_init(&xlt_emergency_page_mutex);
-
mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0);
if (!mlx5_ib_event_wq) {
- free_page(xlt_emergency_page);
+ free_page((unsigned long)xlt_emergency_page);
return -ENOMEM;
}
mlx5_ib_odp_init();
+ ret = mlx5r_rep_init();
+ if (ret)
+ goto rep_err;
+ ret = auxiliary_driver_register(&mlx5r_mp_driver);
+ if (ret)
+ goto mp_err;
+ ret = auxiliary_driver_register(&mlx5r_driver);
+ if (ret)
+ goto drv_err;
+ return 0;
- err = mlx5_register_interface(&mlx5_ib_interface);
-
- return err;
+drv_err:
+ auxiliary_driver_unregister(&mlx5r_mp_driver);
+mp_err:
+ mlx5r_rep_cleanup();
+rep_err:
+ destroy_workqueue(mlx5_ib_event_wq);
+ free_page((unsigned long)xlt_emergency_page);
+ return ret;
}
static void __exit mlx5_ib_cleanup(void)
{
- mlx5_unregister_interface(&mlx5_ib_interface);
+ auxiliary_driver_unregister(&mlx5r_driver);
+ auxiliary_driver_unregister(&mlx5r_mp_driver);
+ mlx5r_rep_cleanup();
+
destroy_workqueue(mlx5_ib_event_wq);
- mutex_destroy(&xlt_emergency_page_mutex);
- free_page(xlt_emergency_page);
+ free_page((unsigned long)xlt_emergency_page);
}
module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 13de3d2edd34..844545064c9e 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -36,161 +36,65 @@
#include "mlx5_ib.h"
#include <linux/jiffies.h>
-/* @umem: umem object to scan
- * @addr: ib virtual address requested by the user
- * @max_page_shift: high limit for page_shift - 0 means no limit
- * @count: number of PAGE_SIZE pages covered by umem
- * @shift: page shift for the compound pages found in the region
- * @ncont: number of compund pages
- * @order: log2 of the number of compound pages
+/*
+ * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
+ * filled in the pas array.
*/
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
- unsigned long max_page_shift,
- int *count, int *shift,
- int *ncont, int *order)
+void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
+ u64 access_flags)
{
- unsigned long tmp;
- unsigned long m;
- u64 base = ~0, p = 0;
- u64 len, pfn;
- int i = 0;
- struct scatterlist *sg;
- int entry;
-
- addr = addr >> PAGE_SHIFT;
- tmp = (unsigned long)addr;
- m = find_first_bit(&tmp, BITS_PER_LONG);
- if (max_page_shift)
- m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m);
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> PAGE_SHIFT;
- pfn = sg_dma_address(sg) >> PAGE_SHIFT;
- if (base + p != pfn) {
- /* If either the offset or the new
- * base are unaligned update m
- */
- tmp = (unsigned long)(pfn | p);
- if (!IS_ALIGNED(tmp, 1 << m))
- m = find_first_bit(&tmp, BITS_PER_LONG);
-
- base = pfn;
- p = 0;
- }
-
- p += len;
- i += len;
- }
-
- if (i) {
- m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
-
- if (order)
- *order = ilog2(roundup_pow_of_two(i) >> m);
-
- *ncont = DIV_ROUND_UP(i, (1 << m));
- } else {
- m = 0;
+ struct ib_block_iter biter;
- if (order)
- *order = 0;
-
- *ncont = 0;
+ rdma_umem_for_each_dma_block (umem, &biter, page_size) {
+ *pas = cpu_to_be64(rdma_block_iter_dma_address(&biter) |
+ access_flags);
+ pas++;
}
- *shift = PAGE_SHIFT + m;
- *count = i;
}
/*
- * Populate the given array with bus addresses from the umem.
- *
- * dev - mlx5_ib device
- * umem - umem to use to fill the pages
- * page_shift - determines the page size used in the resulting array
- * offset - offset into the umem to start from,
- * only implemented for ODP umems
- * num_pages - total number of pages to fill
- * pas - bus addresses array to fill
- * access_flags - access flags to set on all present pages.
- use enum mlx5_ib_mtt_access_flags for this.
+ * Compute the page shift and page_offset for mailboxes that use a quantized
+ * page_offset. The granulatity of the page offset scales according to page
+ * size.
*/
-void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, size_t offset, size_t num_pages,
- __be64 *pas, int access_flags)
+unsigned long __mlx5_umem_find_best_quantized_pgoff(
+ struct ib_umem *umem, unsigned long pgsz_bitmap,
+ unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale,
+ unsigned int *page_offset_quantized)
{
- int shift = page_shift - PAGE_SHIFT;
- int mask = (1 << shift) - 1;
- int i, k, idx;
- u64 cur = 0;
- u64 base;
- int len;
- struct scatterlist *sg;
- int entry;
-
- i = 0;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> PAGE_SHIFT;
- base = sg_dma_address(sg);
-
- /* Skip elements below offset */
- if (i + len < offset << shift) {
- i += len;
- continue;
- }
-
- /* Skip pages below offset */
- if (i < offset << shift) {
- k = (offset << shift) - i;
- i = offset << shift;
- } else {
- k = 0;
- }
-
- for (; k < len; k++) {
- if (!(i & mask)) {
- cur = base + (k << PAGE_SHIFT);
- cur |= access_flags;
- idx = (i >> shift) - offset;
-
- pas[idx] = cpu_to_be64(cur);
- mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
- i >> shift, be64_to_cpu(pas[idx]));
- }
- i++;
-
- /* Stop after num_pages reached */
- if (i >> shift >= offset + num_pages)
- return;
- }
+ const u64 page_offset_mask = (1UL << page_offset_bits) - 1;
+ unsigned long page_size;
+ u64 page_offset;
+
+ page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, pgoff_bitmask);
+ if (!page_size)
+ return 0;
+
+ /*
+ * page size is the largest possible page size.
+ *
+ * Reduce the page_size, and thus the page_offset and quanta, until the
+ * page_offset fits into the mailbox field. Once page_size < scale this
+ * loop is guaranteed to terminate.
+ */
+ page_offset = ib_umem_dma_offset(umem, page_size);
+ while (page_offset & ~(u64)(page_offset_mask * (page_size / scale))) {
+ page_size /= 2;
+ page_offset = ib_umem_dma_offset(umem, page_size);
}
-}
-void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, __be64 *pas, int access_flags)
-{
- return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
- ib_umem_num_dma_blocks(umem, PAGE_SIZE),
- pas, access_flags);
-}
-int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
-{
- u64 page_size;
- u64 page_mask;
- u64 off_size;
- u64 off_mask;
- u64 buf_off;
-
- page_size = (u64)1 << page_shift;
- page_mask = page_size - 1;
- buf_off = addr & page_mask;
- off_size = page_size >> 6;
- off_mask = off_size - 1;
-
- if (buf_off & off_mask)
- return -EINVAL;
-
- *offset = buf_off >> ilog2(off_size);
- return 0;
+ /*
+ * The address is not aligned, or otherwise cannot be represented by the
+ * page_offset.
+ */
+ if (!(pgsz_bitmap & page_size))
+ return 0;
+
+ *page_offset_quantized =
+ (unsigned long)page_offset / (page_size / scale);
+ if (WARN_ON(*page_offset_quantized > page_offset_mask))
+ return 0;
+ return page_size;
}
#define WR_ID_BF 0xBF
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b1f2b34e5955..b0fdc1b08e06 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -40,7 +40,73 @@
#define MLX5_IB_DEFAULT_UIDX 0xffffff
#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
-#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size)
+static __always_inline unsigned long
+__mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits,
+ unsigned int pgsz_shift)
+{
+ unsigned int largest_pg_shift =
+ min_t(unsigned long, (1ULL << log_pgsz_bits) - 1 + pgsz_shift,
+ BITS_PER_LONG - 1);
+
+ /*
+ * Despite a command allowing it, the device does not support lower than
+ * 4k page size.
+ */
+ pgsz_shift = max_t(unsigned int, MLX5_ADAPTER_PAGE_SHIFT, pgsz_shift);
+ return GENMASK(largest_pg_shift, pgsz_shift);
+}
+
+/*
+ * For mkc users, instead of a page_offset the command has a start_iova which
+ * specifies both the page_offset and the on-the-wire IOVA
+ */
+#define mlx5_umem_find_best_pgsz(umem, typ, log_pgsz_fld, pgsz_shift, iova) \
+ ib_umem_find_best_pgsz(umem, \
+ __mlx5_log_page_size_to_bitmap( \
+ __mlx5_bit_sz(typ, log_pgsz_fld), \
+ pgsz_shift), \
+ iova)
+
+static __always_inline unsigned long
+__mlx5_page_offset_to_bitmask(unsigned int page_offset_bits,
+ unsigned int offset_shift)
+{
+ unsigned int largest_offset_shift =
+ min_t(unsigned long, page_offset_bits - 1 + offset_shift,
+ BITS_PER_LONG - 1);
+
+ return GENMASK(largest_offset_shift, offset_shift);
+}
+
+/*
+ * QP/CQ/WQ/etc type commands take a page offset that satisifies:
+ * page_offset_quantized * (page_size/scale) = page_offset
+ * Which restricts allowed page sizes to ones that satisify the above.
+ */
+unsigned long __mlx5_umem_find_best_quantized_pgoff(
+ struct ib_umem *umem, unsigned long pgsz_bitmap,
+ unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale,
+ unsigned int *page_offset_quantized);
+#define mlx5_umem_find_best_quantized_pgoff(umem, typ, log_pgsz_fld, \
+ pgsz_shift, page_offset_fld, \
+ scale, page_offset_quantized) \
+ __mlx5_umem_find_best_quantized_pgoff( \
+ umem, \
+ __mlx5_log_page_size_to_bitmap( \
+ __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \
+ __mlx5_bit_sz(typ, page_offset_fld), \
+ GENMASK(31, order_base_2(scale)), scale, \
+ page_offset_quantized)
+
+#define mlx5_umem_find_best_cq_quantized_pgoff(umem, typ, log_pgsz_fld, \
+ pgsz_shift, page_offset_fld, \
+ scale, page_offset_quantized) \
+ __mlx5_umem_find_best_quantized_pgoff( \
+ umem, \
+ __mlx5_log_page_size_to_bitmap( \
+ __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \
+ __mlx5_bit_sz(typ, page_offset_fld), 0, scale, \
+ page_offset_quantized)
enum {
MLX5_IB_MMAP_OFFSET_START = 9,
@@ -597,14 +663,12 @@ struct mlx5_ib_mr {
int max_descs;
int desc_size;
int access_mode;
+ unsigned int page_shift;
struct mlx5_core_mkey mmkey;
struct ib_umem *umem;
struct mlx5_shared_mr_info *smr_info;
struct list_head list;
- unsigned int order;
struct mlx5_cache_ent *cache_ent;
- int npages;
- struct mlx5_ib_dev *dev;
u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
struct mlx5_core_sig_ctx *sig;
void *descs_alloc;
@@ -1042,6 +1106,11 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
}
+static inline struct mlx5_ib_dev *mr_to_mdev(struct mlx5_ib_mr *mr)
+{
+ return to_mdev(mr->ibmr.device);
+}
+
static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata)
{
struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
@@ -1189,9 +1258,9 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
int access_flags);
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr);
-int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
- u64 length, u64 virt_addr, int access_flags,
- struct ib_pd *pd, struct ib_udata *udata);
+struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
+ u64 length, u64 virt_addr, int access_flags,
+ struct ib_pd *pd, struct ib_udata *udata);
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
@@ -1210,7 +1279,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
size_t *out_mad_size, u16 *out_mad_pkey_index);
int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
-int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
struct ib_smp *out_mad);
@@ -1230,15 +1298,8 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props);
int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props);
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
- unsigned long max_page_shift,
- int *count, int *shift,
- int *ncont, int *order);
-void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, size_t offset, size_t num_pages,
- __be64 *pas, int access_flags);
-void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, __be64 *pas, int access_flags);
+void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
+ u64 access_flags);
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
@@ -1283,7 +1344,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags, struct ib_sge *sg_list, u32 num_sge);
-int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable);
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
@@ -1305,7 +1366,7 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
{
return -EOPNOTSUPP;
}
-static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
+static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
{
return -EOPNOTSUPP;
}
@@ -1317,8 +1378,8 @@ extern const struct mmu_interval_notifier_ops mlx5_mn_ops;
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage);
-void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
- const struct mlx5_ib_profile *profile);
+int __mlx5_ib_add(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile);
int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
u8 port, struct ifla_vf_info *info);
@@ -1456,8 +1517,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev,
return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages;
}
-unsigned long mlx5_ib_get_xlt_emergency_page(void);
-void mlx5_ib_put_xlt_emergency_page(void);
+extern void *xlt_emergency_page;
int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi, u32 bfregn,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index b261797b258f..24f8d59a42ea 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -41,6 +41,13 @@
#include <rdma/ib_verbs.h>
#include "mlx5_ib.h"
+/*
+ * We can't use an array for xlt_emergency_page because dma_map_single doesn't
+ * work on kernel modules memory
+ */
+void *xlt_emergency_page;
+static DEFINE_MUTEX(xlt_emergency_page_mutex);
+
enum {
MAX_PENDING_REG_MR = 8,
};
@@ -49,6 +56,9 @@ enum {
static void
create_mkey_callback(int status, struct mlx5_async_work *context);
+static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags,
+ unsigned int page_size, bool populate);
static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
struct ib_pd *pd)
@@ -123,19 +133,12 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
-static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start,
- u64 length)
-{
- return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
- length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
-}
-
static void create_mkey_callback(int status, struct mlx5_async_work *context)
{
struct mlx5_ib_mr *mr =
container_of(context, struct mlx5_ib_mr, cb_work);
- struct mlx5_ib_dev *dev = mr->dev;
struct mlx5_cache_ent *ent = mr->cache_ent;
+ struct mlx5_ib_dev *dev = ent->dev;
unsigned long flags;
if (status) {
@@ -172,9 +175,7 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return NULL;
- mr->order = ent->order;
mr->cache_ent = ent;
- mr->dev = ent->dev;
set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
MLX5_SET(mkc, mkc, free, 1);
@@ -642,6 +643,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
if (mlx5_mr_cache_invalidate(mr)) {
detach_mr_from_cache(mr);
destroy_mkey(dev, mr);
+ kfree(mr);
return;
}
@@ -867,57 +869,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev)
return MLX5_MAX_UMR_SHIFT;
}
-static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length,
- int access_flags, struct ib_umem **umem, int *npages,
- int *page_shift, int *ncont, int *order)
-{
- struct ib_umem *u;
-
- *umem = NULL;
-
- if (access_flags & IB_ACCESS_ON_DEMAND) {
- struct ib_umem_odp *odp;
-
- odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
- &mlx5_mn_ops);
- if (IS_ERR(odp)) {
- mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
- PTR_ERR(odp));
- return PTR_ERR(odp);
- }
-
- u = &odp->umem;
-
- *page_shift = odp->page_shift;
- *ncont = ib_umem_odp_num_pages(odp);
- *npages = *ncont << (*page_shift - PAGE_SHIFT);
- if (order)
- *order = ilog2(roundup_pow_of_two(*ncont));
- } else {
- u = ib_umem_get(&dev->ib_dev, start, length, access_flags);
- if (IS_ERR(u)) {
- mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
- return PTR_ERR(u);
- }
-
- mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
- page_shift, ncont, order);
- }
-
- if (!*npages) {
- mlx5_ib_warn(dev, "avoid zero region\n");
- ib_umem_release(u);
- return -EINVAL;
- }
-
- *umem = u;
-
- mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
- *npages, *ncont, *order, *page_shift);
-
- return 0;
-}
-
static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct mlx5_ib_umr_context *context =
@@ -974,25 +925,49 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
return &cache->ent[order];
}
-static struct mlx5_ib_mr *
-alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
- u64 len, int npages, int page_shift, unsigned int order,
- int access_flags)
+static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+ u64 length, int access_flags)
+{
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+ mr->ibmr.length = length;
+ mr->ibmr.device = &dev->ib_dev;
+ mr->access_flags = access_flags;
+}
+
+static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
+ struct ib_umem *umem, u64 iova,
+ int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order);
+ struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
+ unsigned int page_size;
- if (!ent)
- return ERR_PTR(-E2BIG);
-
- /* Matches access in alloc_cache_mr() */
- if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
- return ERR_PTR(-EOPNOTSUPP);
+ page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
+ if (WARN_ON(!page_size))
+ return ERR_PTR(-EINVAL);
+ ent = mr_cache_ent_from_order(
+ dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
+ /*
+ * Matches access in alloc_cache_mr(). If the MR can't come from the
+ * cache then synchronously create an uncached one.
+ */
+ if (!ent || ent->limit == 0 ||
+ !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
+ mutex_lock(&dev->slow_path_mutex);
+ mr = reg_create(pd, umem, iova, access_flags, page_size, false);
+ mutex_unlock(&dev->slow_path_mutex);
+ return mr;
+ }
mr = get_cache_mr(ent);
if (!mr) {
mr = create_cache_mr(ent);
+ /*
+ * The above already tried to do the same stuff as reg_create(),
+ * no reason to try it again.
+ */
if (IS_ERR(mr))
return mr;
}
@@ -1001,9 +976,12 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
mr->umem = umem;
mr->access_flags = access_flags;
mr->desc_size = sizeof(struct mlx5_mtt);
- mr->mmkey.iova = virt_addr;
- mr->mmkey.size = len;
+ mr->mmkey.iova = iova;
+ mr->mmkey.size = umem->length;
mr->mmkey.pd = to_mpd(pd)->pdn;
+ mr->page_shift = order_base_2(page_size);
+ mr->umem = umem;
+ set_mr_fields(dev, mr, umem->length, access_flags);
return mr;
}
@@ -1012,14 +990,144 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
MLX5_UMR_MTT_ALIGNMENT)
#define MLX5_SPARE_UMR_CHUNK 0x10000
+/*
+ * Allocate a temporary buffer to hold the per-page information to transfer to
+ * HW. For efficiency this should be as large as it can be, but buffer
+ * allocation failure is not allowed, so try smaller sizes.
+ */
+static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
+{
+ const size_t xlt_chunk_align =
+ MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size);
+ size_t size;
+ void *res = NULL;
+
+ static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
+
+ /*
+ * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
+ * allocation can't trigger any kind of reclaim.
+ */
+ might_sleep();
+
+ gfp_mask |= __GFP_ZERO;
+
+ /*
+ * If the system already has a suitable high order page then just use
+ * that, but don't try hard to create one. This max is about 1M, so a
+ * free x86 huge page will satisfy it.
+ */
+ size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
+ MLX5_MAX_UMR_CHUNK);
+ *nents = size / ent_size;
+ res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
+ get_order(size));
+ if (res)
+ return res;
+
+ if (size > MLX5_SPARE_UMR_CHUNK) {
+ size = MLX5_SPARE_UMR_CHUNK;
+ *nents = get_order(size) / ent_size;
+ res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
+ get_order(size));
+ if (res)
+ return res;
+ }
+
+ *nents = PAGE_SIZE / ent_size;
+ res = (void *)__get_free_page(gfp_mask);
+ if (res)
+ return res;
+
+ mutex_lock(&xlt_emergency_page_mutex);
+ memset(xlt_emergency_page, 0, PAGE_SIZE);
+ return xlt_emergency_page;
+}
+
+static void mlx5_ib_free_xlt(void *xlt, size_t length)
+{
+ if (xlt == xlt_emergency_page) {
+ mutex_unlock(&xlt_emergency_page_mutex);
+ return;
+ }
+
+ free_pages((unsigned long)xlt, get_order(length));
+}
+
+/*
+ * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for
+ * submission.
+ */
+static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
+ struct mlx5_umr_wr *wr, struct ib_sge *sg,
+ size_t nents, size_t ent_size,
+ unsigned int flags)
+{
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct device *ddev = &dev->mdev->pdev->dev;
+ dma_addr_t dma;
+ void *xlt;
+
+ xlt = mlx5_ib_alloc_xlt(&nents, ent_size,
+ flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
+ GFP_KERNEL);
+ sg->length = nents * ent_size;
+ dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, dma)) {
+ mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
+ mlx5_ib_free_xlt(xlt, sg->length);
+ return NULL;
+ }
+ sg->addr = dma;
+ sg->lkey = dev->umrc.pd->local_dma_lkey;
+
+ memset(wr, 0, sizeof(*wr));
+ wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
+ if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
+ wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
+ wr->wr.sg_list = sg;
+ wr->wr.num_sge = 1;
+ wr->wr.opcode = MLX5_IB_WR_UMR;
+ wr->pd = mr->ibmr.pd;
+ wr->mkey = mr->mmkey.key;
+ wr->length = mr->mmkey.size;
+ wr->virt_addr = mr->mmkey.iova;
+ wr->access_flags = mr->access_flags;
+ wr->page_shift = mr->page_shift;
+ wr->xlt_size = sg->length;
+ return xlt;
+}
+
+static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
+ struct ib_sge *sg)
+{
+ struct device *ddev = &dev->mdev->pdev->dev;
+
+ dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
+ mlx5_ib_free_xlt(xlt, sg->length);
+}
+
+static unsigned int xlt_wr_final_send_flags(unsigned int flags)
+{
+ unsigned int res = 0;
+
+ if (flags & MLX5_IB_UPD_XLT_ENABLE)
+ res |= MLX5_IB_SEND_UMR_ENABLE_MR |
+ MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
+ MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+ if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS)
+ res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
+ if (flags & MLX5_IB_UPD_XLT_ADDR)
+ res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+ return res;
+}
+
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags)
{
- struct mlx5_ib_dev *dev = mr->dev;
- struct device *ddev = dev->ib_dev.dev.parent;
- int size;
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct device *ddev = &dev->mdev->pdev->dev;
void *xlt;
- dma_addr_t dma;
struct mlx5_umr_wr wr;
struct ib_sge sg;
int err = 0;
@@ -1030,15 +1138,17 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
const int page_mask = page_align - 1;
size_t pages_mapped = 0;
size_t pages_to_map = 0;
- size_t pages_iter = 0;
+ size_t pages_iter;
size_t size_to_map = 0;
- gfp_t gfp;
- bool use_emergency_page = false;
+ size_t orig_sg_length;
if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
!umr_can_use_indirect_mkey(dev))
return -EPERM;
+ if (WARN_ON(!mr->umem->is_odp))
+ return -EINVAL;
+
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
* so we need to align the offset and length accordingly
*/
@@ -1046,63 +1156,21 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
npages += idx & page_mask;
idx &= ~page_mask;
}
-
- gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
- gfp |= __GFP_ZERO | __GFP_NOWARN;
-
pages_to_map = ALIGN(npages, page_align);
- size = desc_size * pages_to_map;
- size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
-
- xlt = (void *)__get_free_pages(gfp, get_order(size));
- if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
- mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
- size, get_order(size), MLX5_SPARE_UMR_CHUNK);
-
- size = MLX5_SPARE_UMR_CHUNK;
- xlt = (void *)__get_free_pages(gfp, get_order(size));
- }
- if (!xlt) {
- mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
- xlt = (void *)mlx5_ib_get_xlt_emergency_page();
- size = PAGE_SIZE;
- memset(xlt, 0, size);
- use_emergency_page = true;
- }
- pages_iter = size / desc_size;
- dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
- if (dma_mapping_error(ddev, dma)) {
- mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
- err = -ENOMEM;
- goto free_xlt;
- }
+ xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags);
+ if (!xlt)
+ return -ENOMEM;
+ pages_iter = sg.length / desc_size;
+ orig_sg_length = sg.length;
- if (mr->umem->is_odp) {
- if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
- struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
- size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
+ if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
- pages_to_map = min_t(size_t, pages_to_map, max_pages);
- }
+ pages_to_map = min_t(size_t, pages_to_map, max_pages);
}
- sg.addr = dma;
- sg.lkey = dev->umrc.pd->local_dma_lkey;
-
- memset(&wr, 0, sizeof(wr));
- wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
- if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
- wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
- wr.wr.sg_list = &sg;
- wr.wr.num_sge = 1;
- wr.wr.opcode = MLX5_IB_WR_UMR;
-
- wr.pd = mr->ibmr.pd;
- wr.mkey = mr->mmkey.key;
- wr.length = mr->mmkey.size;
- wr.virt_addr = mr->mmkey.iova;
- wr.access_flags = mr->access_flags;
wr.page_shift = page_shift;
for (pages_mapped = 0;
@@ -1110,50 +1178,87 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
pages_mapped += pages_iter, idx += pages_iter) {
npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
size_to_map = npages * desc_size;
- dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
- if (mr->umem->is_odp) {
- mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
- } else {
- __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx,
- npages, xlt,
- MLX5_IB_MTT_PRESENT);
- /* Clear padding after the pages
- * brought from the umem.
- */
- memset(xlt + size_to_map, 0, size - size_to_map);
- }
- dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
+ dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
+ dma_sync_single_for_device(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
- if (pages_mapped + pages_iter >= pages_to_map) {
- if (flags & MLX5_IB_UPD_XLT_ENABLE)
- wr.wr.send_flags |=
- MLX5_IB_SEND_UMR_ENABLE_MR |
- MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
- MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
- if (flags & MLX5_IB_UPD_XLT_PD ||
- flags & MLX5_IB_UPD_XLT_ACCESS)
- wr.wr.send_flags |=
- MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
- if (flags & MLX5_IB_UPD_XLT_ADDR)
- wr.wr.send_flags |=
- MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
- }
+ if (pages_mapped + pages_iter >= pages_to_map)
+ wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
wr.offset = idx * desc_size;
wr.xlt_size = sg.length;
err = mlx5_ib_post_send_wait(dev, &wr);
}
- dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
+ sg.length = orig_sg_length;
+ mlx5_ib_unmap_free_xlt(dev, xlt, &sg);
+ return err;
+}
+
+/*
+ * Send the DMA list to the HW for a normal MR using UMR.
+ */
+static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+{
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct device *ddev = &dev->mdev->pdev->dev;
+ struct ib_block_iter biter;
+ struct mlx5_mtt *cur_mtt;
+ struct mlx5_umr_wr wr;
+ size_t orig_sg_length;
+ struct mlx5_mtt *mtt;
+ size_t final_size;
+ struct ib_sge sg;
+ int err = 0;
+
+ if (WARN_ON(mr->umem->is_odp))
+ return -EINVAL;
+
+ mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg,
+ ib_umem_num_dma_blocks(mr->umem,
+ 1 << mr->page_shift),
+ sizeof(*mtt), flags);
+ if (!mtt)
+ return -ENOMEM;
+ orig_sg_length = sg.length;
+
+ cur_mtt = mtt;
+ rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap,
+ BIT(mr->page_shift)) {
+ if (cur_mtt == (void *)mtt + sg.length) {
+ dma_sync_single_for_device(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ err = mlx5_ib_post_send_wait(dev, &wr);
+ if (err)
+ goto err;
+ dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ wr.offset += sg.length;
+ cur_mtt = mtt;
+ }
+
+ cur_mtt->ptag =
+ cpu_to_be64(rdma_block_iter_dma_address(&biter) |
+ MLX5_IB_MTT_PRESENT);
+ cur_mtt++;
+ }
-free_xlt:
- if (use_emergency_page)
- mlx5_ib_put_xlt_emergency_page();
- else
- free_pages((unsigned long)xlt, get_order(size));
+ final_size = (void *)cur_mtt - (void *)mtt;
+ sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
+ memset(cur_mtt, 0, sg.length - final_size);
+ wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
+ wr.xlt_size = sg.length;
+ dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
+ err = mlx5_ib_post_send_wait(dev, &wr);
+
+err:
+ sg.length = orig_sg_length;
+ mlx5_ib_unmap_free_xlt(dev, mtt, &sg);
return err;
}
@@ -1161,11 +1266,9 @@ free_xlt:
* If ibmr is NULL it will be allocated by reg_create.
* Else, the given ibmr will be used.
*/
-static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
- u64 virt_addr, u64 length,
- struct ib_umem *umem, int npages,
- int page_shift, int access_flags,
- bool populate)
+static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags,
+ unsigned int page_size, bool populate)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr;
@@ -1176,16 +1279,20 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
int err;
bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
- mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!page_size)
+ return ERR_PTR(-EINVAL);
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->ibmr.pd = pd;
mr->access_flags = access_flags;
+ mr->page_shift = order_base_2(page_size);
inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
if (populate)
- inlen += sizeof(*pas) * roundup(npages, 2);
+ inlen += sizeof(*pas) *
+ roundup(ib_umem_num_dma_blocks(umem, page_size), 2);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
@@ -1197,7 +1304,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
err = -EINVAL;
goto err_2;
}
- mlx5_ib_populate_pas(dev, umem, page_shift, pas,
+ mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas,
pg_cap ? MLX5_IB_MTT_PRESENT : 0);
}
@@ -1206,20 +1313,20 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr,
+ set_mkc_access_pd_addr_fields(mkc, access_flags, iova,
populate ? pd : dev->umrc.pd);
MLX5_SET(mkc, mkc, free, !populate);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET64(mkc, mkc, len, length);
+ MLX5_SET64(mkc, mkc, len, umem->length);
MLX5_SET(mkc, mkc, bsf_octword_size, 0);
MLX5_SET(mkc, mkc, translations_octword_size,
- get_octo_len(virt_addr, length, page_shift));
- MLX5_SET(mkc, mkc, log_page_size, page_shift);
+ get_octo_len(iova, umem->length, mr->page_shift));
+ MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
if (populate) {
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
- get_octo_len(virt_addr, length, page_shift));
+ get_octo_len(iova, umem->length, mr->page_shift));
}
err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
@@ -1229,7 +1336,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
}
mr->mmkey.type = MLX5_MKEY_MR;
mr->desc_size = sizeof(struct mlx5_mtt);
- mr->dev = dev;
+ mr->umem = umem;
+ set_mr_fields(dev, mr, umem->length, access_flags);
kvfree(in);
mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
@@ -1238,25 +1346,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
err_2:
kvfree(in);
-
err_1:
- if (!ibmr)
- kfree(mr);
-
+ kfree(mr);
return ERR_PTR(err);
}
-static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
- int npages, u64 length, int access_flags)
-{
- mr->npages = npages;
- atomic_add(npages, &dev->mdev->priv.reg_pages);
- mr->ibmr.lkey = mr->mmkey.key;
- mr->ibmr.rkey = mr->mmkey.key;
- mr->ibmr.length = length;
- mr->access_flags = access_flags;
-}
-
static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
u64 length, int acc, int mode)
{
@@ -1290,8 +1384,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
kfree(in);
- mr->umem = NULL;
- set_mr_fields(dev, mr, 0, length, acc);
+ set_mr_fields(dev, mr, length, acc);
return &mr->ibmr;
@@ -1352,116 +1445,128 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
attr->access_flags, mode);
}
-struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt_addr, int access_flags,
- struct ib_udata *udata)
+static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
bool xlt_with_umr;
- struct ib_umem *umem;
- int page_shift;
- int npages;
- int ncont;
- int order;
int err;
- if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
- return ERR_PTR(-EOPNOTSUPP);
-
- mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
- start, virt_addr, length, access_flags);
-
- xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length);
- /* ODP requires xlt update via umr to work. */
- if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND))
- return ERR_PTR(-EINVAL);
-
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
- length == U64_MAX) {
- if (virt_addr != start)
- return ERR_PTR(-EINVAL);
- if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
- !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
- return ERR_PTR(-EINVAL);
-
- mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
- if (IS_ERR(mr))
- return ERR_CAST(mr);
- return &mr->ibmr;
- }
-
- err = mr_umem_get(dev, start, length, access_flags, &umem,
- &npages, &page_shift, &ncont, &order);
-
- if (err < 0)
- return ERR_PTR(err);
-
+ xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length);
if (xlt_with_umr) {
- mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
- page_shift, order, access_flags);
- if (IS_ERR(mr))
- mr = NULL;
- }
+ mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
+ } else {
+ unsigned int page_size = mlx5_umem_find_best_pgsz(
+ umem, mkc, log_page_size, 0, iova);
- if (!mr) {
mutex_lock(&dev->slow_path_mutex);
- mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
- page_shift, access_flags, !xlt_with_umr);
+ mr = reg_create(pd, umem, iova, access_flags, page_size, true);
mutex_unlock(&dev->slow_path_mutex);
}
-
if (IS_ERR(mr)) {
- err = PTR_ERR(mr);
- goto error;
+ ib_umem_release(umem);
+ return ERR_CAST(mr);
}
mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
- mr->umem = umem;
- set_mr_fields(dev, mr, npages, length, access_flags);
+ atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
- if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) {
+ if (xlt_with_umr) {
/*
* If the MR was created with reg_create then it will be
* configured properly but left disabled. It is safe to go ahead
* and configure it again via UMR while enabling it.
*/
- int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
-
- err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
- update_xlt_flags);
+ err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
if (err) {
dereg_mr(dev, mr);
return ERR_PTR(err);
}
}
+ return &mr->ibmr;
+}
- if (is_odp_mr(mr)) {
- to_ib_umem_odp(mr->umem)->private = mr;
- init_waitqueue_head(&mr->q_deferred_work);
- atomic_set(&mr->num_deferred_work, 0);
- err = xa_err(xa_store(&dev->odp_mkeys,
- mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
- GFP_KERNEL));
- if (err) {
- dereg_mr(dev, mr);
- return ERR_PTR(err);
- }
+static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem_odp *odp;
+ struct mlx5_ib_mr *mr;
+ int err;
- err = mlx5_ib_init_odp_mr(mr, xlt_with_umr);
- if (err) {
- dereg_mr(dev, mr);
- return ERR_PTR(err);
- }
+ if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!start && length == U64_MAX) {
+ if (iova != 0)
+ return ERR_PTR(-EINVAL);
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return ERR_PTR(-EINVAL);
+
+ mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
+ if (IS_ERR(mr))
+ return ERR_CAST(mr);
+ return &mr->ibmr;
+ }
+
+ /* ODP requires xlt update via umr to work. */
+ if (!mlx5_ib_can_load_pas_with_umr(dev, length))
+ return ERR_PTR(-EINVAL);
+
+ odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
+ &mlx5_mn_ops);
+ if (IS_ERR(odp))
+ return ERR_CAST(odp);
+
+ mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags);
+ if (IS_ERR(mr)) {
+ ib_umem_release(&odp->umem);
+ return ERR_CAST(mr);
}
+ odp->private = mr;
+ init_waitqueue_head(&mr->q_deferred_work);
+ atomic_set(&mr->num_deferred_work, 0);
+ err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_KERNEL));
+ if (err)
+ goto err_dereg_mr;
+
+ err = mlx5_ib_init_odp_mr(mr);
+ if (err)
+ goto err_dereg_mr;
return &mr->ibmr;
-error:
- ib_umem_release(umem);
+
+err_dereg_mr:
+ dereg_mr(dev, mr);
return ERR_PTR(err);
}
+struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem *umem;
+
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
+ start, iova, length, access_flags);
+
+ if (access_flags & IB_ACCESS_ON_DEMAND)
+ return create_user_odp_mr(pd, start, length, iova, access_flags,
+ udata);
+ umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
+ if (IS_ERR(umem))
+ return ERR_CAST(umem);
+ return create_real_mr(pd, umem, iova, access_flags);
+}
+
/**
* mlx5_mr_cache_invalidate - Fence all DMA on the MR
* @mr: The MR to fence
@@ -1474,151 +1579,224 @@ int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr)
{
struct mlx5_umr_wr umrwr = {};
- if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;
umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
umrwr.wr.opcode = MLX5_IB_WR_UMR;
- umrwr.pd = mr->dev->umrc.pd;
+ umrwr.pd = mr_to_mdev(mr)->umrc.pd;
umrwr.mkey = mr->mmkey.key;
umrwr.ignore_free_state = 1;
- return mlx5_ib_post_send_wait(mr->dev, &umrwr);
+ return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr);
}
-static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
- int access_flags, int flags)
-{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_umr_wr umrwr = {};
+/*
+ * True if the change in access flags can be done via UMR, only some access
+ * flags can be updated.
+ */
+static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
+ unsigned int current_access_flags,
+ unsigned int target_access_flags)
+{
+ unsigned int diffs = current_access_flags ^ target_access_flags;
+
+ if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
+ return false;
+ return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags,
+ target_access_flags);
+}
+
+static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
+ int access_flags)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ struct mlx5_umr_wr umrwr = {
+ .wr = {
+ .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
+ MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS,
+ .opcode = MLX5_IB_WR_UMR,
+ },
+ .mkey = mr->mmkey.key,
+ .pd = pd,
+ .access_flags = access_flags,
+ };
int err;
- umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
-
- umrwr.wr.opcode = MLX5_IB_WR_UMR;
- umrwr.mkey = mr->mmkey.key;
-
- if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
- umrwr.pd = pd;
- umrwr.access_flags = access_flags;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
- }
-
err = mlx5_ib_post_send_wait(dev, &umrwr);
+ if (err)
+ return err;
- return err;
+ mr->access_flags = access_flags;
+ mr->mmkey.pd = to_mpd(pd)->pdn;
+ return 0;
}
-int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
- u64 length, u64 virt_addr, int new_access_flags,
- struct ib_pd *new_pd, struct ib_udata *udata)
+static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
+ struct ib_umem *new_umem,
+ int new_access_flags, u64 iova,
+ unsigned long *page_size)
{
- struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
- struct mlx5_ib_mr *mr = to_mmr(ib_mr);
- struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
- int access_flags = flags & IB_MR_REREG_ACCESS ?
- new_access_flags :
- mr->access_flags;
- int page_shift = 0;
- int upd_flags = 0;
- int npages = 0;
- int ncont = 0;
- int order = 0;
- u64 addr, len;
- int err;
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
- mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
- start, virt_addr, length, access_flags);
+ /* We only track the allocated sizes of MRs from the cache */
+ if (!mr->cache_ent)
+ return false;
+ if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length))
+ return false;
- atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
+ *page_size =
+ mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
+ if (WARN_ON(!*page_size))
+ return false;
+ return (1ULL << mr->cache_ent->order) >=
+ ib_umem_num_dma_blocks(new_umem, *page_size);
+}
- if (!mr->umem)
- return -EINVAL;
+static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
+ int access_flags, int flags, struct ib_umem *new_umem,
+ u64 iova, unsigned long page_size)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE;
+ struct ib_umem *old_umem = mr->umem;
+ int err;
- if (is_odp_mr(mr))
- return -EOPNOTSUPP;
+ /*
+ * To keep everything simple the MR is revoked before we start to mess
+ * with it. This ensure the change is atomic relative to any use of the
+ * MR.
+ */
+ err = mlx5_mr_cache_invalidate(mr);
+ if (err)
+ return err;
- if (flags & IB_MR_REREG_TRANS) {
- addr = virt_addr;
- len = length;
- } else {
- addr = mr->umem->address;
- len = mr->umem->length;
+ if (flags & IB_MR_REREG_PD) {
+ mr->ibmr.pd = pd;
+ mr->mmkey.pd = to_mpd(pd)->pdn;
+ upd_flags |= MLX5_IB_UPD_XLT_PD;
+ }
+ if (flags & IB_MR_REREG_ACCESS) {
+ mr->access_flags = access_flags;
+ upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
}
- if (flags != IB_MR_REREG_PD) {
+ mr->ibmr.length = new_umem->length;
+ mr->mmkey.iova = iova;
+ mr->mmkey.size = new_umem->length;
+ mr->page_shift = order_base_2(page_size);
+ mr->umem = new_umem;
+ err = mlx5_ib_update_mr_pas(mr, upd_flags);
+ if (err) {
/*
- * Replace umem. This needs to be done whether or not UMR is
- * used.
+ * The MR is revoked at this point so there is no issue to free
+ * new_umem.
*/
- flags |= IB_MR_REREG_TRANS;
- ib_umem_release(mr->umem);
- mr->umem = NULL;
- err = mr_umem_get(dev, addr, len, access_flags, &mr->umem,
- &npages, &page_shift, &ncont, &order);
- if (err)
- goto err;
+ mr->umem = old_umem;
+ return err;
}
- if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags,
- access_flags) ||
- !mlx5_ib_can_load_pas_with_umr(dev, len) ||
- (flags & IB_MR_REREG_TRANS &&
- !mlx5_ib_pas_fits_in_mr(mr, addr, len))) {
- /*
- * UMR can't be used - MKey needs to be replaced.
- */
- if (mr->cache_ent)
- detach_mr_from_cache(mr);
- err = destroy_mkey(dev, mr);
- if (err)
- goto err;
+ atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages);
+ ib_umem_release(old_umem);
+ atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages);
+ return 0;
+}
- mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
- page_shift, access_flags, true);
+struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
+ u64 length, u64 iova, int new_access_flags,
+ struct ib_pd *new_pd,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
+ struct mlx5_ib_mr *mr = to_mmr(ib_mr);
+ int err;
- if (IS_ERR(mr)) {
- err = PTR_ERR(mr);
- mr = to_mmr(ib_mr);
- goto err;
- }
- } else {
- /*
- * Send a UMR WQE
- */
- mr->ibmr.pd = pd;
- mr->access_flags = access_flags;
- mr->mmkey.iova = addr;
- mr->mmkey.size = len;
- mr->mmkey.pd = to_mpd(pd)->pdn;
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
+ return ERR_PTR(-EOPNOTSUPP);
- if (flags & IB_MR_REREG_TRANS) {
- upd_flags = MLX5_IB_UPD_XLT_ADDR;
- if (flags & IB_MR_REREG_PD)
- upd_flags |= MLX5_IB_UPD_XLT_PD;
- if (flags & IB_MR_REREG_ACCESS)
- upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
- err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
- upd_flags);
- } else {
- err = rereg_umr(pd, mr, access_flags, flags);
+ mlx5_ib_dbg(
+ dev,
+ "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
+ start, iova, length, new_access_flags);
+
+ if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!(flags & IB_MR_REREG_ACCESS))
+ new_access_flags = mr->access_flags;
+ if (!(flags & IB_MR_REREG_PD))
+ new_pd = ib_mr->pd;
+
+ if (!(flags & IB_MR_REREG_TRANS)) {
+ struct ib_umem *umem;
+
+ /* Fast path for PD/access change */
+ if (can_use_umr_rereg_access(dev, mr->access_flags,
+ new_access_flags)) {
+ err = umr_rereg_pd_access(mr, new_pd, new_access_flags);
+ if (err)
+ return ERR_PTR(err);
+ return NULL;
}
+ /* DM or ODP MR's don't have a umem so we can't re-use it */
+ if (!mr->umem || is_odp_mr(mr))
+ goto recreate;
+ /*
+ * Only one active MR can refer to a umem at one time, revoke
+ * the old MR before assigning the umem to the new one.
+ */
+ err = mlx5_mr_cache_invalidate(mr);
if (err)
- goto err;
- }
-
- set_mr_fields(dev, mr, npages, len, access_flags);
+ return ERR_PTR(err);
+ umem = mr->umem;
+ mr->umem = NULL;
+ atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
- return 0;
+ return create_real_mr(new_pd, umem, mr->mmkey.iova,
+ new_access_flags);
+ }
-err:
- ib_umem_release(mr->umem);
- mr->umem = NULL;
+ /*
+ * DM doesn't have a PAS list so we can't re-use it, odp does but the
+ * logic around releasing the umem is different
+ */
+ if (!mr->umem || is_odp_mr(mr))
+ goto recreate;
+
+ if (!(new_access_flags & IB_ACCESS_ON_DEMAND) &&
+ can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) {
+ struct ib_umem *new_umem;
+ unsigned long page_size;
+
+ new_umem = ib_umem_get(&dev->ib_dev, start, length,
+ new_access_flags);
+ if (IS_ERR(new_umem))
+ return ERR_CAST(new_umem);
+
+ /* Fast path for PAS change */
+ if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova,
+ &page_size)) {
+ err = umr_rereg_pas(mr, new_pd, new_access_flags, flags,
+ new_umem, iova, page_size);
+ if (err) {
+ ib_umem_release(new_umem);
+ return ERR_PTR(err);
+ }
+ return NULL;
+ }
+ return create_real_mr(new_pd, new_umem, iova, new_access_flags);
+ }
- clean_mr(dev, mr);
- return err;
+ /*
+ * Everything else has no state we can preserve, just create a new MR
+ * from scratch
+ */
+recreate:
+ return mlx5_ib_reg_user_mr(new_pd, start, length, iova,
+ new_access_flags, udata);
}
static int
@@ -1627,6 +1805,8 @@ mlx5_alloc_priv_descs(struct ib_device *device,
int ndescs,
int desc_size)
{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct device *ddev = &dev->mdev->pdev->dev;
int size = ndescs * desc_size;
int add_size;
int ret;
@@ -1639,9 +1819,8 @@ mlx5_alloc_priv_descs(struct ib_device *device,
mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
- mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
- size, DMA_TO_DEVICE);
- if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
+ mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, mr->desc_map)) {
ret = -ENOMEM;
goto err;
}
@@ -1659,9 +1838,10 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
if (mr->descs) {
struct ib_device *device = mr->ibmr.device;
int size = mr->max_descs * mr->desc_size;
+ struct mlx5_ib_dev *dev = to_mdev(device);
- dma_unmap_single(device->dev.parent, mr->desc_map,
- size, DMA_TO_DEVICE);
+ dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
+ DMA_TO_DEVICE);
kfree(mr->descs_alloc);
mr->descs = NULL;
}
@@ -1691,7 +1871,6 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
- int npages = mr->npages;
struct ib_umem *umem = mr->umem;
/* Stop all DMA */
@@ -1700,14 +1879,17 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
else
clean_mr(dev, mr);
+ if (umem) {
+ if (!is_odp_mr(mr))
+ atomic_sub(ib_umem_num_pages(umem),
+ &dev->mdev->priv.reg_pages);
+ ib_umem_release(umem);
+ }
+
if (mr->cache_ent)
mlx5_mr_cache_free(dev, mr);
else
kfree(mr);
-
- ib_umem_release(umem);
- atomic_sub(npages, &dev->mdev->priv.reg_pages);
-
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 5c853ec1b0d8..aa2413b50adc 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -102,7 +102,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
if (flags & MLX5_IB_UPD_XLT_ZAP) {
for (; pklm != end; pklm++, idx++) {
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
- pklm->key = cpu_to_be32(imr->dev->null_mkey);
+ pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey);
pklm->va = 0;
}
return;
@@ -129,7 +129,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
* locking around the xarray.
*/
lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex);
- lockdep_assert_held(&imr->dev->odp_srcu);
+ lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu);
for (; pklm != end; pklm++, idx++) {
struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx);
@@ -139,7 +139,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
pklm->key = cpu_to_be32(mtt->ibmr.lkey);
pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE);
} else {
- pklm->key = cpu_to_be32(imr->dev->null_mkey);
+ pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey);
pklm->va = 0;
}
}
@@ -199,7 +199,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr)
mutex_unlock(&odp->umem_mutex);
if (!mr->cache_ent) {
- mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey);
+ mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey);
WARN_ON(mr->descs);
}
}
@@ -222,19 +222,19 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt)
WARN_ON(atomic_read(&mr->num_deferred_work));
if (need_imr_xlt) {
- srcu_key = srcu_read_lock(&mr->dev->odp_srcu);
+ srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu);
mutex_lock(&odp_imr->umem_mutex);
mlx5_ib_update_xlt(mr->parent, idx, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
- srcu_read_unlock(&mr->dev->odp_srcu, srcu_key);
+ srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key);
}
dma_fence_odp_mr(mr);
mr->parent = NULL;
- mlx5_mr_cache_free(mr->dev, mr);
+ mlx5_mr_cache_free(mr_to_mdev(mr), mr);
ib_umem_odp_release(odp);
if (atomic_dec_and_test(&imr->num_deferred_work))
wake_up(&imr->q_deferred_work);
@@ -274,7 +274,7 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
goto out_unlock;
atomic_inc(&imr->num_deferred_work);
- call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu,
+ call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu,
free_implicit_child_mr_rcu);
out_unlock:
@@ -476,12 +476,13 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
if (IS_ERR(odp))
return ERR_CAST(odp);
- ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY,
- imr->access_flags);
+ ret = mr = mlx5_mr_cache_alloc(
+ mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY, imr->access_flags);
if (IS_ERR(mr))
goto out_umem;
mr->ibmr.pd = imr->ibmr.pd;
+ mr->ibmr.device = &mr_to_mdev(imr)->ib_dev;
mr->umem = &odp->umem;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
@@ -517,11 +518,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
goto out_mr;
}
- mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr);
+ mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr);
return mr;
out_mr:
- mlx5_mr_cache_free(imr->dev, mr);
+ mlx5_mr_cache_free(mr_to_mdev(imr), mr);
out_umem:
ib_umem_odp_release(odp);
return ret;
@@ -536,6 +537,10 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct mlx5_ib_mr *imr;
int err;
+ if (!mlx5_ib_can_load_pas_with_umr(dev,
+ MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
+ return ERR_PTR(-EOPNOTSUPP);
+
umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
if (IS_ERR(umem_odp))
return ERR_CAST(umem_odp);
@@ -551,6 +556,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
imr->umem = &umem_odp->umem;
imr->ibmr.lkey = imr->mmkey.key;
imr->ibmr.rkey = imr->mmkey.key;
+ imr->ibmr.device = &dev->ib_dev;
imr->umem = &umem_odp->umem;
imr->is_odp_implicit = true;
atomic_set(&imr->num_deferred_work, 0);
@@ -584,7 +590,7 @@ out_umem:
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
{
struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
- struct mlx5_ib_dev *dev = imr->dev;
+ struct mlx5_ib_dev *dev = mr_to_mdev(imr);
struct list_head destroy_list;
struct mlx5_ib_mr *mtt;
struct mlx5_ib_mr *tmp;
@@ -654,10 +660,10 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr)
{
/* Prevent new page faults and prefetch requests from succeeding */
- xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
/* Wait for all running page-fault handlers to finish. */
- synchronize_srcu(&mr->dev->odp_srcu);
+ synchronize_srcu(&mr_to_mdev(mr)->odp_srcu);
wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work));
@@ -701,7 +707,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
if (ret < 0) {
if (ret != -EAGAIN)
- mlx5_ib_err(mr->dev,
+ mlx5_ib_err(mr_to_mdev(mr),
"Failed to update mkey page tables\n");
goto out;
}
@@ -791,7 +797,7 @@ out:
MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
if (err) {
- mlx5_ib_err(imr->dev, "Failed to update PAS\n");
+ mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n");
return err;
}
return ret;
@@ -811,7 +817,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
{
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
- lockdep_assert_held(&mr->dev->odp_srcu);
+ lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu);
if (unlikely(io_virt < mr->mmkey.iova))
return -EFAULT;
@@ -831,17 +837,13 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
flags);
}
-int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
{
- u32 flags = MLX5_PF_FLAGS_SNAPSHOT;
int ret;
- if (enable)
- flags |= MLX5_PF_FLAGS_ENABLE;
-
- ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem),
- mr->umem->address, mr->umem->length, NULL,
- flags);
+ ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), mr->umem->address,
+ mr->umem->length, NULL,
+ MLX5_PF_FLAGS_SNAPSHOT | MLX5_PF_FLAGS_ENABLE);
return ret >= 0 ? 0 : ret;
}
@@ -1783,7 +1785,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
/* We rely on IB/core that work is executed if we have num_sge != 0 only. */
WARN_ON(!work->num_sge);
- dev = work->frags[0].mr->dev;
+ dev = mr_to_mdev(work->frags[0].mr);
/* SRCU should be held when calling to mlx5_odp_populate_xlt() */
srcu_key = srcu_read_lock(&dev->odp_srcu);
for (i = 0; i < work->num_sge; ++i) {
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 600e056798c0..0cb7cc642d87 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -778,39 +778,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
return bfregi->sys_pages[index_of_sys_page] + offset;
}
-static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
- unsigned long addr, size_t size,
- struct ib_umem **umem, int *npages, int *page_shift,
- int *ncont, u32 *offset)
-{
- int err;
-
- *umem = ib_umem_get(&dev->ib_dev, addr, size, 0);
- if (IS_ERR(*umem)) {
- mlx5_ib_dbg(dev, "umem_get failed\n");
- return PTR_ERR(*umem);
- }
-
- mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL);
-
- err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
- }
-
- mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n",
- addr, size, *npages, *page_shift, *ncont, *offset);
-
- return 0;
-
-err_umem:
- ib_umem_release(*umem);
- *umem = NULL;
-
- return err;
-}
-
static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_rwq *rwq, struct ib_udata *udata)
{
@@ -833,10 +800,8 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
{
struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct mlx5_ib_ucontext, ibucontext);
- int page_shift = 0;
- int npages;
+ unsigned long page_size = 0;
u32 offset = 0;
- int ncont = 0;
int err;
if (!ucmd->buf_addr)
@@ -849,23 +814,26 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return err;
}
- mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift,
- &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift,
- &rwq->rq_page_offset);
- if (err) {
+ page_size = mlx5_umem_find_best_quantized_pgoff(
+ rwq->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64, &rwq->rq_page_offset);
+ if (!page_size) {
mlx5_ib_warn(dev, "bad offset\n");
+ err = -EINVAL;
goto err_umem;
}
- rwq->rq_num_pas = ncont;
- rwq->page_shift = page_shift;
- rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, page_size);
+ rwq->page_shift = order_base_2(page_size);
+ rwq->log_page_size = rwq->page_shift - MLX5_ADAPTER_PAGE_SHIFT;
rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE);
- mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n",
- (unsigned long long)ucmd->buf_addr, rwq->buf_size,
- npages, page_shift, ncont, offset);
+ mlx5_ib_dbg(
+ dev,
+ "addr 0x%llx, size %zd, npages %zu, page_size %ld, ncont %d, offset %d\n",
+ (unsigned long long)ucmd->buf_addr, rwq->buf_size,
+ ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas,
+ offset);
err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db);
if (err) {
@@ -896,10 +864,9 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
{
struct mlx5_ib_ucontext *context;
struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
- int page_shift = 0;
+ unsigned int page_offset_quantized = 0;
+ unsigned long page_size = 0;
int uar_index = 0;
- int npages;
- u32 offset = 0;
int bfregn;
int ncont = 0;
__be64 *pas;
@@ -950,11 +917,21 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (ucmd->buf_addr && ubuffer->buf_size) {
ubuffer->buf_addr = ucmd->buf_addr;
- err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr,
- ubuffer->buf_size, &ubuffer->umem,
- &npages, &page_shift, &ncont, &offset);
- if (err)
+ ubuffer->umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
+ ubuffer->buf_size, 0);
+ if (IS_ERR(ubuffer->umem)) {
+ err = PTR_ERR(ubuffer->umem);
goto err_bfreg;
+ }
+ page_size = mlx5_umem_find_best_quantized_pgoff(
+ ubuffer->umem, qpc, log_page_size,
+ MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64,
+ &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto err_umem;
+ }
+ ncont = ib_umem_num_dma_blocks(ubuffer->umem, page_size);
} else {
ubuffer->umem = NULL;
}
@@ -969,15 +946,14 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0;
MLX5_SET(create_qp_in, *in, uid, uid);
- pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
- if (ubuffer->umem)
- mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
-
qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
-
- MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET(qpc, qpc, page_offset, offset);
-
+ pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
+ if (ubuffer->umem) {
+ mlx5_ib_populate_pas(ubuffer->umem, page_size, pas, 0);
+ MLX5_SET(qpc, qpc, log_page_size,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(qpc, qpc, page_offset, page_offset_quantized);
+ }
MLX5_SET(qpc, qpc, uar_page, uar_index);
if (bfregn != MLX5_IB_INVALID_BFREG)
resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn);
@@ -1209,18 +1185,24 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
void *wq;
int inlen;
int err;
- int page_shift = 0;
- int npages;
- int ncont = 0;
- u32 offset = 0;
-
- err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size,
- &sq->ubuffer.umem, &npages, &page_shift, &ncont,
- &offset);
- if (err)
- return err;
+ unsigned int page_offset_quantized;
+ unsigned long page_size;
+
+ sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
+ ubuffer->buf_size, 0);
+ if (IS_ERR(sq->ubuffer.umem))
+ return PTR_ERR(sq->ubuffer.umem);
+ page_size = mlx5_umem_find_best_quantized_pgoff(
+ ubuffer->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64, &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto err_umem;
+ }
- inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+ sizeof(u64) *
+ ib_umem_num_dma_blocks(sq->ubuffer.umem, page_size);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
@@ -1248,11 +1230,12 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
- MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET(wq, wq, page_offset, offset);
+ MLX5_SET(wq, wq, log_wq_pg_sz,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(wq, wq, page_offset, page_offset_quantized);
pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
- mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
+ mlx5_ib_populate_pas(sq->ubuffer.umem, page_size, pas, 0);
err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp);
@@ -1278,40 +1261,31 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
ib_umem_release(sq->ubuffer.umem);
}
-static size_t get_rq_pas_size(void *qpc)
-{
- u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
- u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
- u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size);
- u32 page_offset = MLX5_GET(qpc, qpc, page_offset);
- u32 po_quanta = 1 << (log_page_size - 6);
- u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride);
- u32 page_size = 1 << log_page_size;
- u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
- u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size;
-
- return rq_num_pas * sizeof(u64);
-}
-
static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq, void *qpin,
- size_t qpinlen, struct ib_pd *pd)
+ struct ib_pd *pd)
{
struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
__be64 *pas;
- __be64 *qp_pas;
void *in;
void *rqc;
void *wq;
void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
- size_t rq_pas_size = get_rq_pas_size(qpc);
+ struct ib_umem *umem = rq->base.ubuffer.umem;
+ unsigned int page_offset_quantized;
+ unsigned long page_size = 0;
size_t inlen;
int err;
- if (qpinlen < rq_pas_size + MLX5_BYTE_OFF(create_qp_in, pas))
+ page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz,
+ MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64,
+ &page_offset_quantized);
+ if (!page_size)
return -EINVAL;
- inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
+ sizeof(u64) * ib_umem_num_dma_blocks(umem, page_size);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -1333,16 +1307,16 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
if (rq->flags & MLX5_IB_RQ_PCI_WRITE_END_PADDING)
MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
- MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
+ MLX5_SET(wq, wq, page_offset, page_offset_quantized);
MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
- MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size));
+ MLX5_SET(wq, wq, log_wq_pg_sz,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
- qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
- memcpy(pas, qp_pas, rq_pas_size);
+ mlx5_ib_populate_pas(umem, page_size, pas, 0);
err = mlx5_core_create_rq_tracked(dev, in, inlen, &rq->base.mqp);
@@ -1463,7 +1437,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING)
rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
- err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd);
+ err = create_raw_packet_qp_rq(dev, rq, in, pd);
if (err)
goto err_destroy_sq;
@@ -2436,7 +2410,7 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd,
}
qp->state = IB_QPS_RESET;
-
+ rdma_restrack_no_track(&qp->ibqp.res);
return 0;
}
@@ -2460,6 +2434,7 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
case IB_QPT_GSI:
if (dev->profile == &raw_eth_profile)
goto out;
+ fallthrough;
case IB_QPT_RAW_PACKET:
case IB_QPT_UD:
case MLX5_IB_QPT_REG_UMR:
@@ -2712,11 +2687,12 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1,
true, qp);
- if (create_flags)
+ if (create_flags) {
mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n",
create_flags);
-
- return (create_flags) ? -EINVAL : 0;
+ return -EOPNOTSUPP;
+ }
+ return 0;
}
static int process_udata_size(struct mlx5_ib_dev *dev,
@@ -3102,7 +3078,7 @@ static int ib_to_mlx5_rate_map(u8 rate)
return 5;
default:
return rate + MLX5_STAT_RATE_OFFSET;
- };
+ }
return 0;
}
@@ -4247,6 +4223,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int err = -EINVAL;
int port;
+ if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
+ return -EOPNOTSUPP;
+
if (ibqp->rwq_ind_tbl)
return -ENOSYS;
@@ -4576,7 +4555,9 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path);
- if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC ||
+ qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ qp->ibqp.qp_type == IB_QPT_XRC_TGT) {
to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path);
to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path);
qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index);
@@ -4882,7 +4863,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
MLX5_SET(rqc, rqc, delay_drop_en, 1);
}
rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
- mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
+ mlx5_ib_populate_pas(rwq->umem, 1UL << rwq->page_shift, rq_pas0, 0);
err = mlx5_core_create_rq_tracked(dev, in, inlen, &rwq->core_qp);
if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
err = set_delay_drop(dev);
diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c
index 887270dd3ce2..4ac429e72004 100644
--- a/drivers/infiniband/hw/mlx5/restrack.c
+++ b/drivers/infiniband/hw/mlx5/restrack.c
@@ -116,7 +116,7 @@ static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr)
{
struct mlx5_ib_mr *mr = to_mmr(ibmr);
- return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY,
+ return fill_res_raw(msg, mr_to_mdev(mr), MLX5_SGMT_TYPE_PRM_QUERY_MKEY,
mlx5_mkey_to_idx(mr->mmkey.key));
}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index e2f720eec1e1..fab6736e4d6a 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -51,10 +51,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
udata, struct mlx5_ib_ucontext, ibucontext);
size_t ucmdlen;
int err;
- int npages;
- int page_shift;
- int ncont;
- u32 offset;
u32 uidx = MLX5_IB_DEFAULT_UIDX;
ucmdlen = min(udata->inlen, sizeof(ucmd));
@@ -86,32 +82,14 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
err = PTR_ERR(srq->umem);
return err;
}
-
- mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages,
- &page_shift, &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift,
- &offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
- }
-
- in->pas = kvcalloc(ncont, sizeof(*in->pas), GFP_KERNEL);
- if (!in->pas) {
- err = -ENOMEM;
- goto err_umem;
- }
-
- mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0);
+ in->umem = srq->umem;
err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db);
if (err) {
mlx5_ib_dbg(dev, "map doorbell failed\n");
- goto err_in;
+ goto err_umem;
}
- in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
- in->page_offset = offset;
in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
in->type != IB_SRQT_BASIC)
@@ -119,9 +97,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
return 0;
-err_in:
- kvfree(in->pas);
-
err_umem:
ib_umem_release(srq->umem);
@@ -226,6 +201,11 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
struct mlx5_srq_attr in = {};
__u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ if (init_attr->srq_type != IB_SRQT_BASIC &&
+ init_attr->srq_type != IB_SRQT_XRC &&
+ init_attr->srq_type != IB_SRQT_TM)
+ return -EOPNOTSUPP;
+
/* Sanity check SRQ size before proceeding */
if (init_attr->attr.max_wr >= max_srq_wqes) {
mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h
index 2c3627b2509d..a7e3dc5564ac 100644
--- a/drivers/infiniband/hw/mlx5/srq.h
+++ b/drivers/infiniband/hw/mlx5/srq.h
@@ -28,6 +28,7 @@ struct mlx5_srq_attr {
u32 user_index;
u64 db_record;
__be64 *pas;
+ struct ib_umem *umem;
u32 tm_log_list_size;
u32 tm_next_tag;
u32 tm_hw_phase_cnt;
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
index db889ec3fd48..8b3385396599 100644
--- a/drivers/infiniband/hw/mlx5/srq_cmd.c
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -92,6 +92,25 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn)
return srq;
}
+static int __set_srq_page_size(struct mlx5_srq_attr *in,
+ unsigned long page_size)
+{
+ if (!page_size)
+ return -EINVAL;
+ in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT;
+
+ if (WARN_ON(get_pas_size(in) !=
+ ib_umem_num_dma_blocks(in->umem, page_size) * sizeof(u64)))
+ return -EINVAL;
+ return 0;
+}
+
+#define set_srq_page_size(in, typ, log_pgsz_fld) \
+ __set_srq_page_size(in, mlx5_umem_find_best_quantized_pgoff( \
+ (in)->umem, typ, log_pgsz_fld, \
+ MLX5_ADAPTER_PAGE_SHIFT, page_offset, \
+ 64, &(in)->page_offset))
+
static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in)
{
@@ -103,6 +122,12 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
int inlen;
int err;
+ if (in->umem) {
+ err = set_srq_page_size(in, srqc, log_page_size);
+ if (err)
+ return err;
+ }
+
pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
create_in = kvzalloc(inlen, GFP_KERNEL);
@@ -114,7 +139,13 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
set_srqc(srqc, in);
- memcpy(pas, in->pas, pas_size);
+ if (in->umem)
+ mlx5_ib_populate_pas(
+ in->umem,
+ 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+ pas, 0);
+ else
+ memcpy(pas, in->pas, pas_size);
MLX5_SET(create_srq_in, create_in, opcode,
MLX5_CMD_OP_CREATE_SRQ);
@@ -194,6 +225,12 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev,
int inlen;
int err;
+ if (in->umem) {
+ err = set_srq_page_size(in, xrc_srqc, log_page_size);
+ if (err)
+ return err;
+ }
+
pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
create_in = kvzalloc(inlen, GFP_KERNEL);
@@ -207,7 +244,13 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev,
set_srqc(xrc_srqc, in);
MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
- memcpy(pas, in->pas, pas_size);
+ if (in->umem)
+ mlx5_ib_populate_pas(
+ in->umem,
+ 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+ pas, 0);
+ else
+ memcpy(pas, in->pas, pas_size);
MLX5_SET(create_xrc_srq_in, create_in, opcode,
MLX5_CMD_OP_CREATE_XRC_SRQ);
@@ -289,11 +332,18 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
void *create_in = NULL;
void *rmpc;
void *wq;
+ void *pas;
int pas_size;
int outlen;
int inlen;
int err;
+ if (in->umem) {
+ err = set_srq_page_size(in, wq, log_wq_pg_sz);
+ if (err)
+ return err;
+ }
+
pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
outlen = MLX5_ST_SZ_BYTES(create_rmp_out);
@@ -309,8 +359,16 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
MLX5_SET(create_rmp_in, create_in, uid, in->uid);
+ pas = MLX5_ADDR_OF(rmpc, rmpc, wq.pas);
+
set_wq(wq, in);
- memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
+ if (in->umem)
+ mlx5_ib_populate_pas(
+ in->umem,
+ 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+ pas, 0);
+ else
+ memcpy(pas, in->pas, pas_size);
MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP);
err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen);
@@ -421,10 +479,17 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
void *create_in;
void *xrqc;
void *wq;
+ void *pas;
int pas_size;
int inlen;
int err;
+ if (in->umem) {
+ err = set_srq_page_size(in, wq, log_wq_pg_sz);
+ if (err)
+ return err;
+ }
+
pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size;
create_in = kvzalloc(inlen, GFP_KERNEL);
@@ -433,9 +498,16 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context);
wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
+ pas = MLX5_ADDR_OF(xrqc, xrqc, wq.pas);
set_wq(wq, in);
- memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size);
+ if (in->umem)
+ mlx5_ib_populate_pas(
+ in->umem,
+ 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+ pas, 0);
+ else
+ memcpy(pas, in->pas, pas_size);
if (in->type == IB_SRQT_TM) {
MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);