aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx5/mr.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5/mr.c')
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c960
1 files changed, 571 insertions, 389 deletions
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index b261797b258f..24f8d59a42ea 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -41,6 +41,13 @@
#include <rdma/ib_verbs.h>
#include "mlx5_ib.h"
+/*
+ * We can't use an array for xlt_emergency_page because dma_map_single doesn't
+ * work on kernel modules memory
+ */
+void *xlt_emergency_page;
+static DEFINE_MUTEX(xlt_emergency_page_mutex);
+
enum {
MAX_PENDING_REG_MR = 8,
};
@@ -49,6 +56,9 @@ enum {
static void
create_mkey_callback(int status, struct mlx5_async_work *context);
+static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags,
+ unsigned int page_size, bool populate);
static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
struct ib_pd *pd)
@@ -123,19 +133,12 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
-static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start,
- u64 length)
-{
- return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
- length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
-}
-
static void create_mkey_callback(int status, struct mlx5_async_work *context)
{
struct mlx5_ib_mr *mr =
container_of(context, struct mlx5_ib_mr, cb_work);
- struct mlx5_ib_dev *dev = mr->dev;
struct mlx5_cache_ent *ent = mr->cache_ent;
+ struct mlx5_ib_dev *dev = ent->dev;
unsigned long flags;
if (status) {
@@ -172,9 +175,7 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return NULL;
- mr->order = ent->order;
mr->cache_ent = ent;
- mr->dev = ent->dev;
set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
MLX5_SET(mkc, mkc, free, 1);
@@ -642,6 +643,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
if (mlx5_mr_cache_invalidate(mr)) {
detach_mr_from_cache(mr);
destroy_mkey(dev, mr);
+ kfree(mr);
return;
}
@@ -867,57 +869,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev)
return MLX5_MAX_UMR_SHIFT;
}
-static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length,
- int access_flags, struct ib_umem **umem, int *npages,
- int *page_shift, int *ncont, int *order)
-{
- struct ib_umem *u;
-
- *umem = NULL;
-
- if (access_flags & IB_ACCESS_ON_DEMAND) {
- struct ib_umem_odp *odp;
-
- odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
- &mlx5_mn_ops);
- if (IS_ERR(odp)) {
- mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
- PTR_ERR(odp));
- return PTR_ERR(odp);
- }
-
- u = &odp->umem;
-
- *page_shift = odp->page_shift;
- *ncont = ib_umem_odp_num_pages(odp);
- *npages = *ncont << (*page_shift - PAGE_SHIFT);
- if (order)
- *order = ilog2(roundup_pow_of_two(*ncont));
- } else {
- u = ib_umem_get(&dev->ib_dev, start, length, access_flags);
- if (IS_ERR(u)) {
- mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
- return PTR_ERR(u);
- }
-
- mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
- page_shift, ncont, order);
- }
-
- if (!*npages) {
- mlx5_ib_warn(dev, "avoid zero region\n");
- ib_umem_release(u);
- return -EINVAL;
- }
-
- *umem = u;
-
- mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
- *npages, *ncont, *order, *page_shift);
-
- return 0;
-}
-
static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct mlx5_ib_umr_context *context =
@@ -974,25 +925,49 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
return &cache->ent[order];
}
-static struct mlx5_ib_mr *
-alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
- u64 len, int npages, int page_shift, unsigned int order,
- int access_flags)
+static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+ u64 length, int access_flags)
+{
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+ mr->ibmr.length = length;
+ mr->ibmr.device = &dev->ib_dev;
+ mr->access_flags = access_flags;
+}
+
+static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
+ struct ib_umem *umem, u64 iova,
+ int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order);
+ struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
+ unsigned int page_size;
- if (!ent)
- return ERR_PTR(-E2BIG);
-
- /* Matches access in alloc_cache_mr() */
- if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
- return ERR_PTR(-EOPNOTSUPP);
+ page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
+ if (WARN_ON(!page_size))
+ return ERR_PTR(-EINVAL);
+ ent = mr_cache_ent_from_order(
+ dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
+ /*
+ * Matches access in alloc_cache_mr(). If the MR can't come from the
+ * cache then synchronously create an uncached one.
+ */
+ if (!ent || ent->limit == 0 ||
+ !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
+ mutex_lock(&dev->slow_path_mutex);
+ mr = reg_create(pd, umem, iova, access_flags, page_size, false);
+ mutex_unlock(&dev->slow_path_mutex);
+ return mr;
+ }
mr = get_cache_mr(ent);
if (!mr) {
mr = create_cache_mr(ent);
+ /*
+ * The above already tried to do the same stuff as reg_create(),
+ * no reason to try it again.
+ */
if (IS_ERR(mr))
return mr;
}
@@ -1001,9 +976,12 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
mr->umem = umem;
mr->access_flags = access_flags;
mr->desc_size = sizeof(struct mlx5_mtt);
- mr->mmkey.iova = virt_addr;
- mr->mmkey.size = len;
+ mr->mmkey.iova = iova;
+ mr->mmkey.size = umem->length;
mr->mmkey.pd = to_mpd(pd)->pdn;
+ mr->page_shift = order_base_2(page_size);
+ mr->umem = umem;
+ set_mr_fields(dev, mr, umem->length, access_flags);
return mr;
}
@@ -1012,14 +990,144 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
MLX5_UMR_MTT_ALIGNMENT)
#define MLX5_SPARE_UMR_CHUNK 0x10000
+/*
+ * Allocate a temporary buffer to hold the per-page information to transfer to
+ * HW. For efficiency this should be as large as it can be, but buffer
+ * allocation failure is not allowed, so try smaller sizes.
+ */
+static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
+{
+ const size_t xlt_chunk_align =
+ MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size);
+ size_t size;
+ void *res = NULL;
+
+ static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
+
+ /*
+ * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
+ * allocation can't trigger any kind of reclaim.
+ */
+ might_sleep();
+
+ gfp_mask |= __GFP_ZERO;
+
+ /*
+ * If the system already has a suitable high order page then just use
+ * that, but don't try hard to create one. This max is about 1M, so a
+ * free x86 huge page will satisfy it.
+ */
+ size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
+ MLX5_MAX_UMR_CHUNK);
+ *nents = size / ent_size;
+ res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
+ get_order(size));
+ if (res)
+ return res;
+
+ if (size > MLX5_SPARE_UMR_CHUNK) {
+ size = MLX5_SPARE_UMR_CHUNK;
+ *nents = get_order(size) / ent_size;
+ res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
+ get_order(size));
+ if (res)
+ return res;
+ }
+
+ *nents = PAGE_SIZE / ent_size;
+ res = (void *)__get_free_page(gfp_mask);
+ if (res)
+ return res;
+
+ mutex_lock(&xlt_emergency_page_mutex);
+ memset(xlt_emergency_page, 0, PAGE_SIZE);
+ return xlt_emergency_page;
+}
+
+static void mlx5_ib_free_xlt(void *xlt, size_t length)
+{
+ if (xlt == xlt_emergency_page) {
+ mutex_unlock(&xlt_emergency_page_mutex);
+ return;
+ }
+
+ free_pages((unsigned long)xlt, get_order(length));
+}
+
+/*
+ * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for
+ * submission.
+ */
+static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
+ struct mlx5_umr_wr *wr, struct ib_sge *sg,
+ size_t nents, size_t ent_size,
+ unsigned int flags)
+{
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct device *ddev = &dev->mdev->pdev->dev;
+ dma_addr_t dma;
+ void *xlt;
+
+ xlt = mlx5_ib_alloc_xlt(&nents, ent_size,
+ flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
+ GFP_KERNEL);
+ sg->length = nents * ent_size;
+ dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, dma)) {
+ mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
+ mlx5_ib_free_xlt(xlt, sg->length);
+ return NULL;
+ }
+ sg->addr = dma;
+ sg->lkey = dev->umrc.pd->local_dma_lkey;
+
+ memset(wr, 0, sizeof(*wr));
+ wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
+ if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
+ wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
+ wr->wr.sg_list = sg;
+ wr->wr.num_sge = 1;
+ wr->wr.opcode = MLX5_IB_WR_UMR;
+ wr->pd = mr->ibmr.pd;
+ wr->mkey = mr->mmkey.key;
+ wr->length = mr->mmkey.size;
+ wr->virt_addr = mr->mmkey.iova;
+ wr->access_flags = mr->access_flags;
+ wr->page_shift = mr->page_shift;
+ wr->xlt_size = sg->length;
+ return xlt;
+}
+
+static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
+ struct ib_sge *sg)
+{
+ struct device *ddev = &dev->mdev->pdev->dev;
+
+ dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
+ mlx5_ib_free_xlt(xlt, sg->length);
+}
+
+static unsigned int xlt_wr_final_send_flags(unsigned int flags)
+{
+ unsigned int res = 0;
+
+ if (flags & MLX5_IB_UPD_XLT_ENABLE)
+ res |= MLX5_IB_SEND_UMR_ENABLE_MR |
+ MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
+ MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+ if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS)
+ res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
+ if (flags & MLX5_IB_UPD_XLT_ADDR)
+ res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+ return res;
+}
+
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags)
{
- struct mlx5_ib_dev *dev = mr->dev;
- struct device *ddev = dev->ib_dev.dev.parent;
- int size;
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct device *ddev = &dev->mdev->pdev->dev;
void *xlt;
- dma_addr_t dma;
struct mlx5_umr_wr wr;
struct ib_sge sg;
int err = 0;
@@ -1030,15 +1138,17 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
const int page_mask = page_align - 1;
size_t pages_mapped = 0;
size_t pages_to_map = 0;
- size_t pages_iter = 0;
+ size_t pages_iter;
size_t size_to_map = 0;
- gfp_t gfp;
- bool use_emergency_page = false;
+ size_t orig_sg_length;
if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
!umr_can_use_indirect_mkey(dev))
return -EPERM;
+ if (WARN_ON(!mr->umem->is_odp))
+ return -EINVAL;
+
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
* so we need to align the offset and length accordingly
*/
@@ -1046,63 +1156,21 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
npages += idx & page_mask;
idx &= ~page_mask;
}
-
- gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
- gfp |= __GFP_ZERO | __GFP_NOWARN;
-
pages_to_map = ALIGN(npages, page_align);
- size = desc_size * pages_to_map;
- size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
-
- xlt = (void *)__get_free_pages(gfp, get_order(size));
- if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
- mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
- size, get_order(size), MLX5_SPARE_UMR_CHUNK);
-
- size = MLX5_SPARE_UMR_CHUNK;
- xlt = (void *)__get_free_pages(gfp, get_order(size));
- }
- if (!xlt) {
- mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
- xlt = (void *)mlx5_ib_get_xlt_emergency_page();
- size = PAGE_SIZE;
- memset(xlt, 0, size);
- use_emergency_page = true;
- }
- pages_iter = size / desc_size;
- dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
- if (dma_mapping_error(ddev, dma)) {
- mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
- err = -ENOMEM;
- goto free_xlt;
- }
+ xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags);
+ if (!xlt)
+ return -ENOMEM;
+ pages_iter = sg.length / desc_size;
+ orig_sg_length = sg.length;
- if (mr->umem->is_odp) {
- if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
- struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
- size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
+ if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
- pages_to_map = min_t(size_t, pages_to_map, max_pages);
- }
+ pages_to_map = min_t(size_t, pages_to_map, max_pages);
}
- sg.addr = dma;
- sg.lkey = dev->umrc.pd->local_dma_lkey;
-
- memset(&wr, 0, sizeof(wr));
- wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
- if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
- wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
- wr.wr.sg_list = &sg;
- wr.wr.num_sge = 1;
- wr.wr.opcode = MLX5_IB_WR_UMR;
-
- wr.pd = mr->ibmr.pd;
- wr.mkey = mr->mmkey.key;
- wr.length = mr->mmkey.size;
- wr.virt_addr = mr->mmkey.iova;
- wr.access_flags = mr->access_flags;
wr.page_shift = page_shift;
for (pages_mapped = 0;
@@ -1110,50 +1178,87 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
pages_mapped += pages_iter, idx += pages_iter) {
npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
size_to_map = npages * desc_size;
- dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
- if (mr->umem->is_odp) {
- mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
- } else {
- __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx,
- npages, xlt,
- MLX5_IB_MTT_PRESENT);
- /* Clear padding after the pages
- * brought from the umem.
- */
- memset(xlt + size_to_map, 0, size - size_to_map);
- }
- dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
+ dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
+ dma_sync_single_for_device(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
- if (pages_mapped + pages_iter >= pages_to_map) {
- if (flags & MLX5_IB_UPD_XLT_ENABLE)
- wr.wr.send_flags |=
- MLX5_IB_SEND_UMR_ENABLE_MR |
- MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
- MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
- if (flags & MLX5_IB_UPD_XLT_PD ||
- flags & MLX5_IB_UPD_XLT_ACCESS)
- wr.wr.send_flags |=
- MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
- if (flags & MLX5_IB_UPD_XLT_ADDR)
- wr.wr.send_flags |=
- MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
- }
+ if (pages_mapped + pages_iter >= pages_to_map)
+ wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
wr.offset = idx * desc_size;
wr.xlt_size = sg.length;
err = mlx5_ib_post_send_wait(dev, &wr);
}
- dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
+ sg.length = orig_sg_length;
+ mlx5_ib_unmap_free_xlt(dev, xlt, &sg);
+ return err;
+}
+
+/*
+ * Send the DMA list to the HW for a normal MR using UMR.
+ */
+static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+{
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct device *ddev = &dev->mdev->pdev->dev;
+ struct ib_block_iter biter;
+ struct mlx5_mtt *cur_mtt;
+ struct mlx5_umr_wr wr;
+ size_t orig_sg_length;
+ struct mlx5_mtt *mtt;
+ size_t final_size;
+ struct ib_sge sg;
+ int err = 0;
+
+ if (WARN_ON(mr->umem->is_odp))
+ return -EINVAL;
+
+ mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg,
+ ib_umem_num_dma_blocks(mr->umem,
+ 1 << mr->page_shift),
+ sizeof(*mtt), flags);
+ if (!mtt)
+ return -ENOMEM;
+ orig_sg_length = sg.length;
+
+ cur_mtt = mtt;
+ rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap,
+ BIT(mr->page_shift)) {
+ if (cur_mtt == (void *)mtt + sg.length) {
+ dma_sync_single_for_device(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ err = mlx5_ib_post_send_wait(dev, &wr);
+ if (err)
+ goto err;
+ dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ wr.offset += sg.length;
+ cur_mtt = mtt;
+ }
+
+ cur_mtt->ptag =
+ cpu_to_be64(rdma_block_iter_dma_address(&biter) |
+ MLX5_IB_MTT_PRESENT);
+ cur_mtt++;
+ }
-free_xlt:
- if (use_emergency_page)
- mlx5_ib_put_xlt_emergency_page();
- else
- free_pages((unsigned long)xlt, get_order(size));
+ final_size = (void *)cur_mtt - (void *)mtt;
+ sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
+ memset(cur_mtt, 0, sg.length - final_size);
+ wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
+ wr.xlt_size = sg.length;
+ dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
+ err = mlx5_ib_post_send_wait(dev, &wr);
+
+err:
+ sg.length = orig_sg_length;
+ mlx5_ib_unmap_free_xlt(dev, mtt, &sg);
return err;
}
@@ -1161,11 +1266,9 @@ free_xlt:
* If ibmr is NULL it will be allocated by reg_create.
* Else, the given ibmr will be used.
*/
-static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
- u64 virt_addr, u64 length,
- struct ib_umem *umem, int npages,
- int page_shift, int access_flags,
- bool populate)
+static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags,
+ unsigned int page_size, bool populate)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr;
@@ -1176,16 +1279,20 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
int err;
bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
- mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!page_size)
+ return ERR_PTR(-EINVAL);
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->ibmr.pd = pd;
mr->access_flags = access_flags;
+ mr->page_shift = order_base_2(page_size);
inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
if (populate)
- inlen += sizeof(*pas) * roundup(npages, 2);
+ inlen += sizeof(*pas) *
+ roundup(ib_umem_num_dma_blocks(umem, page_size), 2);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
@@ -1197,7 +1304,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
err = -EINVAL;
goto err_2;
}
- mlx5_ib_populate_pas(dev, umem, page_shift, pas,
+ mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas,
pg_cap ? MLX5_IB_MTT_PRESENT : 0);
}
@@ -1206,20 +1313,20 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr,
+ set_mkc_access_pd_addr_fields(mkc, access_flags, iova,
populate ? pd : dev->umrc.pd);
MLX5_SET(mkc, mkc, free, !populate);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET64(mkc, mkc, len, length);
+ MLX5_SET64(mkc, mkc, len, umem->length);
MLX5_SET(mkc, mkc, bsf_octword_size, 0);
MLX5_SET(mkc, mkc, translations_octword_size,
- get_octo_len(virt_addr, length, page_shift));
- MLX5_SET(mkc, mkc, log_page_size, page_shift);
+ get_octo_len(iova, umem->length, mr->page_shift));
+ MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
if (populate) {
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
- get_octo_len(virt_addr, length, page_shift));
+ get_octo_len(iova, umem->length, mr->page_shift));
}
err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
@@ -1229,7 +1336,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
}
mr->mmkey.type = MLX5_MKEY_MR;
mr->desc_size = sizeof(struct mlx5_mtt);
- mr->dev = dev;
+ mr->umem = umem;
+ set_mr_fields(dev, mr, umem->length, access_flags);
kvfree(in);
mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
@@ -1238,25 +1346,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
err_2:
kvfree(in);
-
err_1:
- if (!ibmr)
- kfree(mr);
-
+ kfree(mr);
return ERR_PTR(err);
}
-static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
- int npages, u64 length, int access_flags)
-{
- mr->npages = npages;
- atomic_add(npages, &dev->mdev->priv.reg_pages);
- mr->ibmr.lkey = mr->mmkey.key;
- mr->ibmr.rkey = mr->mmkey.key;
- mr->ibmr.length = length;
- mr->access_flags = access_flags;
-}
-
static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
u64 length, int acc, int mode)
{
@@ -1290,8 +1384,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
kfree(in);
- mr->umem = NULL;
- set_mr_fields(dev, mr, 0, length, acc);
+ set_mr_fields(dev, mr, length, acc);
return &mr->ibmr;
@@ -1352,116 +1445,128 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
attr->access_flags, mode);
}
-struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt_addr, int access_flags,
- struct ib_udata *udata)
+static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
bool xlt_with_umr;
- struct ib_umem *umem;
- int page_shift;
- int npages;
- int ncont;
- int order;
int err;
- if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
- return ERR_PTR(-EOPNOTSUPP);
-
- mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
- start, virt_addr, length, access_flags);
-
- xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length);
- /* ODP requires xlt update via umr to work. */
- if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND))
- return ERR_PTR(-EINVAL);
-
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
- length == U64_MAX) {
- if (virt_addr != start)
- return ERR_PTR(-EINVAL);
- if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
- !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
- return ERR_PTR(-EINVAL);
-
- mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
- if (IS_ERR(mr))
- return ERR_CAST(mr);
- return &mr->ibmr;
- }
-
- err = mr_umem_get(dev, start, length, access_flags, &umem,
- &npages, &page_shift, &ncont, &order);
-
- if (err < 0)
- return ERR_PTR(err);
-
+ xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length);
if (xlt_with_umr) {
- mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
- page_shift, order, access_flags);
- if (IS_ERR(mr))
- mr = NULL;
- }
+ mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
+ } else {
+ unsigned int page_size = mlx5_umem_find_best_pgsz(
+ umem, mkc, log_page_size, 0, iova);
- if (!mr) {
mutex_lock(&dev->slow_path_mutex);
- mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
- page_shift, access_flags, !xlt_with_umr);
+ mr = reg_create(pd, umem, iova, access_flags, page_size, true);
mutex_unlock(&dev->slow_path_mutex);
}
-
if (IS_ERR(mr)) {
- err = PTR_ERR(mr);
- goto error;
+ ib_umem_release(umem);
+ return ERR_CAST(mr);
}
mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
- mr->umem = umem;
- set_mr_fields(dev, mr, npages, length, access_flags);
+ atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
- if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) {
+ if (xlt_with_umr) {
/*
* If the MR was created with reg_create then it will be
* configured properly but left disabled. It is safe to go ahead
* and configure it again via UMR while enabling it.
*/
- int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
-
- err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
- update_xlt_flags);
+ err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
if (err) {
dereg_mr(dev, mr);
return ERR_PTR(err);
}
}
+ return &mr->ibmr;
+}
- if (is_odp_mr(mr)) {
- to_ib_umem_odp(mr->umem)->private = mr;
- init_waitqueue_head(&mr->q_deferred_work);
- atomic_set(&mr->num_deferred_work, 0);
- err = xa_err(xa_store(&dev->odp_mkeys,
- mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
- GFP_KERNEL));
- if (err) {
- dereg_mr(dev, mr);
- return ERR_PTR(err);
- }
+static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem_odp *odp;
+ struct mlx5_ib_mr *mr;
+ int err;
- err = mlx5_ib_init_odp_mr(mr, xlt_with_umr);
- if (err) {
- dereg_mr(dev, mr);
- return ERR_PTR(err);
- }
+ if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!start && length == U64_MAX) {
+ if (iova != 0)
+ return ERR_PTR(-EINVAL);
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return ERR_PTR(-EINVAL);
+
+ mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
+ if (IS_ERR(mr))
+ return ERR_CAST(mr);
+ return &mr->ibmr;
+ }
+
+ /* ODP requires xlt update via umr to work. */
+ if (!mlx5_ib_can_load_pas_with_umr(dev, length))
+ return ERR_PTR(-EINVAL);
+
+ odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
+ &mlx5_mn_ops);
+ if (IS_ERR(odp))
+ return ERR_CAST(odp);
+
+ mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags);
+ if (IS_ERR(mr)) {
+ ib_umem_release(&odp->umem);
+ return ERR_CAST(mr);
}
+ odp->private = mr;
+ init_waitqueue_head(&mr->q_deferred_work);
+ atomic_set(&mr->num_deferred_work, 0);
+ err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_KERNEL));
+ if (err)
+ goto err_dereg_mr;
+
+ err = mlx5_ib_init_odp_mr(mr);
+ if (err)
+ goto err_dereg_mr;
return &mr->ibmr;
-error:
- ib_umem_release(umem);
+
+err_dereg_mr:
+ dereg_mr(dev, mr);
return ERR_PTR(err);
}
+struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem *umem;
+
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
+ start, iova, length, access_flags);
+
+ if (access_flags & IB_ACCESS_ON_DEMAND)
+ return create_user_odp_mr(pd, start, length, iova, access_flags,
+ udata);
+ umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
+ if (IS_ERR(umem))
+ return ERR_CAST(umem);
+ return create_real_mr(pd, umem, iova, access_flags);
+}
+
/**
* mlx5_mr_cache_invalidate - Fence all DMA on the MR
* @mr: The MR to fence
@@ -1474,151 +1579,224 @@ int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr)
{
struct mlx5_umr_wr umrwr = {};
- if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;
umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
umrwr.wr.opcode = MLX5_IB_WR_UMR;
- umrwr.pd = mr->dev->umrc.pd;
+ umrwr.pd = mr_to_mdev(mr)->umrc.pd;
umrwr.mkey = mr->mmkey.key;
umrwr.ignore_free_state = 1;
- return mlx5_ib_post_send_wait(mr->dev, &umrwr);
+ return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr);
}
-static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
- int access_flags, int flags)
-{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_umr_wr umrwr = {};
+/*
+ * True if the change in access flags can be done via UMR, only some access
+ * flags can be updated.
+ */
+static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
+ unsigned int current_access_flags,
+ unsigned int target_access_flags)
+{
+ unsigned int diffs = current_access_flags ^ target_access_flags;
+
+ if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
+ return false;
+ return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags,
+ target_access_flags);
+}
+
+static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
+ int access_flags)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ struct mlx5_umr_wr umrwr = {
+ .wr = {
+ .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
+ MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS,
+ .opcode = MLX5_IB_WR_UMR,
+ },
+ .mkey = mr->mmkey.key,
+ .pd = pd,
+ .access_flags = access_flags,
+ };
int err;
- umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
-
- umrwr.wr.opcode = MLX5_IB_WR_UMR;
- umrwr.mkey = mr->mmkey.key;
-
- if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
- umrwr.pd = pd;
- umrwr.access_flags = access_flags;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
- }
-
err = mlx5_ib_post_send_wait(dev, &umrwr);
+ if (err)
+ return err;
- return err;
+ mr->access_flags = access_flags;
+ mr->mmkey.pd = to_mpd(pd)->pdn;
+ return 0;
}
-int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
- u64 length, u64 virt_addr, int new_access_flags,
- struct ib_pd *new_pd, struct ib_udata *udata)
+static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
+ struct ib_umem *new_umem,
+ int new_access_flags, u64 iova,
+ unsigned long *page_size)
{
- struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
- struct mlx5_ib_mr *mr = to_mmr(ib_mr);
- struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
- int access_flags = flags & IB_MR_REREG_ACCESS ?
- new_access_flags :
- mr->access_flags;
- int page_shift = 0;
- int upd_flags = 0;
- int npages = 0;
- int ncont = 0;
- int order = 0;
- u64 addr, len;
- int err;
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
- mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
- start, virt_addr, length, access_flags);
+ /* We only track the allocated sizes of MRs from the cache */
+ if (!mr->cache_ent)
+ return false;
+ if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length))
+ return false;
- atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
+ *page_size =
+ mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
+ if (WARN_ON(!*page_size))
+ return false;
+ return (1ULL << mr->cache_ent->order) >=
+ ib_umem_num_dma_blocks(new_umem, *page_size);
+}
- if (!mr->umem)
- return -EINVAL;
+static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
+ int access_flags, int flags, struct ib_umem *new_umem,
+ u64 iova, unsigned long page_size)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE;
+ struct ib_umem *old_umem = mr->umem;
+ int err;
- if (is_odp_mr(mr))
- return -EOPNOTSUPP;
+ /*
+ * To keep everything simple the MR is revoked before we start to mess
+ * with it. This ensure the change is atomic relative to any use of the
+ * MR.
+ */
+ err = mlx5_mr_cache_invalidate(mr);
+ if (err)
+ return err;
- if (flags & IB_MR_REREG_TRANS) {
- addr = virt_addr;
- len = length;
- } else {
- addr = mr->umem->address;
- len = mr->umem->length;
+ if (flags & IB_MR_REREG_PD) {
+ mr->ibmr.pd = pd;
+ mr->mmkey.pd = to_mpd(pd)->pdn;
+ upd_flags |= MLX5_IB_UPD_XLT_PD;
+ }
+ if (flags & IB_MR_REREG_ACCESS) {
+ mr->access_flags = access_flags;
+ upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
}
- if (flags != IB_MR_REREG_PD) {
+ mr->ibmr.length = new_umem->length;
+ mr->mmkey.iova = iova;
+ mr->mmkey.size = new_umem->length;
+ mr->page_shift = order_base_2(page_size);
+ mr->umem = new_umem;
+ err = mlx5_ib_update_mr_pas(mr, upd_flags);
+ if (err) {
/*
- * Replace umem. This needs to be done whether or not UMR is
- * used.
+ * The MR is revoked at this point so there is no issue to free
+ * new_umem.
*/
- flags |= IB_MR_REREG_TRANS;
- ib_umem_release(mr->umem);
- mr->umem = NULL;
- err = mr_umem_get(dev, addr, len, access_flags, &mr->umem,
- &npages, &page_shift, &ncont, &order);
- if (err)
- goto err;
+ mr->umem = old_umem;
+ return err;
}
- if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags,
- access_flags) ||
- !mlx5_ib_can_load_pas_with_umr(dev, len) ||
- (flags & IB_MR_REREG_TRANS &&
- !mlx5_ib_pas_fits_in_mr(mr, addr, len))) {
- /*
- * UMR can't be used - MKey needs to be replaced.
- */
- if (mr->cache_ent)
- detach_mr_from_cache(mr);
- err = destroy_mkey(dev, mr);
- if (err)
- goto err;
+ atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages);
+ ib_umem_release(old_umem);
+ atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages);
+ return 0;
+}
- mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
- page_shift, access_flags, true);
+struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
+ u64 length, u64 iova, int new_access_flags,
+ struct ib_pd *new_pd,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
+ struct mlx5_ib_mr *mr = to_mmr(ib_mr);
+ int err;
- if (IS_ERR(mr)) {
- err = PTR_ERR(mr);
- mr = to_mmr(ib_mr);
- goto err;
- }
- } else {
- /*
- * Send a UMR WQE
- */
- mr->ibmr.pd = pd;
- mr->access_flags = access_flags;
- mr->mmkey.iova = addr;
- mr->mmkey.size = len;
- mr->mmkey.pd = to_mpd(pd)->pdn;
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
+ return ERR_PTR(-EOPNOTSUPP);
- if (flags & IB_MR_REREG_TRANS) {
- upd_flags = MLX5_IB_UPD_XLT_ADDR;
- if (flags & IB_MR_REREG_PD)
- upd_flags |= MLX5_IB_UPD_XLT_PD;
- if (flags & IB_MR_REREG_ACCESS)
- upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
- err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
- upd_flags);
- } else {
- err = rereg_umr(pd, mr, access_flags, flags);
+ mlx5_ib_dbg(
+ dev,
+ "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
+ start, iova, length, new_access_flags);
+
+ if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!(flags & IB_MR_REREG_ACCESS))
+ new_access_flags = mr->access_flags;
+ if (!(flags & IB_MR_REREG_PD))
+ new_pd = ib_mr->pd;
+
+ if (!(flags & IB_MR_REREG_TRANS)) {
+ struct ib_umem *umem;
+
+ /* Fast path for PD/access change */
+ if (can_use_umr_rereg_access(dev, mr->access_flags,
+ new_access_flags)) {
+ err = umr_rereg_pd_access(mr, new_pd, new_access_flags);
+ if (err)
+ return ERR_PTR(err);
+ return NULL;
}
+ /* DM or ODP MR's don't have a umem so we can't re-use it */
+ if (!mr->umem || is_odp_mr(mr))
+ goto recreate;
+ /*
+ * Only one active MR can refer to a umem at one time, revoke
+ * the old MR before assigning the umem to the new one.
+ */
+ err = mlx5_mr_cache_invalidate(mr);
if (err)
- goto err;
- }
-
- set_mr_fields(dev, mr, npages, len, access_flags);
+ return ERR_PTR(err);
+ umem = mr->umem;
+ mr->umem = NULL;
+ atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
- return 0;
+ return create_real_mr(new_pd, umem, mr->mmkey.iova,
+ new_access_flags);
+ }
-err:
- ib_umem_release(mr->umem);
- mr->umem = NULL;
+ /*
+ * DM doesn't have a PAS list so we can't re-use it, odp does but the
+ * logic around releasing the umem is different
+ */
+ if (!mr->umem || is_odp_mr(mr))
+ goto recreate;
+
+ if (!(new_access_flags & IB_ACCESS_ON_DEMAND) &&
+ can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) {
+ struct ib_umem *new_umem;
+ unsigned long page_size;
+
+ new_umem = ib_umem_get(&dev->ib_dev, start, length,
+ new_access_flags);
+ if (IS_ERR(new_umem))
+ return ERR_CAST(new_umem);
+
+ /* Fast path for PAS change */
+ if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova,
+ &page_size)) {
+ err = umr_rereg_pas(mr, new_pd, new_access_flags, flags,
+ new_umem, iova, page_size);
+ if (err) {
+ ib_umem_release(new_umem);
+ return ERR_PTR(err);
+ }
+ return NULL;
+ }
+ return create_real_mr(new_pd, new_umem, iova, new_access_flags);
+ }
- clean_mr(dev, mr);
- return err;
+ /*
+ * Everything else has no state we can preserve, just create a new MR
+ * from scratch
+ */
+recreate:
+ return mlx5_ib_reg_user_mr(new_pd, start, length, iova,
+ new_access_flags, udata);
}
static int
@@ -1627,6 +1805,8 @@ mlx5_alloc_priv_descs(struct ib_device *device,
int ndescs,
int desc_size)
{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct device *ddev = &dev->mdev->pdev->dev;
int size = ndescs * desc_size;
int add_size;
int ret;
@@ -1639,9 +1819,8 @@ mlx5_alloc_priv_descs(struct ib_device *device,
mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
- mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
- size, DMA_TO_DEVICE);
- if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
+ mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, mr->desc_map)) {
ret = -ENOMEM;
goto err;
}
@@ -1659,9 +1838,10 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
if (mr->descs) {
struct ib_device *device = mr->ibmr.device;
int size = mr->max_descs * mr->desc_size;
+ struct mlx5_ib_dev *dev = to_mdev(device);
- dma_unmap_single(device->dev.parent, mr->desc_map,
- size, DMA_TO_DEVICE);
+ dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
+ DMA_TO_DEVICE);
kfree(mr->descs_alloc);
mr->descs = NULL;
}
@@ -1691,7 +1871,6 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
- int npages = mr->npages;
struct ib_umem *umem = mr->umem;
/* Stop all DMA */
@@ -1700,14 +1879,17 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
else
clean_mr(dev, mr);
+ if (umem) {
+ if (!is_odp_mr(mr))
+ atomic_sub(ib_umem_num_pages(umem),
+ &dev->mdev->priv.reg_pages);
+ ib_umem_release(umem);
+ }
+
if (mr->cache_ent)
mlx5_mr_cache_free(dev, mr);
else
kfree(mr);
-
- ib_umem_release(umem);
- atomic_sub(npages, &dev->mdev->priv.reg_pages);
-
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)