aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@mellanox.com>2020-01-21 09:55:04 -0400
committerJason Gunthorpe <jgg@mellanox.com>2020-01-21 09:55:04 -0400
commite8b3a426fb4a9e2856a69b6e19de044c7416c316 (patch)
treee902f402349a14df8a733e8d4f417122673f6c24 /drivers/infiniband/core
parentIB/mlx4: Fix memory leak in add_gid error flow (diff)
parentnet/rds: Use prefetch for On-Demand-Paging MR (diff)
downloadlinux-dev-e8b3a426fb4a9e2856a69b6e19de044c7416c316.tar.xz
linux-dev-e8b3a426fb4a9e2856a69b6e19de044c7416c316.zip
Merge tag 'rds-odp-for-5.5' into rdma.git for-next
From https://git.kernel.org/pub/scm/linux/kernel/git/leon/linux-rdma Leon Romanovsky says: ==================== Use ODP MRs for kernel ULPs The following series extends MR creation routines to allow creation of user MRs through kernel ULPs as a proxy. The immediate use case is to allow RDS to work over FS-DAX, which requires ODP (on-demand-paging) MRs to be created and such MRs were not possible to create prior this series. The first part of this patchset extends RDMA to have special verb ib_reg_user_mr(). The common use case that uses this function is a userspace application that allocates memory for HCA access but the responsibility to register the memory at the HCA is on an kernel ULP. This ULP acts as an agent for the userspace application. The second part provides advise MR functionality for ULPs. This is integral part of ODP flows and used to trigger pagefaults in advance to prepare memory before running working set. The third part is actual user of those in-kernel APIs. ==================== * tag 'rds-odp-for-5.5': net/rds: Use prefetch for On-Demand-Paging MR net/rds: Handle ODP mr registration/unregistration net/rds: Detect need of On-Demand-Paging memory registration RDMA/mlx5: Fix handling of IOVA != user_va in ODP paths IB/mlx5: Mask out unsupported ODP capabilities for kernel QPs RDMA/mlx5: Don't fake udata for kernel path IB/mlx5: Add ODP WQE handlers for kernel QPs IB/core: Add interface to advise_mr for kernel users IB/core: Introduce ib_reg_user_mr IB: Allow calls to ib_umem_get from kernel ULPs Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/umem.c27
-rw-r--r--drivers/infiniband/core/umem_odp.c29
-rw-r--r--drivers/infiniband/core/verbs.c41
3 files changed, 57 insertions, 40 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 7a3b99597ead..146f98fbf22b 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -181,15 +181,14 @@ EXPORT_SYMBOL(ib_umem_find_best_pgsz);
/**
* ib_umem_get - Pin and DMA map userspace memory.
*
- * @udata: userspace context to pin memory for
+ * @device: IB device to connect UMEM
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
*/
-struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
+struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
size_t size, int access)
{
- struct ib_ucontext *context;
struct ib_umem *umem;
struct page **page_list;
unsigned long lock_limit;
@@ -201,14 +200,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
struct scatterlist *sg;
unsigned int gup_flags = FOLL_WRITE;
- if (!udata)
- return ERR_PTR(-EIO);
-
- context = container_of(udata, struct uverbs_attr_bundle, driver_udata)
- ->context;
- if (!context)
- return ERR_PTR(-EIO);
-
/*
* If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error.
@@ -226,7 +217,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
- umem->ibdev = context->device;
+ umem->ibdev = device;
umem->length = size;
umem->address = addr;
umem->writable = ib_access_writable(access);
@@ -281,7 +272,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
npages -= ret;
sg = ib_umem_add_sg_table(sg, page_list, ret,
- dma_get_max_seg_size(context->device->dma_device),
+ dma_get_max_seg_size(device->dma_device),
&umem->sg_nents);
up_read(&mm->mmap_sem);
@@ -289,10 +280,10 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
sg_mark_end(sg);
- umem->nmap = ib_dma_map_sg(context->device,
- umem->sg_head.sgl,
- umem->sg_nents,
- DMA_BIDIRECTIONAL);
+ umem->nmap = ib_dma_map_sg(device,
+ umem->sg_head.sgl,
+ umem->sg_nents,
+ DMA_BIDIRECTIONAL);
if (!umem->nmap) {
ret = -ENOMEM;
@@ -303,7 +294,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
goto out;
umem_release:
- __ib_umem_release(context->device, umem, 0);
+ __ib_umem_release(device, umem, 0);
vma:
atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
out:
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index b9baf7d0a5cb..cb3b17a7b9b0 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -110,15 +110,12 @@ out_page_list:
* They exist only to hold the per_mm reference to help the driver create
* children umems.
*
- * @udata: udata from the syscall being used to create the umem
+ * @device: IB device to create UMEM
* @access: ib_reg_mr access flags
*/
-struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
+struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device,
int access)
{
- struct ib_ucontext *context =
- container_of(udata, struct uverbs_attr_bundle, driver_udata)
- ->context;
struct ib_umem *umem;
struct ib_umem_odp *umem_odp;
int ret;
@@ -126,14 +123,11 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
if (access & IB_ACCESS_HUGETLB)
return ERR_PTR(-EINVAL);
- if (!context)
- return ERR_PTR(-EIO);
-
umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL);
if (!umem_odp)
return ERR_PTR(-ENOMEM);
umem = &umem_odp->umem;
- umem->ibdev = context->device;
+ umem->ibdev = device;
umem->writable = ib_access_writable(access);
umem->owning_mm = current->mm;
umem_odp->is_implicit_odp = 1;
@@ -201,7 +195,7 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_child);
/**
* ib_umem_odp_get - Create a umem_odp for a userspace va
*
- * @udata: userspace context to pin memory for
+ * @device: IB device struct to get UMEM
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
@@ -210,23 +204,14 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_child);
* pinning, instead, stores the mm for future page fault handling in
* conjunction with MMU notifiers.
*/
-struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
- size_t size, int access,
+struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device,
+ unsigned long addr, size_t size, int access,
const struct mmu_interval_notifier_ops *ops)
{
struct ib_umem_odp *umem_odp;
- struct ib_ucontext *context;
struct mm_struct *mm;
int ret;
- if (!udata)
- return ERR_PTR(-EIO);
-
- context = container_of(udata, struct uverbs_attr_bundle, driver_udata)
- ->context;
- if (!context)
- return ERR_PTR(-EIO);
-
if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)))
return ERR_PTR(-EINVAL);
@@ -234,7 +219,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
if (!umem_odp)
return ERR_PTR(-ENOMEM);
- umem_odp->umem.ibdev = context->device;
+ umem_odp->umem.ibdev = device;
umem_odp->umem.length = size;
umem_odp->umem.address = addr;
umem_odp->umem.writable = ib_access_writable(access);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 78b27aff2846..3ebae3b65c28 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1993,6 +1993,47 @@ EXPORT_SYMBOL(ib_resize_cq);
/* Memory regions */
+struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags)
+{
+ struct ib_mr *mr;
+
+ if (access_flags & IB_ACCESS_ON_DEMAND) {
+ if (!(pd->device->attrs.device_cap_flags &
+ IB_DEVICE_ON_DEMAND_PAGING)) {
+ pr_debug("ODP support not available\n");
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ mr = pd->device->ops.reg_user_mr(pd, start, length, virt_addr,
+ access_flags, NULL);
+
+ if (IS_ERR(mr))
+ return mr;
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->dm = NULL;
+ atomic_inc(&pd->usecnt);
+ mr->res.type = RDMA_RESTRACK_MR;
+ rdma_restrack_kadd(&mr->res);
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_reg_user_mr);
+
+int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
+ u32 flags, struct ib_sge *sg_list, u32 num_sge)
+{
+ if (!pd->device->ops.advise_mr)
+ return -EOPNOTSUPP;
+
+ return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge,
+ NULL);
+}
+EXPORT_SYMBOL(ib_advise_mr);
+
int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
{
struct ib_pd *pd = mr->pd;