aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-01-21 10:22:51 +0100
committerDavid S. Miller <davem@davemloft.net>2020-01-21 10:22:51 +0100
commitad063075d406dd04d2d9bc1005161a5a08fb38c5 (patch)
treececa9e3059278203dd2d13d3ad96b13f867ab979 /drivers/infiniband/core
parentMerge branch 'mlxsw-SPAN-egress-mirroring-buffer-size' (diff)
parentnet/rds: Use prefetch for On-Demand-Paging MR (diff)
downloadlinux-dev-ad063075d406dd04d2d9bc1005161a5a08fb38c5.tar.xz
linux-dev-ad063075d406dd04d2d9bc1005161a5a08fb38c5.zip
Merge tag 'rds-odp-for-5.5' of https://git.kernel.org/pub/scm/linux/kernel/git/leon/linux-rdma
Leon Romanovsky says: ==================== Use ODP MRs for kernel ULPs The following series extends MR creation routines to allow creation of user MRs through kernel ULPs as a proxy. The immediate use case is to allow RDS to work over FS-DAX, which requires ODP (on-demand-paging) MRs to be created and such MRs were not possible to create prior this series. The first part of this patchset extends RDMA to have special verb ib_reg_user_mr(). The common use case that uses this function is a userspace application that allocates memory for HCA access but the responsibility to register the memory at the HCA is on an kernel ULP. This ULP acts as an agent for the userspace application. The second part provides advise MR functionality for ULPs. This is integral part of ODP flows and used to trigger pagefaults in advance to prepare memory before running working set. The third part is actual user of those in-kernel APIs. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/umem.c27
-rw-r--r--drivers/infiniband/core/umem_odp.c29
-rw-r--r--drivers/infiniband/core/verbs.c41
3 files changed, 57 insertions, 40 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 7a3b99597ead..146f98fbf22b 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -181,15 +181,14 @@ EXPORT_SYMBOL(ib_umem_find_best_pgsz);
/**
* ib_umem_get - Pin and DMA map userspace memory.
*
- * @udata: userspace context to pin memory for
+ * @device: IB device to connect UMEM
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
*/
-struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
+struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
size_t size, int access)
{
- struct ib_ucontext *context;
struct ib_umem *umem;
struct page **page_list;
unsigned long lock_limit;
@@ -201,14 +200,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
struct scatterlist *sg;
unsigned int gup_flags = FOLL_WRITE;
- if (!udata)
- return ERR_PTR(-EIO);
-
- context = container_of(udata, struct uverbs_attr_bundle, driver_udata)
- ->context;
- if (!context)
- return ERR_PTR(-EIO);
-
/*
* If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error.
@@ -226,7 +217,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
- umem->ibdev = context->device;
+ umem->ibdev = device;
umem->length = size;
umem->address = addr;
umem->writable = ib_access_writable(access);
@@ -281,7 +272,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
npages -= ret;
sg = ib_umem_add_sg_table(sg, page_list, ret,
- dma_get_max_seg_size(context->device->dma_device),
+ dma_get_max_seg_size(device->dma_device),
&umem->sg_nents);
up_read(&mm->mmap_sem);
@@ -289,10 +280,10 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
sg_mark_end(sg);
- umem->nmap = ib_dma_map_sg(context->device,
- umem->sg_head.sgl,
- umem->sg_nents,
- DMA_BIDIRECTIONAL);
+ umem->nmap = ib_dma_map_sg(device,
+ umem->sg_head.sgl,
+ umem->sg_nents,
+ DMA_BIDIRECTIONAL);
if (!umem->nmap) {
ret = -ENOMEM;
@@ -303,7 +294,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
goto out;
umem_release:
- __ib_umem_release(context->device, umem, 0);
+ __ib_umem_release(device, umem, 0);
vma:
atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
out:
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index e42d44e501fd..dac3fd2ebc26 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -110,15 +110,12 @@ out_page_list:
* They exist only to hold the per_mm reference to help the driver create
* children umems.
*
- * @udata: udata from the syscall being used to create the umem
+ * @device: IB device to create UMEM
* @access: ib_reg_mr access flags
*/
-struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
+struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device,
int access)
{
- struct ib_ucontext *context =
- container_of(udata, struct uverbs_attr_bundle, driver_udata)
- ->context;
struct ib_umem *umem;
struct ib_umem_odp *umem_odp;
int ret;
@@ -126,14 +123,11 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
if (access & IB_ACCESS_HUGETLB)
return ERR_PTR(-EINVAL);
- if (!context)
- return ERR_PTR(-EIO);
-
umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL);
if (!umem_odp)
return ERR_PTR(-ENOMEM);
umem = &umem_odp->umem;
- umem->ibdev = context->device;
+ umem->ibdev = device;
umem->writable = ib_access_writable(access);
umem->owning_mm = current->mm;
umem_odp->is_implicit_odp = 1;
@@ -201,7 +195,7 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_child);
/**
* ib_umem_odp_get - Create a umem_odp for a userspace va
*
- * @udata: userspace context to pin memory for
+ * @device: IB device struct to get UMEM
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
@@ -210,23 +204,14 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_child);
* pinning, instead, stores the mm for future page fault handling in
* conjunction with MMU notifiers.
*/
-struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
- size_t size, int access,
+struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device,
+ unsigned long addr, size_t size, int access,
const struct mmu_interval_notifier_ops *ops)
{
struct ib_umem_odp *umem_odp;
- struct ib_ucontext *context;
struct mm_struct *mm;
int ret;
- if (!udata)
- return ERR_PTR(-EIO);
-
- context = container_of(udata, struct uverbs_attr_bundle, driver_udata)
- ->context;
- if (!context)
- return ERR_PTR(-EIO);
-
if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)))
return ERR_PTR(-EINVAL);
@@ -234,7 +219,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
if (!umem_odp)
return ERR_PTR(-ENOMEM);
- umem_odp->umem.ibdev = context->device;
+ umem_odp->umem.ibdev = device;
umem_odp->umem.length = size;
umem_odp->umem.address = addr;
umem_odp->umem.writable = ib_access_writable(access);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index dd765e176cdd..d33bdc9b01cd 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1990,6 +1990,47 @@ EXPORT_SYMBOL(ib_resize_cq);
/* Memory regions */
+struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags)
+{
+ struct ib_mr *mr;
+
+ if (access_flags & IB_ACCESS_ON_DEMAND) {
+ if (!(pd->device->attrs.device_cap_flags &
+ IB_DEVICE_ON_DEMAND_PAGING)) {
+ pr_debug("ODP support not available\n");
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ mr = pd->device->ops.reg_user_mr(pd, start, length, virt_addr,
+ access_flags, NULL);
+
+ if (IS_ERR(mr))
+ return mr;
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->dm = NULL;
+ atomic_inc(&pd->usecnt);
+ mr->res.type = RDMA_RESTRACK_MR;
+ rdma_restrack_kadd(&mr->res);
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_reg_user_mr);
+
+int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
+ u32 flags, struct ib_sge *sg_list, u32 num_sge)
+{
+ if (!pd->device->ops.advise_mr)
+ return -EOPNOTSUPP;
+
+ return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge,
+ NULL);
+}
+EXPORT_SYMBOL(ib_advise_mr);
+
int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
{
struct ib_pd *pd = mr->pd;