aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core/uverbs_main.c
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@mellanox.com>2019-04-24 16:20:34 -0300
committerJason Gunthorpe <jgg@mellanox.com>2019-04-24 16:20:34 -0300
commit449a224c10a48d047c799c5c5d3b22d6aec98c60 (patch)
tree7ecff2cce22ad3875b70a772eae55a443752cfce /drivers/infiniband/core/uverbs_main.c
parentIB/hfi1: Remove reference to RHF.VCRCErr (diff)
parentRDMA: Remove rdma_user_mmap_page (diff)
downloadlinux-dev-449a224c10a48d047c799c5c5d3b22d6aec98c60.tar.xz
linux-dev-449a224c10a48d047c799c5c5d3b22d6aec98c60.zip
Merge branch 'rdma_mmap' into rdma.git for-next
Jason Gunthorpe says: ==================== Upon review it turns out there are some long standing problems in BAR mapping area: * BAR pages intended for read-only can be switched to writable via mprotect. * Missing use of rdma_user_mmap_io for the mlx5 clock BAR page. * Disassociate causes SIGBUS when touching the pages. * CPU pages are being mapped through to the process via remap_pfn_range instead of the more appropriate vm_insert_page, causing weird behaviors during disassociation. This series adds the missing VM_* flag manipulation, adds faulting a zero page for disassociation and revises the CPU page mappings to use vm_insert_page. ==================== For dependencies this branch is based on for-rc from git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git * branch 'rdma_mmap': RDMA: Remove rdma_user_mmap_page RDMA/mlx5: Use get_zeroed_page() for clock_info RDMA/ucontext: Fix regression with disassociate RDMA/mlx5: Use rdma_user_map_io for mapping BAR pages RDMA/mlx5: Do not allow the user to write to the clock page Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/infiniband/core/uverbs_main.c')
-rw-r--r--drivers/infiniband/core/uverbs_main.c112
1 files changed, 59 insertions, 53 deletions
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index fef4519d1241..d01a2f861119 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref *ref)
kref_put(&file->async_file->ref,
ib_uverbs_release_async_event_file);
put_device(&file->device->dev);
+
+ if (file->disassociate_page)
+ __free_pages(file->disassociate_page, 0);
kfree(file);
}
@@ -877,45 +880,78 @@ static void rdma_umap_close(struct vm_area_struct *vma)
kfree(priv);
}
+/*
+ * Once the zap_vma_ptes has been called touches to the VMA will come here and
+ * we return a dummy writable zero page for all the pfns.
+ */
+static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
+{
+ struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
+ struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
+ vm_fault_t ret = 0;
+
+ if (!priv)
+ return VM_FAULT_SIGBUS;
+
+ /* Read only pages can just use the system zero page. */
+ if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
+ vmf->page = ZERO_PAGE(vmf->vm_start);
+ get_page(vmf->page);
+ return 0;
+ }
+
+ mutex_lock(&ufile->umap_lock);
+ if (!ufile->disassociate_page)
+ ufile->disassociate_page =
+ alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
+
+ if (ufile->disassociate_page) {
+ /*
+ * This VMA is forced to always be shared so this doesn't have
+ * to worry about COW.
+ */
+ vmf->page = ufile->disassociate_page;
+ get_page(vmf->page);
+ } else {
+ ret = VM_FAULT_SIGBUS;
+ }
+ mutex_unlock(&ufile->umap_lock);
+
+ return ret;
+}
+
static const struct vm_operations_struct rdma_umap_ops = {
.open = rdma_umap_open,
.close = rdma_umap_close,
+ .fault = rdma_umap_fault,
};
-static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
- struct vm_area_struct *vma,
- unsigned long size)
+/*
+ * Map IO memory into a process. This is to be called by drivers as part of
+ * their mmap() functions if they wish to send something like PCI-E BAR memory
+ * to userspace.
+ */
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size, pgprot_t prot)
{
struct ib_uverbs_file *ufile = ucontext->ufile;
struct rdma_umap_priv *priv;
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
if (vma->vm_end - vma->vm_start != size)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
/* Driver is using this wrong, must be called by ib_uverbs_mmap */
if (WARN_ON(!vma->vm_file ||
vma->vm_file->private_data != ufile))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
lockdep_assert_held(&ufile->device->disassociate_srcu);
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
- return ERR_PTR(-ENOMEM);
- return priv;
-}
-
-/*
- * Map IO memory into a process. This is to be called by drivers as part of
- * their mmap() functions if they wish to send something like PCI-E BAR memory
- * to userspace.
- */
-int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
- unsigned long pfn, unsigned long size, pgprot_t prot)
-{
- struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
-
- if (IS_ERR(priv))
- return PTR_ERR(priv);
+ return -ENOMEM;
vma->vm_page_prot = prot;
if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
@@ -928,35 +964,6 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
}
EXPORT_SYMBOL(rdma_user_mmap_io);
-/*
- * The page case is here for a slightly different reason, the driver expects
- * to be able to free the page it is sharing to user space when it destroys
- * its ucontext, which means we need to zap the user space references.
- *
- * We could handle this differently by providing an API to allocate a shared
- * page and then only freeing the shared page when the last ufile is
- * destroyed.
- */
-int rdma_user_mmap_page(struct ib_ucontext *ucontext,
- struct vm_area_struct *vma, struct page *page,
- unsigned long size)
-{
- struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
-
- if (IS_ERR(priv))
- return PTR_ERR(priv);
-
- if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size,
- vma->vm_page_prot)) {
- kfree(priv);
- return -EAGAIN;
- }
-
- rdma_umap_priv_init(priv, vma);
- return 0;
-}
-EXPORT_SYMBOL(rdma_user_mmap_page);
-
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
{
struct rdma_umap_priv *priv, *next_priv;
@@ -992,7 +999,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
* at a time to get the lock ordering right. Typically there
* will only be one mm, so no big deal.
*/
- down_write(&mm->mmap_sem);
+ down_read(&mm->mmap_sem);
mutex_lock(&ufile->umap_lock);
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
list) {
@@ -1004,10 +1011,9 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
zap_vma_ptes(vma, vma->vm_start,
vma->vm_end - vma->vm_start);
- vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
}
mutex_unlock(&ufile->umap_lock);
- up_write(&mm->mmap_sem);
+ up_read(&mm->mmap_sem);
mmput(mm);
}
}