diff options
author | Jason Gunthorpe <jgg@mellanox.com> | 2019-04-24 16:20:34 -0300 |
---|---|---|
committer | Jason Gunthorpe <jgg@mellanox.com> | 2019-04-24 16:20:34 -0300 |
commit | 449a224c10a48d047c799c5c5d3b22d6aec98c60 (patch) | |
tree | 7ecff2cce22ad3875b70a772eae55a443752cfce /drivers/infiniband/core/uverbs_main.c | |
parent | IB/hfi1: Remove reference to RHF.VCRCErr (diff) | |
parent | RDMA: Remove rdma_user_mmap_page (diff) | |
download | linux-dev-449a224c10a48d047c799c5c5d3b22d6aec98c60.tar.xz linux-dev-449a224c10a48d047c799c5c5d3b22d6aec98c60.zip |
Merge branch 'rdma_mmap' into rdma.git for-next
Jason Gunthorpe says:
====================
Upon review it turns out there are some long standing problems in BAR
mapping area:
* BAR pages intended for read-only can be switched to writable via mprotect.
* Missing use of rdma_user_mmap_io for the mlx5 clock BAR page.
* Disassociate causes SIGBUS when touching the pages.
* CPU pages are being mapped through to the process via remap_pfn_range
instead of the more appropriate vm_insert_page, causing weird behaviors
during disassociation.
This series adds the missing VM_* flag manipulation, adds faulting a zero
page for disassociation and revises the CPU page mappings to use
vm_insert_page.
====================
For dependencies this branch is based on for-rc from
git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git
* branch 'rdma_mmap':
RDMA: Remove rdma_user_mmap_page
RDMA/mlx5: Use get_zeroed_page() for clock_info
RDMA/ucontext: Fix regression with disassociate
RDMA/mlx5: Use rdma_user_map_io for mapping BAR pages
RDMA/mlx5: Do not allow the user to write to the clock page
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/infiniband/core/uverbs_main.c')
-rw-r--r-- | drivers/infiniband/core/uverbs_main.c | 112 |
1 files changed, 59 insertions, 53 deletions
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index fef4519d1241..d01a2f861119 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref *ref) kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file); put_device(&file->device->dev); + + if (file->disassociate_page) + __free_pages(file->disassociate_page, 0); kfree(file); } @@ -877,45 +880,78 @@ static void rdma_umap_close(struct vm_area_struct *vma) kfree(priv); } +/* + * Once the zap_vma_ptes has been called touches to the VMA will come here and + * we return a dummy writable zero page for all the pfns. + */ +static vm_fault_t rdma_umap_fault(struct vm_fault *vmf) +{ + struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data; + struct rdma_umap_priv *priv = vmf->vma->vm_private_data; + vm_fault_t ret = 0; + + if (!priv) + return VM_FAULT_SIGBUS; + + /* Read only pages can just use the system zero page. */ + if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) { + vmf->page = ZERO_PAGE(vmf->vm_start); + get_page(vmf->page); + return 0; + } + + mutex_lock(&ufile->umap_lock); + if (!ufile->disassociate_page) + ufile->disassociate_page = + alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0); + + if (ufile->disassociate_page) { + /* + * This VMA is forced to always be shared so this doesn't have + * to worry about COW. + */ + vmf->page = ufile->disassociate_page; + get_page(vmf->page); + } else { + ret = VM_FAULT_SIGBUS; + } + mutex_unlock(&ufile->umap_lock); + + return ret; +} + static const struct vm_operations_struct rdma_umap_ops = { .open = rdma_umap_open, .close = rdma_umap_close, + .fault = rdma_umap_fault, }; -static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, - unsigned long size) +/* + * Map IO memory into a process. This is to be called by drivers as part of + * their mmap() functions if they wish to send something like PCI-E BAR memory + * to userspace. + */ +int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, + unsigned long pfn, unsigned long size, pgprot_t prot) { struct ib_uverbs_file *ufile = ucontext->ufile; struct rdma_umap_priv *priv; + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + if (vma->vm_end - vma->vm_start != size) - return ERR_PTR(-EINVAL); + return -EINVAL; /* Driver is using this wrong, must be called by ib_uverbs_mmap */ if (WARN_ON(!vma->vm_file || vma->vm_file->private_data != ufile)) - return ERR_PTR(-EINVAL); + return -EINVAL; lockdep_assert_held(&ufile->device->disassociate_srcu); priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) - return ERR_PTR(-ENOMEM); - return priv; -} - -/* - * Map IO memory into a process. This is to be called by drivers as part of - * their mmap() functions if they wish to send something like PCI-E BAR memory - * to userspace. - */ -int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, pgprot_t prot) -{ - struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); - - if (IS_ERR(priv)) - return PTR_ERR(priv); + return -ENOMEM; vma->vm_page_prot = prot; if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { @@ -928,35 +964,6 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, } EXPORT_SYMBOL(rdma_user_mmap_io); -/* - * The page case is here for a slightly different reason, the driver expects - * to be able to free the page it is sharing to user space when it destroys - * its ucontext, which means we need to zap the user space references. - * - * We could handle this differently by providing an API to allocate a shared - * page and then only freeing the shared page when the last ufile is - * destroyed. - */ -int rdma_user_mmap_page(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, struct page *page, - unsigned long size) -{ - struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); - - if (IS_ERR(priv)) - return PTR_ERR(priv); - - if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size, - vma->vm_page_prot)) { - kfree(priv); - return -EAGAIN; - } - - rdma_umap_priv_init(priv, vma); - return 0; -} -EXPORT_SYMBOL(rdma_user_mmap_page); - void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) { struct rdma_umap_priv *priv, *next_priv; @@ -992,7 +999,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) * at a time to get the lock ordering right. Typically there * will only be one mm, so no big deal. */ - down_write(&mm->mmap_sem); + down_read(&mm->mmap_sem); mutex_lock(&ufile->umap_lock); list_for_each_entry_safe (priv, next_priv, &ufile->umaps, list) { @@ -1004,10 +1011,9 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); - vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); } mutex_unlock(&ufile->umap_lock); - up_write(&mm->mmap_sem); + up_read(&mm->mmap_sem); mmput(mm); } } |