aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core/umem_odp.c
diff options
context:
space:
mode:
authorYishai Hadas <yishaih@nvidia.com>2020-09-30 19:38:26 +0300
committerJason Gunthorpe <jgg@nvidia.com>2020-10-01 16:44:05 -0300
commit8bfafde08693cc2e740134c3e32b40254f5d82ae (patch)
treea7f58420a3fcb448ce2794c60141e9c0d5565a6e /drivers/infiniband/core/umem_odp.c
parentIB/core: Improve ODP to use hmm_range_fault() (diff)
downloadlinux-8bfafde08693cc2e740134c3e32b40254f5d82ae.tar.xz
linux-8bfafde08693cc2e740134c3e32b40254f5d82ae.zip
IB/core: Enable ODP sync without faulting
Enable ODP sync without faulting, this improves performance by reducing the number of page faults in the system. The gain from this option is that the device page table can be aligned with the presented pages in the CPU page table without causing page faults. As of that, the overhead on data path from hardware point of view to trigger a fault which end-up by calling the driver to bring the pages will be dropped. Link: https://lore.kernel.org/r/20200930163828.1336747-3-leon@kernel.org Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/infiniband/core/umem_odp.c')
-rw-r--r--drivers/infiniband/core/umem_odp.c35
1 files changed, 25 insertions, 10 deletions
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index ad3b2ec688e8..323f6cf00682 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -347,9 +347,10 @@ static int ib_umem_odp_map_dma_single_page(
* the return value.
* @access_mask: bit mask of the requested access permissions for the given
* range.
+ * @fault: is faulting required for the given range
*/
int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
- u64 bcnt, u64 access_mask)
+ u64 bcnt, u64 access_mask, bool fault)
__acquires(&umem_odp->umem_mutex)
{
struct task_struct *owning_process = NULL;
@@ -385,10 +386,12 @@ int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
range.end = ALIGN(user_virt + bcnt, 1UL << page_shift);
pfn_start_idx = (range.start - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
num_pfns = (range.end - range.start) >> PAGE_SHIFT;
- range.default_flags = HMM_PFN_REQ_FAULT;
+ if (fault) {
+ range.default_flags = HMM_PFN_REQ_FAULT;
- if (access_mask & ODP_WRITE_ALLOWED_BIT)
- range.default_flags |= HMM_PFN_REQ_WRITE;
+ if (access_mask & ODP_WRITE_ALLOWED_BIT)
+ range.default_flags |= HMM_PFN_REQ_WRITE;
+ }
range.hmm_pfns = &(umem_odp->pfn_list[pfn_start_idx]);
timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
@@ -417,12 +420,24 @@ retry:
for (pfn_index = 0; pfn_index < num_pfns;
pfn_index += 1 << (page_shift - PAGE_SHIFT), dma_index++) {
- /*
- * Since we asked for hmm_range_fault() to populate pages,
- * it shouldn't return an error entry on success.
- */
- WARN_ON(range.hmm_pfns[pfn_index] & HMM_PFN_ERROR);
- WARN_ON(!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID));
+
+ if (fault) {
+ /*
+ * Since we asked for hmm_range_fault() to populate
+ * pages it shouldn't return an error entry on success.
+ */
+ WARN_ON(range.hmm_pfns[pfn_index] & HMM_PFN_ERROR);
+ WARN_ON(!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID));
+ } else {
+ if (!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID)) {
+ WARN_ON(umem_odp->dma_list[dma_index]);
+ continue;
+ }
+ access_mask = ODP_READ_ALLOWED_BIT;
+ if (range.hmm_pfns[pfn_index] & HMM_PFN_WRITE)
+ access_mask |= ODP_WRITE_ALLOWED_BIT;
+ }
+
hmm_order = hmm_pfn_to_map_order(range.hmm_pfns[pfn_index]);
/* If a hugepage was detected and ODP wasn't set for, the umem
* page_shift will be used, the opposite case is an error.