1 files changed, 69 insertions, 11 deletions
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 4a9d666f1e91..57d8c37a002b 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -66,6 +66,7 @@ struct vfio_domain {
 	struct list_head	next;
 	struct list_head	group_list;
 	int			prot;		/* IOMMU_CACHE */
+	bool			fgsp;		/* Fine-grained super pages */
 };
 
 struct vfio_dma {
@@ -264,6 +265,7 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
 	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 	bool lock_cap = capable(CAP_IPC_LOCK);
 	long ret, i;
+	bool rsvd;
 
 	if (!current->mm)
 		return -ENODEV;
@@ -272,10 +274,9 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
 	if (ret)
 		return ret;
 
-	if (is_invalid_reserved_pfn(*pfn_base))
-		return 1;
+	rsvd = is_invalid_reserved_pfn(*pfn_base);
 
-	if (!lock_cap && current->mm->locked_vm + 1 > limit) {
+	if (!rsvd && !lock_cap && current->mm->locked_vm + 1 > limit) {
 		put_pfn(*pfn_base, prot);
 		pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
 			limit << PAGE_SHIFT);
@@ -283,7 +284,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
 	}
 
 	if (unlikely(disable_hugepages)) {
-		vfio_lock_acct(1);
+		if (!rsvd)
+			vfio_lock_acct(1);
 		return 1;
 	}
 
@@ -295,12 +297,14 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
 		if (ret)
 			break;
 
-		if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) {
+		if (pfn != *pfn_base + i ||
+		    rsvd != is_invalid_reserved_pfn(pfn)) {
 			put_pfn(pfn, prot);
 			break;
 		}
 
-		if (!lock_cap && current->mm->locked_vm + i + 1 > limit) {
+		if (!rsvd && !lock_cap &&
+		    current->mm->locked_vm + i + 1 > limit) {
 			put_pfn(pfn, prot);
 			pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
 				__func__, limit << PAGE_SHIFT);
@@ -308,7 +312,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
 		}
 	}
 
-	vfio_lock_acct(i);
+	if (!rsvd)
+		vfio_lock_acct(i);
 
 	return i;
 }
@@ -346,12 +351,14 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
 	domain = d = list_first_entry(&iommu->domain_list,
 				      struct vfio_domain, next);
 
-	list_for_each_entry_continue(d, &iommu->domain_list, next)
+	list_for_each_entry_continue(d, &iommu->domain_list, next) {
 		iommu_unmap(d->domain, dma->iova, dma->size);
+		cond_resched();
+	}
 
 	while (iova < end) {
-		size_t unmapped;
-		phys_addr_t phys;
+		size_t unmapped, len;
+		phys_addr_t phys, next;
 
 		phys = iommu_iova_to_phys(domain->domain, iova);
 		if (WARN_ON(!phys)) {
@@ -359,7 +366,19 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
 			continue;
 		}
 
-		unmapped = iommu_unmap(domain->domain, iova, PAGE_SIZE);
+		/*
+		 * To optimize for fewer iommu_unmap() calls, each of which
+		 * may require hardware cache flushing, try to find the
+		 * largest contiguous physical memory chunk to unmap.
+		 */
+		for (len = PAGE_SIZE;
+		     !domain->fgsp && iova + len < end; len += PAGE_SIZE) {
+			next = iommu_iova_to_phys(domain->domain, iova + len);
+			if (next != phys + len)
+				break;
+		}
+
+		unmapped = iommu_unmap(domain->domain, iova, len);
 		if (WARN_ON(!unmapped))
 			break;
 
@@ -367,6 +386,8 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
 					     unmapped >> PAGE_SHIFT,
 					     dma->prot, false);
 		iova += unmapped;
+
+		cond_resched();
 	}
 
 	vfio_lock_acct(-unlocked);
@@ -511,6 +532,8 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
 			    map_try_harder(d, iova, pfn, npage, prot))
 				goto unwind;
 		}
+
+		cond_resched();
 	}
 
 	return 0;
@@ -665,6 +688,39 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
 	return 0;
 }
 
+/*
+ * We change our unmap behavior slightly depending on whether the IOMMU
+ * supports fine-grained superpages.  IOMMUs like AMD-Vi will use a superpage
+ * for practically any contiguous power-of-two mapping we give it.  This means
+ * we don't need to look for contiguous chunks ourselves to make unmapping
+ * more efficient.  On IOMMUs with coarse-grained super pages, like Intel VT-d
+ * with discrete 2M/1G/512G/1T superpages, identifying contiguous chunks
+ * significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when
+ * hugetlbfs is in use.
+ */
+static void vfio_test_domain_fgsp(struct vfio_domain *domain)
+{
+	struct page *pages;
+	int ret, order = get_order(PAGE_SIZE * 2);
+
+	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!pages)
+		return;
+
+	ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2,
+			IOMMU_READ | IOMMU_WRITE | domain->prot);
+	if (!ret) {
+		size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE);
+
+		if (unmapped == PAGE_SIZE)
+			iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE);
+		else
+			domain->fgsp = true;
+	}
+
+	__free_pages(pages, order);
+}
+
 static int vfio_iommu_type1_attach_group(void *iommu_data,
 					 struct iommu_group *iommu_group)
 {
@@ -758,6 +814,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 		}
 	}
 
+	vfio_test_domain_fgsp(domain);
+
 	/* replay mappings on new domains */
 	ret = vfio_iommu_replay(iommu, domain);
 	if (ret)