1 files changed, 78 insertions, 45 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b53d273eb7a1..9599f7559b3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -75,50 +75,77 @@ static unsigned amdgpu_vm_directory_size(struct amdgpu_device *adev)
 }
 
 /**
- * amdgpu_vm_get_bos - add the vm BOs to a validation list
+ * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
  *
  * @vm: vm providing the BOs
- * @head: head of validation list
+ * @validated: head of validation list
+ * @entry: entry to add
  *
  * Add the page directory to the list of BOs to
- * validate for command submission (cayman+).
+ * validate for command submission.
  */
-struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
-					  struct amdgpu_vm *vm,
-					  struct list_head *head)
+void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
+			 struct list_head *validated,
+			 struct amdgpu_bo_list_entry *entry)
 {
-	struct amdgpu_bo_list_entry *list;
-	unsigned i, idx;
+	entry->robj = vm->page_directory;
+	entry->prefered_domains = AMDGPU_GEM_DOMAIN_VRAM;
+	entry->allowed_domains = AMDGPU_GEM_DOMAIN_VRAM;
+	entry->priority = 0;
+	entry->tv.bo = &vm->page_directory->tbo;
+	entry->tv.shared = true;
+	list_add(&entry->tv.head, validated);
+}
 
-	list = drm_malloc_ab(vm->max_pde_used + 2,
-			     sizeof(struct amdgpu_bo_list_entry));
-	if (!list) {
-		return NULL;
-	}
+/**
+ * amdgpu_vm_get_bos - add the vm BOs to a duplicates list
+ *
+ * @vm: vm providing the BOs
+ * @duplicates: head of duplicates list
+ *
+ * Add the page directory to the BO duplicates list
+ * for command submission.
+ */
+void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates)
+{
+	unsigned i;
 
 	/* add the vm page table to the list */
-	list[0].robj = vm->page_directory;
-	list[0].prefered_domains = AMDGPU_GEM_DOMAIN_VRAM;
-	list[0].allowed_domains = AMDGPU_GEM_DOMAIN_VRAM;
-	list[0].priority = 0;
-	list[0].tv.bo = &vm->page_directory->tbo;
-	list[0].tv.shared = true;
-	list_add(&list[0].tv.head, head);
-
-	for (i = 0, idx = 1; i <= vm->max_pde_used; i++) {
-		if (!vm->page_tables[i].bo)
+	for (i = 0; i <= vm->max_pde_used; ++i) {
+		struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry;
+
+		if (!entry->robj)
 			continue;
 
-		list[idx].robj = vm->page_tables[i].bo;
-		list[idx].prefered_domains = AMDGPU_GEM_DOMAIN_VRAM;
-		list[idx].allowed_domains = AMDGPU_GEM_DOMAIN_VRAM;
-		list[idx].priority = 0;
-		list[idx].tv.bo = &list[idx].robj->tbo;
-		list[idx].tv.shared = true;
-		list_add(&list[idx++].tv.head, head);
+		list_add(&entry->tv.head, duplicates);
 	}
 
-	return list;
+}
+
+/**
+ * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail
+ *
+ * @adev: amdgpu device instance
+ * @vm: vm providing the BOs
+ *
+ * Move the PT BOs to the tail of the LRU.
+ */
+void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
+				  struct amdgpu_vm *vm)
+{
+	struct ttm_bo_global *glob = adev->mman.bdev.glob;
+	unsigned i;
+
+	spin_lock(&glob->lru_lock);
+	for (i = 0; i <= vm->max_pde_used; ++i) {
+		struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry;
+
+		if (!entry->robj)
+			continue;
+
+		ttm_bo_move_to_lru_tail(&entry->robj->tbo);
+	}
+	spin_unlock(&glob->lru_lock);
 }
 
 /**
@@ -461,7 +488,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 
 	/* walk over the address space and update the page directory */
 	for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
-		struct amdgpu_bo *bo = vm->page_tables[pt_idx].bo;
+		struct amdgpu_bo *bo = vm->page_tables[pt_idx].entry.robj;
 		uint64_t pde, pt;
 
 		if (bo == NULL)
@@ -638,7 +665,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 	/* walk over the address space and update the page tables */
 	for (addr = start; addr < end; ) {
 		uint64_t pt_idx = addr >> amdgpu_vm_block_size;
-		struct amdgpu_bo *pt = vm->page_tables[pt_idx].bo;
+		struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj;
 		unsigned nptes;
 		uint64_t pte;
 		int r;
@@ -1010,13 +1037,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 		return -EINVAL;
 
 	/* make sure object fit at this offset */
-	eaddr = saddr + size;
+	eaddr = saddr + size - 1;
 	if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo)))
 		return -EINVAL;
 
 	last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
-	if (last_pfn > adev->vm_manager.max_pfn) {
-		dev_err(adev->dev, "va above limit (0x%08X > 0x%08X)\n",
+	if (last_pfn >= adev->vm_manager.max_pfn) {
+		dev_err(adev->dev, "va above limit (0x%08X >= 0x%08X)\n",
 			last_pfn, adev->vm_manager.max_pfn);
 		return -EINVAL;
 	}
@@ -1025,7 +1052,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
 	spin_lock(&vm->it_lock);
-	it = interval_tree_iter_first(&vm->va, saddr, eaddr - 1);
+	it = interval_tree_iter_first(&vm->va, saddr, eaddr);
 	spin_unlock(&vm->it_lock);
 	if (it) {
 		struct amdgpu_bo_va_mapping *tmp;
@@ -1046,7 +1073,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 
 	INIT_LIST_HEAD(&mapping->list);
 	mapping->it.start = saddr;
-	mapping->it.last = eaddr - 1;
+	mapping->it.last = eaddr;
 	mapping->offset = offset;
 	mapping->flags = flags;
 
@@ -1070,9 +1097,11 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	/* walk over the address space and allocate the page tables */
 	for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) {
 		struct reservation_object *resv = vm->page_directory->tbo.resv;
+		struct amdgpu_bo_list_entry *entry;
 		struct amdgpu_bo *pt;
 
-		if (vm->page_tables[pt_idx].bo)
+		entry = &vm->page_tables[pt_idx].entry;
+		if (entry->robj)
 			continue;
 
 		r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
@@ -1094,8 +1123,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 			goto error_free;
 		}
 
+		entry->robj = pt;
+		entry->prefered_domains = AMDGPU_GEM_DOMAIN_VRAM;
+		entry->allowed_domains = AMDGPU_GEM_DOMAIN_VRAM;
+		entry->priority = 0;
+		entry->tv.bo = &entry->robj->tbo;
+		entry->tv.shared = true;
 		vm->page_tables[pt_idx].addr = 0;
-		vm->page_tables[pt_idx].bo = pt;
 	}
 
 	return 0;
@@ -1248,7 +1282,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 		AMDGPU_VM_PTE_COUNT * 8);
-	unsigned pd_size, pd_entries, pts_size;
+	unsigned pd_size, pd_entries;
 	int i, r;
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -1266,8 +1300,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	pd_entries = amdgpu_vm_num_pdes(adev);
 
 	/* allocate page table array */
-	pts_size = pd_entries * sizeof(struct amdgpu_vm_pt);
-	vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
+	vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
 	if (vm->page_tables == NULL) {
 		DRM_ERROR("Cannot allocate memory for page table array\n");
 		return -ENOMEM;
@@ -1326,8 +1359,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	}
 
 	for (i = 0; i < amdgpu_vm_num_pdes(adev); i++)
-		amdgpu_bo_unref(&vm->page_tables[i].bo);
-	kfree(vm->page_tables);
+		amdgpu_bo_unref(&vm->page_tables[i].entry.robj);
+	drm_free_large(vm->page_tables);
 
 	amdgpu_bo_unref(&vm->page_directory);
 	fence_put(vm->page_directory_fence);