aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c365
1 files changed, 200 insertions, 165 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 0c52d1f9fe0f..e51b48ac48eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -29,20 +29,26 @@
* Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
* Dave Airlie
*/
+
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+#include <linux/hmm.h>
+#include <linux/pagemap.h>
+#include <linux/sched/task.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/swiotlb.h>
+
#include <drm/ttm/ttm_bo_api.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_module.h>
#include <drm/ttm/ttm_page_alloc.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_debugfs.h>
#include <drm/amdgpu_drm.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/swiotlb.h>
-#include <linux/swap.h>
-#include <linux/pagemap.h>
-#include <linux/debugfs.h>
-#include <linux/iommu.h>
+
#include "amdgpu.h"
#include "amdgpu_object.h"
#include "amdgpu_trace.h"
@@ -385,6 +391,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
src->mem);
src_node_size = (src_mm->size << PAGE_SHIFT);
+ src_page_offset = 0;
} else {
src_node_start += cur_size;
src_page_offset = src_node_start & (PAGE_SIZE - 1);
@@ -394,6 +401,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
dst->mem);
dst_node_size = (dst_mm->size << PAGE_SHIFT);
+ dst_page_offset = 0;
} else {
dst_node_start += cur_size;
dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
@@ -481,6 +489,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
if (unlikely(r)) {
+ pr_err("Failed to find GTT space for blit from VRAM\n");
return r;
}
@@ -539,6 +548,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
if (unlikely(r)) {
+ pr_err("Failed to find GTT space for blit to VRAM\n");
return r;
}
@@ -559,6 +569,30 @@ out_cleanup:
}
/**
+ * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_mem_visible(struct amdgpu_device *adev,
+ struct ttm_mem_reg *mem)
+{
+ struct drm_mm_node *nodes = mem->mm_node;
+
+ if (mem->mem_type == TTM_PL_SYSTEM ||
+ mem->mem_type == TTM_PL_TT)
+ return true;
+ if (mem->mem_type != TTM_PL_VRAM)
+ return false;
+
+ /* ttm_mem_reg_ioremap only supports contiguous memory */
+ if (nodes->size != mem->num_pages)
+ return false;
+
+ return ((nodes->start + nodes->size) << PAGE_SHIFT)
+ <= adev->gmc.visible_vram_size;
+}
+
+/**
* amdgpu_bo_move - Move a buffer object to a new memory location
*
* Called by ttm_bo_handle_move_mem()
@@ -602,8 +636,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
return 0;
}
- if (!adev->mman.buffer_funcs_enabled)
+ if (!adev->mman.buffer_funcs_enabled) {
+ r = -ENODEV;
goto memcpy;
+ }
if (old_mem->mem_type == TTM_PL_VRAM &&
new_mem->mem_type == TTM_PL_SYSTEM) {
@@ -618,10 +654,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
if (r) {
memcpy:
- r = ttm_bo_move_memcpy(bo, ctx, new_mem);
- if (r) {
+ /* Check that all memory is CPU accessible */
+ if (!amdgpu_mem_visible(adev, old_mem) ||
+ !amdgpu_mem_visible(adev, new_mem)) {
+ pr_err("Move buffer fallback to memcpy unavailable\n");
return r;
}
+
+ r = ttm_bo_move_memcpy(bo, ctx, new_mem);
+ if (r)
+ return r;
}
if (bo->type == ttm_bo_type_device &&
@@ -703,143 +745,183 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
/*
* TTM backend functions.
*/
-struct amdgpu_ttm_gup_task_list {
- struct list_head list;
- struct task_struct *task;
-};
-
struct amdgpu_ttm_tt {
struct ttm_dma_tt ttm;
u64 offset;
uint64_t userptr;
struct task_struct *usertask;
uint32_t userflags;
- spinlock_t guptasklock;
- struct list_head guptasks;
- atomic_t mmu_invalidations;
- uint32_t last_set_pages;
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+ struct hmm_range *range;
+#endif
};
/**
- * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
- * pointer to memory
+ * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
+ * memory and start HMM tracking CPU page table update
*
- * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
- * This provides a wrapper around the get_user_pages() call to provide
- * device accessible pages that back user memory.
+ * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
+ * once afterwards to stop HMM tracking
*/
-int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+
+#define MAX_RETRY_HMM_RANGE_FAULT 16
+
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
{
+ struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL;
+ struct ttm_tt *ttm = bo->tbo.ttm;
struct amdgpu_ttm_tt *gtt = (void *)ttm;
struct mm_struct *mm = gtt->usertask->mm;
- unsigned int flags = 0;
- unsigned pinned = 0;
- int r;
+ unsigned long start = gtt->userptr;
+ struct vm_area_struct *vma;
+ struct hmm_range *range;
+ unsigned long i;
+ uint64_t *pfns;
+ int retry = 0;
+ int r = 0;
if (!mm) /* Happens during process shutdown */
return -ESRCH;
- if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
- flags |= FOLL_WRITE;
+ if (unlikely(!mirror)) {
+ DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n");
+ r = -EFAULT;
+ goto out;
+ }
- down_read(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+ if (unlikely(!vma || start < vma->vm_start)) {
+ r = -EFAULT;
+ goto out;
+ }
+ if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+ vma->vm_file)) {
+ r = -EPERM;
+ goto out;
+ }
- if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
- /*
- * check that we only use anonymous memory to prevent problems
- * with writeback
- */
- unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
- struct vm_area_struct *vma;
+ range = kzalloc(sizeof(*range), GFP_KERNEL);
+ if (unlikely(!range)) {
+ r = -ENOMEM;
+ goto out;
+ }
- vma = find_vma(mm, gtt->userptr);
- if (!vma || vma->vm_file || vma->vm_end < end) {
- up_read(&mm->mmap_sem);
- return -EPERM;
- }
+ pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
+ if (unlikely(!pfns)) {
+ r = -ENOMEM;
+ goto out_free_ranges;
}
- /* loop enough times using contiguous pages of memory */
- do {
- unsigned num_pages = ttm->num_pages - pinned;
- uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
- struct page **p = pages + pinned;
- struct amdgpu_ttm_gup_task_list guptask;
+ amdgpu_hmm_init_range(range);
+ range->default_flags = range->flags[HMM_PFN_VALID];
+ range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ?
+ 0 : range->flags[HMM_PFN_WRITE];
+ range->pfn_flags_mask = 0;
+ range->pfns = pfns;
+ hmm_range_register(range, mirror, start,
+ start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT);
- guptask.task = current;
- spin_lock(&gtt->guptasklock);
- list_add(&guptask.list, &gtt->guptasks);
- spin_unlock(&gtt->guptasklock);
+retry:
+ /*
+ * Just wait for range to be valid, safe to ignore return value as we
+ * will use the return value of hmm_range_fault() below under the
+ * mmap_sem to ascertain the validity of the range.
+ */
+ hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
+
+ down_read(&mm->mmap_sem);
+
+ r = hmm_range_fault(range, true);
+ if (unlikely(r < 0)) {
+ if (likely(r == -EAGAIN)) {
+ /*
+ * return -EAGAIN, mmap_sem is dropped
+ */
+ if (retry++ < MAX_RETRY_HMM_RANGE_FAULT)
+ goto retry;
+ else
+ pr_err("Retry hmm fault too many times\n");
+ }
- if (mm == current->mm)
- r = get_user_pages(userptr, num_pages, flags, p, NULL);
- else
- r = get_user_pages_remote(gtt->usertask,
- mm, userptr, num_pages,
- flags, p, NULL, NULL);
+ goto out_up_read;
+ }
- spin_lock(&gtt->guptasklock);
- list_del(&guptask.list);
- spin_unlock(&gtt->guptasklock);
+ up_read(&mm->mmap_sem);
- if (r < 0)
- goto release_pages;
+ for (i = 0; i < ttm->num_pages; i++) {
+ pages[i] = hmm_device_entry_to_page(range, pfns[i]);
+ if (unlikely(!pages[i])) {
+ pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
+ i, pfns[i]);
+ r = -ENOMEM;
- pinned += r;
+ goto out_free_pfns;
+ }
+ }
- } while (pinned < ttm->num_pages);
+ gtt->range = range;
- up_read(&mm->mmap_sem);
return 0;
-release_pages:
- release_pages(pages, pinned);
- up_read(&mm->mmap_sem);
+out_up_read:
+ if (likely(r != -EAGAIN))
+ up_read(&mm->mmap_sem);
+out_free_pfns:
+ hmm_range_unregister(range);
+ kvfree(pfns);
+out_free_ranges:
+ kfree(range);
+out:
return r;
}
/**
- * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
+ * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
+ * Check if the pages backing this ttm range have been invalidated
*
- * Called by amdgpu_cs_list_validate(). This creates the page list
- * that backs user memory and will ultimately be mapped into the device
- * address space.
+ * Returns: true if pages are still valid
*/
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- unsigned i;
+ bool r = false;
- gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
- for (i = 0; i < ttm->num_pages; ++i) {
- if (ttm->pages[i])
- put_page(ttm->pages[i]);
+ if (!gtt || !gtt->userptr)
+ return false;
- ttm->pages[i] = pages ? pages[i] : NULL;
+ DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n",
+ gtt->userptr, ttm->num_pages);
+
+ WARN_ONCE(!gtt->range || !gtt->range->pfns,
+ "No user pages to check\n");
+
+ if (gtt->range) {
+ r = hmm_range_valid(gtt->range);
+ hmm_range_unregister(gtt->range);
+
+ kvfree(gtt->range->pfns);
+ kfree(gtt->range);
+ gtt->range = NULL;
}
+
+ return r;
}
+#endif
/**
- * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty
+ * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
*
- * Called while unpinning userptr pages
+ * Called by amdgpu_cs_list_validate(). This creates the page list
+ * that backs user memory and will ultimately be mapped into the device
+ * address space.
*/
-void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- unsigned i;
-
- for (i = 0; i < ttm->num_pages; ++i) {
- struct page *page = ttm->pages[i];
-
- if (!page)
- continue;
+ unsigned long i;
- if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
- set_page_dirty(page);
-
- mark_page_accessed(page);
- }
+ for (i = 0; i < ttm->num_pages; ++i)
+ ttm->pages[i] = pages ? pages[i] : NULL;
}
/**
@@ -901,10 +983,14 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
/* unmap the pages mapped to the device */
dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
- /* mark the pages as dirty */
- amdgpu_ttm_tt_mark_user_pages(ttm);
-
sg_free_table(ttm->sg);
+
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+ if (gtt->range &&
+ ttm->pages[0] == hmm_device_entry_to_page(gtt->range,
+ gtt->range->pfns[0]))
+ WARN_ONCE(1, "Missing get_user_page_done\n");
+#endif
}
int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
@@ -925,8 +1011,8 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
goto gart_bind_fail;
/* Patch mtype of the second part BO */
- flags &= ~AMDGPU_PTE_MTYPE_MASK;
- flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC);
+ flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
+ flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
r = amdgpu_gart_bind(adev,
gtt->offset + (page_idx << PAGE_SHIFT),
@@ -1254,11 +1340,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
gtt->usertask = current->group_leader;
get_task_struct(gtt->usertask);
- spin_lock_init(&gtt->guptasklock);
- INIT_LIST_HEAD(&gtt->guptasks);
- atomic_set(&gtt->mmu_invalidations, 0);
- gtt->last_set_pages = 0;
-
return 0;
}
@@ -1287,7 +1368,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- struct amdgpu_ttm_gup_task_list *entry;
unsigned long size;
if (gtt == NULL || !gtt->userptr)
@@ -1300,48 +1380,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
if (gtt->userptr > end || gtt->userptr + size <= start)
return false;
- /* Search the lists of tasks that hold this mapping and see
- * if current is one of them. If it is return false.
- */
- spin_lock(&gtt->guptasklock);
- list_for_each_entry(entry, &gtt->guptasks, list) {
- if (entry->task == current) {
- spin_unlock(&gtt->guptasklock);
- return false;
- }
- }
- spin_unlock(&gtt->guptasklock);
-
- atomic_inc(&gtt->mmu_invalidations);
-
return true;
}
/**
- * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
+ * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
*/
-bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
- int *last_invalidated)
-{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- int prev_invalidated = *last_invalidated;
-
- *last_invalidated = atomic_read(&gtt->mmu_invalidations);
- return prev_invalidated != *last_invalidated;
-}
-
-/**
- * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
- * been invalidated since the last time they've been set?
- */
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
if (gtt == NULL || !gtt->userptr)
return false;
- return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
+ return true;
}
/**
@@ -1753,44 +1805,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* Initialize various on-chip memory pools */
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
- adev->gds.mem.total_size);
+ adev->gds.gds_size);
if (r) {
DRM_ERROR("Failed initializing GDS heap.\n");
return r;
}
- r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
- 4, AMDGPU_GEM_DOMAIN_GDS,
- &adev->gds.gds_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
- adev->gds.gws.total_size);
+ adev->gds.gws_size);
if (r) {
DRM_ERROR("Failed initializing gws heap.\n");
return r;
}
- r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
- 1, AMDGPU_GEM_DOMAIN_GWS,
- &adev->gds.gws_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
- adev->gds.oa.total_size);
+ adev->gds.oa_size);
if (r) {
DRM_ERROR("Failed initializing oa heap.\n");
return r;
}
- r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
- 1, AMDGPU_GEM_DOMAIN_OA,
- &adev->gds.oa_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
/* Register debugfs entries for amdgpu_ttm */
r = amdgpu_ttm_debugfs_init(adev);
if (r) {
@@ -2061,9 +2095,9 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
mm_node = bo->tbo.mem.mm_node;
num_loops = 0;
while (num_pages) {
- uint32_t byte_count = mm_node->size << PAGE_SHIFT;
+ uint64_t byte_count = mm_node->size << PAGE_SHIFT;
- num_loops += DIV_ROUND_UP(byte_count, max_bytes);
+ num_loops += DIV_ROUND_UP_ULL(byte_count, max_bytes);
num_pages -= mm_node->size;
++mm_node;
}
@@ -2089,12 +2123,13 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
mm_node = bo->tbo.mem.mm_node;
while (num_pages) {
- uint32_t byte_count = mm_node->size << PAGE_SHIFT;
+ uint64_t byte_count = mm_node->size << PAGE_SHIFT;
uint64_t dst_addr;
dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
while (byte_count) {
- uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
+ uint32_t cur_size_in_bytes = min_t(uint64_t, byte_count,
+ max_bytes);
amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
dst_addr, cur_size_in_bytes);