diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 375 |
1 files changed, 233 insertions, 142 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index dff41d0a85fe..2616e2eafdeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -35,10 +35,12 @@ #include <linux/hmm.h> #include <linux/pagemap.h> #include <linux/sched/task.h> +#include <linux/sched/mm.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/swiotlb.h> +#include <linux/dma-buf.h> #include <drm/ttm/ttm_bo_api.h> #include <drm/ttm/ttm_bo_driver.h> @@ -54,6 +56,7 @@ #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_sdma.h" +#include "amdgpu_ras.h" #include "bif/bif_4_1_d.h" static int amdgpu_map_buffer(struct ttm_buffer_object *bo, @@ -484,15 +487,12 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) { - struct amdgpu_device *adev; struct ttm_mem_reg *old_mem = &bo->mem; struct ttm_mem_reg tmp_mem; struct ttm_place placements; struct ttm_placement placement; int r; - adev = amdgpu_ttm_adev(bo->bdev); - /* create space/pages for new_mem in GTT space */ tmp_mem = *new_mem; tmp_mem.mm_node = NULL; @@ -543,15 +543,12 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) { - struct amdgpu_device *adev; struct ttm_mem_reg *old_mem = &bo->mem; struct ttm_mem_reg tmp_mem; struct ttm_placement placement; struct ttm_place placements; int r; - adev = amdgpu_ttm_adev(bo->bdev); - /* make space in GTT for old_mem buffer */ tmp_mem = *new_mem; tmp_mem.mm_node = NULL; @@ -763,6 +760,7 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, */ struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; + struct drm_gem_object *gobj; u64 offset; uint64_t userptr; struct task_struct *usertask; @@ -772,6 +770,20 @@ struct amdgpu_ttm_tt { #endif }; +#ifdef CONFIG_DRM_AMDGPU_USERPTR +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { + (1 << 0), /* HMM_PFN_VALID */ + (1 << 1), /* HMM_PFN_WRITE */ + 0 /* HMM_PFN_DEVICE_PRIVATE */ +}; + +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ + 0, /* HMM_PFN_NONE */ + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ +}; + /** * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user * memory and start HMM tracking CPU page table update @@ -779,85 +791,89 @@ struct amdgpu_ttm_tt { * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only * once afterwards to stop HMM tracking */ -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - -#define MAX_RETRY_HMM_RANGE_FAULT 16 - int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { - struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL; struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct mm_struct *mm = gtt->usertask->mm; unsigned long start = gtt->userptr; struct vm_area_struct *vma; struct hmm_range *range; + unsigned long timeout; + struct mm_struct *mm; unsigned long i; - uint64_t *pfns; int r = 0; - if (!mm) /* Happens during process shutdown */ - return -ESRCH; - - if (unlikely(!mirror)) { - DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n"); - r = -EFAULT; - goto out; + mm = bo->notifier.mm; + if (unlikely(!mm)) { + DRM_DEBUG_DRIVER("BO is not registered?\n"); + return -EFAULT; } - vma = find_vma(mm, start); - if (unlikely(!vma || start < vma->vm_start)) { - r = -EFAULT; - goto out; - } - if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && - vma->vm_file)) { - r = -EPERM; - goto out; - } + /* Another get_user_pages is running at the same time?? */ + if (WARN_ON(gtt->range)) + return -EFAULT; + + if (!mmget_not_zero(mm)) /* Happens during process shutdown */ + return -ESRCH; range = kzalloc(sizeof(*range), GFP_KERNEL); if (unlikely(!range)) { r = -ENOMEM; goto out; } + range->notifier = &bo->notifier; + range->flags = hmm_range_flags; + range->values = hmm_range_values; + range->pfn_shift = PAGE_SHIFT; + range->start = bo->notifier.interval_tree.start; + range->end = bo->notifier.interval_tree.last + 1; + range->default_flags = hmm_range_flags[HMM_PFN_VALID]; + if (!amdgpu_ttm_tt_is_readonly(ttm)) + range->default_flags |= range->flags[HMM_PFN_WRITE]; - pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL); - if (unlikely(!pfns)) { + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns), + GFP_KERNEL); + if (unlikely(!range->pfns)) { r = -ENOMEM; goto out_free_ranges; } - amdgpu_hmm_init_range(range); - range->default_flags = range->flags[HMM_PFN_VALID]; - range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ? - 0 : range->flags[HMM_PFN_WRITE]; - range->pfn_flags_mask = 0; - range->pfns = pfns; - range->start = start; - range->end = start + ttm->num_pages * PAGE_SIZE; - - hmm_range_register(range, mirror); + down_read(&mm->mmap_sem); + vma = find_vma(mm, start); + if (unlikely(!vma || start < vma->vm_start)) { + r = -EFAULT; + goto out_unlock; + } + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && + vma->vm_file)) { + r = -EPERM; + goto out_unlock; + } + up_read(&mm->mmap_sem); + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - /* - * Just wait for range to be valid, safe to ignore return value as we - * will use the return value of hmm_range_fault() below under the - * mmap_sem to ascertain the validity of the range. - */ - hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); +retry: + range->notifier_seq = mmu_interval_read_begin(&bo->notifier); down_read(&mm->mmap_sem); r = hmm_range_fault(range, 0); up_read(&mm->mmap_sem); - - if (unlikely(r < 0)) + if (unlikely(r <= 0)) { + /* + * FIXME: This timeout should encompass the retry from + * mmu_interval_read_retry() as well. + */ + if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout)) + goto retry; goto out_free_pfns; + } for (i = 0; i < ttm->num_pages; i++) { - pages[i] = hmm_device_entry_to_page(range, pfns[i]); + /* FIXME: The pages cannot be touched outside the notifier_lock */ + pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); if (unlikely(!pages[i])) { pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", - i, pfns[i]); + i, range->pfns[i]); r = -ENOMEM; goto out_free_pfns; @@ -865,15 +881,18 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) } gtt->range = range; + mmput(mm); return 0; +out_unlock: + up_read(&mm->mmap_sem); out_free_pfns: - hmm_range_unregister(range); - kvfree(pfns); + kvfree(range->pfns); out_free_ranges: kfree(range); out: + mmput(mm); return r; } @@ -898,15 +917,18 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) "No user pages to check\n"); if (gtt->range) { - r = hmm_range_valid(gtt->range); - hmm_range_unregister(gtt->range); - + /* + * FIXME: Must always hold notifier_lock for this, and must + * not ignore the return code. + */ + r = mmu_interval_read_retry(gtt->range->notifier, + gtt->range->notifier_seq); kvfree(gtt->range->pfns); kfree(gtt->range); gtt->range = NULL; } - return r; + return !r; } #endif @@ -987,10 +1009,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) sg_free_table(ttm->sg); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - if (gtt->range && - ttm->pages[0] == hmm_device_entry_to_page(gtt->range, - gtt->range->pfns[0])) - WARN_ONCE(1, "Missing get_user_page_done\n"); + if (gtt->range) { + unsigned long i; + + for (i = 0; i < ttm->num_pages; i++) { + if (ttm->pages[i] != + hmm_device_entry_to_page(gtt->range, + gtt->range->pfns[i])) + break; + } + + WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); + } #endif } @@ -1217,16 +1247,14 @@ static struct ttm_backend_func amdgpu_backend_func = { static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { - struct amdgpu_device *adev; struct amdgpu_ttm_tt *gtt; - adev = amdgpu_ttm_adev(bo->bdev); - gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); if (gtt == NULL) { return NULL; } gtt->ttm.ttm.func = &amdgpu_backend_func; + gtt->gobj = &bo->base; /* allocate space for the uninitialized page entries */ if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) { @@ -1247,7 +1275,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ if (gtt && gtt->userptr) { @@ -1260,7 +1287,19 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, return 0; } - if (slave && ttm->sg) { + if (ttm->page_flags & TTM_PAGE_FLAG_SG) { + if (!ttm->sg) { + struct dma_buf_attachment *attach; + struct sg_table *sgt; + + attach = gtt->gobj->import_attach; + sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + ttm->sg = sgt; + } + drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); @@ -1287,9 +1326,8 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, */ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) { - struct amdgpu_device *adev; struct amdgpu_ttm_tt *gtt = (void *)ttm; - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + struct amdgpu_device *adev; if (gtt && gtt->userptr) { amdgpu_ttm_tt_set_user_pages(ttm, NULL); @@ -1298,7 +1336,16 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) return; } - if (slave) + if (ttm->sg && gtt->gobj->import_attach) { + struct dma_buf_attachment *attach; + + attach = gtt->gobj->import_attach; + dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL); + ttm->sg = NULL; + return; + } + + if (ttm->page_flags & TTM_PAGE_FLAG_SG) return; adev = amdgpu_ttm_adev(ttm->bdev); @@ -1634,81 +1681,105 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) */ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) { - struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_bo_param bp; - int r = 0; - int i; - u64 vram_size = adev->gmc.visible_vram_size; - u64 offset = adev->fw_vram_usage.start_offset; - u64 size = adev->fw_vram_usage.size; - struct amdgpu_bo *bo; - - memset(&bp, 0, sizeof(bp)); - bp.size = adev->fw_vram_usage.size; - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - bp.type = ttm_bo_type_kernel; - bp.resv = NULL; + uint64_t vram_size = adev->gmc.visible_vram_size; + adev->fw_vram_usage.va = NULL; adev->fw_vram_usage.reserved_bo = NULL; - if (adev->fw_vram_usage.size > 0 && - adev->fw_vram_usage.size <= vram_size) { + if (adev->fw_vram_usage.size == 0 || + adev->fw_vram_usage.size > vram_size) + return 0; - r = amdgpu_bo_create(adev, &bp, - &adev->fw_vram_usage.reserved_bo); - if (r) - goto error_create; + return amdgpu_bo_create_kernel_at(adev, + adev->fw_vram_usage.start_offset, + adev->fw_vram_usage.size, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->fw_vram_usage.reserved_bo, + &adev->fw_vram_usage.va); +} - r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); - if (r) - goto error_reserve; +/* + * Memoy training reservation functions + */ - /* remove the original mem node and create a new one at the - * request position - */ - bo = adev->fw_vram_usage.reserved_bo; - offset = ALIGN(offset, PAGE_SIZE); - for (i = 0; i < bo->placement.num_placement; ++i) { - bo->placements[i].fpfn = offset >> PAGE_SHIFT; - bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; - } +/** + * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram + * + * @adev: amdgpu_device pointer + * + * free memory training reserved vram if it has been reserved. + */ +static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) +{ + struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; - ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem); - r = ttm_bo_mem_space(&bo->tbo, &bo->placement, - &bo->tbo.mem, &ctx); - if (r) - goto error_pin; + ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; + amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL); + ctx->c2p_bo = NULL; - r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, - AMDGPU_GEM_DOMAIN_VRAM, - adev->fw_vram_usage.start_offset, - (adev->fw_vram_usage.start_offset + - adev->fw_vram_usage.size)); - if (r) - goto error_pin; - r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, - &adev->fw_vram_usage.va); - if (r) - goto error_kmap; + amdgpu_bo_free_kernel(&ctx->p2c_bo, NULL, NULL); + ctx->p2c_bo = NULL; + + return 0; +} - amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); +/** + * amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training + * + * @adev: amdgpu_device pointer + * + * create bo vram reservation from memory training. + */ +static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev) +{ + int ret; + struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; + + memset(ctx, 0, sizeof(*ctx)); + if (!adev->fw_vram_usage.mem_train_support) { + DRM_DEBUG("memory training does not support!\n"); + return 0; } - return r; -error_kmap: - amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); -error_pin: - amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); -error_reserve: - amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); -error_create: - adev->fw_vram_usage.va = NULL; - adev->fw_vram_usage.reserved_bo = NULL; - return r; + ctx->c2p_train_data_offset = adev->fw_vram_usage.mem_train_fb_loc; + ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); + ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES; + + DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", + ctx->train_data_size, + ctx->p2c_train_data_offset, + ctx->c2p_train_data_offset); + + ret = amdgpu_bo_create_kernel_at(adev, + ctx->p2c_train_data_offset, + ctx->train_data_size, + AMDGPU_GEM_DOMAIN_VRAM, + &ctx->p2c_bo, + NULL); + if (ret) { + DRM_ERROR("alloc p2c_bo failed(%d)!\n", ret); + goto Err_out; + } + + ret = amdgpu_bo_create_kernel_at(adev, + ctx->c2p_train_data_offset, + ctx->train_data_size, + AMDGPU_GEM_DOMAIN_VRAM, + &ctx->c2p_bo, + NULL); + if (ret) { + DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret); + goto Err_out; + } + + ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; + return 0; + +Err_out: + amdgpu_ttm_training_reserve_vram_fini(adev); + return ret; } + /** * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. @@ -1731,6 +1802,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) r = ttm_bo_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->ddev->anon_inode->i_mapping, + adev->ddev->vma_offset_manager, dma_addressing_limited(adev->dev)); if (r) { DRM_ERROR("failed initializing buffer object driver(%d).\n", r); @@ -1771,6 +1843,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } + /* + *The reserved vram for memory training must be pinned to the specified + *place on the VRAM, so reserve it early. + */ + r = amdgpu_ttm_training_reserve_vram_init(adev); + if (r) + return r; + /* allocate memory as required for VGA * This is used for VGA emulation and pre-OS scanout buffers to * avoid display artifacts while transitioning between pre-OS @@ -1781,6 +1861,20 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) NULL, &stolen_vga_buf); if (r) return r; + + /* + * reserve one TMR (64K) memory at the top of VRAM which holds + * IP Discovery data and is protected by PSP. + */ + r = amdgpu_bo_create_kernel_at(adev, + adev->gmc.real_vram_size - DISCOVERY_TMR_SIZE, + DISCOVERY_TMR_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->discovery_memory, + NULL); + if (r) + return r; + DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); @@ -1856,7 +1950,11 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) return; amdgpu_ttm_debugfs_fini(adev); + amdgpu_ttm_training_reserve_vram_fini(adev); + /* return the IP Discovery TMR memory back to VRAM */ + amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); + if (adev->mman.aper_base_kaddr) iounmap(adev->mman.aper_base_kaddr); adev->mman.aper_base_kaddr = NULL; @@ -1952,10 +2050,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GPU_PAGE_SIZE; - num_dw = adev->mman.buffer_funcs->copy_num_dw; - while (num_dw & 0x7) - num_dw++; - + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); num_bytes = num_pages * 8; r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job); @@ -2015,11 +2110,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, max_bytes = adev->mman.buffer_funcs->copy_max_bytes; num_loops = DIV_ROUND_UP(byte_count, max_bytes); - num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; - - /* for IB padding */ - while (num_dw & 0x7) - num_dw++; + num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); if (r) |