From a6ff969fe9cbf369e3cd0ac54261fec1122682ec Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 4 Apr 2024 16:25:40 +0200 Subject: drm/amdgpu: fix visible VRAM handling during faults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we removed the hacky start code check we actually didn't took into account that *all* VRAM pages needs to be CPU accessible. Clean up the code and unify the handling into a single helper which checks if the whole resource is CPU accessible. The only place where a partial check would make sense is during eviction, but that is neglitible. Signed-off-by: Christian König Fixes: aed01a68047b ("drm/amdgpu: Remove TTM resource->start visible VRAM condition v2") Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher CC: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index be679c42b0b8..fa03d9e4874c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -250,28 +250,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) return drm_vma_node_offset_addr(&bo->tbo.base.vma_node); } -/** - * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM - */ -static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_res_cursor cursor; - - if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM) - return false; - - amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); - while (cursor.remaining) { - if (cursor.start < adev->gmc.visible_vram_size) - return true; - - amdgpu_res_next(&cursor, cursor.size); - } - - return false; -} - /** * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced */ -- cgit v1.2.3-59-g8ed1b From d3a9331a6591e9df64791e076f6591f440af51c3 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 21 Mar 2024 11:32:02 +0100 Subject: drm/amdgpu: once more fix the call oder in amdgpu_ttm_move() v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts drm/amdgpu: fix ftrace event amdgpu_bo_move always move on same heap. The basic problem here is that after the move the old location is simply not available any more. Some fixes were suggested, but essentially we should call the move notification before actually moving things because only this way we have the correct order for DMA-buf and VM move notifications as well. Also rework the statistic handling so that we don't update the eviction counter before the move. v2: add missing NULL check Signed-off-by: Christian König Fixes: 94aeb4117343 ("drm/amdgpu: fix ftrace event amdgpu_bo_move always move on same heap") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3171 Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher CC: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 14 +++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 4 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 48 ++++++++++++++++-------------- 3 files changed, 38 insertions(+), 28 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index ce733e3cb35d..f6d503432a9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1243,14 +1243,18 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, * amdgpu_bo_move_notify - notification about a memory move * @bo: pointer to a buffer object * @evict: if this move is evicting the buffer from the graphics address space + * @new_mem: new resource for backing the BO * * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs * bookkeeping. * TTM driver callback which is called when ttm moves a buffer. */ -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, + bool evict, + struct ttm_resource *new_mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct ttm_resource *old_mem = bo->resource; struct amdgpu_bo *abo; if (!amdgpu_bo_is_amdgpu_bo(bo)) @@ -1262,12 +1266,12 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) amdgpu_bo_kunmap(abo); if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach && - bo->resource->mem_type != TTM_PL_SYSTEM) + old_mem && old_mem->mem_type != TTM_PL_SYSTEM) dma_buf_move_notify(abo->tbo.base.dma_buf); - /* remember the eviction */ - if (evict) - atomic64_inc(&adev->num_evictions); + /* move_notify is called before move happens */ + trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1, + old_mem ? old_mem->mem_type : -1); } void amdgpu_bo_get_memory(struct amdgpu_bo *bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index fa03d9e4874c..bc42ccbde659 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -328,7 +328,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, size_t buffer_size, uint32_t *metadata_size, uint64_t *flags); -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict); +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, + bool evict, + struct ttm_resource *new_mem); void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1d71729e3f6b..4ffee5545265 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -481,14 +481,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL)) { + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if (old_mem->mem_type == TTM_PL_SYSTEM && (new_mem->mem_type == TTM_PL_TT || new_mem->mem_type == AMDGPU_PL_PREEMPT)) { + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if ((old_mem->mem_type == TTM_PL_TT || old_mem->mem_type == AMDGPU_PL_PREEMPT) && @@ -498,9 +500,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm); + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_resource_free(bo, &bo->resource); ttm_bo_assign_mem(bo, new_mem); - goto out; + return 0; } if (old_mem->mem_type == AMDGPU_PL_GDS || @@ -512,8 +515,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, new_mem->mem_type == AMDGPU_PL_OA || new_mem->mem_type == AMDGPU_PL_DOORBELL) { /* Nothing to save here */ + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if (bo->type == ttm_bo_type_device && @@ -525,22 +529,23 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; } - if (adev->mman.buffer_funcs_enabled) { - if (((old_mem->mem_type == TTM_PL_SYSTEM && - new_mem->mem_type == TTM_PL_VRAM) || - (old_mem->mem_type == TTM_PL_VRAM && - new_mem->mem_type == TTM_PL_SYSTEM))) { - hop->fpfn = 0; - hop->lpfn = 0; - hop->mem_type = TTM_PL_TT; - hop->flags = TTM_PL_FLAG_TEMPORARY; - return -EMULTIHOP; - } + if (adev->mman.buffer_funcs_enabled && + ((old_mem->mem_type == TTM_PL_SYSTEM && + new_mem->mem_type == TTM_PL_VRAM) || + (old_mem->mem_type == TTM_PL_VRAM && + new_mem->mem_type == TTM_PL_SYSTEM))) { + hop->fpfn = 0; + hop->lpfn = 0; + hop->mem_type = TTM_PL_TT; + hop->flags = TTM_PL_FLAG_TEMPORARY; + return -EMULTIHOP; + } + amdgpu_bo_move_notify(bo, evict, new_mem); + if (adev->mman.buffer_funcs_enabled) r = amdgpu_move_blit(bo, evict, new_mem, old_mem); - } else { + else r = -ENODEV; - } if (r) { /* Check that all memory is CPU accessible */ @@ -555,11 +560,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; } - trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); -out: - /* update statistics */ + /* update statistics after the move */ + if (evict) + atomic64_inc(&adev->num_evictions); atomic64_add(bo->base.size, &adev->num_bytes_moved); - amdgpu_bo_move_notify(bo, evict); return 0; } @@ -1559,7 +1563,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, static void amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) { - amdgpu_bo_move_notify(bo, false); + amdgpu_bo_move_notify(bo, false, NULL); } static struct ttm_device_funcs amdgpu_bo_driver = { -- cgit v1.2.3-59-g8ed1b