diff options
Diffstat (limited to 'sys/dev/pci/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r-- | sys/dev/pci/drm/amd/amdgpu/amdgpu_cs.c | 791 |
1 files changed, 396 insertions, 395 deletions
diff --git a/sys/dev/pci/drm/amd/amdgpu/amdgpu_cs.c b/sys/dev/pci/drm/amd/amdgpu/amdgpu_cs.c index d3fd9e861ee..7e6802d93ff 100644 --- a/sys/dev/pci/drm/amd/amdgpu/amdgpu_cs.c +++ b/sys/dev/pci/drm/amd/amdgpu/amdgpu_cs.c @@ -24,20 +24,26 @@ * Authors: * Jerome Glisse <glisse@freedesktop.org> */ + +#include <linux/file.h> #include <linux/pagemap.h> #include <linux/sync_file.h> -#include <drm/drmP.h> +#include <linux/dma-buf.h> + #include <drm/amdgpu_drm.h> #include <drm/drm_syncobj.h> #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_gmc.h" +#include "amdgpu_gem.h" +#include "amdgpu_ras.h" static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, uint32_t *offset) { struct drm_gem_object *gobj; + struct amdgpu_bo *bo; unsigned long size; int r; @@ -45,21 +51,21 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, if (gobj == NULL) return -EINVAL; - p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); + bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; - p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; - p->uf_entry.tv.shared = true; - p->uf_entry.user_pages = NULL; + p->uf_entry.tv.bo = &bo->tbo; + /* One for TTM and one for the CS job */ + p->uf_entry.tv.num_shared = 2; drm_gem_object_put_unlocked(gobj); - size = amdgpu_bo_size(p->uf_entry.robj); + size = amdgpu_bo_size(bo); if (size != PAGE_SIZE || (data->offset + 8) > size) { r = -EINVAL; goto error_unref; } - if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { r = -EINVAL; goto error_unref; } @@ -69,7 +75,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, return 0; error_unref: - amdgpu_bo_unref(&p->uf_entry.robj); + amdgpu_bo_unref(&bo); return r; } @@ -211,6 +217,9 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs case AMDGPU_CHUNK_ID_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: + case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: break; default: @@ -228,7 +237,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs goto free_all_kdata; } - if (p->uf_entry.robj) + if (p->uf_entry.tv.bo) p->job->uf_addr = uf_offset; kfree(chunk_array); @@ -395,7 +404,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, - .resv = bo->tbo.resv, + .resv = bo->tbo.base.resv, .flags = 0 }; uint32_t domain; @@ -407,7 +416,9 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, /* Don't move this buffer if we have depleted our allowance * to move it. Don't move anything if the threshold is zero. */ - if (p->bytes_moved < p->bytes_moved_threshold) { + if (p->bytes_moved < p->bytes_moved_threshold && + (!bo->tbo.base.dma_buf || + list_empty(&bo->tbo.base.dma_buf->attachments))) { if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { /* And don't move a CPU_ACCESS_REQUIRED BO to limited @@ -442,75 +453,12 @@ retry: return r; } -/* Last resort, try to evict something from the current working set */ -static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, - struct amdgpu_bo *validated) -{ - uint32_t domain = validated->allowed_domains; - struct ttm_operation_ctx ctx = { true, false }; - int r; - - if (!p->evictable) - return false; - - for (;&p->evictable->tv.head != &p->validated; - p->evictable = list_prev_entry(p->evictable, tv.head)) { - - struct amdgpu_bo_list_entry *candidate = p->evictable; - struct amdgpu_bo *bo = candidate->robj; - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - bool update_bytes_moved_vis; - uint32_t other; - - /* If we reached our current BO we can forget it */ - if (candidate->robj == validated) - break; - - /* We can't move pinned BOs here */ - if (bo->pin_count) - continue; - - other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); - - /* Check if this BO is in one of the domains we need space for */ - if (!(other & domain)) - continue; - - /* Check if we can move this BO somewhere else */ - other = bo->allowed_domains & ~domain; - if (!other) - continue; - - /* Good we can try to move this BO somewhere else */ - update_bytes_moved_vis = - !amdgpu_gmc_vram_full_visible(&adev->gmc) && - amdgpu_bo_in_cpu_visible_vram(bo); - amdgpu_bo_placement_from_domain(bo, other); - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - p->bytes_moved += ctx.bytes_moved; - if (update_bytes_moved_vis) - p->bytes_moved_vis += ctx.bytes_moved; - - if (unlikely(r)) - break; - - p->evictable = list_prev_entry(p->evictable, tv.head); - list_move(&candidate->tv.head, &p->validated); - - return true; - } - - return false; -} - static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo) { struct amdgpu_cs_parser *p = param; int r; - do { - r = amdgpu_cs_bo_validate(p, bo); - } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo)); + r = amdgpu_cs_bo_validate(p, bo); if (r) return r; @@ -528,40 +476,33 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, int r; list_for_each_entry(lobj, validated, tv.head) { - struct amdgpu_bo *bo = lobj->robj; - bool binding_userptr = false; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo); struct mm_struct *usermm; #ifdef notyet usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); if (usermm && usermm != current->mm) return -EPERM; +#endif - /* Check if we have user pages and nobody bound the BO already */ - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - lobj->user_pages) { + if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && + lobj->user_invalidated && lobj->user_pages) { amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return r; + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, lobj->user_pages); - binding_userptr = true; } -#endif - - if (p->evictable == lobj) - p->evictable = NULL; r = amdgpu_cs_validate(p, bo); if (r) return r; - if (binding_userptr) { - kvfree(lobj->user_pages); - lobj->user_pages = NULL; - } + kvfree(lobj->user_pages); + lobj->user_pages = NULL; } return 0; } @@ -576,7 +517,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *gds; struct amdgpu_bo *gws; struct amdgpu_bo *oa; - unsigned tries = 10; int r; INIT_LIST_HEAD(&p->validated); @@ -598,100 +538,63 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + /* One for TTM and one for the CS job */ + amdgpu_bo_list_for_each_entry(e, p->bo_list) + e->tv.num_shared = 2; + amdgpu_bo_list_get_list(p->bo_list, &p->validated); - if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); - if (p->uf_entry.robj && !p->uf_entry.robj->parent) + if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) list_add(&p->uf_entry.tv.head, &p->validated); - while (1) { - struct list_head need_pages; - - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, - &duplicates); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); - goto error_free_pages; - } - - INIT_LIST_HEAD(&need_pages); - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = e->robj; - - if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, - &e->user_invalidated) && e->user_pages) { - - /* We acquired a page array, but somebody - * invalidated it. Free it and try again - */ -#ifdef notyet - release_pages(e->user_pages, - bo->tbo.ttm->num_pages); -#endif - kvfree(e->user_pages); - e->user_pages = NULL; - } - - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - !e->user_pages) { - list_del(&e->tv.head); - list_add(&e->tv.head, &need_pages); - - amdgpu_bo_unreserve(e->robj); - } + /* Get userptr backing pages. If pages are updated after registered + * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do + * amdgpu_ttm_backend_bind() to flush and invalidate new pages + */ + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + bool userpage_invalidated = false; + int i; + + e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct vm_page *), + GFP_KERNEL | __GFP_ZERO); + if (!e->user_pages) { + DRM_ERROR("calloc failure\n"); + return -ENOMEM; } - if (list_empty(&need_pages)) - break; - - /* Unreserve everything again. */ - ttm_eu_backoff_reservation(&p->ticket, &p->validated); - - /* We tried too many times, just abort */ - if (!--tries) { - r = -EDEADLK; - DRM_ERROR("deadlock in %s\n", __func__); - goto error_free_pages; + r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages); + if (r) { + kvfree(e->user_pages); + e->user_pages = NULL; + return r; } - /* Fill the page arrays for all userptrs. */ - list_for_each_entry(e, &need_pages, tv.head) { - struct ttm_tt *ttm = e->robj->tbo.ttm; - - e->user_pages = kvmalloc_array(ttm->num_pages, - sizeof(struct vm_page*), - GFP_KERNEL | __GFP_ZERO); - if (!e->user_pages) { - r = -ENOMEM; - DRM_ERROR("calloc failure in %s\n", __func__); - goto error_free_pages; - } - - r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); - if (r) { - DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); - kvfree(e->user_pages); - e->user_pages = NULL; - goto error_free_pages; + for (i = 0; i < bo->tbo.ttm->num_pages; i++) { + if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { + userpage_invalidated = true; + break; } } + e->user_invalidated = userpage_invalidated; + } - /* And try again. */ - list_splice(&need_pages, &p->validated); + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, + &duplicates); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); + goto out; } amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, &p->bytes_moved_vis_threshold); p->bytes_moved = 0; p->bytes_moved_vis = 0; - p->evictable = list_last_entry(&p->validated, - struct amdgpu_bo_list_entry, - tv.head); r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, amdgpu_cs_validate, p); @@ -701,16 +604,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } r = amdgpu_cs_list_validate(p, &duplicates); - if (r) { - DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n"); + if (r) goto error_validate; - } r = amdgpu_cs_list_validate(p, &p->validated); - if (r) { - DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n"); + if (r) goto error_validate; - } amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); @@ -719,24 +618,30 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, gws = p->bo_list->gws_obj; oa = p->bo_list->oa_obj; - amdgpu_bo_list_for_each_entry(e, p->bo_list) - e->bo_va = amdgpu_vm_bo_find(vm, e->robj); + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + /* Make sure we use the exclusive slot for shared BOs */ + if (bo->prime_shared_count) + e->tv.num_shared = 0; + e->bo_va = amdgpu_vm_bo_find(vm, bo); + } if (gds) { - p->job->gds_base = amdgpu_bo_gpu_offset(gds); - p->job->gds_size = amdgpu_bo_size(gds); + p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; + p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; } if (gws) { - p->job->gws_base = amdgpu_bo_gpu_offset(gws); - p->job->gws_size = amdgpu_bo_size(gws); + p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT; + p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT; } if (oa) { - p->job->oa_base = amdgpu_bo_gpu_offset(oa); - p->job->oa_size = amdgpu_bo_size(oa); + p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT; + p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT; } - if (!r && p->uf_entry.robj) { - struct amdgpu_bo *uf = p->uf_entry.robj; + if (!r && p->uf_entry.tv.bo) { + struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo); r = amdgpu_ttm_alloc_gart(&uf->tbo); p->job->uf_addr += amdgpu_bo_gpu_offset(uf); @@ -745,33 +650,25 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, error_validate: if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - -error_free_pages: - - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - if (!e->user_pages) - continue; - -#ifdef notyet - release_pages(e->user_pages, - e->robj->tbo.ttm->num_pages); -#endif - kvfree(e->user_pages); - } - +out: return r; } static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) { + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_bo_list_entry *e; int r; list_for_each_entry(e, &p->validated, tv.head) { - struct reservation_object *resv = e->robj->tbo.resv; - r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, - amdgpu_bo_explicit_sync(e->robj)); - + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + struct dma_resv *resv = bo->tbo.base.resv; + enum amdgpu_sync_mode sync_mode; + + sync_mode = amdgpu_bo_explicit_sync(bo) ? + AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, + &fpriv->vm); if (r) return r; } @@ -795,9 +692,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); - for (i = 0; i < parser->num_post_dep_syncobjs; i++) - drm_syncobj_put(parser->post_dep_syncobjs[i]); - kfree(parser->post_dep_syncobjs); + for (i = 0; i < parser->num_post_deps; i++) { + drm_syncobj_put(parser->post_deps[i].syncobj); + kfree(parser->post_deps[i].chain); + } + kfree(parser->post_deps); dma_fence_put(parser->fence); @@ -813,11 +712,16 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, kfree(parser->chunks); if (parser->job) amdgpu_job_free(parser->job); - amdgpu_bo_unref(&parser->uf_entry.robj); + if (parser->uf_entry.tv.bo) { + struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo); + + amdgpu_bo_unref(&uf); + } } -static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) +static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) { + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_device *adev = p->adev; struct amdgpu_vm *vm = &fpriv->vm; @@ -826,92 +730,8 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) struct amdgpu_bo *bo; int r; - r = amdgpu_vm_clear_freed(adev, vm, NULL); - if (r) - return r; - - r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); - if (r) - return r; - - r = amdgpu_sync_fence(adev, &p->job->sync, - fpriv->prt_va->last_pt_update, false); - if (r) - return r; - - if (amdgpu_sriov_vf(adev)) { - struct dma_fence *f; - - bo_va = fpriv->csa_va; - BUG_ON(!bo_va); - r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) - return r; - - f = bo_va->last_pt_update; - r = amdgpu_sync_fence(adev, &p->job->sync, f, false); - if (r) - return r; - } - - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - struct dma_fence *f; - - /* ignore duplicates */ - bo = e->robj; - if (!bo) - continue; - - bo_va = e->bo_va; - if (bo_va == NULL) - continue; - - r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) - return r; - - f = bo_va->last_pt_update; - r = amdgpu_sync_fence(adev, &p->job->sync, f, false); - if (r) - return r; - } - - r = amdgpu_vm_handle_moved(adev, vm); - if (r) - return r; - - r = amdgpu_vm_update_directories(adev, vm); - if (r) - return r; - - r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false); - if (r) - return r; - - if (amdgpu_vm_debug) { - /* Invalidate all BOs to test for userspace bugs */ - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - /* ignore duplicates */ - if (!e->robj) - continue; - - amdgpu_vm_bo_invalidate(adev, e->robj, false); - } - } - - return r; -} - -static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, - struct amdgpu_cs_parser *p) -{ - struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_ring *ring = p->ring; - int r; - /* Only for UVD/VCE VM emulation */ - if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) { + if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) { unsigned i, j; for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { @@ -930,7 +750,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) continue; - va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK; + va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK; r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); if (r) { DRM_ERROR("IB va_start is invalid\n"); @@ -952,7 +772,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, offset = m->start * AMDGPU_GPU_PAGE_SIZE; kptr += va_start - offset; - if (p->ring->funcs->parse_cs) { + if (ring->funcs->parse_cs) { memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); amdgpu_bo_kunmap(aobj); @@ -971,18 +791,80 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, } } - if (p->job->vm) { - p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo); + if (!p->job->vm) + return amdgpu_cs_sync_rings(p); + + + r = amdgpu_vm_clear_freed(adev, vm, NULL); + if (r) + return r; + + r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); + if (r) + return r; + + r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update); + if (r) + return r; + + if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { + bo_va = fpriv->csa_va; + BUG_ON(!bo_va); + r = amdgpu_vm_bo_update(adev, bo_va, false); + if (r) + return r; + + r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); + if (r) + return r; + } + + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + /* ignore duplicates */ + bo = ttm_to_amdgpu_bo(e->tv.bo); + if (!bo) + continue; + + bo_va = e->bo_va; + if (bo_va == NULL) + continue; - r = amdgpu_bo_vm_update_pte(p); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); + r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); if (r) return r; } + r = amdgpu_vm_handle_moved(adev, vm); + if (r) + return r; + + r = amdgpu_vm_update_pdes(adev, vm, false); + if (r) + return r; + + r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update); + if (r) + return r; + + p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); + + if (amdgpu_vm_debug) { + /* Invalidate all BOs to test for userspace bugs */ + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + /* ignore duplicates */ + if (!bo) + continue; + + amdgpu_vm_bo_invalidate(adev, bo, false); + } + } + return amdgpu_cs_sync_rings(p); } @@ -991,14 +873,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, { struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - int i, j; int r, ce_preempt = 0, de_preempt = 0; + struct amdgpu_ring *ring; + int i, j; for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { struct amdgpu_cs_chunk *chunk; struct amdgpu_ib *ib; struct drm_amdgpu_cs_chunk_ib *chunk_ib; - struct amdgpu_ring *ring; + struct drm_sched_entity *entity; chunk = &parser->chunks[i]; ib = &parser->job->ibs[j]; @@ -1007,7 +890,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) continue; - if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) { + if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && + (amdgpu_mcbp || amdgpu_sriov_vf(adev))) { if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) ce_preempt++; @@ -1020,8 +904,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return -EINVAL; } - r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type, - chunk_ib->ip_instance, chunk_ib->ring, &ring); + r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type, + chunk_ib->ip_instance, chunk_ib->ring, + &entity); if (r) return r; @@ -1029,14 +914,19 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; - if (parser->ring && parser->ring != ring) + if (parser->entity && parser->entity != entity) + return -EINVAL; + + /* Return if there is no run queue associated with this entity. + * Possibly because of disabled HW IP*/ + if (entity->rq == NULL) return -EINVAL; - parser->ring = ring; + parser->entity = entity; - r = amdgpu_ib_get(adev, vm, - ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, - ib); + ring = to_amdgpu_ring(entity->rq->sched); + r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? + chunk_ib->ib_bytes : 0, ib); if (r) { DRM_ERROR("Failed to get ib !\n"); return r; @@ -1049,13 +939,12 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, j++; } - /* UVD & VCE fw doesn't support user fences */ - if (parser->job->uf_addr && ( - parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD || - parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) + /* MM engine doesn't support user fences */ + ring = to_amdgpu_ring(parser->entity->rq->sched); + if (parser->job->uf_addr && ring->funcs->no_user_fence) return -EINVAL; - return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx); + return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity); } static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, @@ -1071,51 +960,62 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, sizeof(struct drm_amdgpu_cs_chunk_dep); for (i = 0; i < num_deps; ++i) { - struct amdgpu_ring *ring; struct amdgpu_ctx *ctx; + struct drm_sched_entity *entity; struct dma_fence *fence; ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); if (ctx == NULL) return -EINVAL; - r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr, - deps[i].ip_type, - deps[i].ip_instance, - deps[i].ring, &ring); + r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, + deps[i].ip_instance, + deps[i].ring, &entity); if (r) { amdgpu_ctx_put(ctx); return r; } - fence = amdgpu_ctx_get_fence(ctx, ring, - deps[i].handle); - if (IS_ERR(fence)) { - r = PTR_ERR(fence); - amdgpu_ctx_put(ctx); - return r; - } else if (fence) { - r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, - true); - dma_fence_put(fence); - amdgpu_ctx_put(ctx); - if (r) - return r; + fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); + amdgpu_ctx_put(ctx); + + if (IS_ERR(fence)) + return PTR_ERR(fence); + else if (!fence) + continue; + + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { + struct drm_sched_fence *s_fence; + struct dma_fence *old = fence; + + s_fence = to_drm_sched_fence(fence); + fence = dma_fence_get(&s_fence->scheduled); + dma_fence_put(old); } + + r = amdgpu_sync_fence(&p->job->sync, fence, true); + dma_fence_put(fence); + if (r) + return r; } return 0; } static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, - uint32_t handle) + uint32_t handle, u64 point, + u64 flags) { - int r; struct dma_fence *fence; - r = drm_syncobj_find_fence(p->filp, handle, &fence); - if (r) + int r; + + r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); + if (r) { + DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", + handle, point, r); return r; + } - r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); + r = amdgpu_sync_fence(&p->job->sync, fence, true); dma_fence_put(fence); return r; @@ -1124,46 +1024,123 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { + struct drm_amdgpu_cs_chunk_sem *deps; unsigned num_deps; int i, r; - struct drm_amdgpu_cs_chunk_sem *deps; deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_sem); + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle, + 0, 0); + if (r) + return r; + } + + return 0; +} + + +static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; + unsigned num_deps; + int i, r; + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); for (i = 0; i < num_deps; ++i) { - r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle); + r = amdgpu_syncobj_lookup_and_add_to_sync(p, + syncobj_deps[i].handle, + syncobj_deps[i].point, + syncobj_deps[i].flags); if (r) return r; } + return 0; } static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { + struct drm_amdgpu_cs_chunk_sem *deps; unsigned num_deps; int i; - struct drm_amdgpu_cs_chunk_sem *deps; + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_sem); - p->post_dep_syncobjs = kmalloc_array(num_deps, - sizeof(struct drm_syncobj *), - GFP_KERNEL); - p->num_post_dep_syncobjs = 0; + if (p->post_deps) + return -EINVAL; + + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), + GFP_KERNEL); + p->num_post_deps = 0; - if (!p->post_dep_syncobjs) + if (!p->post_deps) return -ENOMEM; + for (i = 0; i < num_deps; ++i) { - p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); - if (!p->post_dep_syncobjs[i]) + p->post_deps[i].syncobj = + drm_syncobj_find(p->filp, deps[i].handle); + if (!p->post_deps[i].syncobj) return -EINVAL; - p->num_post_dep_syncobjs++; + p->post_deps[i].chain = NULL; + p->post_deps[i].point = 0; + p->num_post_deps++; } + + return 0; +} + + +static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; + unsigned num_deps; + int i; + + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); + + if (p->post_deps) + return -EINVAL; + + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), + GFP_KERNEL); + p->num_post_deps = 0; + + if (!p->post_deps) + return -ENOMEM; + + for (i = 0; i < num_deps; ++i) { + struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; + + dep->chain = NULL; + if (syncobj_deps[i].point) { + dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL); + if (!dep->chain) + return -ENOMEM; + } + + dep->syncobj = drm_syncobj_find(p->filp, + syncobj_deps[i].handle); + if (!dep->syncobj) { + kfree(dep->chain); + return -EINVAL; + } + dep->point = syncobj_deps[i].point; + p->num_post_deps++; + } + return 0; } @@ -1177,18 +1154,33 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, chunk = &p->chunks[i]; - if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) { + switch (chunk->chunk_id) { + case AMDGPU_CHUNK_ID_DEPENDENCIES: + case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: r = amdgpu_cs_process_fence_dep(p, chunk); if (r) return r; - } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_IN: r = amdgpu_cs_process_syncobj_in_dep(p, chunk); if (r) return r; - } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) { + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: r = amdgpu_cs_process_syncobj_out_dep(p, chunk); if (r) return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: + r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: + r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); + if (r) + return r; + break; } } @@ -1199,53 +1191,59 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) { int i; - for (i = 0; i < p->num_post_dep_syncobjs; ++i) - drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence); + for (i = 0; i < p->num_post_deps; ++i) { + if (p->post_deps[i].chain && p->post_deps[i].point) { + drm_syncobj_add_point(p->post_deps[i].syncobj, + p->post_deps[i].chain, + p->fence, p->post_deps[i].point); + p->post_deps[i].chain = NULL; + } else { + drm_syncobj_replace_fence(p->post_deps[i].syncobj, + p->fence); + } + } } static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_ring *ring = p->ring; - struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity; + struct drm_sched_entity *entity = p->entity; enum drm_sched_priority priority; struct amdgpu_bo_list_entry *e; struct amdgpu_job *job; uint64_t seq; - int r; job = p->job; p->job = NULL; - r = drm_sched_job_init(&job->base, entity, p->filp); + r = drm_sched_job_init(&job->base, entity, &fpriv->vm); if (r) goto error_unlock; - /* No memory allocation is allowed while holding the mn lock */ - amdgpu_mn_lock(p->mn); + /* No memory allocation is allowed while holding the notifier lock. + * The lock is held until amdgpu_cs_submit is finished and fence is + * added to BOs. + */ + mutex_lock(&p->adev->notifier_lock); + + /* If userptr are invalidated after amdgpu_cs_parser_bos(), return + * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. + */ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = e->robj; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { - r = -ERESTARTSYS; - goto error_abort; - } + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); } - - job->owner = p->filp; - p->fence = dma_fence_get(&job->base.s_fence->finished); - - r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); if (r) { - dma_fence_put(p->fence); - dma_fence_put(&job->base.s_fence->finished); - amdgpu_job_free(job); - amdgpu_mn_unlock(p->mn); - return r; + r = -EAGAIN; + goto error_abort; } + p->fence = dma_fence_get(&job->base.s_fence->finished); + + amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); amdgpu_cs_post_dependencies(p); if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && @@ -1264,18 +1262,16 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, priority = job->base.s_priority; drm_sched_entity_push_job(&job->base, entity); - ring = to_amdgpu_ring(entity->rq->sched); - amdgpu_ring_priority_get(ring, priority); + amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); - amdgpu_mn_unlock(p->mn); + mutex_unlock(&p->adev->notifier_lock); return 0; error_abort: - dma_fence_put(&job->base.s_fence->finished); - job->base.s_fence = NULL; - amdgpu_mn_unlock(p->mn); + drm_sched_job_cleanup(&job->base); + mutex_unlock(&p->adev->notifier_lock); error_unlock: amdgpu_job_free(job); @@ -1290,6 +1286,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) bool reserved_buffers = false; int i, r; + if (amdgpu_ras_intr_triggered()) + return -EHWPOISON; + if (!adev->accel_working) return -EBUSY; @@ -1298,7 +1297,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_parser_init(&parser, data); if (r) { - DRM_ERROR("Failed to initialize parser !\n"); + DRM_ERROR("Failed to initialize parser %d!\n", r); goto out; } @@ -1306,27 +1305,27 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) goto out; + r = amdgpu_cs_dependencies(adev, &parser); + if (r) { + DRM_ERROR("Failed in the dependencies handling %d!\n", r); + goto out; + } + r = amdgpu_cs_parser_bos(&parser, data); if (r) { if (r == -ENOMEM) DRM_ERROR("Not enough memory for command submission!\n"); - else if (r != -ERESTARTSYS) + else if (r != -ERESTARTSYS && r != -EAGAIN) DRM_ERROR("Failed to process the buffer list %d!\n", r); goto out; } reserved_buffers = true; - r = amdgpu_cs_dependencies(adev, &parser); - if (r) { - DRM_ERROR("Failed in the dependencies handling %d!\n", r); - goto out; - } - for (i = 0; i < parser.job->num_ibs; i++) trace_amdgpu_cs(&parser, i); - r = amdgpu_cs_ib_vm_chunk(adev, &parser); + r = amdgpu_cs_vm_handling(&parser); if (r) goto out; @@ -1334,6 +1333,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); + return r; } @@ -1350,9 +1350,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { union drm_amdgpu_wait_cs *wait = data; - struct amdgpu_device *adev = dev->dev_private; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); - struct amdgpu_ring *ring = NULL; + struct drm_sched_entity *entity; struct amdgpu_ctx *ctx; struct dma_fence *fence; long r; @@ -1361,15 +1360,14 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, if (ctx == NULL) return -EINVAL; - r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, - wait->in.ip_type, wait->in.ip_instance, - wait->in.ring, &ring); + r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance, + wait->in.ring, &entity); if (r) { amdgpu_ctx_put(ctx); return r; } - fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); + fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle); if (IS_ERR(fence)) r = PTR_ERR(fence); else if (fence) { @@ -1401,7 +1399,7 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_fence *user) { - struct amdgpu_ring *ring; + struct drm_sched_entity *entity; struct amdgpu_ctx *ctx; struct dma_fence *fence; int r; @@ -1410,14 +1408,14 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, if (ctx == NULL) return ERR_PTR(-EINVAL); - r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type, - user->ip_instance, user->ring, &ring); + r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance, + user->ring, &entity); if (r) { amdgpu_ctx_put(ctx); return ERR_PTR(r); } - fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no); + fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no); amdgpu_ctx_put(ctx); return fence; @@ -1437,6 +1435,9 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, if (IS_ERR(fence)) return PTR_ERR(fence); + if (!fence) + fence = dma_fence_get_stub(); + switch (info->in.what) { case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ: r = drm_syncobj_create(&syncobj, 0, fence); @@ -1661,7 +1662,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, *map = mapping; /* Double check that the BO is reserved by this CS */ - if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) + if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket) return -EINVAL; if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { |