diff options
Diffstat (limited to 'sys/dev/pci/drm/amd/amdgpu/amdgpu_ring.c')
-rw-r--r-- | sys/dev/pci/drm/amd/amdgpu/amdgpu_ring.c | 253 |
1 files changed, 67 insertions, 186 deletions
diff --git a/sys/dev/pci/drm/amd/amdgpu/amdgpu_ring.c b/sys/dev/pci/drm/amd/amdgpu/amdgpu_ring.c index ee18ba3ce97..bea77a5dc95 100644 --- a/sys/dev/pci/drm/amd/amdgpu/amdgpu_ring.c +++ b/sys/dev/pci/drm/amd/amdgpu/amdgpu_ring.c @@ -28,8 +28,9 @@ */ #include <linux/seq_file.h> #include <linux/slab.h> +#include <linux/uaccess.h> #include <linux/debugfs.h> -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "atom.h" @@ -47,9 +48,6 @@ * wptr. The GPU then starts fetching commands and executes * them until the pointers are equal again. */ -static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, - struct amdgpu_ring *ring); -static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring); /** * amdgpu_ring_alloc - allocate space on the ring buffer @@ -135,9 +133,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) if (ring->funcs->end_use) ring->funcs->end_use(ring); - - if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) - amdgpu_ring_lru_touch(ring->adev, ring); } /** @@ -156,76 +151,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) } /** - * amdgpu_ring_priority_put - restore a ring's priority - * - * @ring: amdgpu_ring structure holding the information - * @priority: target priority - * - * Release a request for executing at @priority - */ -void amdgpu_ring_priority_put(struct amdgpu_ring *ring, - enum drm_sched_priority priority) -{ - int i; - - if (!ring->funcs->set_priority) - return; - - if (atomic_dec_return(&ring->num_jobs[priority]) > 0) - return; - - /* no need to restore if the job is already at the lowest priority */ - if (priority == DRM_SCHED_PRIORITY_NORMAL) - return; - - mutex_lock(&ring->priority_mutex); - /* something higher prio is executing, no need to decay */ - if (ring->priority > priority) - goto out_unlock; - - /* decay priority to the next level with a job available */ - for (i = priority; i >= DRM_SCHED_PRIORITY_MIN; i--) { - if (i == DRM_SCHED_PRIORITY_NORMAL - || atomic_read(&ring->num_jobs[i])) { - ring->priority = i; - ring->funcs->set_priority(ring, i); - break; - } - } - -out_unlock: - mutex_unlock(&ring->priority_mutex); -} - -/** - * amdgpu_ring_priority_get - change the ring's priority - * - * @ring: amdgpu_ring structure holding the information - * @priority: target priority - * - * Request a ring's priority to be raised to @priority (refcounted). - */ -void amdgpu_ring_priority_get(struct amdgpu_ring *ring, - enum drm_sched_priority priority) -{ - if (!ring->funcs->set_priority) - return; - - if (atomic_inc_return(&ring->num_jobs[priority]) <= 0) - return; - - mutex_lock(&ring->priority_mutex); - if (priority <= ring->priority) - goto out_unlock; - - ring->priority = priority; - ring->funcs->set_priority(ring, priority); - -out_unlock: - mutex_unlock(&ring->priority_mutex); -} - -/** * amdgpu_ring_init - init driver ring struct. * * @adev: amdgpu_device pointer @@ -251,6 +176,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) sched_hw_submission = max(sched_hw_submission, 256); + else if (ring == &adev->sdma.instance[0].page) + sched_hw_submission = 256; if (ring->adev == NULL) { if (adev->num_rings >= AMDGPU_MAX_RINGS) @@ -282,6 +209,16 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, return r; } + r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs); + if (r) { + dev_err(adev->dev, + "(%d) ring trail_fence_offs wb alloc failed\n", r); + return r; + } + ring->trail_fence_gpu_addr = + adev->wb.gpu_addr + (ring->trail_fence_offs * 4); + ring->trail_fence_cpu_addr = &adev->wb.wb[ring->trail_fence_offs]; + r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); if (r) { dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); @@ -320,16 +257,10 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->max_dw = max_dw; ring->priority = DRM_SCHED_PRIORITY_NORMAL; rw_init(&ring->priority_mutex, "ringpri"); - INIT_LIST_HEAD(&ring->lru_list); - amdgpu_ring_lru_touch(adev, ring); for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i) atomic_set(&ring->num_jobs[i], 0); - if (amdgpu_debugfs_ring_init(adev, ring)) { - DRM_ERROR("Failed to register debugfs file for rings !\n"); - } - return 0; } @@ -343,12 +274,13 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ void amdgpu_ring_fini(struct amdgpu_ring *ring) { - ring->ready = false; /* Not to finish a ring which is not initialized */ if (!(ring->adev) || !(ring->adev->rings[ring->idx])) return; + ring->sched.ready = false; + amdgpu_device_wb_free(ring->adev, ring->rptr_offs); amdgpu_device_wb_free(ring->adev, ring->wptr_offs); @@ -359,8 +291,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) &ring->gpu_addr, (void **)&ring->ring); - amdgpu_debugfs_ring_fini(ring); - dma_fence_put(ring->vmid_wait); ring->vmid_wait = NULL; ring->me = 0; @@ -368,99 +298,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) ring->adev->rings[ring->idx] = NULL; } -static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - /* list_move_tail handles the case where ring isn't part of the list */ - list_move_tail(&ring->lru_list, &adev->ring_lru_list); -} - -static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring, - int *blacklist, int num_blacklist) -{ - int i; - - for (i = 0; i < num_blacklist; i++) { - if (ring->idx == blacklist[i]) - return true; - } - - return false; -} - -/** - * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block - * - * @adev: amdgpu_device pointer - * @type: amdgpu_ring_type enum - * @blacklist: blacklisted ring ids array - * @num_blacklist: number of entries in @blacklist - * @lru_pipe_order: find a ring from the least recently used pipe - * @ring: output ring - * - * Retrieve the amdgpu_ring structure for the least recently used ring of - * a specific IP block (all asics). - * Returns 0 on success, error on failure. - */ -int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, - int *blacklist, int num_blacklist, - bool lru_pipe_order, struct amdgpu_ring **ring) -{ - struct amdgpu_ring *entry; - - /* List is sorted in LRU order, find first entry corresponding - * to the desired HW IP */ - *ring = NULL; - spin_lock(&adev->ring_lru_list_lock); - list_for_each_entry(entry, &adev->ring_lru_list, lru_list) { - if (entry->funcs->type != type) - continue; - - if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist)) - continue; - - if (!*ring) { - *ring = entry; - - /* We are done for ring LRU */ - if (!lru_pipe_order) - break; - } - - /* Move all rings on the same pipe to the end of the list */ - if (entry->pipe == (*ring)->pipe) - amdgpu_ring_lru_touch_locked(adev, entry); - } - - /* Move the ring we found to the end of the list */ - if (*ring) - amdgpu_ring_lru_touch_locked(adev, *ring); - - spin_unlock(&adev->ring_lru_list_lock); - - if (!*ring) { - DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type); - return -EINVAL; - } - - return 0; -} - -/** - * amdgpu_ring_lru_touch - mark a ring as recently being used - * - * @adev: amdgpu_device pointer - * @ring: ring to touch - * - * Move @ring to the tail of the lru list - */ -void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring) -{ - spin_lock(&adev->ring_lru_list_lock); - amdgpu_ring_lru_touch_locked(adev, ring); - spin_unlock(&adev->ring_lru_list_lock); -} - /** * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper * @@ -481,6 +318,31 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); } +/** + * amdgpu_ring_soft_recovery - try to soft recover a ring lockup + * + * @ring: ring to try the recovery on + * @vmid: VMID we try to get going again + * @fence: timedout fence + * + * Tries to get a ring proceeding again when it is stuck. + */ +bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, + struct dma_fence *fence) +{ + ktime_t deadline = ktime_add_us(ktime_get(), 10000); + + if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) + return false; + + atomic_inc(&ring->adev->gpu_reset_counter); + while (!dma_fence_is_signaled(fence) && + ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) + ring->funcs->soft_recovery(ring, vmid); + + return dma_fence_is_signaled(fence); +} + /* * Debugfs info */ @@ -545,15 +407,15 @@ static const struct file_operations amdgpu_debugfs_ring_fops = { #endif -static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, - struct amdgpu_ring *ring) +int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, + struct amdgpu_ring *ring) { #if defined(CONFIG_DEBUG_FS) struct drm_minor *minor = adev->ddev->primary; struct dentry *ent, *root = minor->debugfs_root; char name[32]; - snprintf(name, sizeof(name), "amdgpu_ring_%s", ring->name); + sprintf(name, "amdgpu_ring_%s", ring->name); ent = debugfs_create_file(name, S_IFREG | S_IRUGO, root, @@ -567,9 +429,28 @@ static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, return 0; } -static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring) +/** + * amdgpu_ring_test_helper - tests ring and set sched readiness status + * + * @ring: ring to try the recovery on + * + * Tests ring and set sched readiness status + * + * Returns 0 on success, error on failure. + */ +int amdgpu_ring_test_helper(struct amdgpu_ring *ring) { -#if defined(CONFIG_DEBUG_FS) - debugfs_remove(ring->ent); -#endif + struct amdgpu_device *adev = ring->adev; + int r; + + r = amdgpu_ring_test_ring(ring); + if (r) + DRM_DEV_ERROR(adev->dev, "ring %s test failed (%d)\n", + ring->name, r); + else + DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n", + ring->name); + + ring->sched.ready = !r; + return r; } |