aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_process.c
diff options
context:
space:
mode:
authorPhilip Yang <Philip.Yang@amd.com>2022-07-07 16:54:19 -0400
committerAlex Deucher <alexander.deucher@amd.com>2022-07-25 09:31:05 -0400
commit74097f9fd2f5ebdae04fcba59da345386415cbf3 (patch)
treed4dd5f0a45c63c6cf462179a1231105d32b41127 /drivers/gpu/drm/amd/amdkfd/kfd_process.c
parentdrm/amdkfd: Correct mmu_notifier_get failure handling (diff)
downloadlinux-dev-74097f9fd2f5ebdae04fcba59da345386415cbf3.tar.xz
linux-dev-74097f9fd2f5ebdae04fcba59da345386415cbf3.zip
drm/amdkfd: Process notifier release callback don't take mutex
Move process queues cleanup to deferred work kfd_process_wq_release, to avoid potential deadlock circular locking warning: WARNING: possible circular locking dependency detected the existing dependency chain (in reverse order) is: -> #2 ((work_completion)(&svms->deferred_list_work)){+.+.}-{0:0}: __flush_work+0x343/0x4a0 svm_range_list_lock_and_flush_work+0x39/0xc0 svm_range_set_attr+0xe8/0x1080 [amdgpu] kfd_ioctl+0x19b/0x600 [amdgpu] __x64_sys_ioctl+0x81/0xb0 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #1 (&info->lock#2){+.+.}-{3:3}: __mutex_lock+0xa4/0x940 amdgpu_amdkfd_gpuvm_acquire_process_vm+0x2e3/0x590 kfd_process_device_init_vm+0x61/0x200 [amdgpu] kfd_ioctl_acquire_vm+0x83/0xb0 [amdgpu] kfd_ioctl+0x19b/0x600 [amdgpu] __x64_sys_ioctl+0x81/0xb0 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #0 (&process->mutex){+.+.}-{3:3}: __lock_acquire+0x1365/0x23d0 lock_acquire+0xc9/0x2e0 __mutex_lock+0xa4/0x940 kfd_process_notifier_release+0x96/0xe0 [amdgpu] __mmu_notifier_release+0x94/0x210 exit_mmap+0x35/0x1f0 mmput+0x63/0x120 svm_range_deferred_list_work+0x177/0x2c0 [amdgpu] process_one_work+0x2a4/0x600 worker_thread+0x39/0x3e0 kthread+0x16d/0x1a0 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock((work_completion)(&svms->deferred_list_work)); lock(&info->lock#2); lock((work_completion)(&svms->deferred_list_work)); lock(&process->mutex); Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c21
1 files changed, 9 insertions, 12 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d8591721270b..6c83a519b3a1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1115,6 +1115,15 @@ static void kfd_process_wq_release(struct work_struct *work)
struct kfd_process *p = container_of(work, struct kfd_process,
release_work);
+ kfd_process_dequeue_from_all_devices(p);
+ pqm_uninit(&p->pqm);
+
+ /* Signal the eviction fence after user mode queues are
+ * destroyed. This allows any BOs to be freed without
+ * triggering pointless evictions or waiting for fences.
+ */
+ dma_fence_signal(p->ef);
+
kfd_process_remove_sysfs(p);
kfd_iommu_unbind_process(p);
@@ -1179,20 +1188,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
- mutex_lock(&p->mutex);
-
- kfd_process_dequeue_from_all_devices(p);
- pqm_uninit(&p->pqm);
-
/* Indicate to other users that MM is no longer valid */
p->mm = NULL;
- /* Signal the eviction fence after user mode queues are
- * destroyed. This allows any BOs to be freed without
- * triggering pointless evictions or waiting for fences.
- */
- dma_fence_signal(p->ef);
-
- mutex_unlock(&p->mutex);
mmu_notifier_put(&p->mmu_notifier);
}