aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c21
1 files changed, 8 insertions, 13 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 5fd67e1cc2a0..f5ce9a9f4cf5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -238,19 +238,15 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
- int locked;
/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
* otherwise the mailbox msg will be ruined/reseted by
* the VF FLR.
- *
- * we can unlock the lock_reset to allow "amdgpu_job_timedout"
- * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
- * which means host side had finished this VF's FLR.
*/
- locked = mutex_trylock(&adev->lock_reset);
- if (locked)
- adev->in_gpu_reset = true;
+ if (!down_read_trylock(&adev->reset_sem))
+ return;
+
+ atomic_set(&adev->in_gpu_reset, 1);
do {
if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
@@ -261,14 +257,13 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
} while (timeout > 1);
flr_done:
- if (locked) {
- adev->in_gpu_reset = false;
- mutex_unlock(&adev->lock_reset);
- }
+ atomic_set(&adev->in_gpu_reset, 0);
+ up_read(&adev->reset_sem);
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
- && adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)
+ && (!amdgpu_device_has_job_running(adev) ||
+ adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT))
amdgpu_device_gpu_recover(adev, NULL);
}