aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2020-08-12 17:48:26 +0200
committerAlex Deucher <alexander.deucher@amd.com>2020-08-14 16:22:40 -0400
commitf1403342ebdfcff3c3cf57ae476f19d3078f2767 (patch)
treed94e6a6c652ebc0688fdb0c57587712c75970472 /drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
parentdrm/amd/powerplay: enable Sienna Cichlid mgpu fan boost feature (diff)
downloadlinux-f1403342ebdfcff3c3cf57ae476f19d3078f2767.tar.xz
linux-f1403342ebdfcff3c3cf57ae476f19d3078f2767.zip
drm/amdgpu: revert "fix system hang issue during GPU reset"
The whole approach wasn't thought through till the end. We already had a reset lock like this in the past and it caused the same problems like this one. Completely revert the patch for now and add individual trylock protection to the hardware access functions as necessary. This reverts commit df9c8d1aa278c435c30a69b8f2418b4a52fcb929. Signed-off-by: Christian König <christian.koenig@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c13
1 files changed, 10 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index fe31cbeccfe9..5fd67e1cc2a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -238,16 +238,20 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
+ int locked;
/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
* otherwise the mailbox msg will be ruined/reseted by
* the VF FLR.
*
- * we can unlock the reset_sem to allow "amdgpu_job_timedout"
+ * we can unlock the lock_reset to allow "amdgpu_job_timedout"
* to run gpu_recover() after FLR_NOTIFICATION_CMPL received
* which means host side had finished this VF's FLR.
*/
- down_read(&adev->reset_sem);
+ locked = mutex_trylock(&adev->lock_reset);
+ if (locked)
+ adev->in_gpu_reset = true;
+
do {
if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
goto flr_done;
@@ -257,7 +261,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
} while (timeout > 1);
flr_done:
- up_read(&adev->reset_sem);
+ if (locked) {
+ adev->in_gpu_reset = false;
+ mutex_unlock(&adev->lock_reset);
+ }
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)