aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorAndrey Grodzovsky <andrey.grodzovsky@amd.com>2022-05-17 14:25:20 -0400
committerAlex Deucher <alexander.deucher@amd.com>2022-06-10 15:26:07 -0400
commitb5fd0cf3ea377a7332721df8a8c8e7715f93c8d4 (patch)
tree0b192dd2416c6bd79028efe1886e36bb1aff98b9 /drivers/gpu/drm/amd/amdgpu
parentdrm/amdgpu: Add work_struct for GPU reset from debugfs (diff)
downloadlinux-dev-b5fd0cf3ea377a7332721df8a8c8e7715f93c8d4.tar.xz
linux-dev-b5fd0cf3ea377a7332721df8a8c8e7715f93c8d4.zip
drm/amdgpu: Add work_struct for GPU reset from kfd.
We need to have a work_struct to cancel this reset if another already in progress. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c31
3 files changed, 15 insertions, 32 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 1f8161cd507f..a23abc0e86e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -33,6 +33,7 @@
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_ras.h"
#include "amdgpu_umc.h"
+#include "amdgpu_reset.h"
/* Total memory size in system memory and all GPU VRAM. Used to
* estimate worst case amount of memory to reserve for page tables
@@ -122,6 +123,15 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
}
}
+
+static void amdgpu_amdkfd_reset_work(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ kfd.reset_work);
+
+ amdgpu_device_gpu_recover_imp(adev, NULL);
+}
+
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
int i;
@@ -180,6 +190,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
adev_to_drm(adev), &gpu_resources);
+
+ INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
}
}
@@ -247,7 +259,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
{
if (amdgpu_device_should_recover_gpu(adev))
- amdgpu_device_gpu_recover(adev, NULL);
+ amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->kfd.reset_work);
}
int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index bbe7d81bb0df..ffb2b7d9b9a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -97,6 +97,7 @@ struct amdgpu_kfd_dev {
struct kfd_dev *dev;
uint64_t vram_used;
bool init_complete;
+ struct work_struct reset_work;
};
enum kgd_engine_type {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6abec88cb5a8..2d490941e727 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5323,37 +5323,6 @@ skip_sched_resume:
return r;
}
-struct amdgpu_recover_work_struct {
- struct work_struct base;
- struct amdgpu_device *adev;
- struct amdgpu_job *job;
- int ret;
-};
-
-static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
-{
- struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base);
-
- amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
-}
-/*
- * Serialize gpu recover into reset domain single threaded wq
- */
-int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
- struct amdgpu_job *job)
-{
- struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
-
- INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
-
- if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base))
- return -EAGAIN;
-
- flush_work(&work.base);
-
- return atomic_read(&adev->reset_domain->reset_res);
-}
-
/**
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
*