aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOded Gabbay <oded.gabbay@gmail.com>2014-12-24 13:30:52 +0200
committerOded Gabbay <oded.gabbay@gmail.com>2015-05-19 13:02:28 +0300
commit81663016dbfd53e29d1b5c5ddbc9b12ae1d66474 (patch)
tree337823c18adb9dac0f812b4e76eb2ed9747c4c71
parentdrm/amdkfd: Add bad opcode exception handling (diff)
downloadlinux-dev-81663016dbfd53e29d1b5c5ddbc9b12ae1d66474.tar.xz
linux-dev-81663016dbfd53e29d1b5c5ddbc9b12ae1d66474.zip
drm/amdkfd: Add module parameter of send_sigterm
This patch adds a new kernel module parameter to amdkfd, called send_sigterm. This parameter specifies whether amdkfd should send the SIGTERM signal to an HSA process, when the following conditions occur: 1. The GPU triggers an exception regarding a kernel that was issued by this process. 2. The HSA process isn't waiting on an event that handles this exception. The default behavior is not to send a SIGTERM and suffice with a dmesg error print. Reviewed-by: Ben Goz <ben.goz@amd.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h6
3 files changed, 20 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 5c3a81e667a8..3cb37d220f60 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -872,10 +872,16 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
/* Send SIGTERM no event of type "type" has been found*/
if (send_signal) {
- dev_warn(kfd_device,
- "Sending SIGTERM to HSA Process with PID %d ",
+ if (send_sigterm) {
+ dev_warn(kfd_device,
+ "Sending SIGTERM to HSA Process with PID %d ",
+ p->lead_thread->pid);
+ send_sig(SIGTERM, p->lead_thread, 0);
+ } else {
+ dev_err(kfd_device,
+ "HSA Process (PID %d) got unhandled exception",
p->lead_thread->pid);
- send_sig(SIGTERM, p->lead_thread, 0);
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 4e0a68f13a77..e4fc96ec4073 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -54,6 +54,11 @@ module_param(max_num_of_queues_per_device, int, 0444);
MODULE_PARM_DESC(max_num_of_queues_per_device,
"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
+int send_sigterm;
+module_param(send_sigterm, int, 0444);
+MODULE_PARM_DESC(send_sigterm,
+ "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
+
bool kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
{
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 9383494b429e..b6f838f56589 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -74,6 +74,12 @@ extern int max_num_of_queues_per_device;
/* Kernel module parameter to specify the scheduling policy */
extern int sched_policy;
+/*
+ * Kernel module parameter to specify whether to send sigterm to HSA process on
+ * unhandled exception
+ */
+extern int send_sigterm;
+
/**
* enum kfd_sched_policy
*