aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
diff options
context:
space:
mode:
authorFelix Kuehling <Felix.Kuehling@amd.com>2022-04-07 18:53:56 -0400
committerAlex Deucher <alexander.deucher@amd.com>2022-04-25 17:05:48 -0400
commitc3eb12dff0f9c6aa7f2916edaaec5545af5f379f (patch)
tree376bc95f81f7f8101425ce59142ac96ba0533b2e /drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
parentdrm/amdgpu: Remove useless kfree (diff)
downloadlinux-dev-c3eb12dff0f9c6aa7f2916edaaec5545af5f379f.tar.xz
linux-dev-c3eb12dff0f9c6aa7f2916edaaec5545af5f379f.zip
drm/amdkfd: Ignore bogus signals from MEC efficiently
MEC firmware sometimes sends signal interrupts without a valid context ID on end of pipe events that don't intend to signal any HSA signals. This triggers the slow path in kfd_signal_event_interrupt that scans the entire event page for signaled events. Detect these signals in the top half interrupt handler to stop processing them as early as possible. Because we now always treat event ID 0 as invalid, reserve that ID during process initialization. v2: Update firmware version checks to support more GPUs Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Philip Yang <Philip.Yang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c33
1 files changed, 33 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 03c29bdd89a1..f27fe022ef6f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -141,6 +141,25 @@ static void event_interrupt_poison_consumption(struct kfd_dev *dev,
}
}
+static bool context_id_expected(struct kfd_dev *dev)
+{
+ switch (KFD_GC_VERSION(dev)) {
+ case IP_VERSION(9, 0, 1):
+ return dev->mec_fw_version >= 0x817a;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 0):
+ return dev->mec_fw_version >= 0x17a;
+ default:
+ /* Other GFXv9 and later GPUs always sent valid context IDs
+ * on legitimate events
+ */
+ return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 1);
+ }
+}
+
static bool event_interrupt_isr_v9(struct kfd_dev *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre,
@@ -206,6 +225,20 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
return false;
+ /* Workaround CP firmware sending bogus signals with 0 context_id.
+ * Those can be safely ignored on hardware and firmware versions that
+ * include a valid context_id on legitimate signals. This avoids the
+ * slow path in kfd_signal_event_interrupt that scans all event slots
+ * for signaled events.
+ */
+ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) {
+ uint32_t context_id =
+ SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+
+ if (context_id == 0 && context_id_expected(dev))
+ return false;
+ }
+
/* Interrupt types we care about: various signals and faults.
* They will be forwarded to a work queue (see below).
*/