aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOak Zeng <Oak.Zeng@amd.com>2020-06-22 19:27:45 -0500
committerAlex Deucher <alexander.deucher@amd.com>2021-03-23 22:59:22 -0400
commit6d909c5da0166d62ffc656990fe1fff9ae66b37f (patch)
tree5e3d8aae01e5e68b9fdf5d7de6beb00ed38c69fc
parentdrm/amdgpu: allow use psp to load firmware (v2) (diff)
downloadlinux-dev-6d909c5da0166d62ffc656990fe1fff9ae66b37f.tar.xz
linux-dev-6d909c5da0166d62ffc656990fe1fff9ae66b37f.zip
drm/amdkfd: Add kernel parameter to stop queue eviction on vm fault
This is to keep wavefront context for debug purpose Signed-off-by: Oak Zeng <Oak.Zeng@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h5
4 files changed, 18 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 7294a8062a93..5179d5f032ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -751,6 +751,13 @@ bool no_system_mem_limit;
module_param(no_system_mem_limit, bool, 0644);
MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)");
+/**
+ * DOC: no_queue_eviction_on_vm_fault (int)
+ * If set, process queues will not be evicted on gpuvm fault. This is to keep the wavefront context for debugging (0 = queue eviction, 1 = no queue eviction). The default is 0 (queue eviction).
+ */
+int amdgpu_no_queue_eviction_on_vm_fault = 0;
+MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)");
+module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
#endif
/**
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index fe14e473f026..f6233019f042 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -80,8 +80,9 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE ||
- ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
- ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT;
+ ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
+ ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
+ !amdgpu_no_queue_eviction_on_vm_fault);
}
static void cik_event_interrupt_wq(struct kfd_dev *dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 74a460be077b..1c20458f3962 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -98,9 +98,10 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
source_id == SOC15_INTSRC_SDMA_TRAP ||
source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
- client_id == SOC15_IH_CLIENTID_VMC ||
+ ((client_id == SOC15_IH_CLIENTID_VMC ||
client_id == SOC15_IH_CLIENTID_VMC1 ||
- client_id == SOC15_IH_CLIENTID_UTCL2;
+ client_id == SOC15_IH_CLIENTID_UTCL2) &&
+ !amdgpu_no_queue_eviction_on_vm_fault);
}
static void event_interrupt_wq_v9(struct kfd_dev *dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0b5974847980..d8c8b5ff449a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -169,6 +169,11 @@ extern bool hws_gws_support;
/* Queue preemption timeout in ms */
extern int queue_preemption_timeout_ms;
+/*
+ * Don't evict process queues on vm fault
+ */
+extern int amdgpu_no_queue_eviction_on_vm_fault;
+
/* Enable eviction debug messages */
extern bool debug_evictions;