From 6d909c5da0166d62ffc656990fe1fff9ae66b37f Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 22 Jun 2020 19:27:45 -0500 Subject: drm/amdkfd: Add kernel parameter to stop queue eviction on vm fault This is to keep wavefront context for debug purpose Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 +++++++ drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 5 +++-- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 5 +++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +++++ 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 7294a8062a93..5179d5f032ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -751,6 +751,13 @@ bool no_system_mem_limit; module_param(no_system_mem_limit, bool, 0644); MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)"); +/** + * DOC: no_queue_eviction_on_vm_fault (int) + * If set, process queues will not be evicted on gpuvm fault. This is to keep the wavefront context for debugging (0 = queue eviction, 1 = no queue eviction). The default is 0 (queue eviction). + */ +int amdgpu_no_queue_eviction_on_vm_fault = 0; +MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)"); +module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); #endif /** diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index fe14e473f026..f6233019f042 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -80,8 +80,9 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, ihre->source_id == CIK_INTSRC_SDMA_TRAP || ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE || - ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || - ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT; + ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || + ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) && + !amdgpu_no_queue_eviction_on_vm_fault); } static void cik_event_interrupt_wq(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 74a460be077b..1c20458f3962 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -98,9 +98,10 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, source_id == SOC15_INTSRC_SDMA_TRAP || source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || source_id == SOC15_INTSRC_CP_BAD_OPCODE || - client_id == SOC15_IH_CLIENTID_VMC || + ((client_id == SOC15_IH_CLIENTID_VMC || client_id == SOC15_IH_CLIENTID_VMC1 || - client_id == SOC15_IH_CLIENTID_UTCL2; + client_id == SOC15_IH_CLIENTID_UTCL2) && + !amdgpu_no_queue_eviction_on_vm_fault); } static void event_interrupt_wq_v9(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 0b5974847980..d8c8b5ff449a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -169,6 +169,11 @@ extern bool hws_gws_support; /* Queue preemption timeout in ms */ extern int queue_preemption_timeout_ms; +/* + * Don't evict process queues on vm fault + */ +extern int amdgpu_no_queue_eviction_on_vm_fault; + /* Enable eviction debug messages */ extern bool debug_evictions; -- cgit v1.2.3-59-g8ed1b