aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
diff options
context:
space:
mode:
authorFelix Kuehling <Felix.Kuehling@amd.com>2017-11-14 16:41:19 -0500
committerOded Gabbay <oded.gabbay@gmail.com>2017-11-14 16:41:19 -0500
commit373d7080896a3cb3b28ae3a2abdafb7bb87552b1 (patch)
tree29bb627b5b9357baa95e661ba35f47ba98d978d0 /drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
parentdrm/amdkfd: Add trap handler for CWSR (diff)
downloadlinux-dev-373d7080896a3cb3b28ae3a2abdafb7bb87552b1.tar.xz
linux-dev-373d7080896a3cb3b28ae3a2abdafb7bb87552b1.zip
drm/amdkfd: Add CWSR support
This hardware feature allows the GPU to preempt shader execution in the middle of a compute wave, save the state and restore it later to resume execution. Memory for saving the state is allocated per queue in user mode and the address and size passed to the create_queue ioctl. The size depends on the number of waves that can be in flight simultaneously on a given ASIC. Signed-off-by: Shaoyun.liu <shaoyun.liu@amd.com> Signed-off-by: Yong Zhao <yong.zhao@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c27
1 files changed, 27 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 2ba7cea7b99b..00e1f1a9728b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -89,6 +89,28 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
if (q->format == KFD_QUEUE_FORMAT_AQL)
m->cp_hqd_iq_rptr = 1;
+ if (q->tba_addr) {
+ m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8);
+ m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8);
+ m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8);
+ m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8);
+ m->compute_pgm_rsrc2 |=
+ (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
+ }
+
+ if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
+ m->cp_hqd_persistent_state |=
+ (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+ m->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_base_addr_hi =
+ upper_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+ m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+ m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+ m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+ }
+
*mqd = m;
if (gart_addr)
*gart_addr = addr;
@@ -167,6 +189,11 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
}
+ if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
+ m->cp_hqd_ctx_save_control =
+ atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
+ mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
+
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0);