diff options
author | Mukul Joshi <mukul.joshi@amd.com> | 2022-04-26 13:00:11 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2022-05-04 10:43:54 -0400 |
commit | cc009e613de6560eb499f8bc92c80a737752cb30 (patch) | |
tree | fbc536b7d33081b3cb60081587c8d32d82e4c647 /drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | |
parent | drm/amdkfd: add helper to generate cache info from gfx config (diff) | |
download | linux-dev-cc009e613de6560eb499f8bc92c80a737752cb30.tar.xz linux-dev-cc009e613de6560eb499f8bc92c80a737752cb30.zip |
drm/amdkfd: Add KFD support for soc21 v3
Add initial support for soc21 in KFD compute
driver (Mukul)
- Add new definition for soc21 device.
- Add new file for amdgpu-kfd interface for GFX11 family.
- Add new file for queue management, interrupt handling,
mqd management for GFX11 family in KFD driver.
- Related changes/updates for soc21 device in
KFD driver.
- Repurpose last 2 entries of SDMA MQD for driver use.
v2: Add an optional argument into update queue operation (Mukul)
v3: Switch to ip version check, replace kgd_dev with
amdgpu_device (Hawking)
Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Oak Zeng <Oak.Zeng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 299 |
1 files changed, 259 insertions, 40 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 198672264492..e9c9a3a67ab0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -35,6 +35,7 @@ #include "cik_regs.h" #include "kfd_kernel_queue.h" #include "amdgpu_amdkfd.h" +#include "mes_api_def.h" /* Size of the per-pipe EOP queue */ #define CIK_HPD_EOP_BYTES_LOG2 11 @@ -118,6 +119,11 @@ unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) dqm->dev->device_info.num_sdma_queues_per_engine; } +static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info.reserved_sdma_queues_bitmap; +} + void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -129,6 +135,151 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, qpd->sh_mem_bases); } +static void kfd_hws_hang(struct device_queue_manager *dqm) +{ + /* + * Issue a GPU reset if HWS is unresponsive + */ + dqm->is_hws_hang = true; + + /* It's possible we're detecting a HWS hang in the + * middle of a GPU reset. No need to schedule another + * reset in this case. + */ + if (!dqm->is_resetting) + schedule_work(&dqm->hw_exception_work); +} + +static int convert_to_mes_queue_type(int queue_type) +{ + int mes_queue_type; + + switch (queue_type) { + case KFD_QUEUE_TYPE_COMPUTE: + mes_queue_type = MES_QUEUE_TYPE_COMPUTE; + break; + case KFD_QUEUE_TYPE_SDMA: + mes_queue_type = MES_QUEUE_TYPE_SDMA; + break; + default: + WARN(1, "Invalid queue type %d", queue_type); + mes_queue_type = -EINVAL; + break; + } + + return mes_queue_type; +} + +static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; + struct kfd_process_device *pdd = qpd_to_pdd(qpd); + struct mes_add_queue_input queue_input; + int r; + + if (dqm->is_hws_hang) + return -EIO; + + memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); + queue_input.process_id = qpd->pqm->process->pasid; + queue_input.page_table_base_addr = qpd->page_table_base; + queue_input.process_va_start = 0; + queue_input.process_va_end = adev->vm_manager.max_pfn - 1; + /* MES unit for quantum is 100ns */ + queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ + queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; + queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ + queue_input.gang_context_addr = q->gang_ctx_gpu_addr; + queue_input.inprocess_gang_priority = q->properties.priority; + queue_input.gang_global_priority_level = + AMDGPU_MES_PRIORITY_LEVEL_NORMAL; + queue_input.doorbell_offset = q->properties.doorbell_off; + queue_input.mqd_addr = q->gart_mqd_addr; + queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; + queue_input.paging = false; + queue_input.tba_addr = qpd->tba_addr; + queue_input.tma_addr = qpd->tma_addr; + + queue_input.queue_type = convert_to_mes_queue_type(q->properties.type); + if (queue_input.queue_type < 0) { + pr_err("Queue type not supported with MES, queue:%d\n", + q->properties.type); + return -EINVAL; + } + + if (q->gws) { + queue_input.gws_base = 0; + queue_input.gws_size = qpd->num_gws; + } + + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + if (r) { + pr_err("failed to add hardware queue to MES, doorbell=0x%x\n", + q->properties.doorbell_off); + pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); + kfd_hws_hang(dqm); +} + + return r; +} + +static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; + int r; + struct mes_remove_queue_input queue_input; + + if (dqm->is_hws_hang) + return -EIO; + + memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); + queue_input.doorbell_offset = q->properties.doorbell_off; + queue_input.gang_context_addr = q->gang_ctx_gpu_addr; + + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + + if (r) { + pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n", + q->properties.doorbell_off); + pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); + kfd_hws_hang(dqm); + } + + return r; +} + +static int remove_all_queues_mes(struct device_queue_manager *dqm) +{ + struct device_process_node *cur; + struct qcm_process_device *qpd; + struct queue *q; + int retval = 0; + + list_for_each_entry(cur, &dqm->queues, list) { + qpd = cur->qpd; + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->properties.is_active) { + retval = remove_queue_mes(dqm, q, qpd); + if (retval) { + pr_err("%s: Failed to remove queue %d for dev %d", + __func__, + q->properties.queue_id, + dqm->dev->id); + return retval; + } + } + } + } + + return retval; +} + static void increment_queue_count(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) @@ -659,6 +810,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; bool prev_active = false; + bool add_queue = false; dqm_lock(dqm); pdd = kfd_get_process_device_data(q->device, q->process); @@ -674,8 +826,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, /* Make sure the queue is unmapped before updating the MQD */ if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { - retval = unmap_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); + if (!dqm->dev->shared_resources.enable_mes) + retval = unmap_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); + else if (prev_active) + retval = remove_queue_mes(dqm, q, &pdd->qpd); + if (retval) { pr_err("unmap queue failed\n"); goto out_unlock; @@ -727,9 +883,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, q->properties.is_gws = false; } - if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) - retval = map_queues_cpsch(dqm); - else if (q->properties.is_active && + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { + if (!dqm->dev->shared_resources.enable_mes) + retval = map_queues_cpsch(dqm); + else if (add_queue) + retval = add_queue_mes(dqm, q, &pdd->qpd); + } else if (q->properties.is_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || q->properties.type == KFD_QUEUE_TYPE_SDMA || q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { @@ -822,12 +981,22 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, q->properties.is_active = false; decrement_queue_count(dqm, qpd, q); + + if (dqm->dev->shared_resources.enable_mes) { + retval = remove_queue_mes(dqm, q, qpd); + if (retval) { + pr_err("Failed to evict queue %d\n", + q->properties.queue_id); + goto out; + } + } } pdd->last_evict_timestamp = get_jiffies_64(); - retval = execute_queues_cpsch(dqm, - qpd->is_debug ? - KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (!dqm->dev->shared_resources.enable_mes) + retval = execute_queues_cpsch(dqm, + qpd->is_debug ? + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); out: dqm_unlock(dqm); @@ -951,9 +1120,19 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, q->properties.is_active = true; increment_queue_count(dqm, &pdd->qpd, q); + + if (dqm->dev->shared_resources.enable_mes) { + retval = add_queue_mes(dqm, q, qpd); + if (retval) { + pr_err("Failed to restore queue %d\n", + q->properties.queue_id); + goto out; + } + } } - retval = execute_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (!dqm->dev->shared_resources.enable_mes) + retval = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); qpd->evicted = 0; eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; atomic64_add(eviction_duration, &pdd->evict_duration_counter); @@ -1081,6 +1260,9 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); + dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm)); + pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap); + dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); return 0; @@ -1277,6 +1459,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm) else dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1); + dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm)); + pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap); + num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm); if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap)) dqm->xgmi_sdma_bitmap = ULLONG_MAX; @@ -1295,14 +1480,16 @@ static int start_cpsch(struct device_queue_manager *dqm) retval = 0; dqm_lock(dqm); - retval = pm_init(&dqm->packet_mgr, dqm); - if (retval) - goto fail_packet_manager_init; - retval = set_sched_resources(dqm); - if (retval) - goto fail_set_sched_resources; + if (!dqm->dev->shared_resources.enable_mes) { + retval = pm_init(&dqm->packet_mgr, dqm); + if (retval) + goto fail_packet_manager_init; + retval = set_sched_resources(dqm); + if (retval) + goto fail_set_sched_resources; + } pr_debug("Allocating fence memory\n"); /* allocate fence memory on the gart */ @@ -1321,13 +1508,15 @@ static int start_cpsch(struct device_queue_manager *dqm) dqm->is_hws_hang = false; dqm->is_resetting = false; dqm->sched_running = true; - execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (!dqm->dev->shared_resources.enable_mes) + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); dqm_unlock(dqm); return 0; fail_allocate_vidmem: fail_set_sched_resources: - pm_uninit(&dqm->packet_mgr, false); + if (!dqm->dev->shared_resources.enable_mes) + pm_uninit(&dqm->packet_mgr, false); fail_packet_manager_init: dqm_unlock(dqm); return retval; @@ -1343,15 +1532,22 @@ static int stop_cpsch(struct device_queue_manager *dqm) return 0; } - if (!dqm->is_hws_hang) - unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); + if (!dqm->is_hws_hang) { + if (!dqm->dev->shared_resources.enable_mes) + unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); + else + remove_all_queues_mes(dqm); + } + hanging = dqm->is_hws_hang || dqm->is_resetting; dqm->sched_running = false; - pm_release_ib(&dqm->packet_mgr); + if (!dqm->dev->shared_resources.enable_mes) + pm_release_ib(&dqm->packet_mgr); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); - pm_uninit(&dqm->packet_mgr, hanging); + if (!dqm->dev->shared_resources.enable_mes) + pm_uninit(&dqm->packet_mgr, hanging); dqm_unlock(dqm); return 0; @@ -1469,8 +1665,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.is_active) { increment_queue_count(dqm, qpd, q); - execute_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (!dqm->dev->shared_resources.enable_mes) { + retval = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + } else { + retval = add_queue_mes(dqm, q, qpd); + if (retval) + goto cleanup_queue; + } } /* @@ -1485,6 +1687,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, dqm_unlock(dqm); return retval; +cleanup_queue: + qpd->queue_count--; + list_del(&q->list); + if (q->properties.is_active) + decrement_queue_count(dqm, qpd, q); + mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + dqm_unlock(dqm); out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: @@ -1572,13 +1781,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, queue_preemption_timeout_ms); if (retval) { pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); - dqm->is_hws_hang = true; - /* It's possible we're detecting a HWS hang in the - * middle of a GPU reset. No need to schedule another - * reset in this case. - */ - if (!dqm->is_resetting) - schedule_work(&dqm->hw_exception_work); + kfd_hws_hang(dqm); return retval; } @@ -1683,11 +1886,15 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, list_del(&q->list); qpd->queue_count--; if (q->properties.is_active) { - decrement_queue_count(dqm, qpd, q); - retval = execute_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); - if (retval == -ETIME) - qpd->reset_wavefronts = true; + if (!dqm->dev->shared_resources.enable_mes) { + decrement_queue_count(dqm, qpd, q); + retval = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (retval == -ETIME) + qpd->reset_wavefronts = true; + } else { + retval = remove_queue_mes(dqm, q, qpd); + } } /* @@ -1941,9 +2148,17 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) deallocate_sdma_queue(dqm, q); - if (q->properties.is_active) + if (q->properties.is_active) { decrement_queue_count(dqm, qpd, q); + if (dqm->dev->shared_resources.enable_mes) { + retval = remove_queue_mes(dqm, q, qpd); + if (retval) + pr_err("Failed to remove queue %d\n", + q->properties.queue_id); + } + } + dqm->total_queue_count--; } @@ -1958,7 +2173,9 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } } - retval = execute_queues_cpsch(dqm, filter, 0); + if (!dqm->dev->shared_resources.enable_mes) + retval = execute_queues_cpsch(dqm, filter, 0); + if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); @@ -2133,7 +2350,9 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) break; default: - if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) + if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) + device_queue_manager_init_v11(&dqm->asic_ops); + else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) device_queue_manager_init_v10_navi10(&dqm->asic_ops); else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) device_queue_manager_init_v9(&dqm->asic_ops); |