aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
diff options
context:
space:
mode:
authorMukul Joshi <mukul.joshi@amd.com>2022-04-26 13:00:11 -0400
committerAlex Deucher <alexander.deucher@amd.com>2022-05-04 10:43:54 -0400
commitcc009e613de6560eb499f8bc92c80a737752cb30 (patch)
treefbc536b7d33081b3cb60081587c8d32d82e4c647 /drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
parentdrm/amdkfd: add helper to generate cache info from gfx config (diff)
downloadlinux-dev-cc009e613de6560eb499f8bc92c80a737752cb30.tar.xz
linux-dev-cc009e613de6560eb499f8bc92c80a737752cb30.zip
drm/amdkfd: Add KFD support for soc21 v3
Add initial support for soc21 in KFD compute driver (Mukul) - Add new definition for soc21 device. - Add new file for amdgpu-kfd interface for GFX11 family. - Add new file for queue management, interrupt handling, mqd management for GFX11 family in KFD driver. - Related changes/updates for soc21 device in KFD driver. - Repurpose last 2 entries of SDMA MQD for driver use. v2: Add an optional argument into update queue operation (Mukul) v3: Switch to ip version check, replace kgd_dev with amdgpu_device (Hawking) Signed-off-by: Mukul Joshi <mukul.joshi@amd.com> Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Oak Zeng <Oak.Zeng@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c299
1 files changed, 259 insertions, 40 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 198672264492..e9c9a3a67ab0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -35,6 +35,7 @@
#include "cik_regs.h"
#include "kfd_kernel_queue.h"
#include "amdgpu_amdkfd.h"
+#include "mes_api_def.h"
/* Size of the per-pipe EOP queue */
#define CIK_HPD_EOP_BYTES_LOG2 11
@@ -118,6 +119,11 @@ unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
dqm->dev->device_info.num_sdma_queues_per_engine;
}
+static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm)
+{
+ return dqm->dev->device_info.reserved_sdma_queues_bitmap;
+}
+
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@@ -129,6 +135,151 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
qpd->sh_mem_bases);
}
+static void kfd_hws_hang(struct device_queue_manager *dqm)
+{
+ /*
+ * Issue a GPU reset if HWS is unresponsive
+ */
+ dqm->is_hws_hang = true;
+
+ /* It's possible we're detecting a HWS hang in the
+ * middle of a GPU reset. No need to schedule another
+ * reset in this case.
+ */
+ if (!dqm->is_resetting)
+ schedule_work(&dqm->hw_exception_work);
+}
+
+static int convert_to_mes_queue_type(int queue_type)
+{
+ int mes_queue_type;
+
+ switch (queue_type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ mes_queue_type = MES_QUEUE_TYPE_COMPUTE;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ mes_queue_type = MES_QUEUE_TYPE_SDMA;
+ break;
+ default:
+ WARN(1, "Invalid queue type %d", queue_type);
+ mes_queue_type = -EINVAL;
+ break;
+ }
+
+ return mes_queue_type;
+}
+
+static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+ struct mes_add_queue_input queue_input;
+ int r;
+
+ if (dqm->is_hws_hang)
+ return -EIO;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
+ queue_input.process_id = qpd->pqm->process->pasid;
+ queue_input.page_table_base_addr = qpd->page_table_base;
+ queue_input.process_va_start = 0;
+ queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
+ /* MES unit for quantum is 100ns */
+ queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */
+ queue_input.process_context_addr = pdd->proc_ctx_gpu_addr;
+ queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */
+ queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
+ queue_input.inprocess_gang_priority = q->properties.priority;
+ queue_input.gang_global_priority_level =
+ AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ queue_input.doorbell_offset = q->properties.doorbell_off;
+ queue_input.mqd_addr = q->gart_mqd_addr;
+ queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
+ queue_input.paging = false;
+ queue_input.tba_addr = qpd->tba_addr;
+ queue_input.tma_addr = qpd->tma_addr;
+
+ queue_input.queue_type = convert_to_mes_queue_type(q->properties.type);
+ if (queue_input.queue_type < 0) {
+ pr_err("Queue type not supported with MES, queue:%d\n",
+ q->properties.type);
+ return -EINVAL;
+ }
+
+ if (q->gws) {
+ queue_input.gws_base = 0;
+ queue_input.gws_size = qpd->num_gws;
+ }
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r) {
+ pr_err("failed to add hardware queue to MES, doorbell=0x%x\n",
+ q->properties.doorbell_off);
+ pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
+ kfd_hws_hang(dqm);
+}
+
+ return r;
+}
+
+static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+ int r;
+ struct mes_remove_queue_input queue_input;
+
+ if (dqm->is_hws_hang)
+ return -EIO;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
+ queue_input.doorbell_offset = q->properties.doorbell_off;
+ queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+
+ if (r) {
+ pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n",
+ q->properties.doorbell_off);
+ pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
+ kfd_hws_hang(dqm);
+ }
+
+ return r;
+}
+
+static int remove_all_queues_mes(struct device_queue_manager *dqm)
+{
+ struct device_process_node *cur;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+ int retval = 0;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.is_active) {
+ retval = remove_queue_mes(dqm, q, qpd);
+ if (retval) {
+ pr_err("%s: Failed to remove queue %d for dev %d",
+ __func__,
+ q->properties.queue_id,
+ dqm->dev->id);
+ return retval;
+ }
+ }
+ }
+ }
+
+ return retval;
+}
+
static void increment_queue_count(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
@@ -659,6 +810,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
struct mqd_manager *mqd_mgr;
struct kfd_process_device *pdd;
bool prev_active = false;
+ bool add_queue = false;
dqm_lock(dqm);
pdd = kfd_get_process_device_data(q->device, q->process);
@@ -674,8 +826,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
/* Make sure the queue is unmapped before updating the MQD */
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
- retval = unmap_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
+ if (!dqm->dev->shared_resources.enable_mes)
+ retval = unmap_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
+ else if (prev_active)
+ retval = remove_queue_mes(dqm, q, &pdd->qpd);
+
if (retval) {
pr_err("unmap queue failed\n");
goto out_unlock;
@@ -727,9 +883,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
q->properties.is_gws = false;
}
- if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
- retval = map_queues_cpsch(dqm);
- else if (q->properties.is_active &&
+ if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
+ if (!dqm->dev->shared_resources.enable_mes)
+ retval = map_queues_cpsch(dqm);
+ else if (add_queue)
+ retval = add_queue_mes(dqm, q, &pdd->qpd);
+ } else if (q->properties.is_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
@@ -822,12 +981,22 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
q->properties.is_active = false;
decrement_queue_count(dqm, qpd, q);
+
+ if (dqm->dev->shared_resources.enable_mes) {
+ retval = remove_queue_mes(dqm, q, qpd);
+ if (retval) {
+ pr_err("Failed to evict queue %d\n",
+ q->properties.queue_id);
+ goto out;
+ }
+ }
}
pdd->last_evict_timestamp = get_jiffies_64();
- retval = execute_queues_cpsch(dqm,
- qpd->is_debug ?
- KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (!dqm->dev->shared_resources.enable_mes)
+ retval = execute_queues_cpsch(dqm,
+ qpd->is_debug ?
+ KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
out:
dqm_unlock(dqm);
@@ -951,9 +1120,19 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
q->properties.is_active = true;
increment_queue_count(dqm, &pdd->qpd, q);
+
+ if (dqm->dev->shared_resources.enable_mes) {
+ retval = add_queue_mes(dqm, q, qpd);
+ if (retval) {
+ pr_err("Failed to restore queue %d\n",
+ q->properties.queue_id);
+ goto out;
+ }
+ }
}
- retval = execute_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (!dqm->dev->shared_resources.enable_mes)
+ retval = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
qpd->evicted = 0;
eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
atomic64_add(eviction_duration, &pdd->evict_duration_counter);
@@ -1081,6 +1260,9 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
+ dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
+ pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
+
dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
return 0;
@@ -1277,6 +1459,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
else
dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);
+ dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
+ pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
+
num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap))
dqm->xgmi_sdma_bitmap = ULLONG_MAX;
@@ -1295,14 +1480,16 @@ static int start_cpsch(struct device_queue_manager *dqm)
retval = 0;
dqm_lock(dqm);
- retval = pm_init(&dqm->packet_mgr, dqm);
- if (retval)
- goto fail_packet_manager_init;
- retval = set_sched_resources(dqm);
- if (retval)
- goto fail_set_sched_resources;
+ if (!dqm->dev->shared_resources.enable_mes) {
+ retval = pm_init(&dqm->packet_mgr, dqm);
+ if (retval)
+ goto fail_packet_manager_init;
+ retval = set_sched_resources(dqm);
+ if (retval)
+ goto fail_set_sched_resources;
+ }
pr_debug("Allocating fence memory\n");
/* allocate fence memory on the gart */
@@ -1321,13 +1508,15 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm->is_hws_hang = false;
dqm->is_resetting = false;
dqm->sched_running = true;
- execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (!dqm->dev->shared_resources.enable_mes)
+ execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
dqm_unlock(dqm);
return 0;
fail_allocate_vidmem:
fail_set_sched_resources:
- pm_uninit(&dqm->packet_mgr, false);
+ if (!dqm->dev->shared_resources.enable_mes)
+ pm_uninit(&dqm->packet_mgr, false);
fail_packet_manager_init:
dqm_unlock(dqm);
return retval;
@@ -1343,15 +1532,22 @@ static int stop_cpsch(struct device_queue_manager *dqm)
return 0;
}
- if (!dqm->is_hws_hang)
- unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
+ if (!dqm->is_hws_hang) {
+ if (!dqm->dev->shared_resources.enable_mes)
+ unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
+ else
+ remove_all_queues_mes(dqm);
+ }
+
hanging = dqm->is_hws_hang || dqm->is_resetting;
dqm->sched_running = false;
- pm_release_ib(&dqm->packet_mgr);
+ if (!dqm->dev->shared_resources.enable_mes)
+ pm_release_ib(&dqm->packet_mgr);
kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
- pm_uninit(&dqm->packet_mgr, hanging);
+ if (!dqm->dev->shared_resources.enable_mes)
+ pm_uninit(&dqm->packet_mgr, hanging);
dqm_unlock(dqm);
return 0;
@@ -1469,8 +1665,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (q->properties.is_active) {
increment_queue_count(dqm, qpd, q);
- execute_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (!dqm->dev->shared_resources.enable_mes) {
+ retval = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ } else {
+ retval = add_queue_mes(dqm, q, qpd);
+ if (retval)
+ goto cleanup_queue;
+ }
}
/*
@@ -1485,6 +1687,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
dqm_unlock(dqm);
return retval;
+cleanup_queue:
+ qpd->queue_count--;
+ list_del(&q->list);
+ if (q->properties.is_active)
+ decrement_queue_count(dqm, qpd, q);
+ mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+ dqm_unlock(dqm);
out_deallocate_doorbell:
deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
@@ -1572,13 +1781,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
queue_preemption_timeout_ms);
if (retval) {
pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
- dqm->is_hws_hang = true;
- /* It's possible we're detecting a HWS hang in the
- * middle of a GPU reset. No need to schedule another
- * reset in this case.
- */
- if (!dqm->is_resetting)
- schedule_work(&dqm->hw_exception_work);
+ kfd_hws_hang(dqm);
return retval;
}
@@ -1683,11 +1886,15 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
list_del(&q->list);
qpd->queue_count--;
if (q->properties.is_active) {
- decrement_queue_count(dqm, qpd, q);
- retval = execute_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
- if (retval == -ETIME)
- qpd->reset_wavefronts = true;
+ if (!dqm->dev->shared_resources.enable_mes) {
+ decrement_queue_count(dqm, qpd, q);
+ retval = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (retval == -ETIME)
+ qpd->reset_wavefronts = true;
+ } else {
+ retval = remove_queue_mes(dqm, q, qpd);
+ }
}
/*
@@ -1941,9 +2148,17 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
deallocate_sdma_queue(dqm, q);
- if (q->properties.is_active)
+ if (q->properties.is_active) {
decrement_queue_count(dqm, qpd, q);
+ if (dqm->dev->shared_resources.enable_mes) {
+ retval = remove_queue_mes(dqm, q, qpd);
+ if (retval)
+ pr_err("Failed to remove queue %d\n",
+ q->properties.queue_id);
+ }
+ }
+
dqm->total_queue_count--;
}
@@ -1958,7 +2173,9 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
}
}
- retval = execute_queues_cpsch(dqm, filter, 0);
+ if (!dqm->dev->shared_resources.enable_mes)
+ retval = execute_queues_cpsch(dqm, filter, 0);
+
if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
@@ -2133,7 +2350,9 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
break;
default:
- if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
+ if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
+ device_queue_manager_init_v11(&dqm->asic_ops);
+ else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
device_queue_manager_init_v10_navi10(&dqm->asic_ops);
else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
device_queue_manager_init_v9(&dqm->asic_ops);