aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorMukul Joshi <mukul.joshi@amd.com>2022-05-31 16:25:16 -0400
committerAlex Deucher <alexander.deucher@amd.com>2023-06-09 09:44:13 -0400
commit643e40d4c06f8c887af1789c7bf8d279e9c8e4cf (patch)
treed6694027e0b64e1e38e3e4e409bd647e0a9d9893
parentdrm/amdkfd: add gpu compute cores io links for gfx9.4.3 (diff)
downloadwireguard-linux-643e40d4c06f8c887af1789c7bf8d279e9c8e4cf.tar.xz
wireguard-linux-643e40d4c06f8c887af1789c7bf8d279e9c8e4cf.zip
drm/amdkfd: Fix SDMA in CPX mode
When creating a user-mode SDMA queue, CP FW expects driver to use/set virtual SDMA engine id in MAP_QUEUES packet instead of using the physical SDMA engine id. Each partition node's virtual SDMA number should start from 0. However, when allocating doorbell for the queue, KFD needs to allocate the doorbell from doorbell space corresponding to the physical SDMA engine id, otherwise the hwardware will not see the doorbell press. Signed-off-by: Mukul Joshi <mukul.joshi@amd.com> Reviewed-by: Amber Lin <Amber.Lin@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c19
1 files changed, 15 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 69419a53a14e..2b5c4b2dd242 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -363,7 +363,16 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
*/
uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx;
- uint32_t valid_id = idx_offset[q->properties.sdma_engine_id]
+
+ /*
+ * q->properties.sdma_engine_id corresponds to the virtual
+ * sdma engine number. However, for doorbell allocation,
+ * we need the physical sdma engine id in order to get the
+ * correct doorbell offset.
+ */
+ uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id *
+ get_num_all_sdma_engines(qpd->dqm) +
+ q->properties.sdma_engine_id]
+ (q->properties.sdma_queue_id & 1)
* KFD_QUEUE_DOORBELL_MIRROR_OFFSET
+ (q->properties.sdma_queue_id >> 1);
@@ -1388,7 +1397,6 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
}
q->properties.sdma_engine_id =
- dqm->dev->node_id * get_num_all_sdma_engines(dqm) +
q->sdma_id % kfd_get_num_sdma_engines(dqm->dev);
q->properties.sdma_queue_id = q->sdma_id /
kfd_get_num_sdma_engines(dqm->dev);
@@ -1418,7 +1426,6 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
* PCIe-optimized ones
*/
q->properties.sdma_engine_id =
- dqm->dev->node_id * get_num_all_sdma_engines(dqm) +
kfd_get_num_sdma_engines(dqm->dev) +
q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
q->properties.sdma_queue_id = q->sdma_id /
@@ -2486,6 +2493,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
int pipe, queue;
int r = 0, xcc;
uint32_t inst;
+ uint32_t sdma_engine_start;
if (!dqm->sched_running) {
seq_puts(m, " Device is stopped\n");
@@ -2530,7 +2538,10 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
}
}
- for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
+ sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
+ for (pipe = sdma_engine_start;
+ pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm));
+ pipe++) {
for (queue = 0;
queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
queue++) {