aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_process.c
diff options
context:
space:
mode:
authorAlex Sierra <alex.sierra@amd.com>2020-05-28 18:03:15 -0500
committerAlex Deucher <alexander.deucher@amd.com>2021-04-20 21:47:41 -0400
commit063e33c5469cf5f46407701f1121a978a3af2ce8 (patch)
tree2d3e9ecb56885b3beb81bbb8092337e13e9f8744 /drivers/gpu/drm/amd/amdkfd/kfd_process.c
parentdrm/amdgpu: Enable retry faults unconditionally on Aldebaran (diff)
downloadlinux-dev-063e33c5469cf5f46407701f1121a978a3af2ce8.tar.xz
linux-dev-063e33c5469cf5f46407701f1121a978a3af2ce8.zip
drm/amdkfd: add xnack enabled flag to kfd_process
XNACK mode controls the SQ RETRY_DISABLE setting that determines, whether recoverable page faults can be supported on GFXv9 hardware. Only on Aldebaran we can support different processes running with different XNACK modes. On older chips all processes must use the same RETRY_DISABLE setting. However, processes not relying on recoverable page faults can work with RETRY enabled. This means XNACK off is always available as a fallback so we can use the same mode on all GPUs in a process. Signed-off-by: Alex Sierra <alex.sierra@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c53
1 files changed, 53 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 3c72e9dc6422..b8db509e2bbd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1193,6 +1193,56 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
}
}
+bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
+{
+ int i;
+
+ /* On most GFXv9 GPUs, the retry mode in the SQ must match the
+ * boot time retry setting. Mixing processes with different
+ * XNACK/retry settings can hang the GPU.
+ *
+ * Different GPUs can have different noretry settings depending
+ * on HW bugs or limitations. We need to find at least one
+ * XNACK mode for this process that's compatible with all GPUs.
+ * Fortunately GPUs with retry enabled (noretry=0) can run code
+ * built for XNACK-off. On GFXv9 it may perform slower.
+ *
+ * Therefore applications built for XNACK-off can always be
+ * supported and will be our fallback if any GPU does not
+ * support retry.
+ */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_dev *dev = p->pdds[i]->dev;
+
+ /* Only consider GFXv9 and higher GPUs. Older GPUs don't
+ * support the SVM APIs and don't need to be considered
+ * for the XNACK mode selection.
+ */
+ if (dev->device_info->asic_family < CHIP_VEGA10)
+ continue;
+ /* Aldebaran can always support XNACK because it can support
+ * per-process XNACK mode selection. But let the dev->noretry
+ * setting still influence the default XNACK mode.
+ */
+ if (supported &&
+ dev->device_info->asic_family == CHIP_ALDEBARAN)
+ continue;
+
+ /* GFXv10 and later GPUs do not support shader preemption
+ * during page faults. This can lead to poor QoS for queue
+ * management and memory-manager-related preemptions or
+ * even deadlocks.
+ */
+ if (dev->device_info->asic_family >= CHIP_NAVI10)
+ return false;
+
+ if (dev->noretry)
+ return false;
+ }
+
+ return true;
+}
+
/*
* On return the kfd_process is fully operational and will be freed when the
* mm is released
@@ -1232,6 +1282,9 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (err != 0)
goto err_init_apertures;
+ /* Check XNACK support after PDDs are created in kfd_init_apertures */
+ process->xnack_enabled = kfd_process_xnack_mode(process, false);
+
err = svm_range_list_init(process);
if (err)
goto err_init_svm_range_list;