drm/amdgpu: rework how isolation is enforced v2

Limiting the number of available VMIDs to enforce isolation causes some issues with gang submit and applying certain HW workarounds which require multiple VMIDs to work correctly. So instead start to track all submissions to the relevant engines in a per partition data structure and use the dma_fences of the submissions to enforce isolation similar to what a VMID limit does. v2: use ~0l for jobs without isolation to distinct it from kernel submissions which uses NULL for the owner. Add some warning when we are OOM. Signed-off-by: Christian König <christian.koenig@amd.com> Acked-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: Christian König <christian.koenig@amd.com> 2025-01-15 13:44:26 +0100
committer: Alex Deucher <alexander.deucher@amd.com> 2025-03-21 12:16:34 -0400
commit: bd22e44ad415ac22e3a4f9a983d2a085f6cb4427 (patch)
tree: fb16198f66751ca2ad807bf57f4b31f010585121 /drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
parent: drm/amdgpu: overwrite signaled fence in amdgpu_sync (diff)
download: wireguard-linux-bd22e44ad415ac22e3a4f9a983d2a085f6cb4427.tar.xz
wireguard-linux-bd22e44ad415ac22e3a4f9a983d2a085f6cb4427.zip
1 files changed, 15 insertions, 28 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 56d27cea052e..92ab821afc06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -287,40 +287,27 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 	    (*id)->flushed_updates < updates ||
 	    !(*id)->last_flush ||
 	    ((*id)->last_flush->context != fence_context &&
-	     !dma_fence_is_signaled((*id)->last_flush))) {
+	     !dma_fence_is_signaled((*id)->last_flush)))
+		needs_flush = true;
+
+	if ((*id)->owner != vm->immediate.fence_context ||
+	    (!adev->vm_manager.concurrent_flush && needs_flush)) {
 		struct dma_fence *tmp;
 
-		/* Wait for the gang to be assembled before using a
-		 * reserved VMID or otherwise the gang could deadlock.
+		/* Don't use per engine and per process VMID at the
+		 * same time
 		 */
-		tmp = amdgpu_device_get_gang(adev);
-		if (!dma_fence_is_signaled(tmp) && tmp != job->gang_submit) {
+		if (adev->vm_manager.concurrent_flush)
+			ring = NULL;
+
+		/* to prevent one context starved by another context */
+		(*id)->pd_gpu_addr = 0;
+		tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
+		if (tmp) {
 			*id = NULL;
-			*fence = tmp;
+			*fence = dma_fence_get(tmp);
 			return 0;
 		}
-		dma_fence_put(tmp);
-
-		/* Make sure the id is owned by the gang before proceeding */
-		if (!job->gang_submit ||
-		    (*id)->owner != vm->immediate.fence_context) {
-
-			/* Don't use per engine and per process VMID at the
-			 * same time
-			 */
-			if (adev->vm_manager.concurrent_flush)
-				ring = NULL;
-
-			/* to prevent one context starved by another context */
-			(*id)->pd_gpu_addr = 0;
-			tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
-			if (tmp) {
-				*id = NULL;
-				*fence = dma_fence_get(tmp);
-				return 0;
-			}
-		}
-		needs_flush = true;
 	}
 
 	/* Good we can use this VMID. Remember this submission as
author	Christian König <christian.koenig@amd.com>	2025-01-15 13:44:26 +0100
committer	Alex Deucher <alexander.deucher@amd.com>	2025-03-21 12:16:34 -0400
commit	bd22e44ad415ac22e3a4f9a983d2a085f6cb4427 (patch)
tree	fb16198f66751ca2ad807bf57f4b31f010585121 /drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
parent	drm/amdgpu: overwrite signaled fence in amdgpu_sync (diff)
download	wireguard-linux-bd22e44ad415ac22e3a4f9a983d2a085f6cb4427.tar.xz wireguard-linux-bd22e44ad415ac22e3a4f9a983d2a085f6cb4427.zip