1 files changed, 124 insertions, 127 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 3a5b247be738..d42f437149c9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -23,13 +23,15 @@
 
 #include "pxp/intel_pxp.h"
 
+#include "i915_cmd_parser.h"
 #include "i915_drv.h"
+#include "i915_file_private.h"
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
+#include "i915_gem_evict.h"
 #include "i915_gem_ioctls.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-#include "i915_vma_snapshot.h"
 
 struct eb_vma {
 	struct i915_vma *vma;
@@ -441,7 +443,7 @@ eb_pin_vma(struct i915_execbuffer *eb,
 	else
 		pin_flags = entry->offset & PIN_OFFSET_MASK;
 
-	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
+	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED | PIN_VALIDATE;
 	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
 		pin_flags |= PIN_GLOBAL;
 
@@ -459,17 +461,15 @@ eb_pin_vma(struct i915_execbuffer *eb,
 					     entry->pad_to_size,
 					     entry->alignment,
 					     eb_pin_flags(entry, ev->flags) |
-					     PIN_USER | PIN_NOEVICT);
+					     PIN_USER | PIN_NOEVICT | PIN_VALIDATE);
 		if (unlikely(err))
 			return err;
 	}
 
 	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
 		err = i915_vma_pin_fence(vma);
-		if (unlikely(err)) {
-			i915_vma_unpin(vma);
+		if (unlikely(err))
 			return err;
-		}
 
 		if (vma->fence)
 			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -485,13 +485,9 @@ eb_pin_vma(struct i915_execbuffer *eb,
 static inline void
 eb_unreserve_vma(struct eb_vma *ev)
 {
-	if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
-		return;
-
 	if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
 		__i915_vma_unpin_fence(ev->vma);
 
-	__i915_vma_unpin(ev->vma);
 	ev->flags &= ~__EXEC_OBJECT_RESERVED;
 }
 
@@ -673,10 +669,8 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
 
 	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
 		err = i915_vma_pin_fence(vma);
-		if (unlikely(err)) {
-			i915_vma_unpin(vma);
+		if (unlikely(err))
 			return err;
-		}
 
 		if (vma->fence)
 			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -688,85 +682,95 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
 	return 0;
 }
 
-static int eb_reserve(struct i915_execbuffer *eb)
+static bool eb_unbind(struct i915_execbuffer *eb, bool force)
 {
 	const unsigned int count = eb->buffer_count;
-	unsigned int pin_flags = PIN_USER | PIN_NONBLOCK;
+	unsigned int i;
 	struct list_head last;
+	bool unpinned = false;
+
+	/* Resort *all* the objects into priority order */
+	INIT_LIST_HEAD(&eb->unbound);
+	INIT_LIST_HEAD(&last);
+
+	for (i = 0; i < count; i++) {
+		struct eb_vma *ev = &eb->vma[i];
+		unsigned int flags = ev->flags;
+
+		if (!force && flags & EXEC_OBJECT_PINNED &&
+		    flags & __EXEC_OBJECT_HAS_PIN)
+			continue;
+
+		unpinned = true;
+		eb_unreserve_vma(ev);
+
+		if (flags & EXEC_OBJECT_PINNED)
+			/* Pinned must have their slot */
+			list_add(&ev->bind_link, &eb->unbound);
+		else if (flags & __EXEC_OBJECT_NEEDS_MAP)
+			/* Map require the lowest 256MiB (aperture) */
+			list_add_tail(&ev->bind_link, &eb->unbound);
+		else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+			/* Prioritise 4GiB region for restricted bo */
+			list_add(&ev->bind_link, &last);
+		else
+			list_add_tail(&ev->bind_link, &last);
+	}
+
+	list_splice_tail(&last, &eb->unbound);
+	return unpinned;
+}
+
+static int eb_reserve(struct i915_execbuffer *eb)
+{
 	struct eb_vma *ev;
-	unsigned int i, pass;
+	unsigned int pass;
 	int err = 0;
+	bool unpinned;
 
 	/*
 	 * Attempt to pin all of the buffers into the GTT.
-	 * This is done in 3 phases:
+	 * This is done in 2 phases:
 	 *
-	 * 1a. Unbind all objects that do not match the GTT constraints for
-	 *     the execbuffer (fenceable, mappable, alignment etc).
-	 * 1b. Increment pin count for already bound objects.
-	 * 2.  Bind new objects.
-	 * 3.  Decrement pin count.
+	 * 1. Unbind all objects that do not match the GTT constraints for
+	 *    the execbuffer (fenceable, mappable, alignment etc).
+	 * 2. Bind new objects.
 	 *
 	 * This avoid unnecessary unbinding of later objects in order to make
 	 * room for the earlier objects *unless* we need to defragment.
+	 *
+	 * Defragmenting is skipped if all objects are pinned at a fixed location.
 	 */
-	pass = 0;
-	do {
-		list_for_each_entry(ev, &eb->unbound, bind_link) {
-			err = eb_reserve_vma(eb, ev, pin_flags);
-			if (err)
-				break;
-		}
-		if (err != -ENOSPC)
-			return err;
+	for (pass = 0; pass <= 2; pass++) {
+		int pin_flags = PIN_USER | PIN_VALIDATE;
 
-		/* Resort *all* the objects into priority order */
-		INIT_LIST_HEAD(&eb->unbound);
-		INIT_LIST_HEAD(&last);
-		for (i = 0; i < count; i++) {
-			unsigned int flags;
+		if (pass == 0)
+			pin_flags |= PIN_NONBLOCK;
 
-			ev = &eb->vma[i];
-			flags = ev->flags;
-			if (flags & EXEC_OBJECT_PINNED &&
-			    flags & __EXEC_OBJECT_HAS_PIN)
-				continue;
-
-			eb_unreserve_vma(ev);
+		if (pass >= 1)
+			unpinned = eb_unbind(eb, pass == 2);
 
-			if (flags & EXEC_OBJECT_PINNED)
-				/* Pinned must have their slot */
-				list_add(&ev->bind_link, &eb->unbound);
-			else if (flags & __EXEC_OBJECT_NEEDS_MAP)
-				/* Map require the lowest 256MiB (aperture) */
-				list_add_tail(&ev->bind_link, &eb->unbound);
-			else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
-				/* Prioritise 4GiB region for restricted bo */
-				list_add(&ev->bind_link, &last);
-			else
-				list_add_tail(&ev->bind_link, &last);
-		}
-		list_splice_tail(&last, &eb->unbound);
-
-		switch (pass++) {
-		case 0:
-			break;
-
-		case 1:
-			/* Too fragmented, unbind everything and retry */
-			mutex_lock(&eb->context->vm->mutex);
-			err = i915_gem_evict_vm(eb->context->vm);
-			mutex_unlock(&eb->context->vm->mutex);
+		if (pass == 2) {
+			err = mutex_lock_interruptible(&eb->context->vm->mutex);
+			if (!err) {
+				err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
+				mutex_unlock(&eb->context->vm->mutex);
+			}
 			if (err)
 				return err;
-			break;
+		}
 
-		default:
-			return -ENOSPC;
+		list_for_each_entry(ev, &eb->unbound, bind_link) {
+			err = eb_reserve_vma(eb, ev, pin_flags);
+			if (err)
+				break;
 		}
 
-		pin_flags = PIN_USER;
-	} while (1);
+		if (err != -ENOSPC)
+			break;
+	}
+
+	return err;
 }
 
 static int eb_select_context(struct i915_execbuffer *eb)
@@ -1095,7 +1099,7 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
 {
 	struct drm_i915_private *i915 =
 		container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
-	return &i915->ggtt;
+	return to_gt(i915)->ggtt;
 }
 
 static void reloc_cache_unmap(struct reloc_cache *cache)
@@ -1214,10 +1218,11 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 	return vaddr;
 }
 
-static void *reloc_iomap(struct drm_i915_gem_object *obj,
+static void *reloc_iomap(struct i915_vma *batch,
 			 struct i915_execbuffer *eb,
 			 unsigned long page)
 {
+	struct drm_i915_gem_object *obj = batch->obj;
 	struct reloc_cache *cache = &eb->reloc_cache;
 	struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 	unsigned long offset;
@@ -1227,7 +1232,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
 		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
 	} else {
-		struct i915_vma *vma;
+		struct i915_vma *vma = ERR_PTR(-ENODEV);
 		int err;
 
 		if (i915_gem_object_is_tiled(obj))
@@ -1240,10 +1245,23 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 		if (err)
 			return ERR_PTR(err);
 
-		vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
-						  PIN_MAPPABLE |
-						  PIN_NONBLOCK /* NOWARN */ |
-						  PIN_NOEVICT);
+		/*
+		 * i915_gem_object_ggtt_pin_ww may attempt to remove the batch
+		 * VMA from the object list because we no longer pin.
+		 *
+		 * Only attempt to pin the batch buffer to ggtt if the current batch
+		 * is not inside ggtt, or the batch buffer is not misplaced.
+		 */
+		if (!i915_is_ggtt(batch->vm)) {
+			vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
+							  PIN_MAPPABLE |
+							  PIN_NONBLOCK /* NOWARN */ |
+							  PIN_NOEVICT);
+		} else if (i915_vma_is_map_and_fenceable(batch)) {
+			__i915_vma_pin(batch);
+			vma = batch;
+		}
+
 		if (vma == ERR_PTR(-EDEADLK))
 			return vma;
 
@@ -1281,7 +1299,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 	return vaddr;
 }
 
-static void *reloc_vaddr(struct drm_i915_gem_object *obj,
+static void *reloc_vaddr(struct i915_vma *vma,
 			 struct i915_execbuffer *eb,
 			 unsigned long page)
 {
@@ -1293,9 +1311,9 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj,
 	} else {
 		vaddr = NULL;
 		if ((cache->vaddr & KMAP) == 0)
-			vaddr = reloc_iomap(obj, eb, page);
+			vaddr = reloc_iomap(vma, eb, page);
 		if (!vaddr)
-			vaddr = reloc_kmap(obj, cache, page);
+			vaddr = reloc_kmap(vma->obj, cache, page);
 	}
 
 	return vaddr;
@@ -1336,7 +1354,7 @@ relocate_entry(struct i915_vma *vma,
 	void *vaddr;
 
 repeat:
-	vaddr = reloc_vaddr(vma->obj, eb,
+	vaddr = reloc_vaddr(vma, eb,
 			    offset >> PAGE_SHIFT);
 	if (IS_ERR(vaddr))
 		return PTR_ERR(vaddr);
@@ -1411,7 +1429,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 			mutex_lock(&vma->vm->mutex);
 			err = i915_vma_bind(target->vma,
 					    target->vma->obj->cache_level,
-					    PIN_GLOBAL, NULL);
+					    PIN_GLOBAL, NULL, NULL);
 			mutex_unlock(&vma->vm->mutex);
 			reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
 			if (err)
@@ -1941,7 +1959,6 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
 {
 	const unsigned int count = eb->buffer_count;
 	unsigned int i = count, j;
-	struct i915_vma_snapshot *vsnap;
 
 	while (i--) {
 		struct eb_vma *ev = &eb->vma[i];
@@ -1951,11 +1968,6 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
 		if (!(flags & EXEC_OBJECT_CAPTURE))
 			continue;
 
-		vsnap = i915_vma_snapshot_alloc(GFP_KERNEL);
-		if (!vsnap)
-			continue;
-
-		i915_vma_snapshot_init(vsnap, vma, "user");
 		for_each_batch_create_order(eb, j) {
 			struct i915_capture_list *capture;
 
@@ -1964,10 +1976,9 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
 				continue;
 
 			capture->next = eb->capture_lists[j];
-			capture->vma_snapshot = i915_vma_snapshot_get(vsnap);
+			capture->vma_res = i915_vma_resource_get(vma->resource);
 			eb->capture_lists[j] = capture;
 		}
-		i915_vma_snapshot_put(vsnap);
 	}
 }
 
@@ -2198,7 +2209,7 @@ shadow_batch_pin(struct i915_execbuffer *eb,
 	if (IS_ERR(vma))
 		return vma;
 
-	err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
+	err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags | PIN_VALIDATE);
 	if (err)
 		return ERR_PTR(err);
 
@@ -2212,7 +2223,7 @@ static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i9
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but bdw mucks it up again. */
 	if (eb->batch_flags & I915_DISPATCH_SECURE)
-		return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0);
+		return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, PIN_VALIDATE);
 
 	return NULL;
 }
@@ -2263,13 +2274,12 @@ static int eb_parse(struct i915_execbuffer *eb)
 
 	err = i915_gem_object_lock(pool->obj, &eb->ww);
 	if (err)
-		goto err;
+		return err;
 
 	shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
-	if (IS_ERR(shadow)) {
-		err = PTR_ERR(shadow);
-		goto err;
-	}
+	if (IS_ERR(shadow))
+		return PTR_ERR(shadow);
+
 	intel_gt_buffer_pool_mark_used(pool);
 	i915_gem_object_set_readonly(shadow->obj);
 	shadow->private = pool;
@@ -2281,25 +2291,21 @@ static int eb_parse(struct i915_execbuffer *eb)
 		shadow = shadow_batch_pin(eb, pool->obj,
 					  &eb->gt->ggtt->vm,
 					  PIN_GLOBAL);
-		if (IS_ERR(shadow)) {
-			err = PTR_ERR(shadow);
-			shadow = trampoline;
-			goto err_shadow;
-		}
+		if (IS_ERR(shadow))
+			return PTR_ERR(shadow);
+
 		shadow->private = pool;
 
 		eb->batch_flags |= I915_DISPATCH_SECURE;
 	}
 
 	batch = eb_dispatch_secure(eb, shadow);
-	if (IS_ERR(batch)) {
-		err = PTR_ERR(batch);
-		goto err_trampoline;
-	}
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
 
 	err = dma_resv_reserve_shared(shadow->obj->base.resv, 1);
 	if (err)
-		goto err_trampoline;
+		return err;
 
 	err = intel_engine_cmd_parser(eb->context->engine,
 				      eb->batches[0]->vma,
@@ -2307,7 +2313,7 @@ static int eb_parse(struct i915_execbuffer *eb)
 				      eb->batch_len[0],
 				      shadow, trampoline);
 	if (err)
-		goto err_unpin_batch;
+		return err;
 
 	eb->batches[0] = &eb->vma[eb->buffer_count++];
 	eb->batches[0]->vma = i915_vma_get(shadow);
@@ -2326,17 +2332,6 @@ secure_batch:
 		eb->batches[0]->vma = i915_vma_get(batch);
 	}
 	return 0;
-
-err_unpin_batch:
-	if (batch)
-		i915_vma_unpin(batch);
-err_trampoline:
-	if (trampoline)
-		i915_vma_unpin(trampoline);
-err_shadow:
-	i915_vma_unpin(shadow);
-err:
-	return err;
 }
 
 static int eb_request_submit(struct i915_execbuffer *eb,
@@ -2505,9 +2500,14 @@ static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce,
 				      timeout) < 0) {
 			i915_request_put(rq);
 
-			tl = intel_context_timeline_lock(ce);
+			/*
+			 * Error path, cannot use intel_context_timeline_lock as
+			 * that is user interruptable and this clean up step
+			 * must be done.
+			 */
+			mutex_lock(&ce->timeline->mutex);
 			intel_context_exit(ce);
-			intel_context_timeline_unlock(tl);
+			mutex_unlock(&ce->timeline->mutex);
 
 			if (nonblock)
 				return -EWOULDBLOCK;
@@ -3270,9 +3270,8 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
 		 * _onstack interface.
 		 */
 		if (eb->batches[i]->vma)
-			i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot,
-						       eb->batches[i]->vma,
-						       "batch");
+			eb->requests[i]->batch_res =
+				i915_vma_resource_get(eb->batches[i]->vma->resource);
 		if (eb->batch_pool) {
 			GEM_BUG_ON(intel_context_is_parallel(eb->context));
 			intel_gt_buffer_pool_mark_active(eb->batch_pool,
@@ -3457,8 +3456,6 @@ err_request:
 
 err_vma:
 	eb_release_vmas(&eb, true);
-	if (eb.trampoline)
-		i915_vma_unpin(eb.trampoline);
 	WARN_ON(err == -EDEADLK);
 	i915_gem_ww_ctx_fini(&eb.ww);