5 files changed, 86 insertions, 53 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index ef7bc41ffffa..5b7ff3ccfa8e 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -123,6 +123,10 @@ static int __context_pin_state(struct i915_vma *vma)
 	if (err)
 		return err;
 
+	err = i915_active_acquire(&vma->active);
+	if (err)
+		goto err_unpin;
+
 	/*
 	 * And mark it as a globally pinned object to let the shrinker know
 	 * it cannot reclaim the object until we release it.
@@ -131,14 +135,44 @@ static int __context_pin_state(struct i915_vma *vma)
 	vma->obj->mm.dirty = true;
 
 	return 0;
+
+err_unpin:
+	i915_vma_unpin(vma);
+	return err;
 }
 
 static void __context_unpin_state(struct i915_vma *vma)
 {
 	i915_vma_make_shrinkable(vma);
+	i915_active_release(&vma->active);
 	__i915_vma_unpin(vma);
 }
 
+static int __ring_active(struct intel_ring *ring)
+{
+	int err;
+
+	err = i915_active_acquire(&ring->vma->active);
+	if (err)
+		return err;
+
+	err = intel_ring_pin(ring);
+	if (err)
+		goto err_active;
+
+	return 0;
+
+err_active:
+	i915_active_release(&ring->vma->active);
+	return err;
+}
+
+static void __ring_retire(struct intel_ring *ring)
+{
+	intel_ring_unpin(ring);
+	i915_active_release(&ring->vma->active);
+}
+
 __i915_active_call
 static void __intel_context_retire(struct i915_active *active)
 {
@@ -151,7 +185,7 @@ static void __intel_context_retire(struct i915_active *active)
 		__context_unpin_state(ce->state);
 
 	intel_timeline_unpin(ce->timeline);
-	intel_ring_unpin(ce->ring);
+	__ring_retire(ce->ring);
 
 	intel_context_put(ce);
 }
@@ -163,7 +197,7 @@ static int __intel_context_active(struct i915_active *active)
 
 	intel_context_get(ce);
 
-	err = intel_ring_pin(ce->ring);
+	err = __ring_active(ce->ring);
 	if (err)
 		goto err_put;
 
@@ -183,7 +217,7 @@ static int __intel_context_active(struct i915_active *active)
 err_timeline:
 	intel_timeline_unpin(ce->timeline);
 err_ring:
-	intel_ring_unpin(ce->ring);
+	__ring_retire(ce->ring);
 err_put:
 	intel_context_put(ce);
 	return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 17f1f1441efc..2b446474e010 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -274,8 +274,8 @@ struct intel_engine_cs {
 	u8 class;
 	u8 instance;
 
-	u8 uabi_class;
-	u8 uabi_instance;
+	u16 uabi_class;
+	u16 uabi_instance;
 
 	u32 uabi_capabilities;
 	u32 context_size;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index a459a42ad5c2..7e64b7d7d330 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -94,8 +94,9 @@ static int __gt_park(struct intel_wakeref *wf)
 		intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
 	}
 
+	/* Defer dropping the display power well for 100ms, it's slow! */
 	GEM_BUG_ON(!wakeref);
-	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
+	intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref);
 
 	i915_globals_park();
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9fdefbdc3546..d925a1035c9d 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -845,12 +845,6 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
 	}
 }
 
-static void unwind_wa_tail(struct i915_request *rq)
-{
-	rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
-	assert_ring_tail_valid(rq->ring, rq->tail);
-}
-
 static struct i915_request *
 __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
@@ -863,12 +857,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	list_for_each_entry_safe_reverse(rq, rn,
 					 &engine->active.requests,
 					 sched.link) {
-
 		if (i915_request_completed(rq))
 			continue; /* XXX */
 
 		__i915_request_unsubmit(rq);
-		unwind_wa_tail(rq);
 
 		/*
 		 * Push the request back into the queue for later resubmission.
@@ -1161,13 +1153,29 @@ execlists_schedule_out(struct i915_request *rq)
 	i915_request_put(rq);
 }
 
-static u64 execlists_update_context(const struct i915_request *rq)
+static u64 execlists_update_context(struct i915_request *rq)
 {
 	struct intel_context *ce = rq->hw_context;
-	u64 desc;
+	u64 desc = ce->lrc_desc;
+	u32 tail;
 
-	ce->lrc_reg_state[CTX_RING_TAIL] =
-		intel_ring_set_tail(rq->ring, rq->tail);
+	/*
+	 * WaIdleLiteRestore:bdw,skl
+	 *
+	 * We should never submit the context with the same RING_TAIL twice
+	 * just in case we submit an empty ring, which confuses the HW.
+	 *
+	 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
+	 * the normal request to be able to always advance the RING_TAIL on
+	 * subsequent resubmissions (for lite restore). Should that fail us,
+	 * and we try and submit the same tail again, force the context
+	 * reload.
+	 */
+	tail = intel_ring_set_tail(rq->ring, rq->tail);
+	if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
+		desc |= CTX_DESC_FORCE_RESTORE;
+	ce->lrc_reg_state[CTX_RING_TAIL] = tail;
+	rq->tail = rq->wa_tail;
 
 	/*
 	 * Make sure the context image is complete before we submit it to HW.
@@ -1186,13 +1194,11 @@ static u64 execlists_update_context(const struct i915_request *rq)
 	 */
 	mb();
 
-	desc = ce->lrc_desc;
-	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
-
 	/* Wa_1607138340:tgl */
 	if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0))
 		desc |= CTX_DESC_FORCE_RESTORE;
 
+	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
 	return desc;
 }
 
@@ -1703,16 +1709,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 				return;
 			}
-
-			/*
-			 * WaIdleLiteRestore:bdw,skl
-			 * Apply the wa NOOPs to prevent
-			 * ring:HEAD == rq:TAIL as we resubmit the
-			 * request. See gen8_emit_fini_breadcrumb() for
-			 * where we prepare the padding after the
-			 * end of the request.
-			 */
-			last->tail = last->wa_tail;
 		}
 	}
 
@@ -2668,6 +2664,14 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
 
+	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
+	batch = gen8_emit_pipe_control(batch,
+				       PIPE_CONTROL_FLUSH_L3 |
+				       PIPE_CONTROL_STORE_DATA_INDEX |
+				       PIPE_CONTROL_CS_STALL |
+				       PIPE_CONTROL_QW_WRITE,
+				       LRC_PPHWSP_SCRATCH_ADDR);
+
 	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
 
 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
@@ -4120,17 +4124,18 @@ static void virtual_context_destroy(struct kref *kref)
 	for (n = 0; n < ve->num_siblings; n++) {
 		struct intel_engine_cs *sibling = ve->siblings[n];
 		struct rb_node *node = &ve->nodes[sibling->id].rb;
+		unsigned long flags;
 
 		if (RB_EMPTY_NODE(node))
 			continue;
 
-		spin_lock_irq(&sibling->active.lock);
+		spin_lock_irqsave(&sibling->active.lock, flags);
 
 		/* Detachment is lazily performed in the execlists tasklet */
 		if (!RB_EMPTY_NODE(node))
 			rb_erase_cached(node, &sibling->execlists.virtual);
 
-		spin_unlock_irq(&sibling->active.lock);
+		spin_unlock_irqrestore(&sibling->active.lock, flags);
 	}
 	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
 
@@ -4419,9 +4424,11 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
 	ve->base.gt = siblings[0]->gt;
 	ve->base.uncore = siblings[0]->uncore;
 	ve->base.id = -1;
+
 	ve->base.class = OTHER_CLASS;
 	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
 	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
+	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
 
 	/*
 	 * The decision on whether to submit a request using semaphores
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index a47d5a7c32c9..93026217c121 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1413,14 +1413,6 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 	int len;
 	u32 *cs;
 
-	flags |= MI_MM_SPACE_GTT;
-	if (IS_HASWELL(i915))
-		/* These flags are for resource streamer on HSW+ */
-		flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
-	else
-		/* We need to save the extended state for powersaving modes */
-		flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
-
 	len = 4;
 	if (IS_GEN(i915, 7))
 		len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
@@ -1589,22 +1581,21 @@ static int switch_context(struct i915_request *rq)
 	}
 
 	if (ce->state) {
-		u32 hw_flags;
+		u32 flags;
 
 		GEM_BUG_ON(rq->engine->id != RCS0);
 
-		/*
-		 * The kernel context(s) is treated as pure scratch and is not
-		 * expected to retain any state (as we sacrifice it during
-		 * suspend and on resume it may be corrupted). This is ok,
-		 * as nothing actually executes using the kernel context; it
-		 * is purely used for flushing user contexts.
-		 */
-		hw_flags = 0;
-		if (i915_gem_context_is_kernel(rq->gem_context))
-			hw_flags = MI_RESTORE_INHIBIT;
+		/* For resource streamer on HSW+ and power context elsewhere */
+		BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
+		BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN);
+
+		flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT;
+		if (!i915_gem_context_is_kernel(rq->gem_context))
+			flags |= MI_RESTORE_EXT_STATE_EN;
+		else
+			flags |= MI_RESTORE_INHIBIT;
 
-		ret = mi_set_context(rq, hw_flags);
+		ret = mi_set_context(rq, flags);
 		if (ret)
 			return ret;
 	}