64 files changed, 5009 insertions, 1032 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
new file mode 100644
index 000000000000..de595b66a746
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gen7_renderclear.h"
+#include "i915_drv.h"
+#include "intel_gpu_commands.h"
+
+#define MAX_URB_ENTRIES 64
+#define STATE_SIZE (4 * 1024)
+#define GT3_INLINE_DATA_DELAYS 0x1E00
+#define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS))
+
+struct cb_kernel {
+	const void *data;
+	u32 size;
+};
+
+#define CB_KERNEL(name) { .data = (name), .size = sizeof(name) }
+
+#include "ivb_clear_kernel.c"
+static const struct cb_kernel cb_kernel_ivb = CB_KERNEL(ivb_clear_kernel);
+
+#include "hsw_clear_kernel.c"
+static const struct cb_kernel cb_kernel_hsw = CB_KERNEL(hsw_clear_kernel);
+
+struct batch_chunk {
+	struct i915_vma *vma;
+	u32 offset;
+	u32 *start;
+	u32 *end;
+	u32 max_items;
+};
+
+struct batch_vals {
+	u32 max_primitives;
+	u32 max_urb_entries;
+	u32 cmd_size;
+	u32 state_size;
+	u32 state_start;
+	u32 batch_size;
+	u32 surface_height;
+	u32 surface_width;
+	u32 scratch_size;
+	u32 max_size;
+};
+
+static void
+batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv)
+{
+	if (IS_HASWELL(i915)) {
+		bv->max_primitives = 280;
+		bv->max_urb_entries = MAX_URB_ENTRIES;
+		bv->surface_height = 16 * 16;
+		bv->surface_width = 32 * 2 * 16;
+	} else {
+		bv->max_primitives = 128;
+		bv->max_urb_entries = MAX_URB_ENTRIES / 2;
+		bv->surface_height = 16 * 8;
+		bv->surface_width = 32 * 16;
+	}
+	bv->cmd_size = bv->max_primitives * 4096;
+	bv->state_size = STATE_SIZE;
+	bv->state_start = bv->cmd_size;
+	bv->batch_size = bv->cmd_size + bv->state_size;
+	bv->scratch_size = bv->surface_height * bv->surface_width;
+	bv->max_size = bv->batch_size + bv->scratch_size;
+}
+
+static void batch_init(struct batch_chunk *bc,
+		       struct i915_vma *vma,
+		       u32 *start, u32 offset, u32 max_bytes)
+{
+	bc->vma = vma;
+	bc->offset = offset;
+	bc->start = start + bc->offset / sizeof(*bc->start);
+	bc->end = bc->start;
+	bc->max_items = max_bytes / sizeof(*bc->start);
+}
+
+static u32 batch_offset(const struct batch_chunk *bc, u32 *cs)
+{
+	return (cs - bc->start) * sizeof(*bc->start) + bc->offset;
+}
+
+static u32 batch_addr(const struct batch_chunk *bc)
+{
+	return bc->vma->node.start;
+}
+
+static void batch_add(struct batch_chunk *bc, const u32 d)
+{
+	GEM_BUG_ON((bc->end - bc->start) >= bc->max_items);
+	*bc->end++ = d;
+}
+
+static u32 *batch_alloc_items(struct batch_chunk *bc, u32 align, u32 items)
+{
+	u32 *map;
+
+	if (align) {
+		u32 *end = PTR_ALIGN(bc->end, align);
+
+		memset32(bc->end, 0, end - bc->end);
+		bc->end = end;
+	}
+
+	map = bc->end;
+	bc->end += items;
+
+	return map;
+}
+
+static u32 *batch_alloc_bytes(struct batch_chunk *bc, u32 align, u32 bytes)
+{
+	GEM_BUG_ON(!IS_ALIGNED(bytes, sizeof(*bc->start)));
+	return batch_alloc_items(bc, align, bytes / sizeof(*bc->start));
+}
+
+static u32
+gen7_fill_surface_state(struct batch_chunk *state,
+			const u32 dst_offset,
+			const struct batch_vals *bv)
+{
+	u32 surface_h = bv->surface_height;
+	u32 surface_w = bv->surface_width;
+	u32 *cs = batch_alloc_items(state, 32, 8);
+	u32 offset = batch_offset(state, cs);
+
+#define SURFACE_2D 1
+#define SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define RENDER_CACHE_READ_WRITE 1
+
+	*cs++ = SURFACE_2D << 29 |
+		(SURFACEFORMAT_B8G8R8A8_UNORM << 18) |
+		(RENDER_CACHE_READ_WRITE << 8);
+
+	*cs++ = batch_addr(state) + dst_offset;
+
+	*cs++ = ((surface_h / 4 - 1) << 16) | (surface_w / 4 - 1);
+	*cs++ = surface_w;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+#define SHADER_CHANNELS(r, g, b, a) \
+	(((r) << 25) | ((g) << 22) | ((b) << 19) | ((a) << 16))
+	*cs++ = SHADER_CHANNELS(4, 5, 6, 7);
+	batch_advance(state, cs);
+
+	return offset;
+}
+
+static u32
+gen7_fill_binding_table(struct batch_chunk *state,
+			const struct batch_vals *bv)
+{
+	u32 surface_start = gen7_fill_surface_state(state, bv->batch_size, bv);
+	u32 *cs = batch_alloc_items(state, 32, 8);
+	u32 offset = batch_offset(state, cs);
+
+	*cs++ = surface_start - state->offset;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	batch_advance(state, cs);
+
+	return offset;
+}
+
+static u32
+gen7_fill_kernel_data(struct batch_chunk *state,
+		      const u32 *data,
+		      const u32 size)
+{
+	return batch_offset(state,
+			    memcpy(batch_alloc_bytes(state, 64, size),
+				   data, size));
+}
+
+static u32
+gen7_fill_interface_descriptor(struct batch_chunk *state,
+			       const struct batch_vals *bv,
+			       const struct cb_kernel *kernel,
+			       unsigned int count)
+{
+	u32 kernel_offset =
+		gen7_fill_kernel_data(state, kernel->data, kernel->size);
+	u32 binding_table = gen7_fill_binding_table(state, bv);
+	u32 *cs = batch_alloc_items(state, 32, 8 * count);
+	u32 offset = batch_offset(state, cs);
+
+	*cs++ = kernel_offset;
+	*cs++ = (1 << 7) | (1 << 13);
+	*cs++ = 0;
+	*cs++ = (binding_table - state->offset) | 1;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	/* 1 - 63dummy idds */
+	memset32(cs, 0x00, (count - 1) * 8);
+	batch_advance(state, cs + (count - 1) * 8);
+
+	return offset;
+}
+
+static void
+gen7_emit_state_base_address(struct batch_chunk *batch,
+			     u32 surface_state_base)
+{
+	u32 *cs = batch_alloc_items(batch, 0, 12);
+
+	*cs++ = STATE_BASE_ADDRESS | (12 - 2);
+	/* general */
+	*cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
+	/* surface */
+	*cs++ = batch_addr(batch) | surface_state_base | BASE_ADDRESS_MODIFY;
+	/* dynamic */
+	*cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
+	/* indirect */
+	*cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
+	/* instruction */
+	*cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
+
+	/* general/dynamic/indirect/instruction access Bound */
+	*cs++ = 0;
+	*cs++ = BASE_ADDRESS_MODIFY;
+	*cs++ = 0;
+	*cs++ = BASE_ADDRESS_MODIFY;
+	*cs++ = 0;
+	*cs++ = 0;
+	batch_advance(batch, cs);
+}
+
+static void
+gen7_emit_vfe_state(struct batch_chunk *batch,
+		    const struct batch_vals *bv,
+		    u32 urb_size, u32 curbe_size,
+		    u32 mode)
+{
+	u32 urb_entries = bv->max_urb_entries;
+	u32 threads = bv->max_primitives - 1;
+	u32 *cs = batch_alloc_items(batch, 32, 8);
+
+	*cs++ = MEDIA_VFE_STATE | (8 - 2);
+
+	/* scratch buffer */
+	*cs++ = 0;
+
+	/* number of threads & urb entries for GPGPU vs Media Mode */
+	*cs++ = threads << 16 | urb_entries << 8 | mode << 2;
+
+	*cs++ = 0;
+
+	/* urb entry size & curbe size in 256 bits unit */
+	*cs++ = urb_size << 16 | curbe_size;
+
+	/* scoreboard */
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	batch_advance(batch, cs);
+}
+
+static void
+gen7_emit_interface_descriptor_load(struct batch_chunk *batch,
+				    const u32 interface_descriptor,
+				    unsigned int count)
+{
+	u32 *cs = batch_alloc_items(batch, 8, 4);
+
+	*cs++ = MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2);
+	*cs++ = 0;
+	*cs++ = count * 8 * sizeof(*cs);
+
+	/*
+	 * interface descriptor address - it is relative to the dynamics base
+	 * address
+	 */
+	*cs++ = interface_descriptor;
+	batch_advance(batch, cs);
+}
+
+static void
+gen7_emit_media_object(struct batch_chunk *batch,
+		       unsigned int media_object_index)
+{
+	unsigned int x_offset = (media_object_index % 16) * 64;
+	unsigned int y_offset = (media_object_index / 16) * 16;
+	unsigned int inline_data_size;
+	unsigned int media_batch_size;
+	unsigned int i;
+	u32 *cs;
+
+	inline_data_size = 112 * 8;
+	media_batch_size = inline_data_size + 6;
+
+	cs = batch_alloc_items(batch, 8, media_batch_size);
+
+	*cs++ = MEDIA_OBJECT | (media_batch_size - 2);
+
+	/* interface descriptor offset */
+	*cs++ = 0;
+
+	/* without indirect data */
+	*cs++ = 0;
+	*cs++ = 0;
+
+	/* scoreboard */
+	*cs++ = 0;
+	*cs++ = 0;
+
+	/* inline */
+	*cs++ = (y_offset << 16) | (x_offset);
+	*cs++ = 0;
+	*cs++ = GT3_INLINE_DATA_DELAYS;
+	for (i = 3; i < inline_data_size; i++)
+		*cs++ = 0;
+
+	batch_advance(batch, cs);
+}
+
+static void gen7_emit_pipeline_flush(struct batch_chunk *batch)
+{
+	u32 *cs = batch_alloc_items(batch, 0, 5);
+
+	*cs++ = GFX_OP_PIPE_CONTROL(5);
+	*cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+		PIPE_CONTROL_GLOBAL_GTT_IVB;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	batch_advance(batch, cs);
+}
+
+static void emit_batch(struct i915_vma * const vma,
+		       u32 *start,
+		       const struct batch_vals *bv)
+{
+	struct drm_i915_private *i915 = vma->vm->i915;
+	unsigned int desc_count = 64;
+	const u32 urb_size = 112;
+	struct batch_chunk cmds, state;
+	u32 interface_descriptor;
+	unsigned int i;
+
+	batch_init(&cmds, vma, start, 0, bv->cmd_size);
+	batch_init(&state, vma, start, bv->state_start, bv->state_size);
+
+	interface_descriptor =
+		gen7_fill_interface_descriptor(&state, bv,
+					       IS_HASWELL(i915) ?
+					       &cb_kernel_hsw :
+					       &cb_kernel_ivb,
+					       desc_count);
+	gen7_emit_pipeline_flush(&cmds);
+	batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
+	batch_add(&cmds, MI_NOOP);
+	gen7_emit_state_base_address(&cmds, interface_descriptor);
+	gen7_emit_pipeline_flush(&cmds);
+
+	gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0);
+
+	gen7_emit_interface_descriptor_load(&cmds,
+					    interface_descriptor,
+					    desc_count);
+
+	for (i = 0; i < bv->max_primitives; i++)
+		gen7_emit_media_object(&cmds, i);
+
+	batch_add(&cmds, MI_BATCH_BUFFER_END);
+}
+
+int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine,
+			    struct i915_vma * const vma)
+{
+	struct batch_vals bv;
+	u32 *batch;
+
+	batch_get_defaults(engine->i915, &bv);
+	if (!vma)
+		return bv.max_size;
+
+	GEM_BUG_ON(vma->obj->base.size < bv.max_size);
+
+	batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC);
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
+
+	emit_batch(vma, memset(batch, 0, bv.max_size), &bv);
+
+	i915_gem_object_flush_map(vma->obj);
+	i915_gem_object_unpin_map(vma->obj);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.h b/drivers/gpu/drm/i915/gt/gen7_renderclear.h
new file mode 100644
index 000000000000..bb100748e2c6
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __GEN7_RENDERCLEAR_H__
+#define __GEN7_RENDERCLEAR_H__
+
+struct intel_engine_cs;
+struct i915_vma;
+
+int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine,
+			    struct i915_vma * const vma);
+
+#endif /* __GEN7_RENDERCLEAR_H__ */
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4d1de2d97d5c..94e746af8926 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -8,6 +8,7 @@
 #include "gen8_ppgtt.h"
 #include "i915_scatterlist.h"
 #include "i915_trace.h"
+#include "i915_pvinfo.h"
 #include "i915_vgpu.h"
 #include "intel_gt.h"
 #include "intel_gtt.h"
@@ -25,6 +26,30 @@ static u64 gen8_pde_encode(const dma_addr_t addr,
 	return pde;
 }
 
+static u64 gen8_pte_encode(dma_addr_t addr,
+			   enum i915_cache_level level,
+			   u32 flags)
+{
+	gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
+
+	if (unlikely(flags & PTE_READ_ONLY))
+		pte &= ~_PAGE_RW;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		pte |= PPAT_UNCACHED;
+		break;
+	case I915_CACHE_WT:
+		pte |= PPAT_DISPLAY_ELLC;
+		break;
+	default:
+		pte |= PPAT_CACHED;
+		break;
+	}
+
+	return pte;
+}
+
 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
 	struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -706,6 +731,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
 	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
 	ppgtt->vm.clear_range = gen8_ppgtt_clear;
 
+	ppgtt->vm.pte_encode = gen8_pte_encode;
+
 	if (intel_vgpu_active(gt->i915))
 		gen8_ppgtt_notify_vgt(ppgtt, true);
 
diff --git a/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c b/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c
new file mode 100644
index 000000000000..b47f9d4a0848
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Generated by: IGT Gpu Tools on Fri 21 Feb 2020 05:30:13 AM UTC
+ */
+
+static const u32 hsw_clear_kernel[] = {
+	0x00000001, 0x26020128, 0x00000024, 0x00000000,
+	0x00000040, 0x20280c21, 0x00000028, 0x00000001,
+	0x01000010, 0x20000c20, 0x0000002c, 0x00000000,
+	0x00010220, 0x34001c00, 0x00001400, 0x00000160,
+	0x00600001, 0x20600061, 0x00000000, 0x00000000,
+	0x00000008, 0x20601c85, 0x00000e00, 0x0000000c,
+	0x00000005, 0x20601ca5, 0x00000060, 0x00000001,
+	0x00000008, 0x20641c85, 0x00000e00, 0x0000000d,
+	0x00000005, 0x20641ca5, 0x00000064, 0x00000003,
+	0x00000041, 0x207424a5, 0x00000064, 0x00000034,
+	0x00000040, 0x206014a5, 0x00000060, 0x00000074,
+	0x00000008, 0x20681c85, 0x00000e00, 0x00000008,
+	0x00000005, 0x20681ca5, 0x00000068, 0x0000000f,
+	0x00000041, 0x20701ca5, 0x00000060, 0x00000010,
+	0x00000040, 0x206814a5, 0x00000068, 0x00000070,
+	0x00600001, 0x20a00061, 0x00000000, 0x00000000,
+	0x00000005, 0x206c1c85, 0x00000e00, 0x00000007,
+	0x00000041, 0x206c1ca5, 0x0000006c, 0x00000004,
+	0x00600001, 0x20800021, 0x008d0000, 0x00000000,
+	0x00000001, 0x20800021, 0x0000006c, 0x00000000,
+	0x00000001, 0x20840021, 0x00000068, 0x00000000,
+	0x00000001, 0x20880061, 0x00000000, 0x00000003,
+	0x00000005, 0x208c0d21, 0x00000086, 0xffffffff,
+	0x05600032, 0x20a00fa1, 0x008d0080, 0x02190001,
+	0x00000040, 0x20a01ca5, 0x000000a0, 0x00000001,
+	0x05600032, 0x20a00fa1, 0x008d0080, 0x040a8001,
+	0x02000040, 0x20281c21, 0x00000028, 0xffffffff,
+	0x00010220, 0x34001c00, 0x00001400, 0xffffffe0,
+	0x00000001, 0x26020128, 0x00000024, 0x00000000,
+	0x00000001, 0x220010e4, 0x00000000, 0x00000000,
+	0x00000001, 0x220831ec, 0x00000000, 0x007f007f,
+	0x00600001, 0x20400021, 0x008d0000, 0x00000000,
+	0x00600001, 0x2fe00021, 0x008d0000, 0x00000000,
+	0x00200001, 0x20400121, 0x00450020, 0x00000000,
+	0x00000001, 0x20480061, 0x00000000, 0x000f000f,
+	0x00000005, 0x204c0d21, 0x00000046, 0xffffffef,
+	0x00800001, 0x20600061, 0x00000000, 0x00000000,
+	0x00800001, 0x20800061, 0x00000000, 0x00000000,
+	0x00800001, 0x20a00061, 0x00000000, 0x00000000,
+	0x00800001, 0x20c00061, 0x00000000, 0x00000000,
+	0x00800001, 0x20e00061, 0x00000000, 0x00000000,
+	0x00800001, 0x21000061, 0x00000000, 0x00000000,
+	0x00800001, 0x21200061, 0x00000000, 0x00000000,
+	0x00800001, 0x21400061, 0x00000000, 0x00000000,
+	0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000,
+	0x00000040, 0x20402d21, 0x00000020, 0x00100010,
+	0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000,
+	0x02000040, 0x22083d8c, 0x00000208, 0xffffffff,
+	0x00800001, 0xa0000109, 0x00000602, 0x00000000,
+	0x00000040, 0x22001c84, 0x00000200, 0x00000020,
+	0x00010220, 0x34001c00, 0x00001400, 0xffffffc0,
+	0x07600032, 0x20000fa0, 0x008d0fe0, 0x82000010,
+};
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 57e8a051ddc2..aea992e46c42 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -51,6 +51,11 @@ int intel_context_alloc_state(struct intel_context *ce)
 		return -EINTR;
 
 	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+		if (intel_context_is_banned(ce)) {
+			err = -EIO;
+			goto unlock;
+		}
+
 		err = ce->ops->alloc(ce);
 		if (unlikely(err))
 			goto unlock;
@@ -92,6 +97,8 @@ int __intel_context_do_pin(struct intel_context *ce)
 {
 	int err;
 
+	GEM_BUG_ON(intel_context_is_closed(ce));
+
 	if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
 		err = intel_context_alloc_state(ce);
 		if (err)
@@ -116,7 +123,8 @@ int __intel_context_do_pin(struct intel_context *ce)
 		if (unlikely(err))
 			goto err_active;
 
-		CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n",
+		CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
+			 i915_ggtt_offset(ce->ring->vma),
 			 ce->ring->head, ce->ring->tail);
 
 		smp_mb__before_atomic(); /* flush pin before it is visible */
@@ -219,7 +227,9 @@ static void __intel_context_retire(struct i915_active *active)
 {
 	struct intel_context *ce = container_of(active, typeof(*ce), active);
 
-	CE_TRACE(ce, "retire\n");
+	CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
+		 intel_context_get_total_runtime_ns(ce),
+		 intel_context_get_avg_runtime_ns(ce));
 
 	set_bit(CONTEXT_VALID_BIT, &ce->flags);
 	if (ce->state)
@@ -280,6 +290,8 @@ intel_context_init(struct intel_context *ce,
 	ce->sseu = engine->sseu;
 	ce->ring = __intel_context_ring_size(SZ_4K);
 
+	ewma_runtime_init(&ce->runtime.avg);
+
 	ce->vm = i915_vm_get(engine->gt->vm);
 
 	INIT_LIST_HEAD(&ce->signal_link);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 30bd248827d8..07be021882cc 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 
 #include "i915_active.h"
+#include "i915_drv.h"
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
 #include "intel_ring_types.h"
@@ -35,6 +36,9 @@ int intel_context_alloc_state(struct intel_context *ce);
 
 void intel_context_free(struct intel_context *ce);
 
+int intel_context_reconfigure_sseu(struct intel_context *ce,
+				   const struct intel_sseu sseu);
+
 /**
  * intel_context_lock_pinned - Stablises the 'pinned' status of the HW context
  * @ce - the context
@@ -169,6 +173,11 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce)
 	return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
 }
 
+static inline bool intel_context_is_closed(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_CLOSED_BIT, &ce->flags);
+}
+
 static inline bool intel_context_use_semaphores(const struct intel_context *ce)
 {
 	return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
@@ -224,4 +233,20 @@ intel_context_clear_nopreempt(struct intel_context *ce)
 	clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
 }
 
+static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
+{
+	const u32 period =
+		RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;
+
+	return READ_ONCE(ce->runtime.total) * period;
+}
+
+static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
+{
+	const u32 period =
+		RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;
+
+	return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period);
+}
+
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.c b/drivers/gpu/drm/i915/gt/intel_context_param.c
new file mode 100644
index 000000000000..65dcd090245d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_context_param.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_active.h"
+#include "intel_context.h"
+#include "intel_context_param.h"
+#include "intel_ring.h"
+
+int intel_context_set_ring_size(struct intel_context *ce, long sz)
+{
+	int err;
+
+	if (intel_context_lock_pinned(ce))
+		return -EINTR;
+
+	err = i915_active_wait(&ce->active);
+	if (err < 0)
+		goto unlock;
+
+	if (intel_context_is_pinned(ce)) {
+		err = -EBUSY; /* In active use, come back later! */
+		goto unlock;
+	}
+
+	if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+		struct intel_ring *ring;
+
+		/* Replace the existing ringbuffer */
+		ring = intel_engine_create_ring(ce->engine, sz);
+		if (IS_ERR(ring)) {
+			err = PTR_ERR(ring);
+			goto unlock;
+		}
+
+		intel_ring_put(ce->ring);
+		ce->ring = ring;
+
+		/* Context image will be updated on next pin */
+	} else {
+		ce->ring = __intel_context_ring_size(sz);
+	}
+
+unlock:
+	intel_context_unlock_pinned(ce);
+	return err;
+}
+
+long intel_context_get_ring_size(struct intel_context *ce)
+{
+	long sz = (unsigned long)READ_ONCE(ce->ring);
+
+	if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+		if (intel_context_lock_pinned(ce))
+			return -EINTR;
+
+		sz = ce->ring->size;
+		intel_context_unlock_pinned(ce);
+	}
+
+	return sz;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h
new file mode 100644
index 000000000000..f053d8633fe2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_context_param.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_CONTEXT_PARAM_H
+#define INTEL_CONTEXT_PARAM_H
+
+struct intel_context;
+
+int intel_context_set_ring_size(struct intel_context *ce, long sz);
+long intel_context_get_ring_size(struct intel_context *ce);
+
+#endif /* INTEL_CONTEXT_PARAM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
new file mode 100644
index 000000000000..57a30956c922
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_vma.h"
+#include "intel_context.h"
+#include "intel_engine_pm.h"
+#include "intel_gpu_commands.h"
+#include "intel_lrc.h"
+#include "intel_lrc_reg.h"
+#include "intel_ring.h"
+#include "intel_sseu.h"
+
+static int gen8_emit_rpcs_config(struct i915_request *rq,
+				 const struct intel_context *ce,
+				 const struct intel_sseu sseu)
+{
+	u64 offset;
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	offset = i915_ggtt_offset(ce->state) +
+		 LRC_STATE_PN * PAGE_SIZE +
+		 CTX_R_PWR_CLK_STATE * 4;
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = lower_32_bits(offset);
+	*cs++ = upper_32_bits(offset);
+	*cs++ = intel_sseu_make_rpcs(rq->i915, &sseu);
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+gen8_modify_rpcs(struct intel_context *ce, const struct intel_sseu sseu)
+{
+	struct i915_request *rq;
+	int ret;
+
+	lockdep_assert_held(&ce->pin_mutex);
+
+	/*
+	 * If the context is not idle, we have to submit an ordered request to
+	 * modify its context image via the kernel context (writing to our own
+	 * image, or into the registers directory, does not stick). Pristine
+	 * and idle contexts will be configured on pinning.
+	 */
+	if (!intel_context_pin_if_active(ce))
+		return 0;
+
+	rq = intel_engine_create_kernel_request(ce->engine);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		goto out_unpin;
+	}
+
+	/* Serialise with the remote context */
+	ret = intel_context_prepare_remote_request(ce, rq);
+	if (ret == 0)
+		ret = gen8_emit_rpcs_config(rq, ce, sseu);
+
+	i915_request_add(rq);
+out_unpin:
+	intel_context_unpin(ce);
+	return ret;
+}
+
+int
+intel_context_reconfigure_sseu(struct intel_context *ce,
+			       const struct intel_sseu sseu)
+{
+	int ret;
+
+	GEM_BUG_ON(INTEL_GEN(ce->engine->i915) < 8);
+
+	ret = intel_context_lock_pinned(ce);
+	if (ret)
+		return ret;
+
+	/* Nothing to do if unmodified. */
+	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
+		goto unlock;
+
+	ret = gen8_modify_rpcs(ce, sseu);
+	if (!ret)
+		ce->sseu = sseu;
+
+unlock:
+	intel_context_unlock_pinned(ce);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index ca1420fb8b53..07cb83a0d017 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -7,6 +7,7 @@
 #ifndef __INTEL_CONTEXT_TYPES__
 #define __INTEL_CONTEXT_TYPES__
 
+#include <linux/average.h>
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
@@ -19,6 +20,8 @@
 
 #define CONTEXT_REDZONE POISON_INUSE
 
+DECLARE_EWMA(runtime, 3, 8);
+
 struct i915_gem_context;
 struct i915_vma;
 struct intel_context;
@@ -42,8 +45,8 @@ struct intel_context {
 
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *inflight;
-#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
-#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
+#define intel_context_inflight(ce) ptr_mask_bits(READ_ONCE((ce)->inflight), 2)
+#define intel_context_inflight_count(ce) ptr_unmask_bits(READ_ONCE((ce)->inflight), 2)
 
 	struct i915_address_space *vm;
 	struct i915_gem_context __rcu *gem_context;
@@ -59,15 +62,25 @@ struct intel_context {
 #define CONTEXT_BARRIER_BIT		0
 #define CONTEXT_ALLOC_BIT		1
 #define CONTEXT_VALID_BIT		2
-#define CONTEXT_USE_SEMAPHORES		3
-#define CONTEXT_BANNED			4
-#define CONTEXT_FORCE_SINGLE_SUBMISSION	5
-#define CONTEXT_NOPREEMPT		6
+#define CONTEXT_CLOSED_BIT		3
+#define CONTEXT_USE_SEMAPHORES		4
+#define CONTEXT_BANNED			5
+#define CONTEXT_FORCE_SINGLE_SUBMISSION	6
+#define CONTEXT_NOPREEMPT		7
 
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
 	u32 tag; /* cookie passed to HW to track this context on submission */
 
+	/* Time on GPU as tracked by the hw. */
+	struct {
+		struct ewma_runtime avg;
+		u64 total;
+		u32 last;
+		I915_SELFTEST_DECLARE(u32 num_underflow);
+		I915_SELFTEST_DECLARE(u32 max_underflow);
+	} runtime;
+
 	unsigned int active_count; /* protected by timeline->mutex */
 
 	atomic_t pin_count;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 5df003061e44..b469de0dd9b6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -107,7 +107,20 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
 static inline struct i915_request *
 execlists_active(const struct intel_engine_execlists *execlists)
 {
-	return *READ_ONCE(execlists->active);
+	struct i915_request * const *cur, * const *old, *active;
+
+	cur = READ_ONCE(execlists->active);
+	smp_rmb(); /* pairs with overwrite protection in process_csb() */
+	do {
+		old = cur;
+
+		active = READ_ONCE(*cur);
+		cur = READ_ONCE(execlists->active);
+
+		smp_rmb(); /* and complete the seqlock retry */
+	} while (unlikely(cur != old));
+
+	return active;
 }
 
 static inline void
@@ -192,6 +205,8 @@ void intel_engines_free(struct intel_gt *gt);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
+int intel_engine_resume(struct intel_engine_cs *engine);
+
 int intel_ring_submission_setup(struct intel_engine_cs *engine);
 
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
@@ -303,26 +318,6 @@ intel_engine_find_active_request(struct intel_engine_cs *engine);
 
 u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
 
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-
-static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
-{
-	if (!execlists->preempt_hang.inject_hang)
-		return false;
-
-	complete(&execlists->preempt_hang.completion);
-	return true;
-}
-
-#else
-
-static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
-{
-	return false;
-}
-
-#endif
-
 void intel_engine_init_active(struct intel_engine_cs *engine,
 			      unsigned int subclass);
 #define ENGINE_PHYSICAL	0
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 06ff7695fa29..3aa8a652c16d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -35,6 +35,7 @@
 #include "intel_engine_user.h"
 #include "intel_gt.h"
 #include "intel_gt_requests.h"
+#include "intel_gt_pm.h"
 #include "intel_lrc.h"
 #include "intel_reset.h"
 #include "intel_ring.h"
@@ -199,10 +200,10 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
 			 * out in the wash.
 			 */
 			cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1;
-			DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n",
-					 INTEL_GEN(gt->i915),
-					 cxt_size * 64,
-					 cxt_size - 1);
+			drm_dbg(&gt->i915->drm,
+				"gen%d CXT_SIZE = %d bytes [0x%08x]\n",
+				INTEL_GEN(gt->i915), cxt_size * 64,
+				cxt_size - 1);
 			return round_up(cxt_size * 64, PAGE_SIZE);
 		case 3:
 		case 2:
@@ -274,6 +275,7 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
 static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 {
 	const struct engine_info *info = &intel_engines[id];
+	struct drm_i915_private *i915 = gt->i915;
 	struct intel_engine_cs *engine;
 
 	BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
@@ -300,11 +302,11 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	engine->id = id;
 	engine->legacy_idx = INVALID_ENGINE;
 	engine->mask = BIT(id);
-	engine->i915 = gt->i915;
+	engine->i915 = i915;
 	engine->gt = gt;
 	engine->uncore = gt->uncore;
 	engine->hw_id = engine->guc_id = info->hw_id;
-	engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases);
+	engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases);
 
 	engine->class = info->class;
 	engine->instance = info->instance;
@@ -312,6 +314,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 
 	engine->props.heartbeat_interval_ms =
 		CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
+	engine->props.max_busywait_duration_ns =
+		CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT;
 	engine->props.preempt_timeout_ms =
 		CONFIG_DRM_I915_PREEMPT_TIMEOUT;
 	engine->props.stop_timeout_ms =
@@ -319,11 +323,15 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	engine->props.timeslice_duration_ms =
 		CONFIG_DRM_I915_TIMESLICE_DURATION;
 
+	/* Override to uninterruptible for OpenCL workloads. */
+	if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS)
+		engine->props.preempt_timeout_ms = 0;
+
 	engine->context_size = intel_engine_context_size(gt, engine->class);
 	if (WARN_ON(engine->context_size > BIT(20)))
 		engine->context_size = 0;
 	if (engine->context_size)
-		DRIVER_CAPS(gt->i915)->has_logical_contexts = true;
+		DRIVER_CAPS(i915)->has_logical_contexts = true;
 
 	/* Nothing to do here, execute in order of dependencies */
 	engine->schedule = NULL;
@@ -339,7 +347,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	gt->engine_class[info->class][info->instance] = engine;
 	gt->engine[id] = engine;
 
-	gt->i915->engine[id] = engine;
+	i915->engine[id] = engine;
 
 	return 0;
 }
@@ -392,8 +400,24 @@ void intel_engines_release(struct intel_gt *gt)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	/*
+	 * Before we release the resources held by engine, we must be certain
+	 * that the HW is no longer accessing them -- having the GPU scribble
+	 * to or read from a page being used for something else causes no end
+	 * of fun.
+	 *
+	 * The GPU should be reset by this point, but assume the worst just
+	 * in case we aborted before completely initialising the engines.
+	 */
+	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
+	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
+		__intel_gt_reset(gt, ALL_ENGINES);
+
 	/* Decouple the backend; but keep the layout for late GPU resets */
 	for_each_engine(engine, gt, id) {
+		intel_wakeref_wait_for_idle(&engine->wakeref);
+		GEM_BUG_ON(intel_engine_pm_is_awake(engine));
+
 		if (!engine->release)
 			continue;
 
@@ -432,9 +456,9 @@ int intel_engines_init_mmio(struct intel_gt *gt)
 	unsigned int i;
 	int err;
 
-	WARN_ON(engine_mask == 0);
-	WARN_ON(engine_mask &
-		GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
+	drm_WARN_ON(&i915->drm, engine_mask == 0);
+	drm_WARN_ON(&i915->drm, engine_mask &
+		    GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
 
 	if (i915_inject_probe_failure(i915))
 		return -ENODEV;
@@ -455,7 +479,7 @@ int intel_engines_init_mmio(struct intel_gt *gt)
 	 * are added to the driver by a warning and disabling the forgotten
 	 * engines.
 	 */
-	if (WARN_ON(mask != engine_mask))
+	if (drm_WARN_ON(&i915->drm, mask != engine_mask))
 		device_info->engine_mask = mask;
 
 	RUNTIME_INFO(i915)->num_engines = hweight32(mask);
@@ -510,7 +534,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
 {
 	unsigned int flags;
 
-	flags = PIN_GLOBAL;
 	if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
 		/*
 		 * On g33, we cannot place HWS above 256MiB, so
@@ -523,11 +546,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
 		 * above the mappable region (even though we never
 		 * actually map it).
 		 */
-		flags |= PIN_MAPPABLE;
+		flags = PIN_MAPPABLE;
 	else
-		flags |= PIN_HIGH;
+		flags = PIN_HIGH;
 
-	return i915_vma_pin(vma, 0, 0, flags);
+	return i915_ggtt_pin(vma, 0, flags);
 }
 
 static int init_status_page(struct intel_engine_cs *engine)
@@ -546,7 +569,8 @@ static int init_status_page(struct intel_engine_cs *engine)
 	 */
 	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
-		DRM_ERROR("Failed to allocate status page\n");
+		drm_err(&engine->i915->drm,
+			"Failed to allocate status page\n");
 		return PTR_ERR(obj);
 	}
 
@@ -614,15 +638,15 @@ static int engine_setup_common(struct intel_engine_cs *engine)
 
 struct measure_breadcrumb {
 	struct i915_request rq;
-	struct intel_timeline timeline;
 	struct intel_ring ring;
 	u32 cs[1024];
 };
 
-static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
+static int measure_breadcrumb_dw(struct intel_context *ce)
 {
+	struct intel_engine_cs *engine = ce->engine;
 	struct measure_breadcrumb *frame;
-	int dw = -ENOMEM;
+	int dw;
 
 	GEM_BUG_ON(!engine->gt->scratch);
 
@@ -630,39 +654,27 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 	if (!frame)
 		return -ENOMEM;
 
-	if (intel_timeline_init(&frame->timeline,
-				engine->gt,
-				engine->status_page.vma))
-		goto out_frame;
-
-	mutex_lock(&frame->timeline.mutex);
+	frame->rq.i915 = engine->i915;
+	frame->rq.engine = engine;
+	frame->rq.context = ce;
+	rcu_assign_pointer(frame->rq.timeline, ce->timeline);
 
 	frame->ring.vaddr = frame->cs;
 	frame->ring.size = sizeof(frame->cs);
 	frame->ring.effective_size = frame->ring.size;
 	intel_ring_update_space(&frame->ring);
-
-	frame->rq.i915 = engine->i915;
-	frame->rq.engine = engine;
 	frame->rq.ring = &frame->ring;
-	rcu_assign_pointer(frame->rq.timeline, &frame->timeline);
-
-	dw = intel_timeline_pin(&frame->timeline);
-	if (dw < 0)
-		goto out_timeline;
 
+	mutex_lock(&ce->timeline->mutex);
 	spin_lock_irq(&engine->active.lock);
+
 	dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
+
 	spin_unlock_irq(&engine->active.lock);
+	mutex_unlock(&ce->timeline->mutex);
 
 	GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
 
-	intel_timeline_unpin(&frame->timeline);
-
-out_timeline:
-	mutex_unlock(&frame->timeline.mutex);
-	intel_timeline_fini(&frame->timeline);
-out_frame:
 	kfree(frame);
 	return dw;
 }
@@ -737,12 +749,6 @@ static int engine_init_common(struct intel_engine_cs *engine)
 
 	engine->set_default_submission(engine);
 
-	ret = measure_breadcrumb_dw(engine);
-	if (ret < 0)
-		return ret;
-
-	engine->emit_fini_breadcrumb_dw = ret;
-
 	/*
 	 * We may need to do things with the shrinker which
 	 * require us to immediately switch back to the default
@@ -755,9 +761,18 @@ static int engine_init_common(struct intel_engine_cs *engine)
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
+	ret = measure_breadcrumb_dw(ce);
+	if (ret < 0)
+		goto err_context;
+
+	engine->emit_fini_breadcrumb_dw = ret;
 	engine->kernel_context = ce;
 
 	return 0;
+
+err_context:
+	intel_context_put(ce);
+	return ret;
 }
 
 int intel_engines_init(struct intel_gt *gt)
@@ -824,6 +839,20 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	intel_wa_list_free(&engine->whitelist);
 }
 
+/**
+ * intel_engine_resume - re-initializes the HW state of the engine
+ * @engine: Engine to resume.
+ *
+ * Returns zero on success or an error code on failure.
+ */
+int intel_engine_resume(struct intel_engine_cs *engine)
+{
+	intel_engine_apply_workarounds(engine);
+	intel_engine_apply_whitelist(engine);
+
+	return engine->resume(engine);
+}
+
 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
@@ -982,6 +1011,12 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
 
 		instdone->slice_common =
 			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
+		if (INTEL_GEN(i915) >= 12) {
+			instdone->slice_common_extra[0] =
+				intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA);
+			instdone->slice_common_extra[1] =
+				intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2);
+		}
 		for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
 			instdone->sampler[slice][subslice] =
 				read_subslice_reg(engine, slice, subslice,
@@ -1276,8 +1311,14 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 	}
 
 	if (INTEL_GEN(dev_priv) >= 6) {
-		drm_printf(m, "\tRING_IMR: %08x\n",
+		drm_printf(m, "\tRING_IMR:   0x%08x\n",
 			   ENGINE_READ(engine, RING_IMR));
+		drm_printf(m, "\tRING_ESR:   0x%08x\n",
+			   ENGINE_READ(engine, RING_ESR));
+		drm_printf(m, "\tRING_EMR:   0x%08x\n",
+			   ENGINE_READ(engine, RING_EMR));
+		drm_printf(m, "\tRING_EIR:   0x%08x\n",
+			   ENGINE_READ(engine, RING_EIR));
 	}
 
 	addr = intel_engine_get_active_head(engine);
@@ -1342,25 +1383,27 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 		execlists_active_lock_bh(execlists);
 		rcu_read_lock();
 		for (port = execlists->active; (rq = *port); port++) {
-			char hdr[80];
+			char hdr[160];
 			int len;
 
-			len = snprintf(hdr, sizeof(hdr),
-				       "\t\tActive[%d]: ",
-				       (int)(port - execlists->active));
+			len = scnprintf(hdr, sizeof(hdr),
+					"\t\tActive[%d]: ",
+					(int)(port - execlists->active));
 			if (!i915_request_signaled(rq)) {
 				struct intel_timeline *tl = get_timeline(rq);
 
-				len += snprintf(hdr + len, sizeof(hdr) - len,
-						"ring:{start:%08x, hwsp:%08x, seqno:%08x}, ",
-						i915_ggtt_offset(rq->ring->vma),
-						tl ? tl->hwsp_offset : 0,
-						hwsp_seqno(rq));
+				len += scnprintf(hdr + len, sizeof(hdr) - len,
+						 "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
+						 i915_ggtt_offset(rq->ring->vma),
+						 tl ? tl->hwsp_offset : 0,
+						 hwsp_seqno(rq),
+						 DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
+								       1000 * 1000));
 
 				if (tl)
 					intel_timeline_put(tl);
 			}
-			snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
+			scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
 			print_request(m, rq, hdr);
 		}
 		for (port = execlists->pending; (rq = *port); port++) {
@@ -1657,6 +1700,23 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
 	 * we only care about the snapshot of this moment.
 	 */
 	lockdep_assert_held(&engine->active.lock);
+
+	rcu_read_lock();
+	request = execlists_active(&engine->execlists);
+	if (request) {
+		struct intel_timeline *tl = request->context->timeline;
+
+		list_for_each_entry_from_reverse(request, &tl->requests, link) {
+			if (i915_request_completed(request))
+				break;
+
+			active = request;
+		}
+	}
+	rcu_read_unlock();
+	if (active)
+		return active;
+
 	list_for_each_entry(request, &engine->active.requests, sched.link) {
 		if (i915_request_completed(request))
 			continue;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 6c6fd185457c..dd825718e4e5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -180,7 +180,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
 	struct intel_context *ce = engine->kernel_context;
 	struct i915_request *rq;
-	int err = 0;
+	int err;
 
 	if (!intel_engine_has_preemption(engine))
 		return -ENODEV;
@@ -188,8 +188,10 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
 	if (!intel_engine_pm_get_if_awake(engine))
 		return 0;
 
-	if (mutex_lock_interruptible(&ce->timeline->mutex))
+	if (mutex_lock_interruptible(&ce->timeline->mutex)) {
+		err = -EINTR;
 		goto out_rpm;
+	}
 
 	intel_context_enter(ce);
 	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
@@ -204,6 +206,8 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
 
 	__i915_request_commit(rq);
 	__i915_request_queue(rq, &attr);
+	GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
+	err = 0;
 
 out_unlock:
 	mutex_unlock(&ce->timeline->mutex);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index ea90ab3e396e..b6cf284e3a2d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -112,7 +112,7 @@ __queue_and_release_pm(struct i915_request *rq,
 {
 	struct intel_gt_timelines *timelines = &engine->gt->timelines;
 
-	ENGINE_TRACE(engine, "\n");
+	ENGINE_TRACE(engine, "parking\n");
 
 	/*
 	 * We have to serialise all potential retirement paths with our
@@ -249,7 +249,7 @@ static int __engine_park(struct intel_wakeref *wf)
 	if (!switch_to_kernel_context(engine))
 		return -EBUSY;
 
-	ENGINE_TRACE(engine, "\n");
+	ENGINE_TRACE(engine, "parked\n");
 
 	call_idle_barriers(engine); /* cleanup after wedging */
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 92be41a6903c..80cdde712842 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -75,6 +75,7 @@ struct intel_instdone {
 	u32 instdone;
 	/* The following exist only in the RCS engine */
 	u32 slice_common;
+	u32 slice_common_extra[2];
 	u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
 	u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
 };
@@ -126,7 +127,6 @@ DECLARE_EWMA(_engine_latency, 6, 4)
 struct st_preempt_hang {
 	struct completion completion;
 	unsigned int count;
-	bool inject_hang;
 };
 
 /**
@@ -157,6 +157,16 @@ struct intel_engine_execlists {
 	struct i915_priolist default_priolist;
 
 	/**
+	 * @error_interrupt: CS Master EIR
+	 *
+	 * The CS generates an interrupt when it detects an error. We capture
+	 * the first error interrupt, record the EIR and schedule the tasklet.
+	 * In the tasklet, we process the pending CS events to ensure we have
+	 * the guilty request, and then reset the engine.
+	 */
+	u32 error_interrupt;
+
+	/**
 	 * @no_priolist: priority lists disabled
 	 */
 	bool no_priolist;
@@ -537,6 +547,7 @@ struct intel_engine_cs {
 
 	struct {
 		unsigned long heartbeat_interval_ms;
+		unsigned long max_busywait_duration_ns;
 		unsigned long preempt_timeout_ms;
 		unsigned long stop_timeout_ms;
 		unsigned long timeslice_duration_ms;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 9e7f12bef828..848decee9066 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -278,7 +278,8 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
 			}
 		}
 
-		if (WARN(errors, "Invalid UABI engine mapping found"))
+		if (drm_WARN(&i915->drm, errors,
+			     "Invalid UABI engine mapping found"))
 			i915->uabi_engines = RB_ROOT;
 	}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 531d501be01f..aed498a0d032 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -8,6 +8,8 @@
 #include <asm/set_memory.h>
 #include <asm/smp.h>
 
+#include <drm/i915_drm.h>
+
 #include "intel_gt.h"
 #include "i915_drv.h"
 #include "i915_scatterlist.h"
@@ -104,27 +106,17 @@ static bool needs_idle_maps(struct drm_i915_private *i915)
 	return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active();
 }
 
-static void ggtt_suspend_mappings(struct i915_ggtt *ggtt)
+void i915_ggtt_suspend(struct i915_ggtt *ggtt)
 {
-	struct drm_i915_private *i915 = ggtt->vm.i915;
-
-	/*
-	 * Don't bother messing with faults pre GEN6 as we have little
-	 * documentation supporting that it's a good idea.
-	 */
-	if (INTEL_GEN(i915) < 6)
-		return;
+	struct i915_vma *vma;
 
-	intel_gt_check_and_clear_faults(ggtt->vm.gt);
+	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
+		i915_vma_wait_for_bind(vma);
 
 	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
-
 	ggtt->invalidate(ggtt);
-}
 
-void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915)
-{
-	ggtt_suspend_mappings(&i915->ggtt);
+	intel_gt_check_and_clear_faults(ggtt->vm.gt);
 }
 
 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
@@ -167,6 +159,13 @@ static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
 	intel_gtt_chipset_flush();
 }
 
+static u64 gen8_ggtt_pte_encode(dma_addr_t addr,
+				enum i915_cache_level level,
+				u32 flags)
+{
+	return addr | _PAGE_PRESENT;
+}
+
 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 {
 	writeq(pte, addr);
@@ -182,7 +181,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 	gen8_pte_t __iomem *pte =
 		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
 
-	gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
+	gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0));
 
 	ggtt->invalidate(ggtt);
 }
@@ -195,7 +194,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	struct sgt_iter sgt_iter;
 	gen8_pte_t __iomem *gtt_entries;
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
+	const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0);
 	dma_addr_t addr;
 
 	/*
@@ -350,31 +349,6 @@ static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
 	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
 }
 
-struct clear_range {
-	struct i915_address_space *vm;
-	u64 start;
-	u64 length;
-};
-
-static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
-{
-	struct clear_range *arg = _arg;
-
-	gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
-	bxt_vtd_ggtt_wa(arg->vm);
-
-	return 0;
-}
-
-static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
-					  u64 start,
-					  u64 length)
-{
-	struct clear_range arg = { vm, start, length };
-
-	stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
-}
-
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 				  u64 start, u64 length)
 {
@@ -462,7 +436,7 @@ static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
 	u64 size;
 	int ret;
 
-	if (!USES_GUC(ggtt->vm.i915))
+	if (!intel_uc_uses_guc(&ggtt->vm.gt->uc))
 		return 0;
 
 	GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
@@ -472,7 +446,8 @@ static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
 				   GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
 				   PIN_NOEVICT);
 	if (ret)
-		DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
+		drm_dbg(&ggtt->vm.i915->drm,
+			"Failed to reserve top of GGTT for GuC\n");
 
 	return ret;
 }
@@ -544,8 +519,9 @@ static int init_ggtt(struct i915_ggtt *ggtt)
 
 	/* Clear any non-preallocated blocks */
 	drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
-		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
-			      hole_start, hole_end);
+		drm_dbg_kms(&ggtt->vm.i915->drm,
+			    "clearing unused GTT space: [%lx, %lx]\n",
+			    hole_start, hole_end);
 		ggtt->vm.clear_range(&ggtt->vm, hole_start,
 				     hole_end - hole_start);
 	}
@@ -879,8 +855,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	    IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
 		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
 		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
-		if (ggtt->vm.clear_range != nop_clear_range)
-			ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+		ggtt->vm.bind_async_flags =
+			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 	}
 
 	ggtt->invalidate = gen8_ggtt_invalidate;
@@ -890,7 +866,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
 	ggtt->vm.vma_ops.clear_pages = clear_pages;
 
-	ggtt->vm.pte_encode = gen8_pte_encode;
+	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
 
 	setup_private_pat(ggtt->vm.gt->uncore);
 
@@ -1180,7 +1156,7 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
 	ggtt->invalidate(ggtt);
 }
 
-static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
+void i915_ggtt_resume(struct i915_ggtt *ggtt)
 {
 	struct i915_vma *vma;
 	bool flush = false;
@@ -1188,8 +1164,6 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
 
 	intel_gt_check_and_clear_faults(ggtt->vm.gt);
 
-	mutex_lock(&ggtt->vm.mutex);
-
 	/* First fill our portion of the GTT with scratch pages */
 	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
 
@@ -1216,19 +1190,10 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
 	atomic_set(&ggtt->vm.open, open);
 	ggtt->invalidate(ggtt);
 
-	mutex_unlock(&ggtt->vm.mutex);
-
 	if (flush)
 		wbinvd_on_all_cpus();
-}
-
-void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915)
-{
-	struct i915_ggtt *ggtt = &i915->ggtt;
-
-	ggtt_restore_mappings(ggtt);
 
-	if (INTEL_GEN(i915) >= 8)
+	if (INTEL_GEN(ggtt->vm.i915) >= 8)
 		setup_private_pat(ggtt->vm.gt->uncore);
 }
 
@@ -1267,6 +1232,7 @@ intel_rotate_pages(struct intel_rotation_info *rot_info,
 		   struct drm_i915_gem_object *obj)
 {
 	unsigned int size = intel_rotation_info_size(rot_info);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct sg_table *st;
 	struct scatterlist *sg;
 	int ret = -ENOMEM;
@@ -1296,8 +1262,9 @@ err_sg_alloc:
 	kfree(st);
 err_st_alloc:
 
-	DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
-			 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
+	drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+		obj->base.size, rot_info->plane[0].width,
+		rot_info->plane[0].height, size);
 
 	return ERR_PTR(ret);
 }
@@ -1349,6 +1316,7 @@ intel_remap_pages(struct intel_remapped_info *rem_info,
 		  struct drm_i915_gem_object *obj)
 {
 	unsigned int size = intel_remapped_info_size(rem_info);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct sg_table *st;
 	struct scatterlist *sg;
 	int ret = -ENOMEM;
@@ -1380,8 +1348,9 @@ err_sg_alloc:
 	kfree(st);
 err_st_alloc:
 
-	DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
-			 obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
+	drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+		obj->base.size, rem_info->plane[0].width,
+		rem_info->plane[0].height, size);
 
 	return ERR_PTR(ret);
 }
@@ -1479,8 +1448,9 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
 	if (IS_ERR(vma->pages)) {
 		ret = PTR_ERR(vma->pages);
 		vma->pages = NULL;
-		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
-			  vma->ggtt_view.type, ret);
+		drm_err(&vma->vm->i915->drm,
+			"Failed to get pages for VMA view type %u (%d)!\n",
+			vma->ggtt_view.type, ret);
 	}
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 51b8718513bc..f04214a54f75 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -292,10 +292,21 @@
 #define MI_STORE_URB_MEM        MI_INSTR(0x2D, 0)
 #define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0)
 
-#define PIPELINE_SELECT                ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16))
-#define GFX_OP_3DSTATE_VF_STATISTICS   ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16))
-#define MEDIA_VFE_STATE                ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16))
+#define STATE_BASE_ADDRESS \
+	((0x3 << 29) | (0x0 << 27) | (0x1 << 24) | (0x1 << 16))
+#define BASE_ADDRESS_MODIFY		REG_BIT(0)
+#define PIPELINE_SELECT \
+	((0x3 << 29) | (0x1 << 27) | (0x1 << 24) | (0x4 << 16))
+#define PIPELINE_SELECT_MEDIA	       REG_BIT(0)
+#define GFX_OP_3DSTATE_VF_STATISTICS \
+	((0x3 << 29) | (0x1 << 27) | (0x0 << 24) | (0xB << 16))
+#define MEDIA_VFE_STATE \
+	((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x0 << 16))
 #define  MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18)
+#define MEDIA_INTERFACE_DESCRIPTOR_LOAD \
+	((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x2 << 16))
+#define MEDIA_OBJECT \
+	((0x3 << 29) | (0x2 << 27) | (0x1 << 24) | (0x0 << 16))
 #define GPGPU_OBJECT                   ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16))
 #define GPGPU_WALKER                   ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16))
 #define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index da2b6e2ae692..d09f7596cb98 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -198,16 +198,16 @@ static void gen6_check_faults(struct intel_gt *gt)
 	for_each_engine(engine, gt, id) {
 		fault = GEN6_RING_FAULT_REG_READ(engine);
 		if (fault & RING_FAULT_VALID) {
-			DRM_DEBUG_DRIVER("Unexpected fault\n"
-					 "\tAddr: 0x%08lx\n"
-					 "\tAddress space: %s\n"
-					 "\tSource ID: %d\n"
-					 "\tType: %d\n",
-					 fault & PAGE_MASK,
-					 fault & RING_FAULT_GTTSEL_MASK ?
-					 "GGTT" : "PPGTT",
-					 RING_FAULT_SRCID(fault),
-					 RING_FAULT_FAULT_TYPE(fault));
+			drm_dbg(&engine->i915->drm, "Unexpected fault\n"
+				"\tAddr: 0x%08lx\n"
+				"\tAddress space: %s\n"
+				"\tSource ID: %d\n"
+				"\tType: %d\n",
+				fault & PAGE_MASK,
+				fault & RING_FAULT_GTTSEL_MASK ?
+				"GGTT" : "PPGTT",
+				RING_FAULT_SRCID(fault),
+				RING_FAULT_FAULT_TYPE(fault));
 		}
 	}
 }
@@ -239,18 +239,17 @@ static void gen8_check_faults(struct intel_gt *gt)
 		fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
 			     ((u64)fault_data0 << 12);
 
-		DRM_DEBUG_DRIVER("Unexpected fault\n"
-				 "\tAddr: 0x%08x_%08x\n"
-				 "\tAddress space: %s\n"
-				 "\tEngine ID: %d\n"
-				 "\tSource ID: %d\n"
-				 "\tType: %d\n",
-				 upper_32_bits(fault_addr),
-				 lower_32_bits(fault_addr),
-				 fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
-				 GEN8_RING_FAULT_ENGINE_ID(fault),
-				 RING_FAULT_SRCID(fault),
-				 RING_FAULT_FAULT_TYPE(fault));
+		drm_dbg(&uncore->i915->drm, "Unexpected fault\n"
+			"\tAddr: 0x%08x_%08x\n"
+			"\tAddress space: %s\n"
+			"\tEngine ID: %d\n"
+			"\tSource ID: %d\n"
+			"\tType: %d\n",
+			upper_32_bits(fault_addr), lower_32_bits(fault_addr),
+			fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
+			GEN8_RING_FAULT_ENGINE_ID(fault),
+			RING_FAULT_SRCID(fault),
+			RING_FAULT_FAULT_TYPE(fault));
 	}
 }
 
@@ -345,7 +344,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
 		goto err_unref;
 	}
 
-	ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
 	if (ret)
 		goto err_unref;
 
@@ -455,6 +454,11 @@ err_rq:
 		if (!rq)
 			continue;
 
+		if (rq->fence.error) {
+			err = -EIO;
+			goto out;
+		}
+
 		GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
 		state = rq->context->state;
 		if (!state)
@@ -538,6 +542,10 @@ static int __engines_verify_workarounds(struct intel_gt *gt)
 			err = -EIO;
 	}
 
+	/* Flush and restore the kernel context for safety */
+	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME)
+		err = -EIO;
+
 	return err;
 }
 
@@ -584,7 +592,9 @@ int intel_gt_init(struct intel_gt *gt)
 	if (err)
 		goto err_engines;
 
-	intel_uc_init(&gt->uc);
+	err = intel_uc_init(&gt->uc);
+	if (err)
+		goto err_engines;
 
 	err = intel_gt_resume(gt);
 	if (err)
@@ -634,6 +644,13 @@ void intel_gt_driver_remove(struct intel_gt *gt)
 void intel_gt_driver_unregister(struct intel_gt *gt)
 {
 	intel_rps_driver_unregister(&gt->rps);
+
+	/*
+	 * Upon unregistering the device to prevent any new users, cancel
+	 * all in-flight requests so that we can quickly unbind the active
+	 * resources.
+	 */
+	intel_gt_set_wedged(gt);
 }
 
 void intel_gt_driver_release(struct intel_gt *gt)
@@ -650,6 +667,9 @@ void intel_gt_driver_release(struct intel_gt *gt)
 
 void intel_gt_driver_late_release(struct intel_gt *gt)
 {
+	/* We need to wait for inflight RCU frees to release their grip */
+	rcu_barrier();
+
 	intel_uc_driver_late_release(&gt->uc);
 	intel_gt_fini_requests(gt);
 	intel_gt_fini_reset(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 1dac441cb8f4..4fac043750aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -14,7 +14,7 @@ struct drm_i915_private;
 
 #define GT_TRACE(gt, fmt, ...) do {					\
 	const struct intel_gt *gt__ __maybe_unused = (gt);		\
-	GEM_TRACE("%s  " fmt, dev_name(gt__->i915->drm.dev),		\
+	GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev),		\
 		  ##__VA_ARGS__);					\
 } while (0)
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index f796bdf1ed30..f0e7fd95165a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -24,6 +24,21 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 {
 	bool tasklet = false;
 
+	if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
+		u32 eir;
+
+		eir = ENGINE_READ(engine, RING_EIR);
+		ENGINE_TRACE(engine, "CS error: %x\n", eir);
+
+		/* Disable the error interrupt until after the reset */
+		if (likely(eir)) {
+			ENGINE_WRITE(engine, RING_EMR, ~0u);
+			ENGINE_WRITE(engine, RING_EIR, eir);
+			WRITE_ONCE(engine->execlists.error_interrupt, eir);
+			tasklet = true;
+		}
+	}
+
 	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
 		tasklet = true;
 
@@ -210,7 +225,10 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
 
 void gen11_gt_irq_postinstall(struct intel_gt *gt)
 {
-	const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT;
+	const u32 irqs =
+		GT_CS_MASTER_ERROR_INTERRUPT |
+		GT_RENDER_USER_INTERRUPT |
+		GT_CONTEXT_SWITCH_INTERRUPT;
 	struct intel_uncore *uncore = gt->uncore;
 	const u32 dmask = irqs << 16 | irqs;
 	const u32 smask = irqs << 16;
@@ -279,66 +297,56 @@ void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
 
 	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
 		      GT_BSD_CS_ERROR_INTERRUPT |
-		      GT_RENDER_CS_MASTER_ERROR_INTERRUPT))
+		      GT_CS_MASTER_ERROR_INTERRUPT))
 		DRM_DEBUG("Command parser error, gt_iir 0x%08x\n", gt_iir);
 
 	if (gt_iir & GT_PARITY_ERROR(gt->i915))
 		gen7_parity_error_irq_handler(gt, gt_iir);
 }
 
-void gen8_gt_irq_ack(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4])
+void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
 {
 	void __iomem * const regs = gt->uncore->regs;
+	u32 iir;
 
 	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
-		gt_iir[0] = raw_reg_read(regs, GEN8_GT_IIR(0));
-		if (likely(gt_iir[0]))
-			raw_reg_write(regs, GEN8_GT_IIR(0), gt_iir[0]);
-	}
-
-	if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
-		gt_iir[1] = raw_reg_read(regs, GEN8_GT_IIR(1));
-		if (likely(gt_iir[1]))
-			raw_reg_write(regs, GEN8_GT_IIR(1), gt_iir[1]);
-	}
-
-	if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
-		gt_iir[2] = raw_reg_read(regs, GEN8_GT_IIR(2));
-		if (likely(gt_iir[2]))
-			raw_reg_write(regs, GEN8_GT_IIR(2), gt_iir[2]);
-	}
-
-	if (master_ctl & GEN8_GT_VECS_IRQ) {
-		gt_iir[3] = raw_reg_read(regs, GEN8_GT_IIR(3));
-		if (likely(gt_iir[3]))
-			raw_reg_write(regs, GEN8_GT_IIR(3), gt_iir[3]);
-	}
-}
-
-void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4])
-{
-	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
-		cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
-			       gt_iir[0] >> GEN8_RCS_IRQ_SHIFT);
-		cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
-			       gt_iir[0] >> GEN8_BCS_IRQ_SHIFT);
+		iir = raw_reg_read(regs, GEN8_GT_IIR(0));
+		if (likely(iir)) {
+			cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
+				       iir >> GEN8_RCS_IRQ_SHIFT);
+			cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
+				       iir >> GEN8_BCS_IRQ_SHIFT);
+			raw_reg_write(regs, GEN8_GT_IIR(0), iir);
+		}
 	}
 
 	if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
-		cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
-			       gt_iir[1] >> GEN8_VCS0_IRQ_SHIFT);
-		cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
-			       gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT);
+		iir = raw_reg_read(regs, GEN8_GT_IIR(1));
+		if (likely(iir)) {
+			cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
+				       iir >> GEN8_VCS0_IRQ_SHIFT);
+			cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
+				       iir >> GEN8_VCS1_IRQ_SHIFT);
+			raw_reg_write(regs, GEN8_GT_IIR(1), iir);
+		}
 	}
 
 	if (master_ctl & GEN8_GT_VECS_IRQ) {
-		cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
-			       gt_iir[3] >> GEN8_VECS_IRQ_SHIFT);
+		iir = raw_reg_read(regs, GEN8_GT_IIR(3));
+		if (likely(iir)) {
+			cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
+				       iir >> GEN8_VECS_IRQ_SHIFT);
+			raw_reg_write(regs, GEN8_GT_IIR(3), iir);
+		}
 	}
 
 	if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
-		gen6_rps_irq_handler(&gt->rps, gt_iir[2]);
-		guc_irq_handler(&gt->uc.guc, gt_iir[2] >> 16);
+		iir = raw_reg_read(regs, GEN8_GT_IIR(2));
+		if (likely(iir)) {
+			gen6_rps_irq_handler(&gt->rps, iir);
+			guc_irq_handler(&gt->uc.guc, iir >> 16);
+			raw_reg_write(regs, GEN8_GT_IIR(2), iir);
+		}
 	}
 }
 
@@ -354,25 +362,18 @@ void gen8_gt_irq_reset(struct intel_gt *gt)
 
 void gen8_gt_irq_postinstall(struct intel_gt *gt)
 {
-	struct intel_uncore *uncore = gt->uncore;
-
 	/* These are interrupts we'll toggle with the ring mask register */
-	u32 gt_interrupts[] = {
-		(GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
-		 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
-		 GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT |
-		 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT),
-
-		(GT_RENDER_USER_INTERRUPT << GEN8_VCS0_IRQ_SHIFT |
-		 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS0_IRQ_SHIFT |
-		 GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT |
-		 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT),
-
+	const u32 irqs =
+		GT_CS_MASTER_ERROR_INTERRUPT |
+		GT_RENDER_USER_INTERRUPT |
+		GT_CONTEXT_SWITCH_INTERRUPT;
+	const u32 gt_interrupts[] = {
+		irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
+		irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
 		0,
-
-		(GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT |
-		 GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT)
+		irqs << GEN8_VECS_IRQ_SHIFT,
 	};
+	struct intel_uncore *uncore = gt->uncore;
 
 	gt->pm_ier = 0x0;
 	gt->pm_imr = ~gt->pm_ier;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
index 8f37593712c9..886c5cf408a2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
@@ -36,9 +36,8 @@ void gen5_gt_enable_irq(struct intel_gt *gt, u32 mask);
 
 void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir);
 
-void gen8_gt_irq_ack(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]);
+void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl);
 void gen8_gt_irq_reset(struct intel_gt *gt);
-void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]);
 void gen8_gt_irq_postinstall(struct intel_gt *gt);
 
 #endif /* INTEL_GT_IRQ_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index d1c2f034296a..8b653c0f5e5f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -216,7 +216,7 @@ int intel_gt_resume(struct intel_gt *gt)
 		intel_engine_pm_get(engine);
 
 		engine->serial++; /* kernel context lost */
-		err = engine->resume(engine);
+		err = intel_engine_resume(engine);
 
 		intel_engine_pm_put(engine);
 		if (err) {
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 16acdc5d6734..2a72cce63fd9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -171,7 +171,9 @@ void __i915_vm_close(struct i915_address_space *vm)
 {
 	struct i915_vma *vma, *vn;
 
-	mutex_lock(&vm->mutex);
+	if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex))
+		return;
+
 	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
@@ -186,6 +188,7 @@ void __i915_vm_close(struct i915_address_space *vm)
 		i915_gem_object_put(obj);
 	}
 	GEM_BUG_ON(!list_empty(&vm->bound_list));
+
 	mutex_unlock(&vm->mutex);
 }
 
@@ -299,6 +302,25 @@ fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
 	kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
 }
 
+static void poison_scratch_page(struct page *page, unsigned long size)
+{
+	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+		return;
+
+	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+
+	do {
+		void *vaddr;
+
+		vaddr = kmap(page);
+		memset(vaddr, POISON_FREE, PAGE_SIZE);
+		kunmap(page);
+
+		page = pfn_to_page(page_to_pfn(page) + 1);
+		size -= PAGE_SIZE;
+	} while (size);
+}
+
 int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
 {
 	unsigned long size;
@@ -331,6 +353,17 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
 		if (unlikely(!page))
 			goto skip;
 
+		/*
+		 * Use a non-zero scratch page for debugging.
+		 *
+		 * We want a value that should be reasonably obvious
+		 * to spot in the error state, while also causing a GPU hang
+		 * if executed. We prefer using a clear page in production, so
+		 * should it ever be accidentally used, the effect should be
+		 * fairly benign.
+		 */
+		poison_scratch_page(page, size);
+
 		addr = dma_map_page_attrs(vm->dma,
 					  page, 0, size,
 					  PCI_DMA_BIDIRECTIONAL,
@@ -448,36 +481,12 @@ void gtt_write_workarounds(struct intel_gt *gt)
 		intel_uncore_write(uncore,
 				   HSW_GTT_CACHE_EN,
 				   can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
-		WARN_ON_ONCE(can_use_gtt_cache &&
-			     intel_uncore_read(uncore,
-					       HSW_GTT_CACHE_EN) == 0);
+		drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache &&
+				 intel_uncore_read(uncore,
+						   HSW_GTT_CACHE_EN) == 0);
 	}
 }
 
-u64 gen8_pte_encode(dma_addr_t addr,
-		    enum i915_cache_level level,
-		    u32 flags)
-{
-	gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
-
-	if (unlikely(flags & PTE_READ_ONLY))
-		pte &= ~_PAGE_RW;
-
-	switch (level) {
-	case I915_CACHE_NONE:
-		pte |= PPAT_UNCACHED;
-		break;
-	case I915_CACHE_WT:
-		pte |= PPAT_DISPLAY_ELLC;
-		break;
-	default:
-		pte |= PPAT_CACHED;
-		break;
-	}
-
-	return pte;
-}
-
 static void tgl_setup_private_ppat(struct intel_uncore *uncore)
 {
 	/* TGL doesn't support LLC or AGE settings */
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 7da7681c20b1..b3116fe8d180 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -429,8 +429,7 @@ static inline void
 i915_vm_close(struct i915_address_space *vm)
 {
 	GEM_BUG_ON(!atomic_read(&vm->open));
-	if (atomic_dec_and_test(&vm->open))
-		__i915_vm_close(vm);
+	__i915_vm_close(vm);
 
 	i915_vm_put(vm);
 }
@@ -512,12 +511,8 @@ int i915_ppgtt_init_hw(struct intel_gt *gt);
 
 struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
 
-void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915);
-void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915);
-
-u64 gen8_pte_encode(dma_addr_t addr,
-		    enum i915_cache_level level,
-		    u32 flags);
+void i915_ggtt_suspend(struct i915_ggtt *gtt);
+void i915_ggtt_resume(struct i915_ggtt *ggtt);
 
 int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
 void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c
index ceb785b75c25..e3f637b3650e 100644
--- a/drivers/gpu/drm/i915/gt/intel_llc.c
+++ b/drivers/gpu/drm/i915/gt/intel_llc.c
@@ -50,6 +50,9 @@ static bool get_ia_constants(struct intel_llc *llc,
 	struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
 	struct intel_rps *rps = &llc_to_gt(llc)->rps;
 
+	if (!HAS_LLC(i915) || IS_DGFX(i915))
+		return false;
+
 	if (rps->max_freq <= rps->min_freq)
 		return false;
 
@@ -147,8 +150,7 @@ static void gen6_update_ring_freq(struct intel_llc *llc)
 
 void intel_llc_enable(struct intel_llc *llc)
 {
-	if (HAS_LLC(llc_to_gt(llc)->i915))
-		gen6_update_ring_freq(llc);
+	gen6_update_ring_freq(llc);
 }
 
 void intel_llc_disable(struct intel_llc *llc)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 31455eceeb0c..683014e7bc51 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -176,8 +176,6 @@
 
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
-#define WA_TAIL_DWORDS 2
-#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 
 struct virtual_engine {
 	struct intel_engine_cs base;
@@ -247,7 +245,7 @@ static void mark_eio(struct i915_request *rq)
 
 	GEM_BUG_ON(i915_request_signaled(rq));
 
-	dma_fence_set_error(&rq->fence, -EIO);
+	i915_request_set_error_once(rq, -EIO);
 	i915_request_mark_complete(rq);
 }
 
@@ -295,7 +293,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 
 static inline int rq_prio(const struct i915_request *rq)
 {
-	return rq->sched.attr.priority;
+	return READ_ONCE(rq->sched.attr.priority);
 }
 
 static int effective_prio(const struct i915_request *rq)
@@ -1006,7 +1004,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 				i915_request_cancel_breadcrumb(rq);
 				spin_unlock(&rq->lock);
 			}
-			rq->engine = owner;
+			WRITE_ONCE(rq->engine, owner);
 			owner->submit_request(rq);
 			active = NULL;
 		}
@@ -1197,6 +1195,48 @@ static void reset_active(struct i915_request *rq,
 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 }
 
+static u32 intel_context_get_runtime(const struct intel_context *ce)
+{
+	/*
+	 * We can use either ppHWSP[16] which is recorded before the context
+	 * switch (and so excludes the cost of context switches) or use the
+	 * value from the context image itself, which is saved/restored earlier
+	 * and so includes the cost of the save.
+	 */
+	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
+static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
+{
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+	ce->runtime.num_underflow += dt < 0;
+	ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
+#endif
+}
+
+static void intel_context_update_runtime(struct intel_context *ce)
+{
+	u32 old;
+	s32 dt;
+
+	if (intel_context_is_barrier(ce))
+		return;
+
+	old = ce->runtime.last;
+	ce->runtime.last = intel_context_get_runtime(ce);
+	dt = ce->runtime.last - old;
+
+	if (unlikely(dt <= 0)) {
+		CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
+			 old, ce->runtime.last, dt);
+		st_update_runtime_underflow(ce, dt);
+		return;
+	}
+
+	ewma_runtime_add(&ce->runtime.avg, dt);
+	ce->runtime.total += dt;
+}
+
 static inline struct intel_engine_cs *
 __execlists_schedule_in(struct i915_request *rq)
 {
@@ -1211,12 +1251,12 @@ __execlists_schedule_in(struct i915_request *rq)
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 		execlists_check_context(ce, engine);
 
+	ce->lrc_desc &= ~GENMASK_ULL(47, 37);
 	if (ce->tag) {
 		/* Use a fixed tag for OA and friends */
 		ce->lrc_desc |= (u64)ce->tag << 32;
 	} else {
 		/* We don't need a strict matching tag, just different values */
-		ce->lrc_desc &= ~GENMASK_ULL(47, 37);
 		ce->lrc_desc |=
 			(u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
 			GEN11_SW_CTX_ID_SHIFT;
@@ -1276,10 +1316,11 @@ __execlists_schedule_out(struct i915_request *rq,
 	 * If we have just completed this context, the engine may now be
 	 * idle and we want to re-enter powersaving.
 	 */
-	if (list_is_last(&rq->link, &ce->timeline->requests) &&
+	if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
 	    i915_request_completed(rq))
 		intel_engine_add_retire(engine, ce->timeline);
 
+	intel_context_update_runtime(ce);
 	intel_engine_context_out(engine);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 	intel_gt_pm_put_async(engine->gt);
@@ -1395,15 +1436,26 @@ trace_ports(const struct intel_engine_execlists *execlists,
 		     ports[1] ? ports[1]->fence.seqno : 0);
 }
 
+static inline bool
+reset_in_progress(const struct intel_engine_execlists *execlists)
+{
+	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
+}
+
 static __maybe_unused bool
 assert_pending_valid(const struct intel_engine_execlists *execlists,
 		     const char *msg)
 {
 	struct i915_request * const *port, *rq;
 	struct intel_context *ce = NULL;
+	bool sentinel = false;
 
 	trace_ports(execlists, msg, execlists->pending);
 
+	/* We may be messing around with the lists during reset, lalala */
+	if (reset_in_progress(execlists))
+		return true;
+
 	if (!execlists->pending[0]) {
 		GEM_TRACE_ERR("Nothing pending for promotion!\n");
 		return false;
@@ -1430,6 +1482,26 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 		}
 		ce = rq->context;
 
+		/*
+		 * Sentinels are supposed to be lonely so they flush the
+		 * current exection off the HW. Check that they are the
+		 * only request in the pending submission.
+		 */
+		if (sentinel) {
+			GEM_TRACE_ERR("context:%llx after sentinel in pending[%zd]\n",
+				      ce->timeline->fence_context,
+				      port - execlists->pending);
+			return false;
+		}
+
+		sentinel = i915_request_has_sentinel(rq);
+		if (sentinel && port != execlists->pending) {
+			GEM_TRACE_ERR("sentinel context:%llx not in prime position[%zd]\n",
+				      ce->timeline->fence_context,
+				      port - execlists->pending);
+			return false;
+		}
+
 		/* Hold tightly onto the lock to prevent concurrent retires! */
 		if (!spin_trylock_irqsave(&rq->lock, flags))
 			continue;
@@ -1525,6 +1597,11 @@ static bool can_merge_ctx(const struct intel_context *prev,
 	return true;
 }
 
+static unsigned long i915_request_flags(const struct i915_request *rq)
+{
+	return READ_ONCE(rq->fence.flags);
+}
+
 static bool can_merge_rq(const struct i915_request *prev,
 			 const struct i915_request *next)
 {
@@ -1542,7 +1619,7 @@ static bool can_merge_rq(const struct i915_request *prev,
 	if (i915_request_completed(next))
 		return true;
 
-	if (unlikely((prev->fence.flags ^ next->fence.flags) &
+	if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
 		     (BIT(I915_FENCE_FLAG_NOPREEMPT) |
 		      BIT(I915_FENCE_FLAG_SENTINEL))))
 		return false;
@@ -1550,6 +1627,7 @@ static bool can_merge_rq(const struct i915_request *prev,
 	if (!can_merge_ctx(prev->context, next->context))
 		return false;
 
+	GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
 	return true;
 }
 
@@ -1585,7 +1663,7 @@ static bool virtual_matches(const struct virtual_engine *ve,
 }
 
 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
-				     struct intel_engine_cs *engine)
+				     struct i915_request *rq)
 {
 	struct intel_engine_cs *old = ve->siblings[0];
 
@@ -1593,9 +1671,19 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
 
 	spin_lock(&old->breadcrumbs.irq_lock);
 	if (!list_empty(&ve->context.signal_link)) {
-		list_move_tail(&ve->context.signal_link,
-			       &engine->breadcrumbs.signalers);
-		intel_engine_signal_breadcrumbs(engine);
+		list_del_init(&ve->context.signal_link);
+
+		/*
+		 * We cannot acquire the new engine->breadcrumbs.irq_lock
+		 * (as we are holding a breadcrumbs.irq_lock already),
+		 * so attach this request to the signaler on submission.
+		 * The queued irq_work will occur when we finally drop
+		 * the engine->active.lock after dequeue.
+		 */
+		set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags);
+
+		/* Also transfer the pending irq_work for the old breadcrumb. */
+		intel_engine_signal_breadcrumbs(rq->engine);
 	}
 	spin_unlock(&old->breadcrumbs.irq_lock);
 }
@@ -1605,6 +1693,11 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
 				     &(rq__)->sched.waiters_list, \
 				     wait_link)
 
+#define for_each_signaler(p__, rq__) \
+	list_for_each_entry_rcu(p__, \
+				&(rq__)->sched.signalers_list, \
+				signal_link)
+
 static void defer_request(struct i915_request *rq, struct list_head * const pl)
 {
 	LIST_HEAD(list);
@@ -1693,12 +1786,13 @@ timeslice(const struct intel_engine_cs *engine)
 static unsigned long
 active_timeslice(const struct intel_engine_cs *engine)
 {
-	const struct i915_request *rq = *engine->execlists.active;
+	const struct intel_engine_execlists *execlists = &engine->execlists;
+	const struct i915_request *rq = *execlists->active;
 
 	if (!rq || i915_request_completed(rq))
 		return 0;
 
-	if (engine->execlists.switch_priority_hint < effective_prio(rq))
+	if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
 		return 0;
 
 	return timeslice(engine);
@@ -1715,8 +1809,11 @@ static void set_timeslice(struct intel_engine_cs *engine)
 static void start_timeslice(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
+	int prio = queue_prio(execlists);
 
-	execlists->switch_priority_hint = execlists->queue_priority_hint;
+	WRITE_ONCE(execlists->switch_priority_hint, prio);
+	if (prio == INT_MIN)
+		return;
 
 	if (timer_pending(&execlists->timer))
 		return;
@@ -1938,13 +2035,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				     "",
 				     yesno(engine != ve->siblings[0]));
 
-			ve->request = NULL;
-			ve->base.execlists.queue_priority_hint = INT_MIN;
+			WRITE_ONCE(ve->request, NULL);
+			WRITE_ONCE(ve->base.execlists.queue_priority_hint,
+				   INT_MIN);
 			rb_erase_cached(rb, &execlists->virtual);
 			RB_CLEAR_NODE(rb);
 
 			GEM_BUG_ON(!(rq->execution_mask & engine->mask));
-			rq->engine = engine;
+			WRITE_ONCE(rq->engine, engine);
 
 			if (engine != ve->siblings[0]) {
 				u32 *regs = ve->context.lrc_reg_state;
@@ -1957,7 +2055,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 									engine);
 
 				if (!list_empty(&ve->context.signals))
-					virtual_xfer_breadcrumbs(ve, engine);
+					virtual_xfer_breadcrumbs(ve, rq);
 
 				/*
 				 * Move the bound engine to the top of the list
@@ -2064,6 +2162,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				GEM_BUG_ON(last &&
 					   !can_merge_ctx(last->context,
 							  rq->context));
+				GEM_BUG_ON(last &&
+					   i915_seqno_passed(last->fence.seqno,
+							     rq->fence.seqno));
 
 				submit = true;
 				last = rq;
@@ -2134,6 +2235,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
 		execlists_schedule_out(*port);
 	clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
 
+	smp_wmb(); /* complete the seqlock for execlists_active() */
 	WRITE_ONCE(execlists->active, execlists->inflight);
 }
 
@@ -2144,12 +2246,6 @@ invalidate_csb_entries(const u32 *first, const u32 *last)
 	clflush((void *)last);
 }
 
-static inline bool
-reset_in_progress(const struct intel_engine_execlists *execlists)
-{
-	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
-}
-
 /*
  * Starting with Gen12, the status has a new format:
  *
@@ -2240,7 +2336,6 @@ static void process_csb(struct intel_engine_cs *engine)
 	 */
 	head = execlists->csb_head;
 	tail = READ_ONCE(*execlists->csb_write);
-	ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
 	if (unlikely(head == tail))
 		return;
 
@@ -2254,6 +2349,7 @@ static void process_csb(struct intel_engine_cs *engine)
 	 */
 	rmb();
 
+	ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
 	do {
 		bool promote;
 
@@ -2288,11 +2384,13 @@ static void process_csb(struct intel_engine_cs *engine)
 		if (promote) {
 			struct i915_request * const *old = execlists->active;
 
+			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
+
+			ring_set_paused(engine, 0);
+
 			/* Point active to the new ELSP; prevent overwriting */
 			WRITE_ONCE(execlists->active, execlists->pending);
-
-			if (!inject_preempt_hang(execlists))
-				ring_set_paused(engine, 0);
+			smp_wmb(); /* notify execlists_active() */
 
 			/* cancel old inflight, prepare for switch */
 			trace_ports(execlists, "preempted", old);
@@ -2300,12 +2398,12 @@ static void process_csb(struct intel_engine_cs *engine)
 				execlists_schedule_out(*old++);
 
 			/* switch pending to inflight */
-			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
-			WRITE_ONCE(execlists->active,
-				   memcpy(execlists->inflight,
-					  execlists->pending,
-					  execlists_num_ports(execlists) *
-					  sizeof(*execlists->pending)));
+			memcpy(execlists->inflight,
+			       execlists->pending,
+			       execlists_num_ports(execlists) *
+			       sizeof(*execlists->pending));
+			smp_wmb(); /* complete the seqlock */
+			WRITE_ONCE(execlists->active, execlists->inflight);
 
 			WRITE_ONCE(execlists->pending[0], NULL);
 		} else {
@@ -2320,8 +2418,37 @@ static void process_csb(struct intel_engine_cs *engine)
 			 * coherent (visible from the CPU) before the
 			 * user interrupt and CSB is processed.
 			 */
-			GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
-				   !reset_in_progress(execlists));
+			if (GEM_SHOW_DEBUG() &&
+			    !i915_request_completed(*execlists->active) &&
+			    !reset_in_progress(execlists)) {
+				struct i915_request *rq __maybe_unused =
+					*execlists->active;
+				const u32 *regs __maybe_unused =
+					rq->context->lrc_reg_state;
+
+				ENGINE_TRACE(engine,
+					     "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
+					     ENGINE_READ(engine, RING_START),
+					     ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
+					     ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
+					     ENGINE_READ(engine, RING_CTL),
+					     ENGINE_READ(engine, RING_MI_MODE));
+				ENGINE_TRACE(engine,
+					     "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
+					     i915_ggtt_offset(rq->ring->vma),
+					     rq->head, rq->tail,
+					     rq->fence.context,
+					     lower_32_bits(rq->fence.seqno),
+					     hwsp_seqno(rq));
+				ENGINE_TRACE(engine,
+					     "ctx:{start:%08x, head:%04x, tail:%04x}, ",
+					     regs[CTX_RING_START],
+					     regs[CTX_RING_HEAD],
+					     regs[CTX_RING_TAIL]);
+
+				GEM_BUG_ON("context completed before request");
+			}
+
 			execlists_schedule_out(*execlists->active++);
 
 			GEM_BUG_ON(execlists->active - execlists->inflight >
@@ -2349,7 +2476,7 @@ static void process_csb(struct intel_engine_cs *engine)
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
 	lockdep_assert_held(&engine->active.lock);
-	if (!engine->execlists.pending[0]) {
+	if (!READ_ONCE(engine->execlists.pending[0])) {
 		rcu_read_lock(); /* protect peeking at execlists->active */
 		execlists_dequeue(engine);
 		rcu_read_unlock();
@@ -2366,12 +2493,12 @@ static void __execlists_hold(struct i915_request *rq)
 		if (i915_request_is_active(rq))
 			__i915_request_unsubmit(rq);
 
-		RQ_TRACE(rq, "on hold\n");
 		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
 		i915_request_set_hold(rq);
+		RQ_TRACE(rq, "on hold\n");
 
-		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+		for_each_waiter(p, rq) {
 			struct i915_request *w =
 				container_of(p->waiter, typeof(*w), sched);
 
@@ -2385,7 +2512,7 @@ static void __execlists_hold(struct i915_request *rq)
 			if (i915_request_completed(w))
 				continue;
 
-			if (i915_request_on_hold(rq))
+			if (i915_request_on_hold(w))
 				continue;
 
 			list_move_tail(&w->sched.link, &list);
@@ -2443,6 +2570,7 @@ static bool execlists_hold(struct intel_engine_cs *engine,
 	GEM_BUG_ON(i915_request_on_hold(rq));
 	GEM_BUG_ON(rq->engine != engine);
 	__execlists_hold(rq);
+	GEM_BUG_ON(list_empty(&engine->active.hold));
 
 unlock:
 	spin_unlock_irq(&engine->active.lock);
@@ -2452,23 +2580,27 @@ unlock:
 static bool hold_request(const struct i915_request *rq)
 {
 	struct i915_dependency *p;
+	bool result = false;
 
 	/*
 	 * If one of our ancestors is on hold, we must also be on hold,
 	 * otherwise we will bypass it and execute before it.
 	 */
-	list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
+	rcu_read_lock();
+	for_each_signaler(p, rq) {
 		const struct i915_request *s =
 			container_of(p->signaler, typeof(*s), sched);
 
 		if (s->engine != rq->engine)
 			continue;
 
-		if (i915_request_on_hold(s))
-			return true;
+		result = i915_request_on_hold(s);
+		if (result)
+			break;
 	}
+	rcu_read_unlock();
 
-	return false;
+	return result;
 }
 
 static void __execlists_unhold(struct i915_request *rq)
@@ -2478,6 +2610,8 @@ static void __execlists_unhold(struct i915_request *rq)
 	do {
 		struct i915_dependency *p;
 
+		RQ_TRACE(rq, "hold release\n");
+
 		GEM_BUG_ON(!i915_request_on_hold(rq));
 		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
 
@@ -2486,21 +2620,24 @@ static void __execlists_unhold(struct i915_request *rq)
 			       i915_sched_lookup_priolist(rq->engine,
 							  rq_prio(rq)));
 		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-		RQ_TRACE(rq, "hold release\n");
 
 		/* Also release any children on this engine that are ready */
-		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+		for_each_waiter(p, rq) {
 			struct i915_request *w =
 				container_of(p->waiter, typeof(*w), sched);
 
+			/* Propagate any change in error status */
+			if (rq->fence.error)
+				i915_request_set_error_once(w, rq->fence.error);
+
 			if (w->engine != rq->engine)
 				continue;
 
-			if (!i915_request_on_hold(rq))
+			if (!i915_request_on_hold(w))
 				continue;
 
 			/* Check that no other parents are also on hold */
-			if (hold_request(rq))
+			if (hold_request(w))
 				continue;
 
 			list_move_tail(&w->sched.link, &list);
@@ -2615,13 +2752,13 @@ static bool execlists_capture(struct intel_engine_cs *engine)
 	if (!cap)
 		return true;
 
+	spin_lock_irq(&engine->active.lock);
 	cap->rq = execlists_active(&engine->execlists);
-	GEM_BUG_ON(!cap->rq);
-
-	rcu_read_lock();
-	cap->rq = active_request(cap->rq->context->timeline, cap->rq);
-	cap->rq = i915_request_get_rcu(cap->rq);
-	rcu_read_unlock();
+	if (cap->rq) {
+		cap->rq = active_request(cap->rq->context->timeline, cap->rq);
+		cap->rq = i915_request_get_rcu(cap->rq);
+	}
+	spin_unlock_irq(&engine->active.lock);
 	if (!cap->rq)
 		goto err_free;
 
@@ -2660,27 +2797,25 @@ err_free:
 	return false;
 }
 
-static noinline void preempt_reset(struct intel_engine_cs *engine)
+static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
 {
 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
 	unsigned long *lock = &engine->gt->reset.flags;
 
-	if (i915_modparams.reset < 3)
+	if (!intel_has_reset_engine(engine->gt))
 		return;
 
 	if (test_and_set_bit(bit, lock))
 		return;
 
+	ENGINE_TRACE(engine, "reset for %s\n", msg);
+
 	/* Mark this tasklet as disabled to avoid waiting for it to complete */
 	tasklet_disable_nosync(&engine->execlists.tasklet);
 
-	ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
-		     READ_ONCE(engine->props.preempt_timeout_ms),
-		     jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
-
 	ring_set_paused(engine, 1); /* Freeze the current request in place */
 	if (execlists_capture(engine))
-		intel_engine_reset(engine, "preemption time out");
+		intel_engine_reset(engine, msg);
 	else
 		ring_set_paused(engine, 0);
 
@@ -2711,6 +2846,13 @@ static void execlists_submission_tasklet(unsigned long data)
 	bool timeout = preempt_timeout(engine);
 
 	process_csb(engine);
+
+	if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
+		engine->execlists.error_interrupt = 0;
+		if (ENGINE_READ(engine, RING_ESR)) /* confirm the error */
+			execlists_reset(engine, "CS error");
+	}
+
 	if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
 		unsigned long flags;
 
@@ -2719,8 +2861,8 @@ static void execlists_submission_tasklet(unsigned long data)
 		spin_unlock_irqrestore(&engine->active.lock, flags);
 
 		/* Recheck after serialising with direct-submission */
-		if (timeout && preempt_timeout(engine))
-			preempt_reset(engine);
+		if (unlikely(timeout && preempt_timeout(engine)))
+			execlists_reset(engine, "preemption time out");
 	}
 }
 
@@ -2793,6 +2935,7 @@ static void execlists_submit_request(struct i915_request *request)
 	spin_lock_irqsave(&engine->active.lock, flags);
 
 	if (unlikely(ancestor_on_hold(engine, request))) {
+		RQ_TRACE(request, "ancestor on hold\n");
 		list_add_tail(&request->sched.link, &engine->active.hold);
 		i915_request_set_hold(request);
 	} else {
@@ -2874,6 +3017,7 @@ __execlists_update_reg_state(const struct intel_context *ce,
 	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
 	regs[CTX_RING_HEAD] = head;
 	regs[CTX_RING_TAIL] = ring->tail;
+	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
 
 	/* RPCS */
 	if (engine->class == RENDER_CLASS) {
@@ -2921,22 +3065,6 @@ static void execlists_context_reset(struct intel_context *ce)
 	CE_TRACE(ce, "reset\n");
 	GEM_BUG_ON(!intel_context_is_pinned(ce));
 
-	/*
-	 * Because we emit WA_TAIL_DWORDS there may be a disparity
-	 * between our bookkeeping in ce->ring->head and ce->ring->tail and
-	 * that stored in context. As we only write new commands from
-	 * ce->ring->tail onwards, everything before that is junk. If the GPU
-	 * starts reading from its RING_HEAD from the context, it may try to
-	 * execute that junk and die.
-	 *
-	 * The contexts that are stilled pinned on resume belong to the
-	 * kernel, and are local to each engine. All other contexts will
-	 * have their head/tail sanitized upon pinning before use, so they
-	 * will never see garbage,
-	 *
-	 * So to avoid that we reset the context images upon resume. For
-	 * simplicity, we just zero everything out.
-	 */
 	intel_ring_reset(ce->ring, ce->ring->emit);
 
 	/* Scrub away the garbage */
@@ -2964,7 +3092,8 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 {
 	u32 *cs;
 
-	GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
+	if (!i915_request_timeline(rq)->has_initial_breadcrumb)
+		return 0;
 
 	cs = intel_ring_begin(rq, 6);
 	if (IS_ERR(cs))
@@ -3257,7 +3386,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
 		goto err;
 	}
 
-	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	err = i915_ggtt_pin(vma, 0, PIN_HIGH);
 	if (err)
 		goto err;
 
@@ -3346,6 +3475,49 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 	return ret;
 }
 
+static void enable_error_interrupt(struct intel_engine_cs *engine)
+{
+	u32 status;
+
+	engine->execlists.error_interrupt = 0;
+	ENGINE_WRITE(engine, RING_EMR, ~0u);
+	ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
+
+	status = ENGINE_READ(engine, RING_ESR);
+	if (unlikely(status)) {
+		dev_err(engine->i915->drm.dev,
+			"engine '%s' resumed still in error: %08x\n",
+			engine->name, status);
+		__intel_gt_reset(engine->gt, engine->mask);
+	}
+
+	/*
+	 * On current gen8+, we have 2 signals to play with
+	 *
+	 * - I915_ERROR_INSTUCTION (bit 0)
+	 *
+	 *    Generate an error if the command parser encounters an invalid
+	 *    instruction
+	 *
+	 *    This is a fatal error.
+	 *
+	 * - CP_PRIV (bit 2)
+	 *
+	 *    Generate an error on privilege violation (where the CP replaces
+	 *    the instruction with a no-op). This also fires for writes into
+	 *    read-only scratch pages.
+	 *
+	 *    This is a non-fatal error, parsing continues.
+	 *
+	 * * there are a few others defined for odd HW that we do not use
+	 *
+	 * Since CP_PRIV fires for cases where we have chosen to ignore the
+	 * error (as the HW is validating and suppressing the mistakes), we
+	 * only unmask the instruction error bit.
+	 */
+	ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
+}
+
 static void enable_execlists(struct intel_engine_cs *engine)
 {
 	u32 mode;
@@ -3367,6 +3539,8 @@ static void enable_execlists(struct intel_engine_cs *engine)
 			i915_ggtt_offset(engine->status_page.vma));
 	ENGINE_POSTING_READ(engine, RING_HWS_PGA);
 
+	enable_error_interrupt(engine);
+
 	engine->context_tag = 0;
 }
 
@@ -3384,9 +3558,6 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine)
 
 static int execlists_resume(struct intel_engine_cs *engine)
 {
-	intel_engine_apply_workarounds(engine);
-	intel_engine_apply_whitelist(engine);
-
 	intel_mocs_init_engine(engine);
 
 	intel_engine_reset_breadcrumbs(engine);
@@ -3517,9 +3688,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	if (!rq)
 		goto unwind;
 
-	/* We still have requests in-flight; the engine should be active */
-	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
-
 	ce = rq->context;
 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 
@@ -3529,8 +3697,12 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 		goto out_replay;
 	}
 
+	/* We still have requests in-flight; the engine should be active */
+	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
 	/* Context has requests still in-flight; it should not be idle! */
 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
+
 	rq = active_request(ce->timeline, rq);
 	head = intel_ring_wrap(ce->ring, rq->head);
 	GEM_BUG_ON(head == ce->ring->tail);
@@ -3604,7 +3776,10 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
 
 static void nop_submission_tasklet(unsigned long data)
 {
+	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
+
 	/* The driver is wedged; don't process any more events. */
+	WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
 }
 
 static void execlists_reset_cancel(struct intel_engine_cs *engine)
@@ -4273,6 +4448,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 
 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
+	engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
 }
 
 static void rcs_submission_override(struct intel_engine_cs *engine)
@@ -4514,8 +4690,13 @@ populate_lr_context(struct intel_context *ce,
 		inhibit = false;
 	}
 
-	/* The second page of the context object contains some fields which must
-	 * be set up prior to the first execution. */
+	/* Clear the ppHWSP (inc. per-context counters) */
+	memset(vaddr, 0, PAGE_SIZE);
+
+	/*
+	 * The second page of the context object contains some registers which
+	 * must be set up prior to the first execution.
+	 */
 	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
 				 ce, engine, ring, inhibit);
 
@@ -4553,8 +4734,17 @@ static int __execlists_context_alloc(struct intel_context *ce,
 
 	if (!ce->timeline) {
 		struct intel_timeline *tl;
+		struct i915_vma *hwsp;
 
-		tl = intel_timeline_create(engine->gt, NULL);
+		/*
+		 * Use the static global HWSP for the kernel context, and
+		 * a dynamically allocated cacheline for everyone else.
+		 */
+		hwsp = NULL;
+		if (unlikely(intel_context_is_barrier(ce)))
+			hwsp = engine->status_page.vma;
+
+		tl = intel_timeline_create(engine->gt, hwsp);
 		if (IS_ERR(tl)) {
 			ret = PTR_ERR(tl);
 			goto error_deref_obj;
@@ -4723,7 +4913,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
 	mask = rq->execution_mask;
 	if (unlikely(!mask)) {
 		/* Invalid selection, submit to a random engine in error */
-		i915_request_skip(rq, -ENODEV);
+		i915_request_set_error_once(rq, -ENODEV);
 		mask = ve->siblings[0]->mask;
 	}
 
@@ -4737,7 +4927,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
 static void virtual_submission_tasklet(unsigned long data)
 {
 	struct virtual_engine * const ve = (struct virtual_engine *)data;
-	const int prio = ve->base.execlists.queue_priority_hint;
+	const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
 	intel_engine_mask_t mask;
 	unsigned int n;
 
@@ -5133,11 +5323,15 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 		show_request(m, last, "\t\tE ");
 	}
 
-	last = NULL;
-	count = 0;
+	if (execlists->switch_priority_hint != INT_MIN)
+		drm_printf(m, "\t\tSwitch priority hint: %d\n",
+			   READ_ONCE(execlists->switch_priority_hint));
 	if (execlists->queue_priority_hint != INT_MIN)
 		drm_printf(m, "\t\tQueue priority hint: %d\n",
-			   execlists->queue_priority_hint);
+			   READ_ONCE(execlists->queue_priority_hint));
+
+	last = NULL;
+	count = 0;
 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
 		int i;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 08a3be65f700..d39b72590e40 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -17,6 +17,7 @@
 #define CTX_RING_CTL			(0x0a + 1)
 #define CTX_BB_STATE			(0x10 + 1)
 #define CTX_BB_PER_CTX_PTR		(0x18 + 1)
+#define CTX_TIMESTAMP			(0x22 + 1)
 #define CTX_PDP3_UDW			(0x24 + 1)
 #define CTX_PDP3_LDW			(0x26 + 1)
 #define CTX_PDP2_UDW			(0x28 + 1)
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index eeef90b55c64..632e08a4592b 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -280,9 +280,32 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = {
 	GEN11_MOCS_ENTRIES
 };
 
-static bool get_mocs_settings(const struct drm_i915_private *i915,
-			      struct drm_i915_mocs_table *table)
+enum {
+	HAS_GLOBAL_MOCS = BIT(0),
+	HAS_ENGINE_MOCS = BIT(1),
+	HAS_RENDER_L3CC = BIT(2),
+};
+
+static bool has_l3cc(const struct drm_i915_private *i915)
 {
+	return true;
+}
+
+static bool has_global_mocs(const struct drm_i915_private *i915)
+{
+	return HAS_GLOBAL_MOCS_REGISTERS(i915);
+}
+
+static bool has_mocs(const struct drm_i915_private *i915)
+{
+	return !IS_DGFX(i915);
+}
+
+static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
+				      struct drm_i915_mocs_table *table)
+{
+	unsigned int flags;
+
 	if (INTEL_GEN(i915) >= 12) {
 		table->size  = ARRAY_SIZE(tgl_mocs_table);
 		table->table = tgl_mocs_table;
@@ -300,13 +323,13 @@ static bool get_mocs_settings(const struct drm_i915_private *i915,
 		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 		table->table = broxton_mocs_table;
 	} else {
-		WARN_ONCE(INTEL_GEN(i915) >= 9,
-			  "Platform that should have a MOCS table does not.\n");
-		return false;
+		drm_WARN_ONCE(&i915->drm, INTEL_GEN(i915) >= 9,
+			      "Platform that should have a MOCS table does not.\n");
+		return 0;
 	}
 
 	if (GEM_DEBUG_WARN_ON(table->size > table->n_entries))
-		return false;
+		return 0;
 
 	/* WaDisableSkipCaching:skl,bxt,kbl,glk */
 	if (IS_GEN(i915, 9)) {
@@ -315,10 +338,20 @@ static bool get_mocs_settings(const struct drm_i915_private *i915,
 		for (i = 0; i < table->size; i++)
 			if (GEM_DEBUG_WARN_ON(table->table[i].l3cc_value &
 					      (L3_ESC(1) | L3_SCC(0x7))))
-				return false;
+				return 0;
 	}
 
-	return true;
+	flags = 0;
+	if (has_mocs(i915)) {
+		if (has_global_mocs(i915))
+			flags |= HAS_GLOBAL_MOCS;
+		else
+			flags |= HAS_ENGINE_MOCS;
+	}
+	if (has_l3cc(i915))
+		flags |= HAS_RENDER_L3CC;
+
+	return flags;
 }
 
 /*
@@ -411,18 +444,20 @@ static void init_l3cc_table(struct intel_engine_cs *engine,
 void intel_mocs_init_engine(struct intel_engine_cs *engine)
 {
 	struct drm_i915_mocs_table table;
+	unsigned int flags;
 
 	/* Called under a blanket forcewake */
 	assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
 
-	if (!get_mocs_settings(engine->i915, &table))
+	flags = get_mocs_settings(engine->i915, &table);
+	if (!flags)
 		return;
 
 	/* Platforms with global MOCS do not need per-engine initialization. */
-	if (!HAS_GLOBAL_MOCS_REGISTERS(engine->i915))
+	if (flags & HAS_ENGINE_MOCS)
 		init_mocs_table(engine, &table);
 
-	if (engine->class == RENDER_CLASS)
+	if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
 		init_l3cc_table(engine, &table);
 }
 
@@ -431,26 +466,17 @@ static u32 global_mocs_offset(void)
 	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
 }
 
-static void init_global_mocs(struct intel_gt *gt)
+void intel_mocs_init(struct intel_gt *gt)
 {
 	struct drm_i915_mocs_table table;
+	unsigned int flags;
 
 	/*
 	 * LLC and eDRAM control values are not applicable to dgfx
 	 */
-	if (IS_DGFX(gt->i915))
-		return;
-
-	if (!get_mocs_settings(gt->i915, &table))
-		return;
-
-	__init_mocs_table(gt->uncore, &table, global_mocs_offset());
-}
-
-void intel_mocs_init(struct intel_gt *gt)
-{
-	if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
-		init_global_mocs(gt);
+	flags = get_mocs_settings(gt->i915, &table);
+	if (flags & HAS_GLOBAL_MOCS)
+		__init_mocs_table(gt->uncore, &table, global_mocs_offset());
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 9e303c29d6e3..3847ee44b181 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -7,6 +7,7 @@
 #include <linux/pm_runtime.h>
 
 #include "i915_drv.h"
+#include "i915_vgpu.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
 #include "intel_rc6.h"
@@ -226,10 +227,7 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6)
 
 	set(uncore, GEN6_RC_SLEEP, 0);
 	set(uncore, GEN6_RC1e_THRESHOLD, 1000);
-	if (IS_IVYBRIDGE(i915))
-		set(uncore, GEN6_RC6_THRESHOLD, 125000);
-	else
-		set(uncore, GEN6_RC6_THRESHOLD, 50000);
+	set(uncore, GEN6_RC6_THRESHOLD, 50000);
 	set(uncore, GEN6_RC6p_THRESHOLD, 150000);
 	set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
 
@@ -299,7 +297,6 @@ static int vlv_rc6_init(struct intel_rc6 *rc6)
 		pcbr_offset = (pcbr & ~4095) - i915->dsm.start;
 		pctx = i915_gem_object_create_stolen_for_preallocated(i915,
 								      pcbr_offset,
-								      I915_GTT_OFFSET_NONE,
 								      pctx_size);
 		if (IS_ERR(pctx))
 			return PTR_ERR(pctx);
@@ -323,10 +320,10 @@ static int vlv_rc6_init(struct intel_rc6 *rc6)
 		return PTR_ERR(pctx);
 	}
 
-	GEM_BUG_ON(range_overflows_t(u64,
-				     i915->dsm.start,
-				     pctx->stolen->start,
-				     U32_MAX));
+	GEM_BUG_ON(range_overflows_end_t(u64,
+					 i915->dsm.start,
+					 pctx->stolen->start,
+					 U32_MAX));
 	pctx_paddr = i915->dsm.start + pctx->stolen->start;
 	intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
 
@@ -542,6 +539,8 @@ void intel_rc6_init(struct intel_rc6 *rc6)
 
 void intel_rc6_sanitize(struct intel_rc6 *rc6)
 {
+	memset(rc6->prev_hw_residency, 0, sizeof(rc6->prev_hw_residency));
+
 	if (rc6->enabled) { /* unbalanced suspend/resume */
 		rpm_get(rc6);
 		rc6->enabled = false;
@@ -604,6 +603,7 @@ void intel_rc6_unpark(struct intel_rc6 *rc6)
 void intel_rc6_park(struct intel_rc6 *rc6)
 {
 	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+	unsigned int target;
 
 	if (!rc6->enabled)
 		return;
@@ -618,7 +618,14 @@ void intel_rc6_park(struct intel_rc6 *rc6)
 
 	/* Turn off the HW timers and go directly to rc6 */
 	set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
-	set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT);
+
+	if (HAS_RC6pp(rc6_to_i915(rc6)))
+		target = 0x6; /* deepest rc6 */
+	else if (HAS_RC6p(rc6_to_i915(rc6)))
+		target = 0x5; /* deep rc6 */
+	else
+		target = 0x4; /* normal rc6 */
+	set(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT);
 }
 
 void intel_rc6_disable(struct intel_rc6 *rc6)
@@ -713,7 +720,7 @@ u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg)
 	 */
 	i = (i915_mmio_reg_offset(reg) -
 	     i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
-	if (WARN_ON_ONCE(i >= ARRAY_SIZE(rc6->cur_residency)))
+	if (drm_WARN_ON_ONCE(&i915->drm, i >= ARRAY_SIZE(rc6->cur_residency)))
 		return 0;
 
 	fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index beee0cf89bce..80db3c9d785e 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -48,8 +48,10 @@ static void engine_skip_context(struct i915_request *rq)
 
 	lockdep_assert_held(&engine->active.lock);
 	list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
-		if (rq->context == hung_ctx)
-			i915_request_skip(rq, -EIO);
+		if (rq->context == hung_ctx) {
+			i915_request_set_error_once(rq, -EIO);
+			__i915_request_skip(rq);
+		}
 }
 
 static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
@@ -72,9 +74,10 @@ static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
 	if (score) {
 		atomic_add(score, &file_priv->ban_score);
 
-		DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
-				 ctx->name, score,
-				 atomic_read(&file_priv->ban_score));
+		drm_dbg(&ctx->i915->drm,
+			"client %s: gained %u ban score, now %u\n",
+			ctx->name, score,
+			atomic_read(&file_priv->ban_score));
 	}
 }
 
@@ -85,19 +88,18 @@ static bool mark_guilty(struct i915_request *rq)
 	bool banned;
 	int i;
 
+	if (intel_context_is_closed(rq->context)) {
+		intel_context_set_banned(rq->context);
+		return true;
+	}
+
 	rcu_read_lock();
 	ctx = rcu_dereference(rq->context->gem_context);
 	if (ctx && !kref_get_unless_zero(&ctx->ref))
 		ctx = NULL;
 	rcu_read_unlock();
 	if (!ctx)
-		return false;
-
-	if (i915_gem_context_is_closed(ctx)) {
-		intel_context_set_banned(rq->context);
-		banned = true;
-		goto out;
-	}
+		return intel_context_is_banned(rq->context);
 
 	atomic_inc(&ctx->guilty_count);
 
@@ -122,8 +124,8 @@ static bool mark_guilty(struct i915_request *rq)
 	if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES))
 		banned = true;
 	if (banned) {
-		DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
-				 ctx->name, atomic_read(&ctx->guilty_count));
+		drm_dbg(&ctx->i915->drm, "context %s: guilty %d, banned\n",
+			ctx->name, atomic_read(&ctx->guilty_count));
 		intel_context_set_banned(rq->context);
 	}
 
@@ -153,11 +155,12 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
 
 	rcu_read_lock(); /* protect the GEM context */
 	if (guilty) {
-		i915_request_skip(rq, -EIO);
+		i915_request_set_error_once(rq, -EIO);
+		__i915_request_skip(rq);
 		if (mark_guilty(rq))
 			engine_skip_context(rq);
 	} else {
-		dma_fence_set_error(&rq->fence, -EAGAIN);
+		i915_request_set_error_once(rq, -EAGAIN);
 		mark_innocent(rq);
 	}
 	rcu_read_unlock();
@@ -226,7 +229,7 @@ static int g4x_do_reset(struct intel_gt *gt,
 			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
 	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
 	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+		drm_dbg(&gt->i915->drm, "Wait for media reset failed\n");
 		goto out;
 	}
 
@@ -234,7 +237,7 @@ static int g4x_do_reset(struct intel_gt *gt,
 			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
 	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
 	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+		drm_dbg(&gt->i915->drm, "Wait for render reset failed\n");
 		goto out;
 	}
 
@@ -260,7 +263,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
 					   5000, 0,
 					   NULL);
 	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+		drm_dbg(&gt->i915->drm, "Wait for render reset failed\n");
 		goto out;
 	}
 
@@ -271,7 +274,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
 					   5000, 0,
 					   NULL);
 	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+		drm_dbg(&gt->i915->drm, "Wait for media reset failed\n");
 		goto out;
 	}
 
@@ -300,8 +303,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
 					   500, 0,
 					   NULL);
 	if (err)
-		DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
-				 hw_domain_mask);
+		drm_dbg(&gt->i915->drm,
+			"Wait for 0x%08x engines reset failed\n",
+			hw_domain_mask);
 
 	return err;
 }
@@ -401,7 +405,8 @@ static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask)
 		return 0;
 
 	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
+		drm_dbg(&engine->i915->drm,
+			"Wait for SFC forced lock ack failed\n");
 		return ret;
 	}
 
@@ -515,9 +520,10 @@ static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
 	ret = __intel_wait_for_register_fw(uncore, reg, mask, ack,
 					   700, 0, NULL);
 	if (ret)
-		DRM_ERROR("%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
-			  engine->name, request,
-			  intel_uncore_read_fw(uncore, reg));
+		drm_err(&engine->i915->drm,
+			"%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
+			engine->name, request,
+			intel_uncore_read_fw(uncore, reg));
 
 	return ret;
 }
@@ -781,7 +787,7 @@ static void nop_submit_request(struct i915_request *request)
 	unsigned long flags;
 
 	RQ_TRACE(request, "-EIO\n");
-	dma_fence_set_error(&request->fence, -EIO);
+	i915_request_set_error_once(request, -EIO);
 
 	spin_lock_irqsave(&engine->active.lock, flags);
 	__i915_request_submit(request);
@@ -800,13 +806,6 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
 	if (test_bit(I915_WEDGED, &gt->reset.flags))
 		return;
 
-	if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) {
-		struct drm_printer p = drm_debug_printer(__func__);
-
-		for_each_engine(engine, gt, id)
-			intel_engine_dump(engine, &p, "%s\n", engine->name);
-	}
-
 	GT_TRACE(gt, "start\n");
 
 	/*
@@ -845,10 +844,30 @@ void intel_gt_set_wedged(struct intel_gt *gt)
 {
 	intel_wakeref_t wakeref;
 
+	if (test_bit(I915_WEDGED, &gt->reset.flags))
+		return;
+
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
 	mutex_lock(&gt->reset.mutex);
-	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
-		__intel_gt_set_wedged(gt);
+
+	if (GEM_SHOW_DEBUG()) {
+		struct drm_printer p = drm_debug_printer(__func__);
+		struct intel_engine_cs *engine;
+		enum intel_engine_id id;
+
+		drm_printf(&p, "called from %pS\n", (void *)_RET_IP_);
+		for_each_engine(engine, gt, id) {
+			if (intel_engine_is_idle(engine))
+				continue;
+
+			intel_engine_dump(engine, &p, "%s\n", engine->name);
+		}
+	}
+
+	__intel_gt_set_wedged(gt);
+
 	mutex_unlock(&gt->reset.mutex);
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
 }
 
 static bool __intel_gt_unset_wedged(struct intel_gt *gt)
@@ -969,7 +988,7 @@ static int resume(struct intel_gt *gt)
 	int ret;
 
 	for_each_engine(engine, gt, id) {
-		ret = engine->resume(engine);
+		ret = intel_engine_resume(engine);
 		if (ret)
 			return ret;
 	}
@@ -1022,7 +1041,7 @@ void intel_gt_reset(struct intel_gt *gt,
 		if (i915_modparams.reset)
 			dev_err(gt->i915->drm.dev, "GPU reset not supported\n");
 		else
-			DRM_DEBUG_DRIVER("GPU reset disabled\n");
+			drm_dbg(&gt->i915->drm, "GPU reset disabled\n");
 		goto error;
 	}
 
@@ -1049,8 +1068,9 @@ void intel_gt_reset(struct intel_gt *gt,
 	 */
 	ret = intel_gt_init_hw(gt);
 	if (ret) {
-		DRM_ERROR("Failed to initialise HW following reset (%d)\n",
-			  ret);
+		drm_err(&gt->i915->drm,
+			"Failed to initialise HW following reset (%d)\n",
+			ret);
 		goto taint;
 	}
 
@@ -1126,9 +1146,8 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
 		ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
 	if (ret) {
 		/* If we fail here, we expect to fallback to a global reset */
-		DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
-				 uses_guc ? "GuC " : "",
-				 engine->name, ret);
+		drm_dbg(&gt->i915->drm, "%sFailed to reset %s, ret=%d\n",
+			uses_guc ? "GuC " : "", engine->name, ret);
 		goto out;
 	}
 
@@ -1144,7 +1163,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
 	 * have been reset to their default values. Follow the init_ring
 	 * process to program RING_MODE, HWSP and re-enable submission.
 	 */
-	ret = engine->resume(engine);
+	ret = intel_engine_resume(engine);
 
 out:
 	intel_engine_cancel_stop_cs(engine);
@@ -1165,7 +1184,7 @@ static void intel_gt_reset_global(struct intel_gt *gt,
 
 	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
 
-	DRM_DEBUG_DRIVER("resetting chip\n");
+	drm_dbg(&gt->i915->drm, "resetting chip, engines=%x\n", engine_mask);
 	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
 
 	/* Use a watchdog to ensure that our reset completes */
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 6ff803f397c4..8cda1b7e17ba 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring)
 	if (atomic_fetch_inc(&ring->pin_count))
 		return 0;
 
-	flags = PIN_GLOBAL;
-
 	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
-	flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+	flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
 
 	if (vma->obj->stolen)
 		flags |= PIN_MAPPABLE;
 	else
 		flags |= PIN_HIGH;
 
-	ret = i915_vma_pin(vma, 0, 0, flags);
+	ret = i915_ggtt_pin(vma, 0, flags);
 	if (unlikely(ret))
 		goto err_unpin;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index bc44fe8e5ffa..fdc3f10e12aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -29,11 +29,10 @@
 
 #include <linux/log2.h>
 
-#include <drm/i915_drm.h>
-
 #include "gem/i915_gem_context.h"
 
 #include "gen6_ppgtt.h"
+#include "gen7_renderclear.h"
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "intel_context.h"
@@ -568,7 +567,8 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
 		return;
 
 	/* ring should be idle before issuing a sync flush*/
-	WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
+	drm_WARN_ON(&dev_priv->drm,
+		    (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
 
 	ENGINE_WRITE(engine, RING_INSTPM,
 		     _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
@@ -626,6 +626,27 @@ static bool stop_ring(struct intel_engine_cs *engine)
 	return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0;
 }
 
+static struct i915_address_space *vm_alias(struct i915_address_space *vm)
+{
+	if (i915_is_ggtt(vm))
+		vm = &i915_vm_to_ggtt(vm)->alias->vm;
+
+	return vm;
+}
+
+static void set_pp_dir(struct intel_engine_cs *engine)
+{
+	struct i915_address_space *vm = vm_alias(engine->gt->vm);
+
+	if (vm) {
+		struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
+		ENGINE_WRITE(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G);
+		ENGINE_WRITE(engine, RING_PP_DIR_BASE,
+			     px_base(ppgtt->pd)->ggtt_offset << 10);
+	}
+}
+
 static int xcs_resume(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -684,6 +705,8 @@ static int xcs_resume(struct intel_engine_cs *engine)
 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
 	intel_ring_update_space(ring);
 
+	set_pp_dir(engine);
+
 	/* First wake the ring up to an empty/idle ring */
 	ENGINE_WRITE(engine, RING_HEAD, ring->head);
 	ENGINE_WRITE(engine, RING_TAIL, ring->head);
@@ -857,43 +880,6 @@ static int rcs_resume(struct intel_engine_cs *engine)
 		intel_uncore_write(uncore, ECOSKPD,
 			   _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE));
 
-	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
-	if (IS_GEN_RANGE(i915, 4, 6))
-		intel_uncore_write(uncore, MI_MODE,
-				   _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
-
-	/* We need to disable the AsyncFlip performance optimisations in order
-	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
-	 * programmed to '1' on all products.
-	 *
-	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
-	 */
-	if (IS_GEN_RANGE(i915, 6, 7))
-		intel_uncore_write(uncore, MI_MODE,
-				   _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
-
-	/* Required for the hardware to program scanline values for waiting */
-	/* WaEnableFlushTlbInvalidationMode:snb */
-	if (IS_GEN(i915, 6))
-		intel_uncore_write(uncore, GFX_MODE,
-			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
-
-	/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
-	if (IS_GEN(i915, 7))
-		intel_uncore_write(uncore, GFX_MODE_GEN7,
-			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
-			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
-
-	if (IS_GEN(i915, 6)) {
-		/* From the Sandybridge PRM, volume 1 part 3, page 24:
-		 * "If this bit is set, STCunit will have LRA as replacement
-		 *  policy. [...] This bit must be reset.  LRA replacement
-		 *  policy is not supported."
-		 */
-		intel_uncore_write(uncore, CACHE_MODE_0,
-			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
-	}
-
 	if (IS_GEN_RANGE(i915, 6, 7))
 		intel_uncore_write(uncore, INSTPM,
 				   _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
@@ -910,9 +896,7 @@ static void reset_cancel(struct intel_engine_cs *engine)
 
 	/* Mark all submitted requests as skipped. */
 	list_for_each_entry(request, &engine->active.requests, sched.link) {
-		if (!i915_request_signaled(request))
-			dma_fence_set_error(&request->fence, -EIO);
-
+		i915_request_set_error_once(request, -EIO);
 		i915_request_mark_complete(request);
 	}
 
@@ -1197,23 +1181,12 @@ static void ring_context_destroy(struct kref *ref)
 	intel_context_free(ce);
 }
 
-static struct i915_address_space *vm_alias(struct intel_context *ce)
-{
-	struct i915_address_space *vm;
-
-	vm = ce->vm;
-	if (i915_is_ggtt(vm))
-		vm = &i915_vm_to_ggtt(vm)->alias->vm;
-
-	return vm;
-}
-
 static int __context_pin_ppgtt(struct intel_context *ce)
 {
 	struct i915_address_space *vm;
 	int err = 0;
 
-	vm = vm_alias(ce);
+	vm = vm_alias(ce->vm);
 	if (vm)
 		err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
 
@@ -1224,7 +1197,7 @@ static void __context_unpin_ppgtt(struct intel_context *ce)
 {
 	struct i915_address_space *vm;
 
-	vm = vm_alias(ce);
+	vm = vm_alias(ce->vm);
 	if (vm)
 		gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
 }
@@ -1384,7 +1357,9 @@ static int load_pd_dir(struct i915_request *rq,
 	return rq->engine->emit_flush(rq, EMIT_FLUSH);
 }
 
-static inline int mi_set_context(struct i915_request *rq, u32 flags)
+static inline int mi_set_context(struct i915_request *rq,
+				 struct intel_context *ce,
+				 u32 flags)
 {
 	struct drm_i915_private *i915 = rq->i915;
 	struct intel_engine_cs *engine = rq->engine;
@@ -1459,7 +1434,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 
 	*cs++ = MI_NOOP;
 	*cs++ = MI_SET_CONTEXT;
-	*cs++ = i915_ggtt_offset(rq->context->state) | flags;
+	*cs++ = i915_ggtt_offset(ce->state) | flags;
 	/*
 	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
 	 * WaMiSetContext_Hang:snb,ivb,vlv
@@ -1574,21 +1549,64 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
 	return rq->engine->emit_flush(rq, EMIT_INVALIDATE);
 }
 
+static int clear_residuals(struct i915_request *rq)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	int ret;
+
+	ret = switch_mm(rq, vm_alias(engine->kernel_context->vm));
+	if (ret)
+		return ret;
+
+	if (engine->kernel_context->state) {
+		ret = mi_set_context(rq,
+				     engine->kernel_context,
+				     MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT);
+		if (ret)
+			return ret;
+	}
+
+	ret = engine->emit_bb_start(rq,
+				    engine->wa_ctx.vma->node.start, 0,
+				    0);
+	if (ret)
+		return ret;
+
+	ret = engine->emit_flush(rq, EMIT_FLUSH);
+	if (ret)
+		return ret;
+
+	/* Always invalidate before the next switch_mm() */
+	return engine->emit_flush(rq, EMIT_INVALIDATE);
+}
+
 static int switch_context(struct i915_request *rq)
 {
+	struct intel_engine_cs *engine = rq->engine;
 	struct intel_context *ce = rq->context;
+	void **residuals = NULL;
 	int ret;
 
 	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
-	ret = switch_mm(rq, vm_alias(ce));
+	if (engine->wa_ctx.vma && ce != engine->kernel_context) {
+		if (engine->wa_ctx.vma->private != ce) {
+			ret = clear_residuals(rq);
+			if (ret)
+				return ret;
+
+			residuals = &engine->wa_ctx.vma->private;
+		}
+	}
+
+	ret = switch_mm(rq, vm_alias(ce->vm));
 	if (ret)
 		return ret;
 
 	if (ce->state) {
 		u32 flags;
 
-		GEM_BUG_ON(rq->engine->id != RCS0);
+		GEM_BUG_ON(engine->id != RCS0);
 
 		/* For resource streamer on HSW+ and power context elsewhere */
 		BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
@@ -1600,7 +1618,7 @@ static int switch_context(struct i915_request *rq)
 		else
 			flags |= MI_RESTORE_INHIBIT;
 
-		ret = mi_set_context(rq, flags);
+		ret = mi_set_context(rq, ce, flags);
 		if (ret)
 			return ret;
 	}
@@ -1609,6 +1627,20 @@ static int switch_context(struct i915_request *rq)
 	if (ret)
 		return ret;
 
+	/*
+	 * Now past the point of no return, this request _will_ be emitted.
+	 *
+	 * Or at least this preamble will be emitted, the request may be
+	 * interrupted prior to submitting the user payload. If so, we
+	 * still submit the "empty" request in order to preserve global
+	 * state tracking such as this, our tracking of the current
+	 * dirty context.
+	 */
+	if (residuals) {
+		intel_context_put(*residuals);
+		*residuals = intel_context_get(ce);
+	}
+
 	return 0;
 }
 
@@ -1662,7 +1694,8 @@ static void gen6_bsd_submit_request(struct i915_request *request)
 					 GEN6_BSD_SLEEP_INDICATOR,
 					 0,
 					 1000, 0, NULL))
-		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
+		drm_err(&uncore->i915->drm,
+			"timed out waiting for the BSD ring to wake up\n");
 
 	/* Now that the ring is fully powered up, update the tail */
 	i9xx_submit_request(request);
@@ -1787,11 +1820,16 @@ static void ring_release(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
-	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
-		(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
+	drm_WARN_ON(&dev_priv->drm, INTEL_GEN(dev_priv) > 2 &&
+		    (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
 
 	intel_engine_cleanup_common(engine);
 
+	if (engine->wa_ctx.vma) {
+		intel_context_put(engine->wa_ctx.vma->private);
+		i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
+	}
+
 	intel_ring_unpin(engine->legacy.ring);
 	intel_ring_put(engine->legacy.ring);
 
@@ -1939,6 +1977,64 @@ static void setup_vecs(struct intel_engine_cs *engine)
 	engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
 }
 
+static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine,
+				    struct i915_vma * const vma)
+{
+	return gen7_setup_clear_gpr_bb(engine, vma);
+}
+
+static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int size;
+	int err;
+
+	size = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */);
+	if (size <= 0)
+		return size;
+
+	size = ALIGN(size, PAGE_SIZE);
+	obj = i915_gem_object_create_internal(engine->i915, size);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	vma = i915_vma_instance(obj, engine->gt->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_obj;
+	}
+
+	vma->private = intel_context_create(engine); /* dummy residuals */
+	if (IS_ERR(vma->private)) {
+		err = PTR_ERR(vma->private);
+		goto err_obj;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+	if (err)
+		goto err_private;
+
+	err = i915_vma_sync(vma);
+	if (err)
+		goto err_unpin;
+
+	err = gen7_ctx_switch_bb_setup(engine, vma);
+	if (err)
+		goto err_unpin;
+
+	engine->wa_ctx.vma = vma;
+	return 0;
+
+err_unpin:
+	i915_vma_unpin(vma);
+err_private:
+	intel_context_put(vma->private);
+err_obj:
+	i915_gem_object_put(obj);
+	return err;
+}
+
 int intel_ring_submission_setup(struct intel_engine_cs *engine)
 {
 	struct intel_timeline *timeline;
@@ -1992,11 +2088,19 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
 
 	GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
 
+	if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) {
+		err = gen7_ctx_switch_bb_init(engine);
+		if (err)
+			goto err_ring_unpin;
+	}
+
 	/* Finally, take ownership and responsibility for cleanup! */
 	engine->release = ring_release;
 
 	return 0;
 
+err_ring_unpin:
+	intel_ring_unpin(ring);
 err_ring:
 	intel_ring_put(ring);
 err_timeline_unpin:
@@ -2007,3 +2111,7 @@ err:
 	intel_engine_cleanup_common(engine);
 	return err;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_ring_submission.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index d2a3d935d186..cfaf141bac4d 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -4,6 +4,8 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include <drm/i915_drm.h>
+
 #include "i915_drv.h"
 #include "intel_gt.h"
 #include "intel_gt_irq.h"
@@ -55,7 +57,7 @@ static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
 	if (val < rps->max_freq_softlimit)
 		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
 
-	mask &= rps->pm_events;
+	mask &= READ_ONCE(rps->pm_events);
 
 	return rps_pm_sanitize_mask(rps, ~mask);
 }
@@ -68,17 +70,19 @@ static void rps_reset_ei(struct intel_rps *rps)
 static void rps_enable_interrupts(struct intel_rps *rps)
 {
 	struct intel_gt *gt = rps_to_gt(rps);
+	u32 events;
 
 	rps_reset_ei(rps);
 
 	if (IS_VALLEYVIEW(gt->i915))
 		/* WaGsvRC0ResidencyMethod:vlv */
-		rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
+		events = GEN6_PM_RP_UP_EI_EXPIRED;
 	else
-		rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
-				  GEN6_PM_RP_DOWN_THRESHOLD |
-				  GEN6_PM_RP_DOWN_TIMEOUT);
+		events = (GEN6_PM_RP_UP_THRESHOLD |
+			  GEN6_PM_RP_DOWN_THRESHOLD |
+			  GEN6_PM_RP_DOWN_TIMEOUT);
 
+	WRITE_ONCE(rps->pm_events, events);
 	spin_lock_irq(&gt->irq_lock);
 	gen6_gt_pm_enable_irq(gt, rps->pm_events);
 	spin_unlock_irq(&gt->irq_lock);
@@ -115,8 +119,7 @@ static void rps_disable_interrupts(struct intel_rps *rps)
 {
 	struct intel_gt *gt = rps_to_gt(rps);
 
-	rps->pm_events = 0;
-
+	WRITE_ONCE(rps->pm_events, 0);
 	set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
 
 	spin_lock_irq(&gt->irq_lock);
@@ -642,7 +645,7 @@ void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
 {
 	mutex_lock(&rps->power.mutex);
 	if (interactive) {
-		if (!rps->power.interactive++ && rps->active)
+		if (!rps->power.interactive++ && READ_ONCE(rps->active))
 			rps_set_power(rps, HIGH_POWER);
 	} else {
 		GEM_BUG_ON(!rps->power.interactive);
@@ -719,11 +722,15 @@ void intel_rps_unpark(struct intel_rps *rps)
 	 * performance, jump directly to RPe as our starting frequency.
 	 */
 	mutex_lock(&rps->lock);
-	rps->active = true;
+
+	WRITE_ONCE(rps->active, true);
+
 	freq = max(rps->cur_freq, rps->efficient_freq),
 	freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit);
 	intel_rps_set(rps, freq);
+
 	rps->last_adj = 0;
+
 	mutex_unlock(&rps->lock);
 
 	if (INTEL_GEN(rps_to_i915(rps)) >= 6)
@@ -743,7 +750,7 @@ void intel_rps_park(struct intel_rps *rps)
 	if (INTEL_GEN(i915) >= 6)
 		rps_disable_interrupts(rps);
 
-	rps->active = false;
+	WRITE_ONCE(rps->active, false);
 	if (rps->last_freq <= rps->idle_freq)
 		return;
 
@@ -763,14 +770,27 @@ void intel_rps_park(struct intel_rps *rps)
 	intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
 	rps_set(rps, rps->idle_freq, false);
 	intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
+
+	/*
+	 * Since we will try and restart from the previously requested
+	 * frequency on unparking, treat this idle point as a downclock
+	 * interrupt and reduce the frequency for resume. If we park/unpark
+	 * more frequently than the rps worker can run, we will not respond
+	 * to any EI and never see a change in frequency.
+	 *
+	 * (Note we accommodate Cherryview's limitation of only using an
+	 * even bin by applying it to all.)
+	 */
+	rps->cur_freq =
+		max_t(int, round_down(rps->cur_freq - 1, 2), rps->min_freq);
 }
 
 void intel_rps_boost(struct i915_request *rq)
 {
-	struct intel_rps *rps = &rq->engine->gt->rps;
+	struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
 	unsigned long flags;
 
-	if (i915_request_signaled(rq) || !rps->active)
+	if (i915_request_signaled(rq) || !READ_ONCE(rps->active))
 		return;
 
 	/* Serializes with i915_request_retire() */
@@ -1026,7 +1046,8 @@ static bool chv_rps_enable(struct intel_rps *rps)
 	vlv_punit_put(i915);
 
 	/* RPS code assumes GPLL is used */
-	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+	drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
+		      "GPLL not enabled\n");
 
 	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
 	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
@@ -1123,7 +1144,8 @@ static bool vlv_rps_enable(struct intel_rps *rps)
 	vlv_punit_put(i915);
 
 	/* RPS code assumes GPLL is used */
-	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+	drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
+		      "GPLL not enabled\n");
 
 	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
 	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
@@ -1191,11 +1213,11 @@ void intel_rps_enable(struct intel_rps *rps)
 	if (!rps->enabled)
 		return;
 
-	WARN_ON(rps->max_freq < rps->min_freq);
-	WARN_ON(rps->idle_freq > rps->max_freq);
+	drm_WARN_ON(&i915->drm, rps->max_freq < rps->min_freq);
+	drm_WARN_ON(&i915->drm, rps->idle_freq > rps->max_freq);
 
-	WARN_ON(rps->efficient_freq < rps->min_freq);
-	WARN_ON(rps->efficient_freq > rps->max_freq);
+	drm_WARN_ON(&i915->drm, rps->efficient_freq < rps->min_freq);
+	drm_WARN_ON(&i915->drm, rps->efficient_freq > rps->max_freq);
 }
 
 static void gen6_rps_disable(struct intel_rps *rps)
@@ -1390,9 +1412,9 @@ static void chv_rps_init(struct intel_rps *rps)
 			BIT(VLV_IOSF_SB_NC) |
 			BIT(VLV_IOSF_SB_CCK));
 
-	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
-		   rps->min_freq) & 1,
-		  "Odd GPU freq values\n");
+	drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq |
+				   rps->rp1_freq | rps->min_freq) & 1,
+		      "Odd GPU freq values\n");
 }
 
 static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
@@ -1451,12 +1473,12 @@ static void rps_work(struct work_struct *work)
 	u32 pm_iir = 0;
 
 	spin_lock_irq(&gt->irq_lock);
-	pm_iir = fetch_and_zero(&rps->pm_iir);
+	pm_iir = fetch_and_zero(&rps->pm_iir) & READ_ONCE(rps->pm_events);
 	client_boost = atomic_read(&rps->num_waiters);
 	spin_unlock_irq(&gt->irq_lock);
 
 	/* Make sure we didn't queue anything we're not going to process. */
-	if ((pm_iir & rps->pm_events) == 0 && !client_boost)
+	if (!pm_iir && !client_boost)
 		goto out;
 
 	mutex_lock(&rps->lock);
@@ -1552,11 +1574,15 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
 {
 	struct intel_gt *gt = rps_to_gt(rps);
+	u32 events;
 
-	if (pm_iir & rps->pm_events) {
+	events = pm_iir & READ_ONCE(rps->pm_events);
+	if (events) {
 		spin_lock(&gt->irq_lock);
-		gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events);
-		rps->pm_iir |= pm_iir & rps->pm_events;
+
+		gen6_gt_pm_mask_irq(gt, events);
+		rps->pm_iir |= events;
+
 		schedule_work(&rps->work);
 		spin_unlock(&gt->irq_lock);
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index d8d9f1179c2b..91debbc97c9a 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -312,7 +312,7 @@ int intel_timeline_pin(struct intel_timeline *tl)
 	if (atomic_add_unless(&tl->pin_count, 1, 0))
 		return 0;
 
-	err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
 	if (err)
 		return err;
 
@@ -410,6 +410,8 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
 	void *vaddr;
 	int err;
 
+	might_lock(&tl->gt->ggtt->vm.mutex);
+
 	/*
 	 * If there is an outstanding GPU reference to this cacheline,
 	 * such as it being sampled by a HW semaphore on another timeline,
@@ -435,7 +437,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
 		goto err_rollback;
 	}
 
-	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	err = i915_ggtt_pin(vma, 0, PIN_HIGH);
 	if (err) {
 		__idle_hwsp_free(vma->private, cacheline);
 		goto err_rollback;
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 6c2f8462e0f3..5176ad1a3976 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -116,17 +116,17 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
 		} else {
 			wa_ = &wal->list[mid];
 
-			if ((wa->mask & ~wa_->mask) == 0) {
-				DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
+			if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
+				DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
 					  i915_mmio_reg_offset(wa_->reg),
-					  wa_->mask, wa_->val);
+					  wa_->clr, wa_->set);
 
-				wa_->val &= ~wa->mask;
+				wa_->set &= ~wa->clr;
 			}
 
 			wal->wa_count++;
-			wa_->val |= wa->val;
-			wa_->mask |= wa->mask;
+			wa_->set |= wa->set;
+			wa_->clr |= wa->clr;
 			wa_->read |= wa->read;
 			return;
 		}
@@ -147,13 +147,13 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
 	}
 }
 
-static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
-		   u32 val, u32 read_mask)
+static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
+		   u32 clear, u32 set, u32 read_mask)
 {
 	struct i915_wa wa = {
 		.reg  = reg,
-		.mask = mask,
-		.val  = val,
+		.clr  = clear,
+		.set  = set,
 		.read = read_mask,
 	};
 
@@ -161,38 +161,43 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
 }
 
 static void
-wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
-		   u32 val)
+wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
 {
-	wa_add(wal, reg, mask, val, mask);
+	wa_add(wal, reg, clear, set, clear);
 }
 
 static void
-wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
 {
-	wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
+	wa_write_masked_or(wal, reg, ~0, set);
 }
 
 static void
-wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
 {
-	wa_write_masked_or(wal, reg, ~0, val);
+	wa_write_masked_or(wal, reg, set, set);
 }
 
 static void
-wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 {
-	wa_write_masked_or(wal, reg, val, val);
+	wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
+}
+
+static void
+wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+	wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val);
 }
 
 #define WA_SET_BIT_MASKED(addr, mask) \
-	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
+	wa_masked_en(wal, (addr), (mask))
 
 #define WA_CLR_BIT_MASKED(addr, mask) \
-	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
+	wa_masked_dis(wal, (addr), (mask))
 
 #define WA_SET_FIELD_MASKED(addr, mask, value) \
-	wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
+	wa_write_masked_or(wal, (addr), 0, _MASKED_FIELD((mask), (value)))
 
 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
 				      struct i915_wa_list *wal)
@@ -570,12 +575,29 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
 	/* allow headerless messages for preemptible GPGPU context */
 	WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
 			  GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
+
+	/* Wa_1604278689:icl,ehl */
+	wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
+	wa_write_masked_or(wal, IVB_FBC_RT_BASE_UPPER,
+			   0, /* write-only register; skip validation */
+			   0xFFFFFFFF);
+
+	/* Wa_1406306137:icl,ehl */
+	wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
 }
 
 static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
 				     struct i915_wa_list *wal)
 {
-	/* Wa_1409142259:tgl */
+	/*
+	 * Wa_1409142259:tgl
+	 * Wa_1409347922:tgl
+	 * Wa_1409252684:tgl
+	 * Wa_1409217633:tgl
+	 * Wa_1409207793:tgl
+	 * Wa_1409178076:tgl
+	 * Wa_1408979724:tgl
+	 */
 	WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
 			  GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
 
@@ -588,6 +610,11 @@ static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
 	 */
 	wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
 	       FF_MODE2_TDS_TIMER_128, 0);
+
+	/* WaDisableGPGPUMidThreadPreemption:tgl */
+	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
+			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
 }
 
 static void
@@ -657,7 +684,7 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
 	*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
 		*cs++ = i915_mmio_reg_offset(wa->reg);
-		*cs++ = wa->val;
+		*cs++ = wa->set;
 	}
 	*cs++ = MI_NOOP;
 
@@ -822,7 +849,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 		DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
 			 intel_sseu_get_subslices(sseu, slice), l3_en);
 		subslice = fls(l3_en);
-		WARN_ON(!subslice);
+		drm_WARN_ON(&i915->drm, !subslice);
 	}
 	subslice--;
 
@@ -893,11 +920,6 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 			    SLICE_UNIT_LEVEL_CLKGATE,
 			    MSCUNIT_CLKGATE_DIS);
 
-	/* Wa_1406680159:icl */
-	wa_write_or(wal,
-		    SUBSLICE_UNIT_LEVEL_CLKGATE,
-		    GWUNIT_CLKGATE_DIS);
-
 	/* Wa_1406838659:icl (pre-prod) */
 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
 		wa_write_or(wal,
@@ -926,7 +948,7 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 			    SUBSLICE_UNIT_LEVEL_CLKGATE2,
 			    CPSSUNIT_CLKGATE_DIS);
 
-	/* Wa_1409180338:tgl */
+	/* Wa_1607087056:tgl also know as BUG:1409180338 */
 	if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
 		wa_write_or(wal,
 			    SLICE_UNIT_LEVEL_CLKGATE,
@@ -986,11 +1008,10 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 static bool
 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
 {
-	if ((cur ^ wa->val) & wa->read) {
-		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
+	if ((cur ^ wa->set) & wa->read) {
+		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x)\n",
 			  name, from, i915_mmio_reg_offset(wa->reg),
-			  cur, cur & wa->read,
-			  wa->val, wa->mask);
+			  cur, cur & wa->read, wa->set);
 
 		return false;
 	}
@@ -1015,7 +1036,10 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 	intel_uncore_forcewake_get__locked(uncore, fw);
 
 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
-		intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
+		if (wa->clr)
+			intel_uncore_rmw_fw(uncore, wa->reg, wa->clr, wa->set);
+		else
+			intel_uncore_write_fw(uncore, wa->reg, wa->set);
 		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 			wa_verify(wa,
 				  intel_uncore_read_fw(uncore, wa->reg),
@@ -1239,6 +1263,7 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
 	case RENDER_CLASS:
 		/*
 		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
+		 * Wa_1408556865:tgl
 		 *
 		 * This covers 4 registers which are next to one another :
 		 *   - PS_INVOCATION_COUNT
@@ -1249,6 +1274,12 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
 		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
 				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
 				  RING_FORCE_TO_NONPRIV_RANGE_4);
+
+		/* Wa_1808121037:tgl */
+		whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
+
+		/* Wa_1806527549:tgl */
+		whitelist_reg(w, HIZ_CHICKEN);
 		break;
 	default:
 		break;
@@ -1315,19 +1346,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 	struct drm_i915_private *i915 = engine->i915;
 
 	if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
-		/* Wa_1606700617:tgl */
-		wa_masked_en(wal,
-			     GEN9_CS_DEBUG_MODE1,
-			     FF_DOP_CLOCK_GATE_DISABLE);
-
-		/* Wa_1607138336:tgl */
+		/*
+		 * Wa_1607138336:tgl
+		 * Wa_1607063988:tgl
+		 */
 		wa_write_or(wal,
 			    GEN9_CTX_PREEMPT_REG,
 			    GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
 
-		/* Wa_1607030317:tgl */
-		/* Wa_1607186500:tgl */
-		/* Wa_1607297627:tgl */
+		/*
+		 * Wa_1607030317:tgl
+		 * Wa_1607186500:tgl
+		 * Wa_1607297627:tgl there is 3 entries for this WA on BSpec, 2
+		 * of then says it is fixed on B0 the other one says it is
+		 * permanent
+		 */
 		wa_masked_en(wal,
 			     GEN6_RC_SLEEP_PSMI_CONTROL,
 			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
@@ -1340,6 +1373,35 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		wa_write_or(wal,
 			    GEN7_SARCHKMD,
 			    GEN7_DISABLE_SAMPLER_PREFETCH);
+
+		/* Wa_1407928979:tgl */
+		wa_write_or(wal,
+			    GEN7_FF_THREAD_MODE,
+			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
+
+		/*
+		 * Wa_1409085225:tgl
+		 * Wa_14010229206:tgl
+		 */
+		wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
+
+		/* Wa_1408615072:tgl */
+		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
+			    VSUNIT_CLKGATE_DIS_TGL);
+	}
+
+	if (IS_TIGERLAKE(i915)) {
+		/* Wa_1606931601:tgl */
+		wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
+
+		/* Wa_1409804808:tgl */
+		wa_masked_en(wal, GEN7_ROW_CHICKEN2,
+			     GEN12_PUSH_CONST_DEREF_HOLD_DIS);
+
+		/* Wa_1606700617:tgl */
+		wa_masked_en(wal,
+			     GEN9_CS_DEBUG_MODE1,
+			     FF_DOP_CLOCK_GATE_DISABLE);
 	}
 
 	if (IS_GEN(i915, 11)) {
@@ -1405,10 +1467,38 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 				   GEN11_SCRATCH2,
 				   GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
 				   0);
+
+		/* WaEnable32PlaneMode:icl */
+		wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
+			     GEN11_ENABLE_32_PLANE_MODE);
+
+		/*
+		 * Wa_1408615072:icl,ehl  (vsunit)
+		 * Wa_1407596294:icl,ehl  (hsunit)
+		 */
+		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+			    VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
+
+		/* Wa_1407352427:icl,ehl */
+		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
+			    PSDUNIT_CLKGATE_DIS);
+
+		/* Wa_1406680159:icl,ehl */
+		wa_write_or(wal,
+			    SUBSLICE_UNIT_LEVEL_CLKGATE,
+			    GWUNIT_CLKGATE_DIS);
+
+		/*
+		 * Wa_1408767742:icl[a2..forever],ehl[all]
+		 * Wa_1605460711:icl[a0..c0]
+		 */
+		wa_write_or(wal,
+			    GEN7_FF_THREAD_MODE,
+			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
 	}
 
-	if (IS_GEN_RANGE(i915, 9, 11)) {
-		/* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
+	if (IS_GEN_RANGE(i915, 9, 12)) {
+		/* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
 		wa_masked_en(wal,
 			     GEN7_FF_SLICE_CS_CHICKEN1,
 			     GEN9_FFSC_PERCTX_PREEMPT_CTRL);
@@ -1452,6 +1542,52 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 			    GEN8_L3SQCREG4,
 			    GEN8_LQSC_FLUSH_COHERENT_LINES);
 	}
+
+	if (IS_GEN(i915, 7))
+		/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
+		wa_masked_en(wal,
+			     GFX_MODE_GEN7,
+			     GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
+
+	if (IS_GEN_RANGE(i915, 6, 7))
+		/*
+		 * We need to disable the AsyncFlip performance optimisations in
+		 * order to use MI_WAIT_FOR_EVENT within the CS. It should
+		 * already be programmed to '1' on all products.
+		 *
+		 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
+		 */
+		wa_masked_en(wal,
+			     MI_MODE,
+			     ASYNC_FLIP_PERF_DISABLE);
+
+	if (IS_GEN(i915, 6)) {
+		/*
+		 * Required for the hardware to program scanline values for
+		 * waiting
+		 * WaEnableFlushTlbInvalidationMode:snb
+		 */
+		wa_masked_en(wal,
+			     GFX_MODE,
+			     GFX_TLB_INVALIDATE_EXPLICIT);
+
+		/*
+		 * From the Sandybridge PRM, volume 1 part 3, page 24:
+		 * "If this bit is set, STCunit will have LRA as replacement
+		 *  policy. [...] This bit must be reset. LRA replacement
+		 *  policy is not supported."
+		 */
+		wa_masked_dis(wal,
+			      CACHE_MODE_0,
+			      CM0_STC_EVICT_DISABLE_LRA_SNB);
+	}
+
+	if (IS_GEN_RANGE(i915, 4, 6))
+		/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
+		wa_add(wal, MI_MODE,
+		       0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
+		       /* XXX bit doesn't stick on Broadwater */
+		       IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH);
 }
 
 static void
@@ -1470,7 +1606,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 static void
 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 {
-	if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
+	if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 4))
 		return;
 
 	if (engine->class == RENDER_CLASS)
@@ -1483,7 +1619,7 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine)
 {
 	struct i915_wa_list *wal = &engine->wa_list;
 
-	if (INTEL_GEN(engine->i915) < 8)
+	if (INTEL_GEN(engine->i915) < 4)
 		return;
 
 	wa_init_start(wal, "engine", engine->name);
@@ -1626,6 +1762,16 @@ static int engine_wa_list_verify(struct intel_context *ce,
 		goto err_vma;
 	}
 
+	i915_vma_lock(vma);
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+	if (err) {
+		i915_request_add(rq);
+		goto err_vma;
+	}
+
 	err = wa_list_srm(rq, wal, vma);
 	if (err)
 		goto err_vma;
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
index e27ab1b710b3..d166a7145720 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
@@ -13,8 +13,8 @@
 
 struct i915_wa {
 	i915_reg_t	reg;
-	u32		mask;
-	u32		val;
+	u32		clr;
+	u32		set;
 	u32		read;
 };
 
diff --git a/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c b/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c
new file mode 100644
index 000000000000..610ca7687735
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Generated by: IGT Gpu Tools on Fri 21 Feb 2020 05:29:32 AM UTC
+ */
+
+static const u32 ivb_clear_kernel[] = {
+	0x00000001, 0x26020128, 0x00000024, 0x00000000,
+	0x00000040, 0x20280c21, 0x00000028, 0x00000001,
+	0x01000010, 0x20000c20, 0x0000002c, 0x00000000,
+	0x00010220, 0x34001c00, 0x00001400, 0x0000002c,
+	0x00600001, 0x20600061, 0x00000000, 0x00000000,
+	0x00000008, 0x20601c85, 0x00000e00, 0x0000000c,
+	0x00000005, 0x20601ca5, 0x00000060, 0x00000001,
+	0x00000008, 0x20641c85, 0x00000e00, 0x0000000d,
+	0x00000005, 0x20641ca5, 0x00000064, 0x00000003,
+	0x00000041, 0x207424a5, 0x00000064, 0x00000034,
+	0x00000040, 0x206014a5, 0x00000060, 0x00000074,
+	0x00000008, 0x20681c85, 0x00000e00, 0x00000008,
+	0x00000005, 0x20681ca5, 0x00000068, 0x0000000f,
+	0x00000041, 0x20701ca5, 0x00000060, 0x00000010,
+	0x00000040, 0x206814a5, 0x00000068, 0x00000070,
+	0x00600001, 0x20a00061, 0x00000000, 0x00000000,
+	0x00000005, 0x206c1c85, 0x00000e00, 0x00000007,
+	0x00000041, 0x206c1ca5, 0x0000006c, 0x00000004,
+	0x00600001, 0x20800021, 0x008d0000, 0x00000000,
+	0x00000001, 0x20800021, 0x0000006c, 0x00000000,
+	0x00000001, 0x20840021, 0x00000068, 0x00000000,
+	0x00000001, 0x20880061, 0x00000000, 0x00000003,
+	0x00000005, 0x208c0d21, 0x00000086, 0xffffffff,
+	0x05600032, 0x20a00fa1, 0x008d0080, 0x02190001,
+	0x00000040, 0x20a01ca5, 0x000000a0, 0x00000001,
+	0x05600032, 0x20a00fa1, 0x008d0080, 0x040a8001,
+	0x02000040, 0x20281c21, 0x00000028, 0xffffffff,
+	0x00010220, 0x34001c00, 0x00001400, 0xfffffffc,
+	0x00000001, 0x26020128, 0x00000024, 0x00000000,
+	0x00000001, 0x220010e4, 0x00000000, 0x00000000,
+	0x00000001, 0x220831ec, 0x00000000, 0x007f007f,
+	0x00600001, 0x20400021, 0x008d0000, 0x00000000,
+	0x00600001, 0x2fe00021, 0x008d0000, 0x00000000,
+	0x00200001, 0x20400121, 0x00450020, 0x00000000,
+	0x00000001, 0x20480061, 0x00000000, 0x000f000f,
+	0x00000005, 0x204c0d21, 0x00000046, 0xffffffef,
+	0x00800001, 0x20600061, 0x00000000, 0x00000000,
+	0x00800001, 0x20800061, 0x00000000, 0x00000000,
+	0x00800001, 0x20a00061, 0x00000000, 0x00000000,
+	0x00800001, 0x20c00061, 0x00000000, 0x00000000,
+	0x00800001, 0x20e00061, 0x00000000, 0x00000000,
+	0x00800001, 0x21000061, 0x00000000, 0x00000000,
+	0x00800001, 0x21200061, 0x00000000, 0x00000000,
+	0x00800001, 0x21400061, 0x00000000, 0x00000000,
+	0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000,
+	0x00000040, 0x20402d21, 0x00000020, 0x00100010,
+	0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000,
+	0x02000040, 0x22083d8c, 0x00000208, 0xffffffff,
+	0x00800001, 0xa0000109, 0x00000602, 0x00000000,
+	0x00000040, 0x22001c84, 0x00000200, 0x00000020,
+	0x00010220, 0x34001c00, 0x00001400, 0xfffffff8,
+	0x07600032, 0x20000fa0, 0x008d0fe0, 0x82000010,
+};
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index f2806381733f..4a53ded7c2dd 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -65,6 +65,9 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 		return NULL;
 	}
 	i915_active_init(&ring->vma->active, NULL, NULL);
+	__set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(ring->vma));
+	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &ring->vma->node.flags);
+	ring->vma->node.size = sz;
 
 	intel_ring_update_space(ring);
 
@@ -241,9 +244,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine)
 
 	/* Mark all submitted requests as skipped. */
 	list_for_each_entry(request, &engine->active.requests, sched.link) {
-		if (!i915_request_signaled(request))
-			dma_fence_set_error(&request->fence, -EIO);
-
+		i915_request_set_error_once(request, -EIO);
 		i915_request_mark_complete(request);
 	}
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 43d4d589749f..697114dd1f47 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -142,6 +142,24 @@ out:
 	return err;
 }
 
+static void engine_heartbeat_disable(struct intel_engine_cs *engine,
+				     unsigned long *saved)
+{
+	*saved = engine->props.heartbeat_interval_ms;
+	engine->props.heartbeat_interval_ms = 0;
+
+	intel_engine_pm_get(engine);
+	intel_engine_park_heartbeat(engine);
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+				    unsigned long saved)
+{
+	intel_engine_pm_put(engine);
+
+	engine->props.heartbeat_interval_ms = saved;
+}
+
 static int live_idle_flush(void *arg)
 {
 	struct intel_gt *gt = arg;
@@ -152,9 +170,11 @@ static int live_idle_flush(void *arg)
 	/* Check that we can flush the idle barriers */
 
 	for_each_engine(engine, gt, id) {
-		intel_engine_pm_get(engine);
+		unsigned long heartbeat;
+
+		engine_heartbeat_disable(engine, &heartbeat);
 		err = __live_idle_pulse(engine, intel_engine_flush_barriers);
-		intel_engine_pm_put(engine);
+		engine_heartbeat_enable(engine, heartbeat);
 		if (err)
 			break;
 	}
@@ -172,9 +192,11 @@ static int live_idle_pulse(void *arg)
 	/* Check that heartbeat pulses flush the idle barriers */
 
 	for_each_engine(engine, gt, id) {
-		intel_engine_pm_get(engine);
+		unsigned long heartbeat;
+
+		engine_heartbeat_disable(engine, &heartbeat);
 		err = __live_idle_pulse(engine, intel_engine_pulse);
-		intel_engine_pm_put(engine);
+		engine_heartbeat_enable(engine, heartbeat);
 		if (err && err != -ENODEV)
 			break;
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 3e5e6c86e843..2b2efff6e19d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -268,7 +268,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 
 cancel_rq:
 	if (err) {
-		i915_request_skip(rq, err);
+		i915_request_set_error_once(rq, err);
 		i915_request_add(rq);
 	}
 unpin_hws:
@@ -1640,7 +1640,7 @@ static int igt_reset_engines_atomic(void *arg)
 	if (!intel_has_reset_engine(gt))
 		return 0;
 
-	if (USES_GUC_SUBMISSION(gt->i915))
+	if (intel_uc_uses_guc_submission(&gt->uc))
 		return 0;
 
 	igt_global_reset_lock(gt);
diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c
index fd3770e48ac7..a912159693fd 100644
--- a/drivers/gpu/drm/i915/gt/selftest_llc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_llc.c
@@ -18,10 +18,8 @@ static int gen6_verify_ring_freq(struct intel_llc *llc)
 
 	wakeref = intel_runtime_pm_get(llc_to_gt(llc)->uncore->rpm);
 
-	if (!get_ia_constants(llc, &consts)) {
-		err = -ENODEV;
+	if (!get_ia_constants(llc, &consts))
 		goto out_rpm;
-	}
 
 	for (gpu_freq = consts.min_gpu_freq;
 	     gpu_freq <= consts.max_gpu_freq;
@@ -71,10 +69,5 @@ out_rpm:
 
 int st_llc_verify(struct intel_llc *llc)
 {
-	int err = 0;
-
-	if (HAS_LLC(llc_to_gt(llc)->i915))
-		err = gen6_verify_ring_freq(llc);
-
-	return err;
+	return gen6_verify_ring_freq(llc);
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index b292f8cbd0bf..6f06ba750a0a 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -68,6 +68,71 @@ static void engine_heartbeat_enable(struct intel_engine_cs *engine,
 	engine->props.heartbeat_interval_ms = saved;
 }
 
+static int wait_for_submit(struct intel_engine_cs *engine,
+			   struct i915_request *rq,
+			   unsigned long timeout)
+{
+	timeout += jiffies;
+	do {
+		cond_resched();
+		intel_engine_flush_submission(engine);
+
+		if (READ_ONCE(engine->execlists.pending[0]))
+			continue;
+
+		if (i915_request_is_active(rq))
+			return 0;
+
+		if (i915_request_started(rq)) /* that was quick! */
+			return 0;
+	} while (time_before(jiffies, timeout));
+
+	return -ETIME;
+}
+
+static int wait_for_reset(struct intel_engine_cs *engine,
+			  struct i915_request *rq,
+			  unsigned long timeout)
+{
+	timeout += jiffies;
+
+	do {
+		cond_resched();
+		intel_engine_flush_submission(engine);
+
+		if (READ_ONCE(engine->execlists.pending[0]))
+			continue;
+
+		if (i915_request_completed(rq))
+			break;
+
+		if (READ_ONCE(rq->fence.error))
+			break;
+	} while (time_before(jiffies, timeout));
+
+	flush_scheduled_work();
+
+	if (rq->fence.error != -EIO) {
+		pr_err("%s: hanging request %llx:%lld not reset\n",
+		       engine->name,
+		       rq->fence.context,
+		       rq->fence.seqno);
+		return -EINVAL;
+	}
+
+	/* Give the request a jiffie to complete after flushing the worker */
+	if (i915_request_wait(rq, 0,
+			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
+		pr_err("%s: hanging request %llx:%lld did not complete\n",
+		       engine->name,
+		       rq->fence.context,
+		       rq->fence.seqno);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
 static int live_sanitycheck(void *arg)
 {
 	struct intel_gt *gt = arg;
@@ -285,6 +350,84 @@ static int live_unlite_preempt(void *arg)
 	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
 }
 
+static int live_pin_rewind(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * We have to be careful not to trust intel_ring too much, for example
+	 * ring->head is updated upon retire which is out of sync with pinning
+	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
+	 * or else we risk writing an older, stale value.
+	 *
+	 * To simulate this, let's apply a bit of deliberate sabotague.
+	 */
+
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce;
+		struct i915_request *rq;
+		struct intel_ring *ring;
+		struct igt_live_test t;
+
+		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
+			err = -EIO;
+			break;
+		}
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			break;
+		}
+
+		err = intel_context_pin(ce);
+		if (err) {
+			intel_context_put(ce);
+			break;
+		}
+
+		/* Keep the context awake while we play games */
+		err = i915_active_acquire(&ce->active);
+		if (err) {
+			intel_context_unpin(ce);
+			intel_context_put(ce);
+			break;
+		}
+		ring = ce->ring;
+
+		/* Poison the ring, and offset the next request from HEAD */
+		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
+		ring->emit = ring->size / 2;
+		ring->tail = ring->emit;
+		GEM_BUG_ON(ring->head);
+
+		intel_context_unpin(ce);
+
+		/* Submit a simple nop request */
+		GEM_BUG_ON(intel_context_is_pinned(ce));
+		rq = intel_context_create_request(ce);
+		i915_active_release(&ce->active); /* e.g. async retire */
+		intel_context_put(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+		GEM_BUG_ON(!rq->head);
+		i915_request_add(rq);
+
+		/* Expect not to hang! */
+		if (igt_live_test_end(&t)) {
+			err = -EIO;
+			break;
+		}
+	}
+
+	return err;
+}
+
 static int live_hold_reset(void *arg)
 {
 	struct intel_gt *gt = arg;
@@ -386,6 +529,152 @@ out:
 	return err;
 }
 
+static const char *error_repr(int err)
+{
+	return err ? "bad" : "good";
+}
+
+static int live_error_interrupt(void *arg)
+{
+	static const struct error_phase {
+		enum { GOOD = 0, BAD = -EIO } error[2];
+	} phases[] = {
+		{ { BAD,  GOOD } },
+		{ { BAD,  BAD  } },
+		{ { BAD,  GOOD } },
+		{ { GOOD, GOOD } }, /* sentinel */
+	};
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/*
+	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
+	 * of invalid commands in user batches that will cause a GPU hang.
+	 * This is a faster mechanism than using hangcheck/heartbeats, but
+	 * only detects problems the HW knows about -- it will not warn when
+	 * we kill the HW!
+	 *
+	 * To verify our detection and reset, we throw some invalid commands
+	 * at the HW and wait for the interrupt.
+	 */
+
+	if (!intel_has_reset_engine(gt))
+		return 0;
+
+	for_each_engine(engine, gt, id) {
+		const struct error_phase *p;
+		unsigned long heartbeat;
+		int err = 0;
+
+		engine_heartbeat_disable(engine, &heartbeat);
+
+		for (p = phases; p->error[0] != GOOD; p++) {
+			struct i915_request *client[ARRAY_SIZE(phases->error)];
+			u32 *cs;
+			int i;
+
+			memset(client, 0, sizeof(*client));
+			for (i = 0; i < ARRAY_SIZE(client); i++) {
+				struct intel_context *ce;
+				struct i915_request *rq;
+
+				ce = intel_context_create(engine);
+				if (IS_ERR(ce)) {
+					err = PTR_ERR(ce);
+					goto out;
+				}
+
+				rq = intel_context_create_request(ce);
+				intel_context_put(ce);
+				if (IS_ERR(rq)) {
+					err = PTR_ERR(rq);
+					goto out;
+				}
+
+				if (rq->engine->emit_init_breadcrumb) {
+					err = rq->engine->emit_init_breadcrumb(rq);
+					if (err) {
+						i915_request_add(rq);
+						goto out;
+					}
+				}
+
+				cs = intel_ring_begin(rq, 2);
+				if (IS_ERR(cs)) {
+					i915_request_add(rq);
+					err = PTR_ERR(cs);
+					goto out;
+				}
+
+				if (p->error[i]) {
+					*cs++ = 0xdeadbeef;
+					*cs++ = 0xdeadbeef;
+				} else {
+					*cs++ = MI_NOOP;
+					*cs++ = MI_NOOP;
+				}
+
+				client[i] = i915_request_get(rq);
+				i915_request_add(rq);
+			}
+
+			err = wait_for_submit(engine, client[0], HZ / 2);
+			if (err) {
+				pr_err("%s: first request did not start within time!\n",
+				       engine->name);
+				err = -ETIME;
+				goto out;
+			}
+
+			for (i = 0; i < ARRAY_SIZE(client); i++) {
+				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
+					pr_debug("%s: %s request incomplete!\n",
+						 engine->name,
+						 error_repr(p->error[i]));
+
+				if (!i915_request_started(client[i])) {
+					pr_debug("%s: %s request not stated!\n",
+						 engine->name,
+						 error_repr(p->error[i]));
+					err = -ETIME;
+					goto out;
+				}
+
+				/* Kick the tasklet to process the error */
+				intel_engine_flush_submission(engine);
+				if (client[i]->fence.error != p->error[i]) {
+					pr_err("%s: %s request completed with wrong error code: %d\n",
+					       engine->name,
+					       error_repr(p->error[i]),
+					       client[i]->fence.error);
+					err = -EINVAL;
+					goto out;
+				}
+			}
+
+out:
+			for (i = 0; i < ARRAY_SIZE(client); i++)
+				if (client[i])
+					i915_request_put(client[i]);
+			if (err) {
+				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
+				       engine->name, p - phases,
+				       p->error[0], p->error[1]);
+				break;
+			}
+		}
+
+		engine_heartbeat_enable(engine, heartbeat);
+		if (err) {
+			intel_gt_set_wedged(gt);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
 static int
 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
 {
@@ -580,6 +869,10 @@ static int live_timeslice_preempt(void *arg)
 	if (err)
 		goto err_map;
 
+	err = i915_vma_sync(vma);
+	if (err)
+		goto err_pin;
+
 	for_each_prime_number_from(count, 1, 16) {
 		struct intel_engine_cs *engine;
 		enum intel_engine_id id;
@@ -614,33 +907,227 @@ err_obj:
 	return err;
 }
 
-static struct i915_request *nop_request(struct intel_engine_cs *engine)
+static struct i915_request *
+create_rewinder(struct intel_context *ce,
+		struct i915_request *wait,
+		void *slot, int idx)
 {
+	const u32 offset =
+		i915_ggtt_offset(ce->engine->status_page.vma) +
+		offset_in_page(slot);
 	struct i915_request *rq;
+	u32 *cs;
+	int err;
 
-	rq = intel_engine_create_kernel_request(engine);
+	rq = intel_context_create_request(ce);
 	if (IS_ERR(rq))
 		return rq;
 
+	if (wait) {
+		err = i915_request_await_dma_fence(rq, &wait->fence);
+		if (err)
+			goto err;
+	}
+
+	cs = intel_ring_begin(rq, 10);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err;
+	}
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	*cs++ = MI_NOOP;
+
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_GLOBAL_GTT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_NEQ_SDD;
+	*cs++ = 0;
+	*cs++ = offset;
+	*cs++ = 0;
+
+	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
+	*cs++ = offset + idx * sizeof(u32);
+	*cs++ = 0;
+
+	intel_ring_advance(rq, cs);
+
+	rq->sched.attr.priority = I915_PRIORITY_MASK;
+	err = 0;
+err:
 	i915_request_get(rq);
 	i915_request_add(rq);
+	if (err) {
+		i915_request_put(rq);
+		return ERR_PTR(err);
+	}
 
 	return rq;
 }
 
-static int wait_for_submit(struct intel_engine_cs *engine,
-			   struct i915_request *rq,
-			   unsigned long timeout)
+static int live_timeslice_rewind(void *arg)
 {
-	timeout += jiffies;
-	do {
-		cond_resched();
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/*
+	 * The usual presumption on timeslice expiration is that we replace
+	 * the active context with another. However, given a chain of
+	 * dependencies we may end up with replacing the context with itself,
+	 * but only a few of those requests, forcing us to rewind the
+	 * RING_TAIL of the original request.
+	 */
+	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+		return 0;
+
+	for_each_engine(engine, gt, id) {
+		enum { A1, A2, B1 };
+		enum { X = 1, Y, Z };
+		struct i915_request *rq[3] = {};
+		struct intel_context *ce;
+		unsigned long heartbeat;
+		unsigned long timeslice;
+		int i, err = 0;
+		u32 *slot;
+
+		if (!intel_engine_has_timeslices(engine))
+			continue;
+
+		/*
+		 * A:rq1 -- semaphore wait, timestamp X
+		 * A:rq2 -- write timestamp Y
+		 *
+		 * B:rq1 [await A:rq1] -- write timestamp Z
+		 *
+		 * Force timeslice, release semaphore.
+		 *
+		 * Expect execution/evaluation order XZY
+		 */
+
+		engine_heartbeat_disable(engine, &heartbeat);
+		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
+
+		slot = memset32(engine->status_page.addr + 1000, 0, 4);
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			goto err;
+		}
+
+		rq[0] = create_rewinder(ce, NULL, slot, 1);
+		if (IS_ERR(rq[0])) {
+			intel_context_put(ce);
+			goto err;
+		}
+
+		rq[1] = create_rewinder(ce, NULL, slot, 2);
+		intel_context_put(ce);
+		if (IS_ERR(rq[1]))
+			goto err;
+
+		err = wait_for_submit(engine, rq[1], HZ / 2);
+		if (err) {
+			pr_err("%s: failed to submit first context\n",
+			       engine->name);
+			goto err;
+		}
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			goto err;
+		}
+
+		rq[2] = create_rewinder(ce, rq[0], slot, 3);
+		intel_context_put(ce);
+		if (IS_ERR(rq[2]))
+			goto err;
+
+		err = wait_for_submit(engine, rq[2], HZ / 2);
+		if (err) {
+			pr_err("%s: failed to submit second context\n",
+			       engine->name);
+			goto err;
+		}
+		GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
+
+		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
+		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
+		GEM_BUG_ON(!i915_request_is_active(rq[A2]));
+		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
+
+		/* Wait for the timeslice to kick in */
+		del_timer(&engine->execlists.timer);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
 		intel_engine_flush_submission(engine);
-		if (i915_request_is_active(rq))
-			return 0;
-	} while (time_before(jiffies, timeout));
 
-	return -ETIME;
+		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
+		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
+		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
+		GEM_BUG_ON(i915_request_is_active(rq[A2]));
+
+		/* Release the hounds! */
+		slot[0] = 1;
+		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
+
+		for (i = 1; i <= 3; i++) {
+			unsigned long timeout = jiffies + HZ / 2;
+
+			while (!READ_ONCE(slot[i]) &&
+			       time_before(jiffies, timeout))
+				;
+
+			if (!time_before(jiffies, timeout)) {
+				pr_err("%s: rq[%d] timed out\n",
+				       engine->name, i - 1);
+				err = -ETIME;
+				goto err;
+			}
+
+			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
+		}
+
+		/* XZY: XZ < XY */
+		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
+			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
+			       engine->name,
+			       slot[Z] - slot[X],
+			       slot[Y] - slot[X]);
+			err = -EINVAL;
+		}
+
+err:
+		memset32(&slot[0], -1, 4);
+		wmb();
+
+		engine->props.timeslice_duration_ms = timeslice;
+		engine_heartbeat_enable(engine, heartbeat);
+		for (i = 0; i < 3; i++)
+			i915_request_put(rq[i]);
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static struct i915_request *nop_request(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	rq = intel_engine_create_kernel_request(engine);
+	if (IS_ERR(rq))
+		return rq;
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	return rq;
 }
 
 static long timeslice_threshold(const struct intel_engine_cs *engine)
@@ -688,6 +1175,10 @@ static int live_timeslice_queue(void *arg)
 	if (err)
 		goto err_map;
 
+	err = i915_vma_sync(vma);
+	if (err)
+		goto err_pin;
+
 	for_each_engine(engine, gt, id) {
 		struct i915_sched_attr attr = {
 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
@@ -774,6 +1265,7 @@ err_heartbeat:
 			break;
 	}
 
+err_pin:
 	i915_vma_unpin(vma);
 err_map:
 	i915_gem_object_unpin_map(obj);
@@ -832,6 +1324,10 @@ static int live_busywait_preempt(void *arg)
 	if (err)
 		goto err_map;
 
+	err = i915_vma_sync(vma);
+	if (err)
+		goto err_vma;
+
 	for_each_engine(engine, gt, id) {
 		struct i915_request *lo, *hi;
 		struct igt_live_test t;
@@ -1352,14 +1848,9 @@ static int __cancel_active0(struct live_preempt_cancel *arg)
 	if (err)
 		goto out;
 
-	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
-		err = -EIO;
-		goto out;
-	}
-
-	if (rq->fence.error != -EIO) {
-		pr_err("Cancelled inflight0 request did not report -EIO\n");
-		err = -EINVAL;
+	err = wait_for_reset(arg->engine, rq, HZ / 2);
+	if (err) {
+		pr_err("Cancelled inflight0 request did not reset\n");
 		goto out;
 	}
 
@@ -1417,10 +1908,9 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
 		goto out;
 
 	igt_spinner_end(&arg->a.spin);
-	if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
-		err = -EIO;
+	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
+	if (err)
 		goto out;
-	}
 
 	if (rq[0]->fence.error != 0) {
 		pr_err("Normal inflight0 request did not complete\n");
@@ -1500,10 +1990,9 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
 	if (err)
 		goto out;
 
-	if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
-		err = -EIO;
+	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
+	if (err)
 		goto out;
-	}
 
 	if (rq[0]->fence.error != -EIO) {
 		pr_err("Cancelled inflight0 request did not report -EIO\n");
@@ -1561,14 +2050,9 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
 	if (err)
 		goto out;
 
-	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
-		err = -EIO;
-		goto out;
-	}
-
-	if (rq->fence.error != -EIO) {
-		pr_err("Cancelled inflight0 request did not report -EIO\n");
-		err = -EINVAL;
+	err = wait_for_reset(arg->engine, rq, HZ / 2);
+	if (err) {
+		pr_err("Cancelled inflight0 request did not reset\n");
 		goto out;
 	}
 
@@ -1656,7 +2140,7 @@ static int live_suppress_self_preempt(void *arg)
 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 		return 0;
 
-	if (USES_GUC_SUBMISSION(gt->i915))
+	if (intel_uc_uses_guc_submission(&gt->uc))
 		return 0; /* presume black blox */
 
 	if (intel_vgpu_active(gt->i915))
@@ -2279,117 +2763,6 @@ static int live_preempt_gang(void *arg)
 	return 0;
 }
 
-static int live_preempt_hang(void *arg)
-{
-	struct intel_gt *gt = arg;
-	struct i915_gem_context *ctx_hi, *ctx_lo;
-	struct igt_spinner spin_hi, spin_lo;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int err = -ENOMEM;
-
-	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
-		return 0;
-
-	if (!intel_has_reset_engine(gt))
-		return 0;
-
-	if (igt_spinner_init(&spin_hi, gt))
-		return -ENOMEM;
-
-	if (igt_spinner_init(&spin_lo, gt))
-		goto err_spin_hi;
-
-	ctx_hi = kernel_context(gt->i915);
-	if (!ctx_hi)
-		goto err_spin_lo;
-	ctx_hi->sched.priority =
-		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
-
-	ctx_lo = kernel_context(gt->i915);
-	if (!ctx_lo)
-		goto err_ctx_hi;
-	ctx_lo->sched.priority =
-		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
-
-	for_each_engine(engine, gt, id) {
-		struct i915_request *rq;
-
-		if (!intel_engine_has_preemption(engine))
-			continue;
-
-		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
-					    MI_ARB_CHECK);
-		if (IS_ERR(rq)) {
-			err = PTR_ERR(rq);
-			goto err_ctx_lo;
-		}
-
-		i915_request_add(rq);
-		if (!igt_wait_for_spinner(&spin_lo, rq)) {
-			GEM_TRACE("lo spinner failed to start\n");
-			GEM_TRACE_DUMP();
-			intel_gt_set_wedged(gt);
-			err = -EIO;
-			goto err_ctx_lo;
-		}
-
-		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
-					    MI_ARB_CHECK);
-		if (IS_ERR(rq)) {
-			igt_spinner_end(&spin_lo);
-			err = PTR_ERR(rq);
-			goto err_ctx_lo;
-		}
-
-		init_completion(&engine->execlists.preempt_hang.completion);
-		engine->execlists.preempt_hang.inject_hang = true;
-
-		i915_request_add(rq);
-
-		if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion,
-						 HZ / 10)) {
-			pr_err("Preemption did not occur within timeout!");
-			GEM_TRACE_DUMP();
-			intel_gt_set_wedged(gt);
-			err = -EIO;
-			goto err_ctx_lo;
-		}
-
-		set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
-		intel_engine_reset(engine, NULL);
-		clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
-
-		engine->execlists.preempt_hang.inject_hang = false;
-
-		if (!igt_wait_for_spinner(&spin_hi, rq)) {
-			GEM_TRACE("hi spinner failed to start\n");
-			GEM_TRACE_DUMP();
-			intel_gt_set_wedged(gt);
-			err = -EIO;
-			goto err_ctx_lo;
-		}
-
-		igt_spinner_end(&spin_hi);
-		igt_spinner_end(&spin_lo);
-		if (igt_flush_test(gt->i915)) {
-			err = -EIO;
-			goto err_ctx_lo;
-		}
-	}
-
-	err = 0;
-err_ctx_lo:
-	kernel_context_close(ctx_lo);
-err_ctx_hi:
-	kernel_context_close(ctx_hi);
-err_spin_lo:
-	igt_spinner_fini(&spin_lo);
-err_spin_hi:
-	igt_spinner_fini(&spin_hi);
-	return err;
-}
-
 static int live_preempt_timeout(void *arg)
 {
 	struct intel_gt *gt = arg;
@@ -2882,7 +3255,7 @@ static int live_virtual_engine(void *arg)
 	unsigned int class, inst;
 	int err;
 
-	if (USES_GUC_SUBMISSION(gt->i915))
+	if (intel_uc_uses_guc_submission(&gt->uc))
 		return 0;
 
 	for_each_engine(engine, gt, id) {
@@ -3015,7 +3388,7 @@ static int live_virtual_mask(void *arg)
 	unsigned int class, inst;
 	int err;
 
-	if (USES_GUC_SUBMISSION(gt->i915))
+	if (intel_uc_uses_guc_submission(&gt->uc))
 		return 0;
 
 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
@@ -3055,6 +3428,10 @@ static int preserved_virtual_engine(struct intel_gt *gt,
 	if (IS_ERR(scratch))
 		return PTR_ERR(scratch);
 
+	err = i915_vma_sync(scratch);
+	if (err)
+		goto out_scratch;
+
 	ve = intel_execlists_create_virtual(siblings, nsibling);
 	if (IS_ERR(ve)) {
 		err = PTR_ERR(ve);
@@ -3153,7 +3530,7 @@ static int live_virtual_preserved(void *arg)
 	 * are preserved.
 	 */
 
-	if (USES_GUC_SUBMISSION(gt->i915))
+	if (intel_uc_uses_guc_submission(&gt->uc))
 		return 0;
 
 	/* As we use CS_GPR we cannot run before they existed on all engines. */
@@ -3243,15 +3620,21 @@ static int bond_virtual_engine(struct intel_gt *gt,
 	rq[0] = ERR_PTR(-ENOMEM);
 	for_each_engine(master, gt, id) {
 		struct i915_sw_fence fence = {};
+		struct intel_context *ce;
 
 		if (master->class == class)
 			continue;
 
+		ce = intel_context_create(master);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			goto out;
+		}
+
 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
 
-		rq[0] = igt_spinner_create_request(&spin,
-						   master->kernel_context,
-						   MI_NOOP);
+		rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
+		intel_context_put(ce);
 		if (IS_ERR(rq[0])) {
 			err = PTR_ERR(rq[0]);
 			goto out;
@@ -3377,7 +3760,7 @@ static int live_virtual_bond(void *arg)
 	unsigned int class, inst;
 	int err;
 
-	if (USES_GUC_SUBMISSION(gt->i915))
+	if (intel_uc_uses_guc_submission(&gt->uc))
 		return 0;
 
 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
@@ -3538,7 +3921,7 @@ static int live_virtual_reset(void *arg)
 	 * forgotten.
 	 */
 
-	if (USES_GUC_SUBMISSION(gt->i915))
+	if (intel_uc_uses_guc_submission(&gt->uc))
 		return 0;
 
 	if (!intel_has_reset_engine(gt))
@@ -3571,8 +3954,11 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_sanitycheck),
 		SUBTEST(live_unlite_switch),
 		SUBTEST(live_unlite_preempt),
+		SUBTEST(live_pin_rewind),
 		SUBTEST(live_hold_reset),
+		SUBTEST(live_error_interrupt),
 		SUBTEST(live_timeslice_preempt),
+		SUBTEST(live_timeslice_rewind),
 		SUBTEST(live_timeslice_queue),
 		SUBTEST(live_busywait_preempt),
 		SUBTEST(live_preempt),
@@ -3583,7 +3969,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_suppress_wait_preempt),
 		SUBTEST(live_chain_preempt),
 		SUBTEST(live_preempt_gang),
-		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_timeout),
 		SUBTEST(live_preempt_smoke),
 		SUBTEST(live_virtual_engine),
@@ -3631,6 +4016,62 @@ static void hexdump(const void *buf, size_t len)
 	}
 }
 
+static int emit_semaphore_signal(struct intel_context *ce, void *slot)
+{
+	const u32 offset =
+		i915_ggtt_offset(ce->engine->status_page.vma) +
+		offset_in_page(slot);
+	struct i915_request *rq;
+	u32 *cs;
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs)) {
+		i915_request_add(rq);
+		return PTR_ERR(cs);
+	}
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = offset;
+	*cs++ = 0;
+	*cs++ = 1;
+
+	intel_ring_advance(rq, cs);
+
+	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+	i915_request_add(rq);
+	return 0;
+}
+
+static int context_flush(struct intel_context *ce, long timeout)
+{
+	struct i915_request *rq;
+	struct dma_fence *fence;
+	int err = 0;
+
+	rq = intel_engine_create_kernel_request(ce->engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	fence = i915_active_fence_get(&ce->timeline->last_request);
+	if (fence) {
+		i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
+
+	rq = i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, timeout) < 0)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	rmb(); /* We know the request is written, make sure all state is too! */
+	return err;
+}
+
 static int live_lrc_layout(void *arg)
 {
 	struct intel_gt *gt = arg;
@@ -3797,6 +4238,11 @@ static int live_lrc_fixed(void *arg)
 				CTX_BB_STATE - 1,
 				"BB_STATE"
 			},
+			{
+				i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
+				CTX_TIMESTAMP - 1,
+				"RING_CTX_TIMESTAMP"
+			},
 			{ },
 		}, *t;
 		u32 *hw;
@@ -3880,8 +4326,16 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
 	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
 	*cs++ = 0;
 
+	i915_vma_lock(scratch);
+	err = i915_request_await_object(rq, scratch->obj, true);
+	if (!err)
+		err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(scratch);
+
 	i915_request_get(rq);
 	i915_request_add(rq);
+	if (err)
+		goto err_rq;
 
 	intel_engine_flush_submission(engine);
 	expected[RING_TAIL_IDX] = ce->ring->tail;
@@ -3947,13 +4401,13 @@ static int live_lrc_state(void *arg)
 	return err;
 }
 
-static int gpr_make_dirty(struct intel_engine_cs *engine)
+static int gpr_make_dirty(struct intel_context *ce)
 {
 	struct i915_request *rq;
 	u32 *cs;
 	int n;
 
-	rq = intel_engine_create_kernel_request(engine);
+	rq = intel_context_create_request(ce);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
@@ -3965,20 +4419,79 @@ static int gpr_make_dirty(struct intel_engine_cs *engine)
 
 	*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
 	for (n = 0; n < NUM_GPR_DW; n++) {
-		*cs++ = CS_GPR(engine, n);
+		*cs++ = CS_GPR(ce->engine, n);
 		*cs++ = STACK_MAGIC;
 	}
 	*cs++ = MI_NOOP;
 
 	intel_ring_advance(rq, cs);
+
+	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 	i915_request_add(rq);
 
 	return 0;
 }
 
-static int __live_gpr_clear(struct intel_engine_cs *engine,
-			    struct i915_vma *scratch)
+static struct i915_request *
+__gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
 {
+	const u32 offset =
+		i915_ggtt_offset(ce->engine->status_page.vma) +
+		offset_in_page(slot);
+	struct i915_request *rq;
+	u32 *cs;
+	int err;
+	int n;
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		return rq;
+
+	cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
+	if (IS_ERR(cs)) {
+		i915_request_add(rq);
+		return ERR_CAST(cs);
+	}
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	*cs++ = MI_NOOP;
+
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_GLOBAL_GTT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_NEQ_SDD;
+	*cs++ = 0;
+	*cs++ = offset;
+	*cs++ = 0;
+
+	for (n = 0; n < NUM_GPR_DW; n++) {
+		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+		*cs++ = CS_GPR(ce->engine, n);
+		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+		*cs++ = 0;
+	}
+
+	i915_vma_lock(scratch);
+	err = i915_request_await_object(rq, scratch->obj, true);
+	if (!err)
+		err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(scratch);
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (err) {
+		i915_request_put(rq);
+		rq = ERR_PTR(err);
+	}
+
+	return rq;
+}
+
+static int __live_lrc_gpr(struct intel_engine_cs *engine,
+			  struct i915_vma *scratch,
+			  bool preempt)
+{
+	u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
 	struct intel_context *ce;
 	struct i915_request *rq;
 	u32 *cs;
@@ -3988,7 +4501,7 @@ static int __live_gpr_clear(struct intel_engine_cs *engine,
 	if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
 		return 0; /* GPR only on rcs0 for gen8 */
 
-	err = gpr_make_dirty(engine);
+	err = gpr_make_dirty(engine->kernel_context);
 	if (err)
 		return err;
 
@@ -3996,28 +4509,28 @@ static int __live_gpr_clear(struct intel_engine_cs *engine,
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
-	rq = intel_context_create_request(ce);
+	rq = __gpr_read(ce, scratch, slot);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_put;
 	}
 
-	cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
-	if (IS_ERR(cs)) {
-		err = PTR_ERR(cs);
-		i915_request_add(rq);
-		goto err_put;
-	}
+	err = wait_for_submit(engine, rq, HZ / 2);
+	if (err)
+		goto err_rq;
 
-	for (n = 0; n < NUM_GPR_DW; n++) {
-		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
-		*cs++ = CS_GPR(engine, n);
-		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
-		*cs++ = 0;
-	}
+	if (preempt) {
+		err = gpr_make_dirty(engine->kernel_context);
+		if (err)
+			goto err_rq;
 
-	i915_request_get(rq);
-	i915_request_add(rq);
+		err = emit_semaphore_signal(engine->kernel_context, slot);
+		if (err)
+			goto err_rq;
+	} else {
+		slot[0] = 1;
+		wmb();
+	}
 
 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 		err = -ETIME;
@@ -4044,13 +4557,15 @@ static int __live_gpr_clear(struct intel_engine_cs *engine,
 	i915_gem_object_unpin_map(scratch->obj);
 
 err_rq:
+	memset32(&slot[0], -1, 4);
+	wmb();
 	i915_request_put(rq);
 err_put:
 	intel_context_put(ce);
 	return err;
 }
 
-static int live_gpr_clear(void *arg)
+static int live_lrc_gpr(void *arg)
 {
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
@@ -4068,7 +4583,971 @@ static int live_gpr_clear(void *arg)
 		return PTR_ERR(scratch);
 
 	for_each_engine(engine, gt, id) {
-		err = __live_gpr_clear(engine, scratch);
+		unsigned long heartbeat;
+
+		engine_heartbeat_disable(engine, &heartbeat);
+
+		err = __live_lrc_gpr(engine, scratch, false);
+		if (err)
+			goto err;
+
+		err = __live_lrc_gpr(engine, scratch, true);
+		if (err)
+			goto err;
+
+err:
+		engine_heartbeat_enable(engine, heartbeat);
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
+		if (err)
+			break;
+	}
+
+	i915_vma_unpin_and_release(&scratch, 0);
+	return err;
+}
+
+static struct i915_request *
+create_timestamp(struct intel_context *ce, void *slot, int idx)
+{
+	const u32 offset =
+		i915_ggtt_offset(ce->engine->status_page.vma) +
+		offset_in_page(slot);
+	struct i915_request *rq;
+	u32 *cs;
+	int err;
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		return rq;
+
+	cs = intel_ring_begin(rq, 10);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err;
+	}
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	*cs++ = MI_NOOP;
+
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_GLOBAL_GTT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_NEQ_SDD;
+	*cs++ = 0;
+	*cs++ = offset;
+	*cs++ = 0;
+
+	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
+	*cs++ = offset + idx * sizeof(u32);
+	*cs++ = 0;
+
+	intel_ring_advance(rq, cs);
+
+	rq->sched.attr.priority = I915_PRIORITY_MASK;
+	err = 0;
+err:
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (err) {
+		i915_request_put(rq);
+		return ERR_PTR(err);
+	}
+
+	return rq;
+}
+
+struct lrc_timestamp {
+	struct intel_engine_cs *engine;
+	struct intel_context *ce[2];
+	u32 poison;
+};
+
+static bool timestamp_advanced(u32 start, u32 end)
+{
+	return (s32)(end - start) > 0;
+}
+
+static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
+{
+	u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
+	struct i915_request *rq;
+	u32 timestamp;
+	int err = 0;
+
+	arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
+	rq = create_timestamp(arg->ce[0], slot, 1);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	err = wait_for_submit(rq->engine, rq, HZ / 2);
+	if (err)
+		goto err;
+
+	if (preempt) {
+		arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
+		err = emit_semaphore_signal(arg->ce[1], slot);
+		if (err)
+			goto err;
+	} else {
+		slot[0] = 1;
+		wmb();
+	}
+
+	/* And wait for switch to kernel (to save our context to memory) */
+	err = context_flush(arg->ce[0], HZ / 2);
+	if (err)
+		goto err;
+
+	if (!timestamp_advanced(arg->poison, slot[1])) {
+		pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
+		       arg->engine->name, preempt ? "preempt" : "simple",
+		       arg->poison, slot[1]);
+		err = -EINVAL;
+	}
+
+	timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
+	if (!timestamp_advanced(slot[1], timestamp)) {
+		pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
+		       arg->engine->name, preempt ? "preempt" : "simple",
+		       slot[1], timestamp);
+		err = -EINVAL;
+	}
+
+err:
+	memset32(slot, -1, 4);
+	i915_request_put(rq);
+	return err;
+}
+
+static int live_lrc_timestamp(void *arg)
+{
+	struct lrc_timestamp data = {};
+	struct intel_gt *gt = arg;
+	enum intel_engine_id id;
+	const u32 poison[] = {
+		0,
+		S32_MAX,
+		(u32)S32_MAX + 1,
+		U32_MAX,
+	};
+
+	/*
+	 * We want to verify that the timestamp is saved and restore across
+	 * context switches and is monotonic.
+	 *
+	 * So we do this with a little bit of LRC poisoning to check various
+	 * boundary conditions, and see what happens if we preempt the context
+	 * with a second request (carrying more poison into the timestamp).
+	 */
+
+	for_each_engine(data.engine, gt, id) {
+		unsigned long heartbeat;
+		int i, err = 0;
+
+		engine_heartbeat_disable(data.engine, &heartbeat);
+
+		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
+			struct intel_context *tmp;
+
+			tmp = intel_context_create(data.engine);
+			if (IS_ERR(tmp)) {
+				err = PTR_ERR(tmp);
+				goto err;
+			}
+
+			err = intel_context_pin(tmp);
+			if (err) {
+				intel_context_put(tmp);
+				goto err;
+			}
+
+			data.ce[i] = tmp;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(poison); i++) {
+			data.poison = poison[i];
+
+			err = __lrc_timestamp(&data, false);
+			if (err)
+				break;
+
+			err = __lrc_timestamp(&data, true);
+			if (err)
+				break;
+		}
+
+err:
+		engine_heartbeat_enable(data.engine, heartbeat);
+		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
+			if (!data.ce[i])
+				break;
+
+			intel_context_unpin(data.ce[i]);
+			intel_context_put(data.ce[i]);
+		}
+
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static struct i915_vma *
+create_user_vma(struct i915_address_space *vm, unsigned long size)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int err;
+
+	obj = i915_gem_object_create_internal(vm->i915, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		i915_gem_object_put(obj);
+		return vma;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err) {
+		i915_gem_object_put(obj);
+		return ERR_PTR(err);
+	}
+
+	return vma;
+}
+
+static struct i915_vma *
+store_context(struct intel_context *ce, struct i915_vma *scratch)
+{
+	struct i915_vma *batch;
+	u32 dw, x, *cs, *hw;
+
+	batch = create_user_vma(ce->vm, SZ_64K);
+	if (IS_ERR(batch))
+		return batch;
+
+	cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
+	if (IS_ERR(cs)) {
+		i915_vma_put(batch);
+		return ERR_CAST(cs);
+	}
+
+	x = 0;
+	dw = 0;
+	hw = ce->engine->pinned_default_state;
+	hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+	do {
+		u32 len = hw[dw] & 0x7f;
+
+		if (hw[dw] == 0) {
+			dw++;
+			continue;
+		}
+
+		if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+			dw += len + 2;
+			continue;
+		}
+
+		dw++;
+		len = (len + 1) / 2;
+		while (len--) {
+			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
+			*cs++ = hw[dw];
+			*cs++ = lower_32_bits(scratch->node.start + x);
+			*cs++ = upper_32_bits(scratch->node.start + x);
+
+			dw += 2;
+			x += 4;
+		}
+	} while (dw < PAGE_SIZE / sizeof(u32) &&
+		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(batch->obj);
+	i915_gem_object_unpin_map(batch->obj);
+
+	return batch;
+}
+
+static int move_to_active(struct i915_request *rq,
+			  struct i915_vma *vma,
+			  unsigned int flags)
+{
+	int err;
+
+	i915_vma_lock(vma);
+	err = i915_request_await_object(rq, vma->obj, flags);
+	if (!err)
+		err = i915_vma_move_to_active(vma, rq, flags);
+	i915_vma_unlock(vma);
+
+	return err;
+}
+
+static struct i915_request *
+record_registers(struct intel_context *ce,
+		 struct i915_vma *before,
+		 struct i915_vma *after,
+		 u32 *sema)
+{
+	struct i915_vma *b_before, *b_after;
+	struct i915_request *rq;
+	u32 *cs;
+	int err;
+
+	b_before = store_context(ce, before);
+	if (IS_ERR(b_before))
+		return ERR_CAST(b_before);
+
+	b_after = store_context(ce, after);
+	if (IS_ERR(b_after)) {
+		rq = ERR_CAST(b_after);
+		goto err_before;
+	}
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		goto err_after;
+
+	err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
+	if (err)
+		goto err_rq;
+
+	err = move_to_active(rq, b_before, 0);
+	if (err)
+		goto err_rq;
+
+	err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
+	if (err)
+		goto err_rq;
+
+	err = move_to_active(rq, b_after, 0);
+	if (err)
+		goto err_rq;
+
+	cs = intel_ring_begin(rq, 14);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_rq;
+	}
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+	*cs++ = lower_32_bits(b_before->node.start);
+	*cs++ = upper_32_bits(b_before->node.start);
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_GLOBAL_GTT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_NEQ_SDD;
+	*cs++ = 0;
+	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
+		offset_in_page(sema);
+	*cs++ = 0;
+	*cs++ = MI_NOOP;
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+	*cs++ = lower_32_bits(b_after->node.start);
+	*cs++ = upper_32_bits(b_after->node.start);
+
+	intel_ring_advance(rq, cs);
+
+	WRITE_ONCE(*sema, 0);
+	i915_request_get(rq);
+	i915_request_add(rq);
+err_after:
+	i915_vma_put(b_after);
+err_before:
+	i915_vma_put(b_before);
+	return rq;
+
+err_rq:
+	i915_request_add(rq);
+	rq = ERR_PTR(err);
+	goto err_after;
+}
+
+static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
+{
+	struct i915_vma *batch;
+	u32 dw, *cs, *hw;
+
+	batch = create_user_vma(ce->vm, SZ_64K);
+	if (IS_ERR(batch))
+		return batch;
+
+	cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
+	if (IS_ERR(cs)) {
+		i915_vma_put(batch);
+		return ERR_CAST(cs);
+	}
+
+	dw = 0;
+	hw = ce->engine->pinned_default_state;
+	hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+	do {
+		u32 len = hw[dw] & 0x7f;
+
+		if (hw[dw] == 0) {
+			dw++;
+			continue;
+		}
+
+		if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+			dw += len + 2;
+			continue;
+		}
+
+		dw++;
+		len = (len + 1) / 2;
+		*cs++ = MI_LOAD_REGISTER_IMM(len);
+		while (len--) {
+			*cs++ = hw[dw];
+			*cs++ = poison;
+			dw += 2;
+		}
+	} while (dw < PAGE_SIZE / sizeof(u32) &&
+		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(batch->obj);
+	i915_gem_object_unpin_map(batch->obj);
+
+	return batch;
+}
+
+static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
+{
+	struct i915_request *rq;
+	struct i915_vma *batch;
+	u32 *cs;
+	int err;
+
+	batch = load_context(ce, poison);
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_batch;
+	}
+
+	err = move_to_active(rq, batch, 0);
+	if (err)
+		goto err_rq;
+
+	cs = intel_ring_begin(rq, 8);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_rq;
+	}
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+	*cs++ = lower_32_bits(batch->node.start);
+	*cs++ = upper_32_bits(batch->node.start);
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
+		offset_in_page(sema);
+	*cs++ = 0;
+	*cs++ = 1;
+
+	intel_ring_advance(rq, cs);
+
+	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+err_rq:
+	i915_request_add(rq);
+err_batch:
+	i915_vma_put(batch);
+	return err;
+}
+
+static bool is_moving(u32 a, u32 b)
+{
+	return a != b;
+}
+
+static int compare_isolation(struct intel_engine_cs *engine,
+			     struct i915_vma *ref[2],
+			     struct i915_vma *result[2],
+			     struct intel_context *ce,
+			     u32 poison)
+{
+	u32 x, dw, *hw, *lrc;
+	u32 *A[2], *B[2];
+	int err = 0;
+
+	A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
+	if (IS_ERR(A[0]))
+		return PTR_ERR(A[0]);
+
+	A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
+	if (IS_ERR(A[1])) {
+		err = PTR_ERR(A[1]);
+		goto err_A0;
+	}
+
+	B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
+	if (IS_ERR(B[0])) {
+		err = PTR_ERR(B[0]);
+		goto err_A1;
+	}
+
+	B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
+	if (IS_ERR(B[1])) {
+		err = PTR_ERR(B[1]);
+		goto err_B0;
+	}
+
+	lrc = i915_gem_object_pin_map(ce->state->obj,
+				      i915_coherent_map_type(engine->i915));
+	if (IS_ERR(lrc)) {
+		err = PTR_ERR(lrc);
+		goto err_B1;
+	}
+	lrc += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+
+	x = 0;
+	dw = 0;
+	hw = engine->pinned_default_state;
+	hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+	do {
+		u32 len = hw[dw] & 0x7f;
+
+		if (hw[dw] == 0) {
+			dw++;
+			continue;
+		}
+
+		if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+			dw += len + 2;
+			continue;
+		}
+
+		dw++;
+		len = (len + 1) / 2;
+		while (len--) {
+			if (!is_moving(A[0][x], A[1][x]) &&
+			    (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
+				switch (hw[dw] & 4095) {
+				case 0x30: /* RING_HEAD */
+				case 0x34: /* RING_TAIL */
+					break;
+
+				default:
+					pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
+					       engine->name, dw,
+					       hw[dw], hw[dw + 1],
+					       A[0][x], B[0][x], B[1][x],
+					       poison, lrc[dw + 1]);
+					err = -EINVAL;
+					break;
+				}
+			}
+			dw += 2;
+			x++;
+		}
+	} while (dw < PAGE_SIZE / sizeof(u32) &&
+		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+	i915_gem_object_unpin_map(ce->state->obj);
+err_B1:
+	i915_gem_object_unpin_map(result[1]->obj);
+err_B0:
+	i915_gem_object_unpin_map(result[0]->obj);
+err_A1:
+	i915_gem_object_unpin_map(ref[1]->obj);
+err_A0:
+	i915_gem_object_unpin_map(ref[0]->obj);
+	return err;
+}
+
+static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
+{
+	u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
+	struct i915_vma *ref[2], *result[2];
+	struct intel_context *A, *B;
+	struct i915_request *rq;
+	int err;
+
+	A = intel_context_create(engine);
+	if (IS_ERR(A))
+		return PTR_ERR(A);
+
+	B = intel_context_create(engine);
+	if (IS_ERR(B)) {
+		err = PTR_ERR(B);
+		goto err_A;
+	}
+
+	ref[0] = create_user_vma(A->vm, SZ_64K);
+	if (IS_ERR(ref[0])) {
+		err = PTR_ERR(ref[0]);
+		goto err_B;
+	}
+
+	ref[1] = create_user_vma(A->vm, SZ_64K);
+	if (IS_ERR(ref[1])) {
+		err = PTR_ERR(ref[1]);
+		goto err_ref0;
+	}
+
+	rq = record_registers(A, ref[0], ref[1], sema);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_ref1;
+	}
+
+	WRITE_ONCE(*sema, 1);
+	wmb();
+
+	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
+		i915_request_put(rq);
+		err = -ETIME;
+		goto err_ref1;
+	}
+	i915_request_put(rq);
+
+	result[0] = create_user_vma(A->vm, SZ_64K);
+	if (IS_ERR(result[0])) {
+		err = PTR_ERR(result[0]);
+		goto err_ref1;
+	}
+
+	result[1] = create_user_vma(A->vm, SZ_64K);
+	if (IS_ERR(result[1])) {
+		err = PTR_ERR(result[1]);
+		goto err_result0;
+	}
+
+	rq = record_registers(A, result[0], result[1], sema);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_result1;
+	}
+
+	err = poison_registers(B, poison, sema);
+	if (err) {
+		WRITE_ONCE(*sema, -1);
+		i915_request_put(rq);
+		goto err_result1;
+	}
+
+	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
+		i915_request_put(rq);
+		err = -ETIME;
+		goto err_result1;
+	}
+	i915_request_put(rq);
+
+	err = compare_isolation(engine, ref, result, A, poison);
+
+err_result1:
+	i915_vma_put(result[1]);
+err_result0:
+	i915_vma_put(result[0]);
+err_ref1:
+	i915_vma_put(ref[1]);
+err_ref0:
+	i915_vma_put(ref[0]);
+err_B:
+	intel_context_put(B);
+err_A:
+	intel_context_put(A);
+	return err;
+}
+
+static bool skip_isolation(const struct intel_engine_cs *engine)
+{
+	if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
+		return true;
+
+	if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
+		return true;
+
+	return false;
+}
+
+static int live_lrc_isolation(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	const u32 poison[] = {
+		STACK_MAGIC,
+		0x3a3a3a3a,
+		0x5c5c5c5c,
+		0xffffffff,
+		0xffff0000,
+	};
+
+	/*
+	 * Our goal is try and verify that per-context state cannot be
+	 * tampered with by another non-privileged client.
+	 *
+	 * We take the list of context registers from the LRI in the default
+	 * context image and attempt to modify that list from a remote context.
+	 */
+
+	for_each_engine(engine, gt, id) {
+		int err = 0;
+		int i;
+
+		/* Just don't even ask */
+		if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
+		    skip_isolation(engine))
+			continue;
+
+		intel_engine_pm_get(engine);
+		if (engine->pinned_default_state) {
+			for (i = 0; i < ARRAY_SIZE(poison); i++) {
+				err = __lrc_isolation(engine, poison[i]);
+				if (err)
+					break;
+
+				err = __lrc_isolation(engine, ~poison[i]);
+				if (err)
+					break;
+			}
+		}
+		intel_engine_pm_put(engine);
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void garbage_reset(struct intel_engine_cs *engine,
+			  struct i915_request *rq)
+{
+	const unsigned int bit = I915_RESET_ENGINE + engine->id;
+	unsigned long *lock = &engine->gt->reset.flags;
+
+	if (test_and_set_bit(bit, lock))
+		return;
+
+	tasklet_disable(&engine->execlists.tasklet);
+
+	if (!rq->fence.error)
+		intel_engine_reset(engine, NULL);
+
+	tasklet_enable(&engine->execlists.tasklet);
+	clear_and_wake_up_bit(bit, lock);
+}
+
+static struct i915_request *garbage(struct intel_context *ce,
+				    struct rnd_state *prng)
+{
+	struct i915_request *rq;
+	int err;
+
+	err = intel_context_pin(ce);
+	if (err)
+		return ERR_PTR(err);
+
+	prandom_bytes_state(prng,
+			    ce->lrc_reg_state,
+			    ce->engine->context_size -
+			    LRC_STATE_PN * PAGE_SIZE);
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_unpin;
+	}
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+	return rq;
+
+err_unpin:
+	intel_context_unpin(ce);
+	return ERR_PTR(err);
+}
+
+static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
+{
+	struct intel_context *ce;
+	struct i915_request *hang;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	hang = garbage(ce, prng);
+	if (IS_ERR(hang)) {
+		err = PTR_ERR(hang);
+		goto err_ce;
+	}
+
+	if (wait_for_submit(engine, hang, HZ / 2)) {
+		i915_request_put(hang);
+		err = -ETIME;
+		goto err_ce;
+	}
+
+	intel_context_set_banned(ce);
+	garbage_reset(engine, hang);
+
+	intel_engine_flush_submission(engine);
+	if (!hang->fence.error) {
+		i915_request_put(hang);
+		pr_err("%s: corrupted context was not reset\n",
+		       engine->name);
+		err = -EINVAL;
+		goto err_ce;
+	}
+
+	if (i915_request_wait(hang, 0, HZ / 2) < 0) {
+		pr_err("%s: corrupted context did not recover\n",
+		       engine->name);
+		i915_request_put(hang);
+		err = -EIO;
+		goto err_ce;
+	}
+	i915_request_put(hang);
+
+err_ce:
+	intel_context_put(ce);
+	return err;
+}
+
+static int live_lrc_garbage(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/*
+	 * Verify that we can recover if one context state is completely
+	 * corrupted.
+	 */
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
+		return 0;
+
+	for_each_engine(engine, gt, id) {
+		I915_RND_STATE(prng);
+		int err = 0, i;
+
+		if (!intel_has_reset_engine(engine->gt))
+			continue;
+
+		intel_engine_pm_get(engine);
+		for (i = 0; i < 3; i++) {
+			err = __lrc_garbage(engine, &prng);
+			if (err)
+				break;
+		}
+		intel_engine_pm_put(engine);
+
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
+{
+	struct intel_context *ce;
+	struct i915_request *rq;
+	IGT_TIMEOUT(end_time);
+	int err;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	ce->runtime.num_underflow = 0;
+	ce->runtime.max_underflow = 0;
+
+	do {
+		unsigned int loop = 1024;
+
+		while (loop) {
+			rq = intel_context_create_request(ce);
+			if (IS_ERR(rq)) {
+				err = PTR_ERR(rq);
+				goto err_rq;
+			}
+
+			if (--loop == 0)
+				i915_request_get(rq);
+
+			i915_request_add(rq);
+		}
+
+		if (__igt_timeout(end_time, NULL))
+			break;
+
+		i915_request_put(rq);
+	} while (1);
+
+	err = i915_request_wait(rq, 0, HZ / 5);
+	if (err < 0) {
+		pr_err("%s: request not completed!\n", engine->name);
+		goto err_wait;
+	}
+
+	igt_flush_test(engine->i915);
+
+	pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
+		engine->name,
+		intel_context_get_total_runtime_ns(ce),
+		intel_context_get_avg_runtime_ns(ce));
+
+	err = 0;
+	if (ce->runtime.num_underflow) {
+		pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
+		       engine->name,
+		       ce->runtime.num_underflow,
+		       ce->runtime.max_underflow);
+		GEM_TRACE_DUMP();
+		err = -EOVERFLOW;
+	}
+
+err_wait:
+	i915_request_put(rq);
+err_rq:
+	intel_context_put(ce);
+	return err;
+}
+
+static int live_pphwsp_runtime(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * Check that cumulative context runtime as stored in the pphwsp[16]
+	 * is monotonic.
+	 */
+
+	for_each_engine(engine, gt, id) {
+		err = __live_pphwsp_runtime(engine);
 		if (err)
 			break;
 	}
@@ -4076,7 +5555,6 @@ static int live_gpr_clear(void *arg)
 	if (igt_flush_test(gt->i915))
 		err = -EIO;
 
-	i915_vma_unpin_and_release(&scratch, 0);
 	return err;
 }
 
@@ -4086,7 +5564,11 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_lrc_layout),
 		SUBTEST(live_lrc_fixed),
 		SUBTEST(live_lrc_state),
-		SUBTEST(live_gpr_clear),
+		SUBTEST(live_lrc_gpr),
+		SUBTEST(live_lrc_isolation),
+		SUBTEST(live_lrc_timestamp),
+		SUBTEST(live_lrc_garbage),
+		SUBTEST(live_pphwsp_runtime),
 	};
 
 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index de1f83100fb6..8831ffee2061 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -12,7 +12,8 @@
 #include "selftests/igt_spinner.h"
 
 struct live_mocs {
-	struct drm_i915_mocs_table table;
+	struct drm_i915_mocs_table mocs;
+	struct drm_i915_mocs_table l3cc;
 	struct i915_vma *scratch;
 	void *vaddr;
 };
@@ -70,11 +71,22 @@ static struct i915_vma *create_scratch(struct intel_gt *gt)
 
 static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt)
 {
+	struct drm_i915_mocs_table table;
+	unsigned int flags;
 	int err;
 
-	if (!get_mocs_settings(gt->i915, &arg->table))
+	memset(arg, 0, sizeof(*arg));
+
+	flags = get_mocs_settings(gt->i915, &table);
+	if (!flags)
 		return -EINVAL;
 
+	if (flags & HAS_RENDER_L3CC)
+		arg->l3cc = table;
+
+	if (flags & (HAS_GLOBAL_MOCS | HAS_ENGINE_MOCS))
+		arg->mocs = table;
+
 	arg->scratch = create_scratch(gt);
 	if (IS_ERR(arg->scratch))
 		return PTR_ERR(arg->scratch);
@@ -223,9 +235,9 @@ static int check_mocs_engine(struct live_mocs *arg,
 	/* Read the mocs tables back using SRM */
 	offset = i915_ggtt_offset(vma);
 	if (!err)
-		err = read_mocs_table(rq, &arg->table, &offset);
+		err = read_mocs_table(rq, &arg->mocs, &offset);
 	if (!err && ce->engine->class == RENDER_CLASS)
-		err = read_l3cc_table(rq, &arg->table, &offset);
+		err = read_l3cc_table(rq, &arg->l3cc, &offset);
 	offset -= i915_ggtt_offset(vma);
 	GEM_BUG_ON(offset > PAGE_SIZE);
 
@@ -236,9 +248,9 @@ static int check_mocs_engine(struct live_mocs *arg,
 	/* Compare the results against the expected tables */
 	vaddr = arg->vaddr;
 	if (!err)
-		err = check_mocs_table(ce->engine, &arg->table, &vaddr);
+		err = check_mocs_table(ce->engine, &arg->mocs, &vaddr);
 	if (!err && ce->engine->class == RENDER_CLASS)
-		err = check_l3cc_table(ce->engine, &arg->table, &vaddr);
+		err = check_l3cc_table(ce->engine, &arg->l3cc, &vaddr);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
index 8cc55a0e9e06..95b165faeba7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -12,6 +12,21 @@
 
 #include "selftests/i915_random.h"
 
+static u64 rc6_residency(struct intel_rc6 *rc6)
+{
+	u64 result;
+
+	/* XXX VLV_GT_MEDIA_RC6? */
+
+	result = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+	if (HAS_RC6p(rc6_to_i915(rc6)))
+		result += intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6p);
+	if (HAS_RC6pp(rc6_to_i915(rc6)))
+		result += intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6pp);
+
+	return result;
+}
+
 int live_rc6_manual(void *arg)
 {
 	struct intel_gt *gt = arg;
@@ -38,9 +53,9 @@ int live_rc6_manual(void *arg)
 	__intel_rc6_disable(rc6);
 	msleep(1); /* wakeup is not immediate, takes about 100us on icl */
 
-	res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+	res[0] = rc6_residency(rc6);
 	msleep(250);
-	res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+	res[1] = rc6_residency(rc6);
 	if ((res[1] - res[0]) >> 10) {
 		pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n",
 		       (res[1] - res[0]) >> 10);
@@ -51,14 +66,15 @@ int live_rc6_manual(void *arg)
 	/* Manually enter RC6 */
 	intel_rc6_park(rc6);
 
-	res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+	res[0] = rc6_residency(rc6);
 	msleep(100);
-	res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+	res[1] = rc6_residency(rc6);
 
 	if (res[1] == res[0]) {
-		pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x\n",
+		pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x, residency=%lld\n",
 		       intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE),
-		       intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL));
+		       intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL),
+		       res[0]);
 		err = -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 6ad6aca315f6..35406ecdf0b2 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -115,7 +115,7 @@ static int igt_atomic_engine_reset(void *arg)
 	if (!intel_has_reset_engine(gt))
 		return 0;
 
-	if (USES_GUC_SUBMISSION(gt->i915))
+	if (intel_uc_uses_guc_submission(&gt->uc))
 		return 0;
 
 	intel_gt_pm_get(gt);
diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
new file mode 100644
index 000000000000..9995faadd7e8
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "intel_engine_pm.h"
+#include "selftests/igt_flush_test.h"
+
+static struct i915_vma *create_wally(struct intel_engine_cs *engine)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	u32 *cs;
+	int err;
+
+	obj = i915_gem_object_create_internal(engine->i915, 4096);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	vma = i915_vma_instance(obj, engine->gt->vm, NULL);
+	if (IS_ERR(vma)) {
+		i915_gem_object_put(obj);
+		return vma;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+	if (err) {
+		i915_gem_object_put(obj);
+		return ERR_PTR(err);
+	}
+
+	err = i915_vma_sync(vma);
+	if (err) {
+		i915_gem_object_put(obj);
+		return ERR_PTR(err);
+	}
+
+	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(cs)) {
+		i915_gem_object_put(obj);
+		return ERR_CAST(cs);
+	}
+
+	if (INTEL_GEN(engine->i915) >= 6) {
+		*cs++ = MI_STORE_DWORD_IMM_GEN4;
+		*cs++ = 0;
+	} else if (INTEL_GEN(engine->i915) >= 4) {
+		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+		*cs++ = 0;
+	} else {
+		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+	}
+	*cs++ = vma->node.start + 4000;
+	*cs++ = STACK_MAGIC;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+	i915_gem_object_unpin_map(obj);
+
+	vma->private = intel_context_create(engine); /* dummy residuals */
+	if (IS_ERR(vma->private)) {
+		vma = ERR_CAST(vma->private);
+		i915_gem_object_put(obj);
+	}
+
+	return vma;
+}
+
+static int context_sync(struct intel_context *ce)
+{
+	struct i915_request *rq;
+	int err = 0;
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	if (i915_request_wait(rq, 0, HZ / 5) < 0)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	return err;
+}
+
+static int new_context_sync(struct intel_engine_cs *engine)
+{
+	struct intel_context *ce;
+	int err;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = context_sync(ce);
+	intel_context_put(ce);
+
+	return err;
+}
+
+static int mixed_contexts_sync(struct intel_engine_cs *engine, u32 *result)
+{
+	int pass;
+	int err;
+
+	for (pass = 0; pass < 2; pass++) {
+		WRITE_ONCE(*result, 0);
+		err = context_sync(engine->kernel_context);
+		if (err || READ_ONCE(*result)) {
+			if (!err) {
+				pr_err("pass[%d] wa_bb emitted for the kernel context\n",
+				       pass);
+				err = -EINVAL;
+			}
+			return err;
+		}
+
+		WRITE_ONCE(*result, 0);
+		err = new_context_sync(engine);
+		if (READ_ONCE(*result) != STACK_MAGIC) {
+			if (!err) {
+				pr_err("pass[%d] wa_bb *NOT* emitted after the kernel context\n",
+				       pass);
+				err = -EINVAL;
+			}
+			return err;
+		}
+
+		WRITE_ONCE(*result, 0);
+		err = new_context_sync(engine);
+		if (READ_ONCE(*result) != STACK_MAGIC) {
+			if (!err) {
+				pr_err("pass[%d] wa_bb *NOT* emitted for the user context switch\n",
+				       pass);
+				err = -EINVAL;
+			}
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int double_context_sync_00(struct intel_engine_cs *engine, u32 *result)
+{
+	struct intel_context *ce;
+	int err, i;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	for (i = 0; i < 2; i++) {
+		WRITE_ONCE(*result, 0);
+		err = context_sync(ce);
+		if (err)
+			break;
+	}
+	intel_context_put(ce);
+	if (err)
+		return err;
+
+	if (READ_ONCE(*result)) {
+		pr_err("wa_bb emitted between the same user context\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int kernel_context_sync_00(struct intel_engine_cs *engine, u32 *result)
+{
+	struct intel_context *ce;
+	int err, i;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	for (i = 0; i < 2; i++) {
+		WRITE_ONCE(*result, 0);
+		err = context_sync(ce);
+		if (err)
+			break;
+
+		err = context_sync(engine->kernel_context);
+		if (err)
+			break;
+	}
+	intel_context_put(ce);
+	if (err)
+		return err;
+
+	if (READ_ONCE(*result)) {
+		pr_err("wa_bb emitted between the same user context [with intervening kernel]\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __live_ctx_switch_wa(struct intel_engine_cs *engine)
+{
+	struct i915_vma *bb;
+	u32 *result;
+	int err;
+
+	bb = create_wally(engine);
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	result = i915_gem_object_pin_map(bb->obj, I915_MAP_WC);
+	if (IS_ERR(result)) {
+		intel_context_put(bb->private);
+		i915_vma_unpin_and_release(&bb, 0);
+		return PTR_ERR(result);
+	}
+	result += 1000;
+
+	engine->wa_ctx.vma = bb;
+
+	err = mixed_contexts_sync(engine, result);
+	if (err)
+		goto out;
+
+	err = double_context_sync_00(engine, result);
+	if (err)
+		goto out;
+
+	err = kernel_context_sync_00(engine, result);
+	if (err)
+		goto out;
+
+out:
+	intel_context_put(engine->wa_ctx.vma->private);
+	i915_vma_unpin_and_release(&engine->wa_ctx.vma, I915_VMA_RELEASE_MAP);
+	return err;
+}
+
+static int live_ctx_switch_wa(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/*
+	 * Exercise the inter-context wa batch.
+	 *
+	 * Between each user context we run a wa batch, and since it may
+	 * have implications for user visible state, we have to check that
+	 * we do actually execute it.
+	 *
+	 * The trick we use is to replace the normal wa batch with a custom
+	 * one that writes to a marker within it, and we can then look for
+	 * that marker to confirm if the batch was run when we expect it,
+	 * and equally important it was wasn't run when we don't!
+	 */
+
+	for_each_engine(engine, gt, id) {
+		struct i915_vma *saved_wa;
+		int err;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		if (IS_GEN_RANGE(gt->i915, 4, 5))
+			continue; /* MI_STORE_DWORD is privileged! */
+
+		saved_wa = fetch_and_zero(&engine->wa_ctx.vma);
+
+		intel_engine_pm_get(engine);
+		err = __live_ctx_switch_wa(engine);
+		intel_engine_pm_put(engine);
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
+
+		engine->wa_ctx.vma = saved_wa;
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+int intel_ring_submission_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_ctx_switch_wa),
+	};
+
+	if (HAS_EXECLISTS(i915))
+		return 0;
+
+	return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index e2d78cc22fb4..c2578a0f2f14 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -6,6 +6,8 @@
 
 #include <linux/prime_numbers.h>
 
+#include "intel_context.h"
+#include "intel_engine_heartbeat.h"
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
 #include "intel_gt_requests.h"
@@ -604,7 +606,6 @@ static int live_hwsp_alternate(void *arg)
 
 			tl = checked_intel_timeline_create(gt);
 			if (IS_ERR(tl)) {
-				intel_engine_pm_put(engine);
 				err = PTR_ERR(tl);
 				goto out;
 			}
@@ -750,6 +751,189 @@ out_free:
 	return err;
 }
 
+static void engine_heartbeat_disable(struct intel_engine_cs *engine,
+				     unsigned long *saved)
+{
+	*saved = engine->props.heartbeat_interval_ms;
+	engine->props.heartbeat_interval_ms = 0;
+
+	intel_engine_pm_get(engine);
+	intel_engine_park_heartbeat(engine);
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+				    unsigned long saved)
+{
+	intel_engine_pm_put(engine);
+
+	engine->props.heartbeat_interval_ms = saved;
+}
+
+static int live_hwsp_rollover_kernel(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * Run the host for long enough, and even the kernel context will
+	 * see a seqno rollover.
+	 */
+
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce = engine->kernel_context;
+		struct intel_timeline *tl = ce->timeline;
+		struct i915_request *rq[3] = {};
+		unsigned long heartbeat;
+		int i;
+
+		engine_heartbeat_disable(engine, &heartbeat);
+		if (intel_gt_wait_for_idle(gt, HZ / 2)) {
+			err = -EIO;
+			goto out;
+		}
+
+		GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
+		tl->seqno = 0;
+		timeline_rollback(tl);
+		timeline_rollback(tl);
+		WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
+
+		for (i = 0; i < ARRAY_SIZE(rq); i++) {
+			struct i915_request *this;
+
+			this = i915_request_create(ce);
+			if (IS_ERR(this)) {
+				err = PTR_ERR(this);
+				goto out;
+			}
+
+			pr_debug("%s: create fence.seqnp:%d\n",
+				 engine->name,
+				 lower_32_bits(this->fence.seqno));
+
+			GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
+
+			rq[i] = i915_request_get(this);
+			i915_request_add(this);
+		}
+
+		/* We expected a wrap! */
+		GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
+
+		if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
+			pr_err("Wait for timeline wrap timed out!\n");
+			err = -EIO;
+			goto out;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(rq); i++) {
+			if (!i915_request_completed(rq[i])) {
+				pr_err("Pre-wrap request not completed!\n");
+				err = -EINVAL;
+				goto out;
+			}
+		}
+
+out:
+		for (i = 0; i < ARRAY_SIZE(rq); i++)
+			i915_request_put(rq[i]);
+		engine_heartbeat_enable(engine, heartbeat);
+		if (err)
+			break;
+	}
+
+	if (igt_flush_test(gt->i915))
+		err = -EIO;
+
+	return err;
+}
+
+static int live_hwsp_rollover_user(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * Simulate a long running user context, and force the seqno wrap
+	 * on the user's timeline.
+	 */
+
+	for_each_engine(engine, gt, id) {
+		struct i915_request *rq[3] = {};
+		struct intel_timeline *tl;
+		struct intel_context *ce;
+		int i;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce))
+			return PTR_ERR(ce);
+
+		err = intel_context_alloc_state(ce);
+		if (err)
+			goto out;
+
+		tl = ce->timeline;
+		if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
+			goto out;
+
+		timeline_rollback(tl);
+		timeline_rollback(tl);
+		WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
+
+		for (i = 0; i < ARRAY_SIZE(rq); i++) {
+			struct i915_request *this;
+
+			this = intel_context_create_request(ce);
+			if (IS_ERR(this)) {
+				err = PTR_ERR(this);
+				goto out;
+			}
+
+			pr_debug("%s: create fence.seqnp:%d\n",
+				 engine->name,
+				 lower_32_bits(this->fence.seqno));
+
+			GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
+
+			rq[i] = i915_request_get(this);
+			i915_request_add(this);
+		}
+
+		/* We expected a wrap! */
+		GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
+
+		if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
+			pr_err("Wait for timeline wrap timed out!\n");
+			err = -EIO;
+			goto out;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(rq); i++) {
+			if (!i915_request_completed(rq[i])) {
+				pr_err("Pre-wrap request not completed!\n");
+				err = -EINVAL;
+				goto out;
+			}
+		}
+
+out:
+		for (i = 0; i < ARRAY_SIZE(rq); i++)
+			i915_request_put(rq[i]);
+		intel_context_put(ce);
+		if (err)
+			break;
+	}
+
+	if (igt_flush_test(gt->i915))
+		err = -EIO;
+
+	return err;
+}
+
 static int live_hwsp_recycle(void *arg)
 {
 	struct intel_gt *gt = arg;
@@ -827,6 +1011,8 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_hwsp_engine),
 		SUBTEST(live_hwsp_alternate),
 		SUBTEST(live_hwsp_wrap),
+		SUBTEST(live_hwsp_rollover_kernel),
+		SUBTEST(live_hwsp_rollover_user),
 	};
 
 	if (intel_gt_is_wedged(&i915->gt))
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index ac1921854cbf..5ed323254ee1 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -583,6 +583,15 @@ static int check_dirty_whitelist(struct intel_context *ce)
 		if (err)
 			goto err_request;
 
+		i915_vma_lock(scratch);
+		err = i915_request_await_object(rq, scratch->obj, true);
+		if (err == 0)
+			err = i915_vma_move_to_active(scratch, rq,
+						      EXEC_OBJECT_WRITE);
+		i915_vma_unlock(scratch);
+		if (err)
+			goto err_request;
+
 		err = engine->emit_bb_start(rq,
 					    batch->node.start, PAGE_SIZE,
 					    0);
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
new file mode 100644
index 000000000000..8f9b2f33dbaf
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include "i915_drv.h"
+#include "intel_engine.h"
+#include "intel_engine_heartbeat.h"
+#include "sysfs_engines.h"
+
+struct kobj_engine {
+	struct kobject base;
+	struct intel_engine_cs *engine;
+};
+
+static struct intel_engine_cs *kobj_to_engine(struct kobject *kobj)
+{
+	return container_of(kobj, struct kobj_engine, base)->engine;
+}
+
+static ssize_t
+name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", kobj_to_engine(kobj)->name);
+}
+
+static struct kobj_attribute name_attr =
+__ATTR(name, 0444, name_show, NULL);
+
+static ssize_t
+class_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_class);
+}
+
+static struct kobj_attribute class_attr =
+__ATTR(class, 0444, class_show, NULL);
+
+static ssize_t
+inst_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_instance);
+}
+
+static struct kobj_attribute inst_attr =
+__ATTR(instance, 0444, inst_show, NULL);
+
+static ssize_t
+mmio_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "0x%x\n", kobj_to_engine(kobj)->mmio_base);
+}
+
+static struct kobj_attribute mmio_attr =
+__ATTR(mmio_base, 0444, mmio_show, NULL);
+
+static const char * const vcs_caps[] = {
+	[ilog2(I915_VIDEO_CLASS_CAPABILITY_HEVC)] = "hevc",
+	[ilog2(I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC)] = "sfc",
+};
+
+static const char * const vecs_caps[] = {
+	[ilog2(I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC)] = "sfc",
+};
+
+static ssize_t repr_trim(char *buf, ssize_t len)
+{
+	/* Trim off the trailing space and replace with a newline */
+	if (len > PAGE_SIZE)
+		len = PAGE_SIZE;
+	if (len > 0)
+		buf[len - 1] = '\n';
+
+	return len;
+}
+
+static ssize_t
+__caps_show(struct intel_engine_cs *engine,
+	    u32 caps, char *buf, bool show_unknown)
+{
+	const char * const *repr;
+	int count, n;
+	ssize_t len;
+
+	BUILD_BUG_ON(!typecheck(typeof(caps), engine->uabi_capabilities));
+
+	switch (engine->class) {
+	case VIDEO_DECODE_CLASS:
+		repr = vcs_caps;
+		count = ARRAY_SIZE(vcs_caps);
+		break;
+
+	case VIDEO_ENHANCEMENT_CLASS:
+		repr = vecs_caps;
+		count = ARRAY_SIZE(vecs_caps);
+		break;
+
+	default:
+		repr = NULL;
+		count = 0;
+		break;
+	}
+	GEM_BUG_ON(count > BITS_PER_TYPE(typeof(caps)));
+
+	len = 0;
+	for_each_set_bit(n,
+			 (unsigned long *)&caps,
+			 show_unknown ? BITS_PER_TYPE(typeof(caps)) : count) {
+		if (n >= count || !repr[n]) {
+			if (GEM_WARN_ON(show_unknown))
+				len += snprintf(buf + len, PAGE_SIZE - len,
+						"[%x] ", n);
+		} else {
+			len += snprintf(buf + len, PAGE_SIZE - len,
+					"%s ", repr[n]);
+		}
+		if (GEM_WARN_ON(len >= PAGE_SIZE))
+			break;
+	}
+	return repr_trim(buf, len);
+}
+
+static ssize_t
+caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return __caps_show(engine, engine->uabi_capabilities, buf, true);
+}
+
+static struct kobj_attribute caps_attr =
+__ATTR(capabilities, 0444, caps_show, NULL);
+
+static ssize_t
+all_caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	return __caps_show(kobj_to_engine(kobj), -1, buf, false);
+}
+
+static struct kobj_attribute all_caps_attr =
+__ATTR(known_capabilities, 0444, all_caps_show, NULL);
+
+static ssize_t
+max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
+	       const char *buf, size_t count)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+	unsigned long long duration;
+	int err;
+
+	/*
+	 * When waiting for a request, if is it currently being executed
+	 * on the GPU, we busywait for a short while before sleeping. The
+	 * premise is that most requests are short, and if it is already
+	 * executing then there is a good chance that it will complete
+	 * before we can setup the interrupt handler and go to sleep.
+	 * We try to offset the cost of going to sleep, by first spinning
+	 * on the request -- if it completed in less time than it would take
+	 * to go sleep, process the interrupt and return back to the client,
+	 * then we have saved the client some latency, albeit at the cost
+	 * of spinning on an expensive CPU core.
+	 *
+	 * While we try to avoid waiting at all for a request that is unlikely
+	 * to complete, deciding how long it is worth spinning is for is an
+	 * arbitrary decision: trading off power vs latency.
+	 */
+
+	err = kstrtoull(buf, 0, &duration);
+	if (err)
+		return err;
+
+	if (duration > jiffies_to_nsecs(2))
+		return -EINVAL;
+
+	WRITE_ONCE(engine->props.max_busywait_duration_ns, duration);
+
+	return count;
+}
+
+static ssize_t
+max_spin_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n", engine->props.max_busywait_duration_ns);
+}
+
+static struct kobj_attribute max_spin_attr =
+__ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store);
+
+static ssize_t
+timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+	unsigned long long duration;
+	int err;
+
+	/*
+	 * Execlists uses a scheduling quantum (a timeslice) to alternate
+	 * execution between ready-to-run contexts of equal priority. This
+	 * ensures that all users (though only if they of equal importance)
+	 * have the opportunity to run and prevents livelocks where contexts
+	 * may have implicit ordering due to userspace semaphores.
+	 */
+
+	err = kstrtoull(buf, 0, &duration);
+	if (err)
+		return err;
+
+	if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+		return -EINVAL;
+
+	WRITE_ONCE(engine->props.timeslice_duration_ms, duration);
+
+	if (execlists_active(&engine->execlists))
+		set_timer_ms(&engine->execlists.timer, duration);
+
+	return count;
+}
+
+static ssize_t
+timeslice_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n", engine->props.timeslice_duration_ms);
+}
+
+static struct kobj_attribute timeslice_duration_attr =
+__ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store);
+
+static ssize_t
+stop_store(struct kobject *kobj, struct kobj_attribute *attr,
+	   const char *buf, size_t count)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+	unsigned long long duration;
+	int err;
+
+	/*
+	 * When we allow ourselves to sleep before a GPU reset after disabling
+	 * submission, even for a few milliseconds, gives an innocent context
+	 * the opportunity to clear the GPU before the reset occurs. However,
+	 * how long to sleep depends on the typical non-preemptible duration
+	 * (a similar problem to determining the ideal preempt-reset timeout
+	 * or even the heartbeat interval).
+	 */
+
+	err = kstrtoull(buf, 0, &duration);
+	if (err)
+		return err;
+
+	if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+		return -EINVAL;
+
+	WRITE_ONCE(engine->props.stop_timeout_ms, duration);
+	return count;
+}
+
+static ssize_t
+stop_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n", engine->props.stop_timeout_ms);
+}
+
+static struct kobj_attribute stop_timeout_attr =
+__ATTR(stop_timeout_ms, 0644, stop_show, stop_store);
+
+static ssize_t
+preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
+		      const char *buf, size_t count)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+	unsigned long long timeout;
+	int err;
+
+	/*
+	 * After initialising a preemption request, we give the current
+	 * resident a small amount of time to vacate the GPU. The preemption
+	 * request is for a higher priority context and should be immediate to
+	 * maintain high quality of service (and avoid priority inversion).
+	 * However, the preemption granularity of the GPU can be quite coarse
+	 * and so we need a compromise.
+	 */
+
+	err = kstrtoull(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+		return -EINVAL;
+
+	WRITE_ONCE(engine->props.preempt_timeout_ms, timeout);
+
+	if (READ_ONCE(engine->execlists.pending[0]))
+		set_timer_ms(&engine->execlists.preempt, timeout);
+
+	return count;
+}
+
+static ssize_t
+preempt_timeout_show(struct kobject *kobj, struct kobj_attribute *attr,
+		     char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n", engine->props.preempt_timeout_ms);
+}
+
+static struct kobj_attribute preempt_timeout_attr =
+__ATTR(preempt_timeout_ms, 0644, preempt_timeout_show, preempt_timeout_store);
+
+static ssize_t
+heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+	unsigned long long delay;
+	int err;
+
+	/*
+	 * We monitor the health of the system via periodic heartbeat pulses.
+	 * The pulses also provide the opportunity to perform garbage
+	 * collection.  However, we interpret an incomplete pulse (a missed
+	 * heartbeat) as an indication that the system is no longer responsive,
+	 * i.e. hung, and perform an engine or full GPU reset. Given that the
+	 * preemption granularity can be very coarse on a system, the optimal
+	 * value for any workload is unknowable!
+	 */
+
+	err = kstrtoull(buf, 0, &delay);
+	if (err)
+		return err;
+
+	if (delay >= jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+		return -EINVAL;
+
+	err = intel_engine_set_heartbeat(engine, delay);
+	if (err)
+		return err;
+
+	return count;
+}
+
+static ssize_t
+heartbeat_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n", engine->props.heartbeat_interval_ms);
+}
+
+static struct kobj_attribute heartbeat_interval_attr =
+__ATTR(heartbeat_interval_ms, 0644, heartbeat_show, heartbeat_store);
+
+static void kobj_engine_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static struct kobj_type kobj_engine_type = {
+	.release = kobj_engine_release,
+	.sysfs_ops = &kobj_sysfs_ops
+};
+
+static struct kobject *
+kobj_engine(struct kobject *dir, struct intel_engine_cs *engine)
+{
+	struct kobj_engine *ke;
+
+	ke = kzalloc(sizeof(*ke), GFP_KERNEL);
+	if (!ke)
+		return NULL;
+
+	kobject_init(&ke->base, &kobj_engine_type);
+	ke->engine = engine;
+
+	if (kobject_add(&ke->base, dir, "%s", engine->name)) {
+		kobject_put(&ke->base);
+		return NULL;
+	}
+
+	/* xfer ownership to sysfs tree */
+	return &ke->base;
+}
+
+void intel_engines_add_sysfs(struct drm_i915_private *i915)
+{
+	static const struct attribute *files[] = {
+		&name_attr.attr,
+		&class_attr.attr,
+		&inst_attr.attr,
+		&mmio_attr.attr,
+		&caps_attr.attr,
+		&all_caps_attr.attr,
+		&max_spin_attr.attr,
+		&stop_timeout_attr.attr,
+#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL
+		&heartbeat_interval_attr.attr,
+#endif
+		NULL
+	};
+
+	struct device *kdev = i915->drm.primary->kdev;
+	struct intel_engine_cs *engine;
+	struct kobject *dir;
+
+	dir = kobject_create_and_add("engine", &kdev->kobj);
+	if (!dir)
+		return;
+
+	for_each_uabi_engine(engine, i915) {
+		struct kobject *kobj;
+
+		kobj = kobj_engine(dir, engine);
+		if (!kobj)
+			goto err_engine;
+
+		if (sysfs_create_files(kobj, files))
+			goto err_object;
+
+		if (intel_engine_has_timeslices(engine) &&
+		    sysfs_create_file(kobj, &timeslice_duration_attr.attr))
+			goto err_engine;
+
+		if (intel_engine_has_preempt_reset(engine) &&
+		    sysfs_create_file(kobj, &preempt_timeout_attr.attr))
+			goto err_engine;
+
+		if (0) {
+err_object:
+			kobject_put(kobj);
+err_engine:
+			dev_err(kdev, "Failed to add sysfs engine '%s'\n",
+				engine->name);
+			break;
+		}
+	}
+}
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.h b/drivers/gpu/drm/i915/gt/sysfs_engines.h
new file mode 100644
index 000000000000..9546fffe03a7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_SYSFS_H
+#define INTEL_ENGINE_SYSFS_H
+
+struct drm_i915_private;
+
+void intel_engines_add_sysfs(struct drm_i915_private *i915);
+
+#endif /* INTEL_ENGINE_SYSFS_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 5d00a3b2d914..819f09ef51fc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -207,7 +207,7 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc)
 {
 	u32 flags = 0;
 
-	if (!intel_guc_is_submission_supported(guc))
+	if (!intel_guc_submission_is_used(guc))
 		flags |= GUC_CTL_DISABLE_SCHEDULER;
 
 	return flags;
@@ -217,7 +217,7 @@ static u32 guc_ctl_ctxinfo_flags(struct intel_guc *guc)
 {
 	u32 flags = 0;
 
-	if (intel_guc_is_submission_supported(guc)) {
+	if (intel_guc_submission_is_used(guc)) {
 		u32 ctxnum, base;
 
 		base = intel_guc_ggtt_offset(guc, guc->stage_desc_pool);
@@ -333,7 +333,7 @@ int intel_guc_init(struct intel_guc *guc)
 
 	ret = intel_uc_fw_init(&guc->fw);
 	if (ret)
-		goto err_fetch;
+		goto out;
 
 	ret = intel_guc_log_create(&guc->log);
 	if (ret)
@@ -348,7 +348,7 @@ int intel_guc_init(struct intel_guc *guc)
 	if (ret)
 		goto err_ads;
 
-	if (intel_guc_is_submission_supported(guc)) {
+	if (intel_guc_submission_is_used(guc)) {
 		/*
 		 * This is stuff we need to have available at fw load time
 		 * if we are planning to enable submission later
@@ -364,6 +364,8 @@ int intel_guc_init(struct intel_guc *guc)
 	/* We need to notify the guc whenever we change the GGTT */
 	i915_ggtt_enable_guc(gt->ggtt);
 
+	intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_LOADABLE);
+
 	return 0;
 
 err_ct:
@@ -374,9 +376,8 @@ err_log:
 	intel_guc_log_destroy(&guc->log);
 err_fw:
 	intel_uc_fw_fini(&guc->fw);
-err_fetch:
-	intel_uc_fw_cleanup_fetch(&guc->fw);
-	DRM_DEV_DEBUG_DRIVER(gt->i915->drm.dev, "failed with %d\n", ret);
+out:
+	i915_probe_error(gt->i915, "failed with %d\n", ret);
 	return ret;
 }
 
@@ -384,12 +385,12 @@ void intel_guc_fini(struct intel_guc *guc)
 {
 	struct intel_gt *gt = guc_to_gt(guc);
 
-	if (!intel_uc_fw_is_available(&guc->fw))
+	if (!intel_uc_fw_is_loadable(&guc->fw))
 		return;
 
 	i915_ggtt_disable_guc(gt->ggtt);
 
-	if (intel_guc_is_submission_supported(guc))
+	if (intel_guc_submission_is_used(guc))
 		intel_guc_submission_fini(guc);
 
 	intel_guc_ct_fini(&guc->ct);
@@ -397,9 +398,6 @@ void intel_guc_fini(struct intel_guc *guc)
 	intel_guc_ads_destroy(guc);
 	intel_guc_log_destroy(&guc->log);
 	intel_uc_fw_fini(&guc->fw);
-	intel_uc_fw_cleanup_fetch(&guc->fw);
-
-	intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_DISABLED);
 }
 
 /*
@@ -544,7 +542,7 @@ int intel_guc_suspend(struct intel_guc *guc)
 	 * If GuC communication is enabled but submission is not supported,
 	 * we do not need to suspend the GuC.
 	 */
-	if (!intel_guc_submission_is_enabled(guc))
+	if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc))
 		return 0;
 
 	/*
@@ -609,7 +607,7 @@ int intel_guc_resume(struct intel_guc *guc)
 	 * we do not need to resume the GuC but we do need to enable the
 	 * GuC communication on resume (above).
 	 */
-	if (!intel_guc_submission_is_enabled(guc))
+	if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc))
 		return 0;
 
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
@@ -678,8 +676,8 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
 	if (IS_ERR(vma))
 		goto err;
 
-	flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-	ret = i915_vma_pin(vma, 0, 0, flags);
+	flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+	ret = i915_ggtt_pin(vma, 0, flags);
 	if (ret) {
 		vma = ERR_PTR(ret);
 		goto err;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 910d49590068..4594ccbeaa34 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -39,7 +39,7 @@ struct intel_guc {
 		void (*disable)(struct intel_guc *guc);
 	} interrupts;
 
-	bool submission_supported;
+	bool submission_selected;
 
 	struct i915_vma *ads_vma;
 	struct __guc_ads_blob *ads_blob;
@@ -143,29 +143,36 @@ static inline bool intel_guc_is_supported(struct intel_guc *guc)
 	return intel_uc_fw_is_supported(&guc->fw);
 }
 
-static inline bool intel_guc_is_enabled(struct intel_guc *guc)
+static inline bool intel_guc_is_wanted(struct intel_guc *guc)
 {
 	return intel_uc_fw_is_enabled(&guc->fw);
 }
 
-static inline bool intel_guc_is_running(struct intel_guc *guc)
+static inline bool intel_guc_is_used(struct intel_guc *guc)
+{
+	GEM_BUG_ON(__intel_uc_fw_status(&guc->fw) == INTEL_UC_FIRMWARE_SELECTED);
+	return intel_uc_fw_is_available(&guc->fw);
+}
+
+static inline bool intel_guc_is_fw_running(struct intel_guc *guc)
 {
 	return intel_uc_fw_is_running(&guc->fw);
 }
 
+static inline bool intel_guc_is_ready(struct intel_guc *guc)
+{
+	return intel_guc_is_fw_running(guc) && intel_guc_ct_enabled(&guc->ct);
+}
+
 static inline int intel_guc_sanitize(struct intel_guc *guc)
 {
 	intel_uc_fw_sanitize(&guc->fw);
+	intel_guc_ct_sanitize(&guc->ct);
 	guc->mmio_msg = 0;
 
 	return 0;
 }
 
-static inline bool intel_guc_is_submission_supported(struct intel_guc *guc)
-{
-	return guc->submission_supported;
-}
-
 static inline void intel_guc_enable_msg(struct intel_guc *guc, u32 mask)
 {
 	spin_lock_irq(&guc->irq_lock);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index c6f971a049f9..11742fca0e9e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -5,11 +5,15 @@
 
 #include "i915_drv.h"
 #include "intel_guc_ct.h"
+#include "gt/intel_gt.h"
 
+#define CT_ERROR(_ct, _fmt, ...) \
+	DRM_DEV_ERROR(ct_to_dev(_ct), "CT: " _fmt, ##__VA_ARGS__)
 #ifdef CONFIG_DRM_I915_DEBUG_GUC
-#define CT_DEBUG_DRIVER(...)	DRM_DEBUG_DRIVER(__VA_ARGS__)
+#define CT_DEBUG(_ct, _fmt, ...) \
+	DRM_DEV_DEBUG_DRIVER(ct_to_dev(_ct), "CT: " _fmt, ##__VA_ARGS__)
 #else
-#define CT_DEBUG_DRIVER(...)	do { } while (0)
+#define CT_DEBUG(...)	do { } while (0)
 #endif
 
 struct ct_request {
@@ -48,6 +52,21 @@ static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct)
 	return container_of(ct, struct intel_guc, ct);
 }
 
+static inline struct intel_gt *ct_to_gt(struct intel_guc_ct *ct)
+{
+	return guc_to_gt(ct_to_guc(ct));
+}
+
+static inline struct drm_i915_private *ct_to_i915(struct intel_guc_ct *ct)
+{
+	return ct_to_gt(ct)->i915;
+}
+
+static inline struct device *ct_to_dev(struct intel_guc_ct *ct)
+{
+	return ct_to_i915(ct)->drm.dev;
+}
+
 static inline const char *guc_ct_buffer_type_to_str(u32 type)
 {
 	switch (type) {
@@ -63,7 +82,6 @@ static inline const char *guc_ct_buffer_type_to_str(u32 type)
 static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
 				    u32 cmds_addr, u32 size)
 {
-	CT_DEBUG_DRIVER("CT: init addr=%#x size=%u\n", cmds_addr, size);
 	memset(desc, 0, sizeof(*desc));
 	desc->addr = cmds_addr;
 	desc->size = size;
@@ -72,8 +90,6 @@ static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
 
 static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
 {
-	CT_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n",
-			desc, desc->head, desc->tail);
 	desc->head = 0;
 	desc->tail = 0;
 	desc->is_in_error = 0;
@@ -89,31 +105,40 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc,
 		sizeof(struct guc_ct_buffer_desc),
 		type
 	};
-	int err;
 
 	/* Can't use generic send(), CT registration must go over MMIO */
-	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
-	if (err)
-		DRM_ERROR("CT: register %s buffer failed; err=%d\n",
-			  guc_ct_buffer_type_to_str(type), err);
+	return intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
+}
+
+static int ct_register_buffer(struct intel_guc_ct *ct, u32 desc_addr, u32 type)
+{
+	int err = guc_action_register_ct_buffer(ct_to_guc(ct), desc_addr, type);
+
+	if (unlikely(err))
+		CT_ERROR(ct, "Failed to register %s buffer (err=%d)\n",
+			 guc_ct_buffer_type_to_str(type), err);
 	return err;
 }
 
-static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
-					   u32 type)
+static int guc_action_deregister_ct_buffer(struct intel_guc *guc, u32 type)
 {
 	u32 action[] = {
 		INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER,
 		CTB_OWNER_HOST,
 		type
 	};
-	int err;
 
 	/* Can't use generic send(), CT deregistration must go over MMIO */
-	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
-	if (err)
-		DRM_ERROR("CT: deregister %s buffer failed; err=%d\n",
-			  guc_ct_buffer_type_to_str(type), err);
+	return intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
+}
+
+static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type)
+{
+	int err = guc_action_deregister_ct_buffer(ct_to_guc(ct), type);
+
+	if (unlikely(err))
+		CT_ERROR(ct, "Failed to deregister %s buffer (err=%d)\n",
+			 guc_ct_buffer_type_to_str(type), err);
 	return err;
 }
 
@@ -157,13 +182,12 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
 	 */
 
 	err = intel_guc_allocate_and_map_vma(guc, PAGE_SIZE, &ct->vma, &blob);
-	if (err) {
-		DRM_ERROR("CT: channel allocation failed; err=%d\n", err);
+	if (unlikely(err)) {
+		CT_ERROR(ct, "Failed to allocate CT channel (err=%d)\n", err);
 		return err;
 	}
 
-	CT_DEBUG_DRIVER("CT: vma base=%#x\n",
-			intel_guc_ggtt_offset(guc, ct->vma));
+	CT_DEBUG(ct, "vma base=%#x\n", intel_guc_ggtt_offset(guc, ct->vma));
 
 	/* store pointers to desc and cmds */
 	for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) {
@@ -197,7 +221,7 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct)
 int intel_guc_ct_enable(struct intel_guc_ct *ct)
 {
 	struct intel_guc *guc = ct_to_guc(ct);
-	u32 base;
+	u32 base, cmds, size;
 	int err;
 	int i;
 
@@ -212,23 +236,23 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
 	 */
 	for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) {
 		GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
-		guc_ct_buffer_desc_init(ct->ctbs[i].desc,
-					base + PAGE_SIZE/4 * i + PAGE_SIZE/2,
-					PAGE_SIZE/4);
+		cmds = base + PAGE_SIZE / 4 * i + PAGE_SIZE / 2;
+		size = PAGE_SIZE / 4;
+		CT_DEBUG(ct, "%d: addr=%#x size=%u\n", i, cmds, size);
+		guc_ct_buffer_desc_init(ct->ctbs[i].desc, cmds, size);
 	}
 
-	/* register buffers, starting wirh RECV buffer
-	 * descriptors are in first half of the blob
+	/*
+	 * Register both CT buffers starting with RECV buffer.
+	 * Descriptors are in first half of the blob.
 	 */
-	err = guc_action_register_ct_buffer(guc,
-					    base + PAGE_SIZE/4 * CTB_RECV,
-					    INTEL_GUC_CT_BUFFER_TYPE_RECV);
+	err = ct_register_buffer(ct, base + PAGE_SIZE / 4 * CTB_RECV,
+				 INTEL_GUC_CT_BUFFER_TYPE_RECV);
 	if (unlikely(err))
 		goto err_out;
 
-	err = guc_action_register_ct_buffer(guc,
-					    base + PAGE_SIZE/4 * CTB_SEND,
-					    INTEL_GUC_CT_BUFFER_TYPE_SEND);
+	err = ct_register_buffer(ct, base + PAGE_SIZE / 4 * CTB_SEND,
+				 INTEL_GUC_CT_BUFFER_TYPE_SEND);
 	if (unlikely(err))
 		goto err_deregister;
 
@@ -237,10 +261,9 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
 	return 0;
 
 err_deregister:
-	guc_action_deregister_ct_buffer(guc,
-					INTEL_GUC_CT_BUFFER_TYPE_RECV);
+	ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_RECV);
 err_out:
-	DRM_ERROR("CT: can't open channel; err=%d\n", err);
+	CT_ERROR(ct, "Failed to open open CT channel (err=%d)\n", err);
 	return err;
 }
 
@@ -256,18 +279,16 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct)
 
 	ct->enabled = false;
 
-	if (intel_guc_is_running(guc)) {
-		guc_action_deregister_ct_buffer(guc,
-						INTEL_GUC_CT_BUFFER_TYPE_SEND);
-		guc_action_deregister_ct_buffer(guc,
-						INTEL_GUC_CT_BUFFER_TYPE_RECV);
+	if (intel_guc_is_fw_running(guc)) {
+		ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_SEND);
+		ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_RECV);
 	}
 }
 
 static u32 ct_get_next_fence(struct intel_guc_ct *ct)
 {
 	/* For now it's trivial */
-	return ++ct->requests.next_fence;
+	return ++ct->requests.last_fence;
 }
 
 /**
@@ -288,25 +309,33 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct)
  *                   ^-----------------len-------------------^
  */
 
-static int ctb_write(struct intel_guc_ct_buffer *ctb,
-		     const u32 *action,
-		     u32 len /* in dwords */,
-		     u32 fence,
-		     bool want_response)
+static int ct_write(struct intel_guc_ct *ct,
+		    const u32 *action,
+		    u32 len /* in dwords */,
+		    u32 fence,
+		    bool want_response)
 {
+	struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_SEND];
 	struct guc_ct_buffer_desc *desc = ctb->desc;
-	u32 head = desc->head / 4;	/* in dwords */
-	u32 tail = desc->tail / 4;	/* in dwords */
-	u32 size = desc->size / 4;	/* in dwords */
-	u32 used;			/* in dwords */
+	u32 head = desc->head;
+	u32 tail = desc->tail;
+	u32 size = desc->size;
+	u32 used;
 	u32 header;
 	u32 *cmds = ctb->cmds;
 	unsigned int i;
 
-	GEM_BUG_ON(desc->size % 4);
-	GEM_BUG_ON(desc->head % 4);
-	GEM_BUG_ON(desc->tail % 4);
-	GEM_BUG_ON(tail >= size);
+	if (unlikely(desc->is_in_error))
+		return -EPIPE;
+
+	if (unlikely(!IS_ALIGNED(head | tail | size, 4) ||
+		     (tail | head) >= size))
+		goto corrupted;
+
+	/* later calculations will be done in dwords */
+	head /= 4;
+	tail /= 4;
+	size /= 4;
 
 	/*
 	 * tail == head condition indicates empty. GuC FW does not support
@@ -332,9 +361,8 @@ static int ctb_write(struct intel_guc_ct_buffer *ctb,
 		 (want_response ? GUC_CT_MSG_SEND_STATUS : 0) |
 		 (action[0] << GUC_CT_MSG_ACTION_SHIFT);
 
-	CT_DEBUG_DRIVER("CT: writing %*ph %*ph %*ph\n",
-			4, &header, 4, &fence,
-			4 * (len - 1), &action[1]);
+	CT_DEBUG(ct, "writing %*ph %*ph %*ph\n",
+		 4, &header, 4, &fence, 4 * (len - 1), &action[1]);
 
 	cmds[tail] = header;
 	tail = (tail + 1) % size;
@@ -346,12 +374,17 @@ static int ctb_write(struct intel_guc_ct_buffer *ctb,
 		cmds[tail] = action[i];
 		tail = (tail + 1) % size;
 	}
+	GEM_BUG_ON(tail > size);
 
 	/* now update desc tail (back in bytes) */
 	desc->tail = tail * 4;
-	GEM_BUG_ON(desc->tail > desc->size);
-
 	return 0;
+
+corrupted:
+	CT_ERROR(ct, "Corrupted descriptor addr=%#x head=%u tail=%u size=%u\n",
+		 desc->addr, desc->head, desc->tail, desc->size);
+	desc->is_in_error = 1;
+	return -EPIPE;
 }
 
 /**
@@ -469,7 +502,7 @@ static int ct_send(struct intel_guc_ct *ct,
 	list_add_tail(&request.link, &ct->requests.pending);
 	spin_unlock_irqrestore(&ct->requests.lock, flags);
 
-	err = ctb_write(ctb, action, len, fence, !!response_buf);
+	err = ct_write(ct, action, len, fence, !!response_buf);
 	if (unlikely(err))
 		goto unlink;
 
@@ -526,11 +559,11 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
 
 	ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
 	if (unlikely(ret < 0)) {
-		DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
-			  action[0], ret, status);
+		CT_ERROR(ct, "Sending action %#x failed (err=%d status=%#X)\n",
+			 action[0], ret, status);
 	} else if (unlikely(ret)) {
-		CT_DEBUG_DRIVER("CT: send action %#x returned %d (%#x)\n",
-				action[0], ret, ret);
+		CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
+			 action[0], ret, ret);
 	}
 
 	mutex_unlock(&guc->send_mutex);
@@ -552,22 +585,29 @@ static inline bool ct_header_is_response(u32 header)
 	return !!(header & GUC_CT_MSG_IS_RESPONSE);
 }
 
-static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data)
+static int ct_read(struct intel_guc_ct *ct, u32 *data)
 {
+	struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_RECV];
 	struct guc_ct_buffer_desc *desc = ctb->desc;
-	u32 head = desc->head / 4;	/* in dwords */
-	u32 tail = desc->tail / 4;	/* in dwords */
-	u32 size = desc->size / 4;	/* in dwords */
+	u32 head = desc->head;
+	u32 tail = desc->tail;
+	u32 size = desc->size;
 	u32 *cmds = ctb->cmds;
-	s32 available;			/* in dwords */
+	s32 available;
 	unsigned int len;
 	unsigned int i;
 
-	GEM_BUG_ON(desc->size % 4);
-	GEM_BUG_ON(desc->head % 4);
-	GEM_BUG_ON(desc->tail % 4);
-	GEM_BUG_ON(tail >= size);
-	GEM_BUG_ON(head >= size);
+	if (unlikely(desc->is_in_error))
+		return -EPIPE;
+
+	if (unlikely(!IS_ALIGNED(head | tail | size, 4) ||
+		     (tail | head) >= size))
+		goto corrupted;
+
+	/* later calculations will be done in dwords */
+	head /= 4;
+	tail /= 4;
+	size /= 4;
 
 	/* tail == head condition indicates empty */
 	available = tail - head;
@@ -577,7 +617,7 @@ static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data)
 	/* beware of buffer wrap case */
 	if (unlikely(available < 0))
 		available += size;
-	CT_DEBUG_DRIVER("CT: available %d (%u:%u)\n", available, head, tail);
+	CT_DEBUG(ct, "available %d (%u:%u)\n", available, head, tail);
 	GEM_BUG_ON(available < 0);
 
 	data[0] = cmds[head];
@@ -586,23 +626,29 @@ static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data)
 	/* message len with header */
 	len = ct_header_get_len(data[0]) + 1;
 	if (unlikely(len > (u32)available)) {
-		DRM_ERROR("CT: incomplete message %*ph %*ph %*ph\n",
-			  4, data,
-			  4 * (head + available - 1 > size ?
-			       size - head : available - 1), &cmds[head],
-			  4 * (head + available - 1 > size ?
-			       available - 1 - size + head : 0), &cmds[0]);
-		return -EPROTO;
+		CT_ERROR(ct, "Incomplete message %*ph %*ph %*ph\n",
+			 4, data,
+			 4 * (head + available - 1 > size ?
+			      size - head : available - 1), &cmds[head],
+			 4 * (head + available - 1 > size ?
+			      available - 1 - size + head : 0), &cmds[0]);
+		goto corrupted;
 	}
 
 	for (i = 1; i < len; i++) {
 		data[i] = cmds[head];
 		head = (head + 1) % size;
 	}
-	CT_DEBUG_DRIVER("CT: received %*ph\n", 4 * len, data);
+	CT_DEBUG(ct, "received %*ph\n", 4 * len, data);
 
 	desc->head = head * 4;
 	return 0;
+
+corrupted:
+	CT_ERROR(ct, "Corrupted descriptor addr=%#x head=%u tail=%u size=%u\n",
+		 desc->addr, desc->head, desc->tail, desc->size);
+	desc->is_in_error = 1;
+	return -EPIPE;
 }
 
 /**
@@ -627,7 +673,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
 {
 	u32 header = msg[0];
 	u32 len = ct_header_get_len(header);
-	u32 msglen = len + 1; /* total message length including header */
+	u32 msgsize = (len + 1) * sizeof(u32); /* msg size in bytes w/header */
 	u32 fence;
 	u32 status;
 	u32 datalen;
@@ -639,7 +685,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
 
 	/* Response payload shall at least include fence and status */
 	if (unlikely(len < 2)) {
-		DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg);
+		CT_ERROR(ct, "Corrupted response %*ph\n", msgsize, msg);
 		return -EPROTO;
 	}
 
@@ -649,22 +695,22 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
 
 	/* Format of the status follows RESPONSE message */
 	if (unlikely(!INTEL_GUC_MSG_IS_RESPONSE(status))) {
-		DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg);
+		CT_ERROR(ct, "Corrupted response %*ph\n", msgsize, msg);
 		return -EPROTO;
 	}
 
-	CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status);
+	CT_DEBUG(ct, "response fence %u status %#x\n", fence, status);
 
 	spin_lock(&ct->requests.lock);
 	list_for_each_entry(req, &ct->requests.pending, link) {
 		if (unlikely(fence != req->fence)) {
-			CT_DEBUG_DRIVER("CT: request %u awaits response\n",
-					req->fence);
+			CT_DEBUG(ct, "request %u awaits response\n",
+				 req->fence);
 			continue;
 		}
 		if (unlikely(datalen > req->response_len)) {
-			DRM_ERROR("CT: response %u too long %*ph\n",
-				  req->fence, 4 * msglen, msg);
+			CT_ERROR(ct, "Response for %u is too long %*ph\n",
+				 req->fence, msgsize, msg);
 			datalen = 0;
 		}
 		if (datalen)
@@ -677,7 +723,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
 	spin_unlock(&ct->requests.lock);
 
 	if (!found)
-		DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg);
+		CT_ERROR(ct, "Unsolicited response %*ph\n", msgsize, msg);
 	return 0;
 }
 
@@ -687,7 +733,7 @@ static void ct_process_request(struct intel_guc_ct *ct,
 	struct intel_guc *guc = ct_to_guc(ct);
 	int ret;
 
-	CT_DEBUG_DRIVER("CT: request %x %*ph\n", action, 4 * len, payload);
+	CT_DEBUG(ct, "request %x %*ph\n", action, 4 * len, payload);
 
 	switch (action) {
 	case INTEL_GUC_ACTION_DEFAULT:
@@ -698,8 +744,8 @@ static void ct_process_request(struct intel_guc_ct *ct,
 
 	default:
 fail_unexpected:
-		DRM_ERROR("CT: unexpected request %x %*ph\n",
-			  action, 4 * len, payload);
+		CT_ERROR(ct, "Unexpected request %x %*ph\n",
+			 action, 4 * len, payload);
 		break;
 	}
 }
@@ -767,18 +813,18 @@ static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg)
 {
 	u32 header = msg[0];
 	u32 len = ct_header_get_len(header);
-	u32 msglen = len + 1; /* total message length including header */
+	u32 msgsize = (len + 1) * sizeof(u32); /* msg size in bytes w/header */
 	struct ct_incoming_request *request;
 	unsigned long flags;
 
 	GEM_BUG_ON(ct_header_is_response(header));
 
-	request = kmalloc(sizeof(*request) + 4 * msglen, GFP_ATOMIC);
+	request = kmalloc(sizeof(*request) + msgsize, GFP_ATOMIC);
 	if (unlikely(!request)) {
-		DRM_ERROR("CT: dropping request %*ph\n", 4 * msglen, msg);
+		CT_ERROR(ct, "Dropping request %*ph\n", msgsize, msg);
 		return 0; /* XXX: -ENOMEM ? */
 	}
-	memcpy(request->msg, msg, 4 * msglen);
+	memcpy(request->msg, msg, msgsize);
 
 	spin_lock_irqsave(&ct->requests.lock, flags);
 	list_add_tail(&request->link, &ct->requests.incoming);
@@ -794,7 +840,6 @@ static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg)
  */
 void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
 {
-	struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_RECV];
 	u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */
 	int err = 0;
 
@@ -804,7 +849,7 @@ void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
 	}
 
 	do {
-		err = ctb_read(ctb, msg);
+		err = ct_read(ct, msg);
 		if (err)
 			break;
 
@@ -813,10 +858,4 @@ void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
 		else
 			err = ct_handle_request(ct, msg);
 	} while (!err);
-
-	if (GEM_WARN_ON(err == -EPROTO)) {
-		DRM_ERROR("CT: corrupted message detected!\n");
-		ctb->desc->is_in_error = 1;
-	}
 }
-
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
index 3e7fe237cfa5..494a51a5200f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -49,7 +49,7 @@ struct intel_guc_ct {
 	struct intel_guc_ct_buffer ctbs[2];
 
 	struct {
-		u32 next_fence; /* fence to be used with next request to send */
+		u32 last_fence; /* last fence used to send request */
 
 		spinlock_t lock; /* protects pending requests list */
 		struct list_head pending; /* requests waiting for response */
@@ -65,6 +65,11 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct);
 int intel_guc_ct_enable(struct intel_guc_ct *ct);
 void intel_guc_ct_disable(struct intel_guc_ct *ct);
 
+static inline void intel_guc_ct_sanitize(struct intel_guc_ct *ct)
+{
+	ct->enabled = false;
+}
+
 static inline bool intel_guc_ct_enabled(struct intel_guc_ct *ct)
 {
 	return ct->enabled;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 9e42324fdecd..fe7778c28d2d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -456,9 +456,7 @@ static void guc_reset_cancel(struct intel_engine_cs *engine)
 
 	/* Mark all executing requests as skipped. */
 	list_for_each_entry(rq, &engine->active.requests, sched.link) {
-		if (!i915_request_signaled(rq))
-			dma_fence_set_error(&rq->fence, -EIO);
-
+		i915_request_set_error_once(rq, -EIO);
 		i915_request_mark_complete(rq);
 	}
 
@@ -660,12 +658,9 @@ void intel_guc_submission_disable(struct intel_guc *guc)
 	guc_proc_desc_fini(guc);
 }
 
-static bool __guc_submission_support(struct intel_guc *guc)
+static bool __guc_submission_selected(struct intel_guc *guc)
 {
-	/* XXX: GuC submission is unavailable for now */
-	return false;
-
-	if (!intel_guc_is_supported(guc))
+	if (!intel_guc_submission_is_supported(guc))
 		return false;
 
 	return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION;
@@ -673,7 +668,7 @@ static bool __guc_submission_support(struct intel_guc *guc)
 
 void intel_guc_submission_init_early(struct intel_guc *guc)
 {
-	guc->submission_supported = __guc_submission_support(guc);
+	guc->submission_selected = __guc_submission_selected(guc);
 }
 
 bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index e402a2932592..4cf9d3e50263 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -8,7 +8,8 @@
 
 #include <linux/types.h>
 
-struct intel_guc;
+#include "intel_guc.h"
+
 struct intel_engine_cs;
 
 void intel_guc_submission_init_early(struct intel_guc *guc);
@@ -20,4 +21,20 @@ int intel_guc_preempt_work_create(struct intel_guc *guc);
 void intel_guc_preempt_work_destroy(struct intel_guc *guc);
 bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine);
 
+static inline bool intel_guc_submission_is_supported(struct intel_guc *guc)
+{
+	/* XXX: GuC submission is unavailable for now */
+	return false;
+}
+
+static inline bool intel_guc_submission_is_wanted(struct intel_guc *guc)
+{
+	return guc->submission_selected;
+}
+
+static inline bool intel_guc_submission_is_used(struct intel_guc *guc)
+{
+	return intel_guc_is_used(guc) && intel_guc_submission_is_wanted(guc);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 32a069841c14..a74b65694512 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -121,19 +121,20 @@ int intel_huc_init(struct intel_huc *huc)
 	if (err)
 		goto out_fini;
 
+	intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOADABLE);
+
 	return 0;
 
 out_fini:
 	intel_uc_fw_fini(&huc->fw);
 out:
-	intel_uc_fw_cleanup_fetch(&huc->fw);
-	DRM_DEV_DEBUG_DRIVER(i915->drm.dev, "failed with %d\n", err);
+	i915_probe_error(i915, "failed with %d\n", err);
 	return err;
 }
 
 void intel_huc_fini(struct intel_huc *huc)
 {
-	if (!intel_uc_fw_is_available(&huc->fw))
+	if (!intel_uc_fw_is_loadable(&huc->fw))
 		return;
 
 	intel_huc_rsa_data_destroy(huc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index 644c059fe01d..a40b9cfc6c22 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -41,11 +41,17 @@ static inline bool intel_huc_is_supported(struct intel_huc *huc)
 	return intel_uc_fw_is_supported(&huc->fw);
 }
 
-static inline bool intel_huc_is_enabled(struct intel_huc *huc)
+static inline bool intel_huc_is_wanted(struct intel_huc *huc)
 {
 	return intel_uc_fw_is_enabled(&huc->fw);
 }
 
+static inline bool intel_huc_is_used(struct intel_huc *huc)
+{
+	GEM_BUG_ON(__intel_uc_fw_status(&huc->fw) == INTEL_UC_FIRMWARE_SELECTED);
+	return intel_uc_fw_is_available(&huc->fw);
+}
+
 static inline bool intel_huc_is_authenticated(struct intel_huc *huc)
 {
 	return intel_uc_fw_is_running(&huc->fw);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index eee193bf2cc4..9cdf4cbe691c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -20,7 +20,7 @@ void intel_huc_fw_init_early(struct intel_huc *huc)
 	struct drm_i915_private *i915 = gt->i915;
 
 	intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC,
-			       intel_uc_uses_guc(uc),
+			       intel_uc_wants_guc(uc),
 			       INTEL_INFO(i915)->platform, INTEL_REVID(i915));
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 64934a876a50..a4cbe06e06bd 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -48,17 +48,17 @@ static void __confirm_options(struct intel_uc *uc)
 	DRM_DEV_DEBUG_DRIVER(i915->drm.dev,
 			     "enable_guc=%d (guc:%s submission:%s huc:%s)\n",
 			     i915_modparams.enable_guc,
-			     yesno(intel_uc_uses_guc(uc)),
-			     yesno(intel_uc_uses_guc_submission(uc)),
-			     yesno(intel_uc_uses_huc(uc)));
+			     yesno(intel_uc_wants_guc(uc)),
+			     yesno(intel_uc_wants_guc_submission(uc)),
+			     yesno(intel_uc_wants_huc(uc)));
 
 	if (i915_modparams.enable_guc == -1)
 		return;
 
 	if (i915_modparams.enable_guc == 0) {
-		GEM_BUG_ON(intel_uc_uses_guc(uc));
-		GEM_BUG_ON(intel_uc_uses_guc_submission(uc));
-		GEM_BUG_ON(intel_uc_uses_huc(uc));
+		GEM_BUG_ON(intel_uc_wants_guc(uc));
+		GEM_BUG_ON(intel_uc_wants_guc_submission(uc));
+		GEM_BUG_ON(intel_uc_wants_huc(uc));
 		return;
 	}
 
@@ -93,7 +93,7 @@ void intel_uc_init_early(struct intel_uc *uc)
 
 	__confirm_options(uc);
 
-	if (intel_uc_uses_guc(uc))
+	if (intel_uc_wants_guc(uc))
 		uc->ops = &uc_ops_on;
 	else
 		uc->ops = &uc_ops_off;
@@ -257,13 +257,13 @@ static void __uc_fetch_firmwares(struct intel_uc *uc)
 {
 	int err;
 
-	GEM_BUG_ON(!intel_uc_uses_guc(uc));
+	GEM_BUG_ON(!intel_uc_wants_guc(uc));
 
 	err = intel_uc_fw_fetch(&uc->guc.fw);
 	if (err)
 		return;
 
-	if (intel_uc_uses_huc(uc))
+	if (intel_uc_wants_huc(uc))
 		intel_uc_fw_fetch(&uc->huc.fw);
 }
 
@@ -273,25 +273,38 @@ static void __uc_cleanup_firmwares(struct intel_uc *uc)
 	intel_uc_fw_cleanup_fetch(&uc->guc.fw);
 }
 
-static void __uc_init(struct intel_uc *uc)
+static int __uc_init(struct intel_uc *uc)
 {
 	struct intel_guc *guc = &uc->guc;
 	struct intel_huc *huc = &uc->huc;
 	int ret;
 
-	GEM_BUG_ON(!intel_uc_uses_guc(uc));
+	GEM_BUG_ON(!intel_uc_wants_guc(uc));
+
+	if (!intel_uc_uses_guc(uc))
+		return 0;
+
+	if (i915_inject_probe_failure(uc_to_gt(uc)->i915))
+		return -ENOMEM;
 
 	/* XXX: GuC submission is unavailable for now */
-	GEM_BUG_ON(intel_uc_supports_guc_submission(uc));
+	GEM_BUG_ON(intel_uc_uses_guc_submission(uc));
 
 	ret = intel_guc_init(guc);
-	if (ret) {
-		intel_uc_fw_cleanup_fetch(&huc->fw);
-		return;
+	if (ret)
+		return ret;
+
+	if (intel_uc_uses_huc(uc)) {
+		ret = intel_huc_init(huc);
+		if (ret)
+			goto out_guc;
 	}
 
-	if (intel_uc_uses_huc(uc))
-		intel_huc_init(huc);
+	return 0;
+
+out_guc:
+	intel_guc_fini(guc);
+	return ret;
 }
 
 static void __uc_fini(struct intel_uc *uc)
@@ -402,12 +415,12 @@ static int __uc_init_hw(struct intel_uc *uc)
 	int ret, attempts;
 
 	GEM_BUG_ON(!intel_uc_supports_guc(uc));
-	GEM_BUG_ON(!intel_uc_uses_guc(uc));
+	GEM_BUG_ON(!intel_uc_wants_guc(uc));
 
-	if (!intel_uc_fw_is_available(&guc->fw)) {
+	if (!intel_uc_fw_is_loadable(&guc->fw)) {
 		ret = __uc_check_hw(uc) ||
 		      intel_uc_fw_is_overridden(&guc->fw) ||
-		      intel_uc_supports_guc_submission(uc) ?
+		      intel_uc_wants_guc_submission(uc) ?
 		      intel_uc_fw_status_to_error(guc->fw.status) : 0;
 		goto err_out;
 	}
@@ -459,14 +472,14 @@ static int __uc_init_hw(struct intel_uc *uc)
 	if (ret)
 		goto err_communication;
 
-	if (intel_uc_supports_guc_submission(uc))
+	if (intel_uc_uses_guc_submission(uc))
 		intel_guc_submission_enable(guc);
 
 	dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
 		 intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path,
 		 guc->fw.major_ver_found, guc->fw.minor_ver_found,
 		 "submission",
-		 enableddisabled(intel_uc_supports_guc_submission(uc)));
+		 enableddisabled(intel_uc_uses_guc_submission(uc)));
 
 	if (intel_uc_uses_huc(uc)) {
 		dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
@@ -505,10 +518,10 @@ static void __uc_fini_hw(struct intel_uc *uc)
 {
 	struct intel_guc *guc = &uc->guc;
 
-	if (!intel_guc_is_running(guc))
+	if (!intel_guc_is_fw_running(guc))
 		return;
 
-	if (intel_uc_supports_guc_submission(uc))
+	if (intel_uc_uses_guc_submission(uc))
 		intel_guc_submission_disable(guc);
 
 	if (guc_communication_enabled(guc))
@@ -527,7 +540,7 @@ void intel_uc_reset_prepare(struct intel_uc *uc)
 {
 	struct intel_guc *guc = &uc->guc;
 
-	if (!intel_guc_is_running(guc))
+	if (!intel_guc_is_ready(guc))
 		return;
 
 	guc_disable_communication(guc);
@@ -539,7 +552,7 @@ void intel_uc_runtime_suspend(struct intel_uc *uc)
 	struct intel_guc *guc = &uc->guc;
 	int err;
 
-	if (!intel_guc_is_running(guc))
+	if (!intel_guc_is_ready(guc))
 		return;
 
 	err = intel_guc_suspend(guc);
@@ -554,7 +567,7 @@ void intel_uc_suspend(struct intel_uc *uc)
 	struct intel_guc *guc = &uc->guc;
 	intel_wakeref_t wakeref;
 
-	if (!intel_guc_is_running(guc))
+	if (!intel_guc_is_ready(guc))
 		return;
 
 	with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref)
@@ -566,7 +579,7 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication)
 	struct intel_guc *guc = &uc->guc;
 	int err;
 
-	if (!intel_guc_is_running(guc))
+	if (!intel_guc_is_fw_running(guc))
 		return 0;
 
 	/* Make sure we enable communication if and only if it's disabled */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index 49c913524686..5ae7b50b7dc1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -7,6 +7,7 @@
 #define _INTEL_UC_H_
 
 #include "intel_guc.h"
+#include "intel_guc_submission.h"
 #include "intel_huc.h"
 #include "i915_params.h"
 
@@ -16,7 +17,7 @@ struct intel_uc_ops {
 	int (*sanitize)(struct intel_uc *uc);
 	void (*init_fw)(struct intel_uc *uc);
 	void (*fini_fw)(struct intel_uc *uc);
-	void (*init)(struct intel_uc *uc);
+	int (*init)(struct intel_uc *uc);
 	void (*fini)(struct intel_uc *uc);
 	int (*init_hw)(struct intel_uc *uc);
 	void (*fini_hw)(struct intel_uc *uc);
@@ -40,35 +41,44 @@ void intel_uc_runtime_suspend(struct intel_uc *uc);
 int intel_uc_resume(struct intel_uc *uc);
 int intel_uc_runtime_resume(struct intel_uc *uc);
 
-static inline bool intel_uc_supports_guc(struct intel_uc *uc)
-{
-	return intel_guc_is_supported(&uc->guc);
-}
-
-static inline bool intel_uc_uses_guc(struct intel_uc *uc)
-{
-	return intel_guc_is_enabled(&uc->guc);
-}
+/*
+ * We need to know as early as possible if we're going to use GuC or not to
+ * take the correct setup paths. Additionally, once we've started loading the
+ * GuC, it is unsafe to keep executing without it because some parts of the HW,
+ * a subset of which is not cleaned on GT reset, will start expecting the GuC FW
+ * to be running.
+ * To solve both these requirements, we commit to using the microcontrollers if
+ * the relevant modparam is set and the blobs are found on the system. At this
+ * stage, the only thing that can stop us from attempting to load the blobs on
+ * the HW and use them is a fundamental issue (e.g. no memory for our
+ * structures); if we hit such a problem during driver load we're broken even
+ * without GuC, so there is no point in trying to fall back.
+ *
+ * Given the above, we can be in one of 4 states, with the last one implying
+ * we're committed to using the microcontroller:
+ * - Not supported: not available in HW and/or firmware not defined.
+ * - Supported: available in HW and firmware defined.
+ * - Wanted: supported + enabled in modparam.
+ * - In use: wanted + firmware found on the system and successfully fetched.
+ */
 
-static inline bool intel_uc_supports_guc_submission(struct intel_uc *uc)
-{
-	return intel_guc_is_submission_supported(&uc->guc);
+#define __uc_state_checker(x, func, state, required) \
+static inline bool intel_uc_##state##_##func(struct intel_uc *uc) \
+{ \
+	return intel_##func##_is_##required(&uc->x); \
 }
 
-static inline bool intel_uc_uses_guc_submission(struct intel_uc *uc)
-{
-	return intel_guc_is_submission_supported(&uc->guc);
-}
+#define uc_state_checkers(x, func) \
+__uc_state_checker(x, func, supports, supported) \
+__uc_state_checker(x, func, wants, wanted) \
+__uc_state_checker(x, func, uses, used)
 
-static inline bool intel_uc_supports_huc(struct intel_uc *uc)
-{
-	return intel_uc_supports_guc(uc);
-}
+uc_state_checkers(guc, guc);
+uc_state_checkers(huc, huc);
+uc_state_checkers(guc, guc_submission);
 
-static inline bool intel_uc_uses_huc(struct intel_uc *uc)
-{
-	return intel_huc_is_enabled(&uc->huc);
-}
+#undef uc_state_checkers
+#undef __uc_state_checker
 
 #define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \
 static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \
@@ -80,7 +90,7 @@ static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \
 intel_uc_ops_function(sanitize, sanitize, int, 0);
 intel_uc_ops_function(fetch_firmwares, init_fw, void, );
 intel_uc_ops_function(cleanup_firmwares, fini_fw, void, );
-intel_uc_ops_function(init, init, void, );
+intel_uc_ops_function(init, init, int, 0);
 intel_uc_ops_function(fini, fini, void, );
 intel_uc_ops_function(init_hw, init_hw, int, 0);
 intel_uc_ops_function(fini_hw, fini_hw, void, );
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 8ee0a0c7f447..18c755203688 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -43,7 +43,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
  * features.
  */
 #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
-	fw_def(TIGERLAKE,   0, guc_def(tgl, 35, 2, 0), huc_def(tgl,  7, 0, 3)) \
+	fw_def(TIGERLAKE,   0, guc_def(tgl, 35, 2, 0), huc_def(tgl,  7, 0, 12)) \
 	fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl,  9, 0, 0)) \
 	fw_def(ICELAKE,     0, guc_def(icl, 33, 0, 0), huc_def(icl,  9, 0, 0)) \
 	fw_def(COFFEELAKE,  5, guc_def(cml, 33, 0, 0), huc_def(cml,  4, 0, 0)) \
@@ -279,7 +279,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 
 	err = i915_inject_probe_error(i915, -ENXIO);
 	if (err)
-		return err;
+		goto fail;
 
 	__force_fw_fetch_failures(uc_fw, -EINVAL);
 	__force_fw_fetch_failures(uc_fw, -ESTALE);
@@ -501,7 +501,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
 	if (err)
 		return err;
 
-	if (!intel_uc_fw_is_available(uc_fw))
+	if (!intel_uc_fw_is_loadable(uc_fw))
 		return -ENOEXEC;
 
 	/* Call custom loader */
@@ -544,7 +544,10 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
 
 void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
 {
-	intel_uc_fw_cleanup_fetch(uc_fw);
+	if (i915_gem_object_has_pinned_pages(uc_fw->obj))
+		i915_gem_object_unpin_pages(uc_fw->obj);
+
+	intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 1f30543d0d2d..888ff0de0244 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -29,8 +29,11 @@ struct intel_gt;
  * |            |                 SELECTED                          |
  * +------------+-               /   |   \                         -+
  * |            |    MISSING <--/    |    \--> ERROR                |
- * |   fetch    |                    |                              |
- * |            |        /------> AVAILABLE <---<-----------\       |
+ * |   fetch    |                    V                              |
+ * |            |                 AVAILABLE                         |
+ * +------------+-                   |                             -+
+ * |   init     |                    V                              |
+ * |            |        /------> LOADABLE <----<-----------\       |
  * +------------+-       \         /    \        \           \     -+
  * |            |         FAIL <--<      \--> TRANSFERRED     \     |
  * |   upload   |                  \           /   \          /     |
@@ -46,6 +49,7 @@ enum intel_uc_fw_status {
 	INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */
 	INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */
 	INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
+	INTEL_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */
 	INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */
 	INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
 	INTEL_UC_FIRMWARE_RUNNING /* init/auth done */
@@ -115,6 +119,8 @@ const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
 		return "ERROR";
 	case INTEL_UC_FIRMWARE_AVAILABLE:
 		return "AVAILABLE";
+	case INTEL_UC_FIRMWARE_LOADABLE:
+		return "LOADABLE";
 	case INTEL_UC_FIRMWARE_FAIL:
 		return "FAIL";
 	case INTEL_UC_FIRMWARE_TRANSFERRED:
@@ -143,6 +149,7 @@ static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status)
 	case INTEL_UC_FIRMWARE_SELECTED:
 		return -ESTALE;
 	case INTEL_UC_FIRMWARE_AVAILABLE:
+	case INTEL_UC_FIRMWARE_LOADABLE:
 	case INTEL_UC_FIRMWARE_TRANSFERRED:
 	case INTEL_UC_FIRMWARE_RUNNING:
 		return 0;
@@ -184,6 +191,11 @@ static inline bool intel_uc_fw_is_available(struct intel_uc_fw *uc_fw)
 	return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_AVAILABLE;
 }
 
+static inline bool intel_uc_fw_is_loadable(struct intel_uc_fw *uc_fw)
+{
+	return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_LOADABLE;
+}
+
 static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw)
 {
 	return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_TRANSFERRED;
@@ -202,7 +214,7 @@ static inline bool intel_uc_fw_is_overridden(const struct intel_uc_fw *uc_fw)
 static inline void intel_uc_fw_sanitize(struct intel_uc_fw *uc_fw)
 {
 	if (intel_uc_fw_is_loaded(uc_fw))
-		intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE);
+		intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOADABLE);
 }
 
 static inline u32 __intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)