1 files changed, 437 insertions, 298 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 5dd84e148bba..3c97f0e7a003 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,8 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 
+#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
+
 /* This is a HW constraint. The value below is the largest known requirement
  * I've seen in a spec to date, and that was a workaround for a non-shipping
  * part. It should be safe to decrease this, but it's more future proof as is.
@@ -97,28 +99,27 @@
 #define GEN6_CONTEXT_ALIGN (64<<10)
 #define GEN7_CONTEXT_ALIGN 4096
 
-static size_t get_context_alignment(struct drm_device *dev)
+static size_t get_context_alignment(struct drm_i915_private *dev_priv)
 {
-	if (IS_GEN6(dev))
+	if (IS_GEN6(dev_priv))
 		return GEN6_CONTEXT_ALIGN;
 
 	return GEN7_CONTEXT_ALIGN;
 }
 
-static int get_context_size(struct drm_device *dev)
+static int get_context_size(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 	u32 reg;
 
-	switch (INTEL_INFO(dev)->gen) {
+	switch (INTEL_GEN(dev_priv)) {
 	case 6:
 		reg = I915_READ(CXT_SIZE);
 		ret = GEN6_CXT_TOTAL_SIZE(reg) * 64;
 		break;
 	case 7:
 		reg = I915_READ(GEN7_CXT_SIZE);
-		if (IS_HASWELL(dev))
+		if (IS_HASWELL(dev_priv))
 			ret = HSW_CXT_TOTAL_SIZE;
 		else
 			ret = GEN7_CXT_TOTAL_SIZE(reg) * 64;
@@ -133,7 +134,7 @@ static int get_context_size(struct drm_device *dev)
 	return ret;
 }
 
-static void i915_gem_context_clean(struct intel_context *ctx)
+static void i915_gem_context_clean(struct i915_gem_context *ctx)
 {
 	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
 	struct i915_vma *vma, *next;
@@ -150,13 +151,12 @@ static void i915_gem_context_clean(struct intel_context *ctx)
 
 void i915_gem_context_free(struct kref *ctx_ref)
 {
-	struct intel_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
+	struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
+	int i;
 
+	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	trace_i915_context_free(ctx);
 
-	if (i915.enable_execlists)
-		intel_lr_context_free(ctx);
-
 	/*
 	 * This context is going away and we need to remove all VMAs still
 	 * around. This is to handle imported shared objects for which
@@ -166,9 +166,22 @@ void i915_gem_context_free(struct kref *ctx_ref)
 
 	i915_ppgtt_put(ctx->ppgtt);
 
-	if (ctx->legacy_hw_ctx.rcs_state)
-		drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base);
+	for (i = 0; i < I915_NUM_ENGINES; i++) {
+		struct intel_context *ce = &ctx->engine[i];
+
+		if (!ce->state)
+			continue;
+
+		WARN_ON(ce->pin_count);
+		if (ce->ringbuf)
+			intel_ringbuffer_free(ce->ringbuf);
+
+		drm_gem_object_unreference(&ce->state->base);
+	}
+
 	list_del(&ctx->link);
+
+	ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id);
 	kfree(ctx);
 }
 
@@ -178,9 +191,11 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
 	struct drm_i915_gem_object *obj;
 	int ret;
 
-	obj = i915_gem_alloc_object(dev, size);
-	if (obj == NULL)
-		return ERR_PTR(-ENOMEM);
+	lockdep_assert_held(&dev->struct_mutex);
+
+	obj = i915_gem_object_create(dev, size);
+	if (IS_ERR(obj))
+		return obj;
 
 	/*
 	 * Try to make the context utilize L3 as well as LLC.
@@ -209,22 +224,52 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
 	return obj;
 }
 
-static struct intel_context *
+static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
+{
+	int ret;
+
+	ret = ida_simple_get(&dev_priv->context_hw_ida,
+			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
+	if (ret < 0) {
+		/* Contexts are only released when no longer active.
+		 * Flush any pending retires to hopefully release some
+		 * stale contexts and try again.
+		 */
+		i915_gem_retire_requests(dev_priv);
+		ret = ida_simple_get(&dev_priv->context_hw_ida,
+				     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	}
+
+	*out = ret;
+	return 0;
+}
+
+static struct i915_gem_context *
 __create_hw_context(struct drm_device *dev,
 		    struct drm_i915_file_private *file_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_context *ctx;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_gem_context *ctx;
 	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (ctx == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	ret = assign_hw_id(dev_priv, &ctx->hw_id);
+	if (ret) {
+		kfree(ctx);
+		return ERR_PTR(ret);
+	}
+
 	kref_init(&ctx->ref);
 	list_add_tail(&ctx->link, &dev_priv->context_list);
 	ctx->i915 = dev_priv;
 
+	ctx->ggtt_alignment = get_context_alignment(dev_priv);
+
 	if (dev_priv->hw_context_size) {
 		struct drm_i915_gem_object *obj =
 				i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size);
@@ -232,7 +277,7 @@ __create_hw_context(struct drm_device *dev,
 			ret = PTR_ERR(obj);
 			goto err_out;
 		}
-		ctx->legacy_hw_ctx.rcs_state = obj;
+		ctx->engine[RCS].state = obj;
 	}
 
 	/* Default context will never have a file_priv */
@@ -249,9 +294,13 @@ __create_hw_context(struct drm_device *dev,
 	/* NB: Mark all slices as needing a remap so that when the context first
 	 * loads it will restore whatever remap state already exists. If there
 	 * is no remap info, it will be a NOP. */
-	ctx->remap_slice = (1 << NUM_L3_SLICES(dev)) - 1;
+	ctx->remap_slice = ALL_L3_SLICES(dev_priv);
 
 	ctx->hang_stats.ban_period_seconds = DRM_I915_CTX_BAN_PERIOD;
+	ctx->ring_size = 4 * PAGE_SIZE;
+	ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) <<
+			     GEN8_CTX_ADDRESSING_MODE_SHIFT;
+	ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier);
 
 	return ctx;
 
@@ -265,44 +314,27 @@ err_out:
  * context state of the GPU for applications that don't utilize HW contexts, as
  * well as an idle case.
  */
-static struct intel_context *
+static struct i915_gem_context *
 i915_gem_create_context(struct drm_device *dev,
 			struct drm_i915_file_private *file_priv)
 {
-	const bool is_global_default_ctx = file_priv == NULL;
-	struct intel_context *ctx;
-	int ret = 0;
+	struct i915_gem_context *ctx;
 
-	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
+	lockdep_assert_held(&dev->struct_mutex);
 
 	ctx = __create_hw_context(dev, file_priv);
 	if (IS_ERR(ctx))
 		return ctx;
 
-	if (is_global_default_ctx && ctx->legacy_hw_ctx.rcs_state) {
-		/* We may need to do things with the shrinker which
-		 * require us to immediately switch back to the default
-		 * context. This can cause a problem as pinning the
-		 * default context also requires GTT space which may not
-		 * be available. To avoid this we always pin the default
-		 * context.
-		 */
-		ret = i915_gem_obj_ggtt_pin(ctx->legacy_hw_ctx.rcs_state,
-					    get_context_alignment(dev), 0);
-		if (ret) {
-			DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
-			goto err_destroy;
-		}
-	}
-
 	if (USES_FULL_PPGTT(dev)) {
 		struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv);
 
-		if (IS_ERR_OR_NULL(ppgtt)) {
+		if (IS_ERR(ppgtt)) {
 			DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
 					 PTR_ERR(ppgtt));
-			ret = PTR_ERR(ppgtt);
-			goto err_unpin;
+			idr_remove(&file_priv->context_idr, ctx->user_handle);
+			i915_gem_context_unreference(ctx);
+			return ERR_CAST(ppgtt);
 		}
 
 		ctx->ppgtt = ppgtt;
@@ -311,76 +343,102 @@ i915_gem_create_context(struct drm_device *dev,
 	trace_i915_context_create(ctx);
 
 	return ctx;
+}
 
-err_unpin:
-	if (is_global_default_ctx && ctx->legacy_hw_ctx.rcs_state)
-		i915_gem_object_ggtt_unpin(ctx->legacy_hw_ctx.rcs_state);
-err_destroy:
-	idr_remove(&file_priv->context_idr, ctx->user_handle);
-	i915_gem_context_unreference(ctx);
-	return ERR_PTR(ret);
+/**
+ * i915_gem_context_create_gvt - create a GVT GEM context
+ * @dev: drm device *
+ *
+ * This function is used to create a GVT specific GEM context.
+ *
+ * Returns:
+ * pointer to i915_gem_context on success, error pointer if failed
+ *
+ */
+struct i915_gem_context *
+i915_gem_context_create_gvt(struct drm_device *dev)
+{
+	struct i915_gem_context *ctx;
+	int ret;
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
+		return ERR_PTR(-ENODEV);
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ctx = i915_gem_create_context(dev, NULL);
+	if (IS_ERR(ctx))
+		goto out;
+
+	ctx->execlists_force_single_submission = true;
+	ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */
+out:
+	mutex_unlock(&dev->struct_mutex);
+	return ctx;
 }
 
-static void i915_gem_context_unpin(struct intel_context *ctx,
+static void i915_gem_context_unpin(struct i915_gem_context *ctx,
 				   struct intel_engine_cs *engine)
 {
 	if (i915.enable_execlists) {
 		intel_lr_context_unpin(ctx, engine);
 	} else {
-		if (engine->id == RCS && ctx->legacy_hw_ctx.rcs_state)
-			i915_gem_object_ggtt_unpin(ctx->legacy_hw_ctx.rcs_state);
+		struct intel_context *ce = &ctx->engine[engine->id];
+
+		if (ce->state)
+			i915_gem_object_ggtt_unpin(ce->state);
+
 		i915_gem_context_unreference(ctx);
 	}
 }
 
 void i915_gem_context_reset(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int i;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+
+	lockdep_assert_held(&dev->struct_mutex);
 
 	if (i915.enable_execlists) {
-		struct intel_context *ctx;
+		struct i915_gem_context *ctx;
 
 		list_for_each_entry(ctx, &dev_priv->context_list, link)
-			intel_lr_context_reset(dev, ctx);
+			intel_lr_context_reset(dev_priv, ctx);
 	}
 
-	for (i = 0; i < I915_NUM_RINGS; i++) {
-		struct intel_engine_cs *ring = &dev_priv->ring[i];
-
-		if (ring->last_context) {
-			i915_gem_context_unpin(ring->last_context, ring);
-			ring->last_context = NULL;
-		}
-	}
-
-	/* Force the GPU state to be reinitialised on enabling */
-	dev_priv->kernel_context->legacy_hw_ctx.initialized = false;
+	i915_gem_context_lost(dev_priv);
 }
 
 int i915_gem_context_init(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_context *ctx;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_gem_context *ctx;
 
 	/* Init should only be called once per module load. Eventually the
 	 * restriction on the context_disabled check can be loosened. */
 	if (WARN_ON(dev_priv->kernel_context))
 		return 0;
 
-	if (intel_vgpu_active(dev) && HAS_LOGICAL_RING_CONTEXTS(dev)) {
+	if (intel_vgpu_active(dev_priv) &&
+	    HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
 		if (!i915.enable_execlists) {
 			DRM_INFO("Only EXECLIST mode is supported in vgpu.\n");
 			return -EINVAL;
 		}
 	}
 
+	/* Using the simple ida interface, the max is limited by sizeof(int) */
+	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
+	ida_init(&dev_priv->context_hw_ida);
+
 	if (i915.enable_execlists) {
 		/* NB: intentionally left blank. We will allocate our own
 		 * backing objects as we need them, thank you very much */
 		dev_priv->hw_context_size = 0;
-	} else if (HAS_HW_CONTEXTS(dev)) {
-		dev_priv->hw_context_size = round_up(get_context_size(dev), 4096);
+	} else if (HAS_HW_CONTEXTS(dev_priv)) {
+		dev_priv->hw_context_size =
+			round_up(get_context_size(dev_priv), 4096);
 		if (dev_priv->hw_context_size > (1<<20)) {
 			DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n",
 					 dev_priv->hw_context_size);
@@ -403,67 +461,60 @@ int i915_gem_context_init(struct drm_device *dev)
 	return 0;
 }
 
-void i915_gem_context_fini(struct drm_device *dev)
+void i915_gem_context_lost(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_context *dctx = dev_priv->kernel_context;
-	int i;
-
-	if (dctx->legacy_hw_ctx.rcs_state) {
-		/* The only known way to stop the gpu from accessing the hw context is
-		 * to reset it. Do this as the very last operation to avoid confusing
-		 * other code, leading to spurious errors. */
-		intel_gpu_reset(dev);
+	struct intel_engine_cs *engine;
 
-		/* When default context is created and switched to, base object refcount
-		 * will be 2 (+1 from object creation and +1 from do_switch()).
-		 * i915_gem_context_fini() will be called after gpu_idle() has switched
-		 * to default context. So we need to unreference the base object once
-		 * to offset the do_switch part, so that i915_gem_context_unreference()
-		 * can then free the base object correctly. */
-		WARN_ON(!dev_priv->ring[RCS].last_context);
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-		i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
+	for_each_engine(engine, dev_priv) {
+		if (engine->last_context) {
+			i915_gem_context_unpin(engine->last_context, engine);
+			engine->last_context = NULL;
+		}
 	}
 
-	for (i = I915_NUM_RINGS; --i >= 0;) {
-		struct intel_engine_cs *ring = &dev_priv->ring[i];
+	/* Force the GPU state to be restored on enabling */
+	if (!i915.enable_execlists) {
+		struct i915_gem_context *ctx;
+
+		list_for_each_entry(ctx, &dev_priv->context_list, link) {
+			if (!i915_gem_context_is_default(ctx))
+				continue;
 
-		if (ring->last_context) {
-			i915_gem_context_unpin(ring->last_context, ring);
-			ring->last_context = NULL;
+			for_each_engine(engine, dev_priv)
+				ctx->engine[engine->id].initialised = false;
+
+			ctx->remap_slice = ALL_L3_SLICES(dev_priv);
 		}
-	}
 
-	i915_gem_context_unreference(dctx);
-	dev_priv->kernel_context = NULL;
+		for_each_engine(engine, dev_priv) {
+			struct intel_context *kce =
+				&dev_priv->kernel_context->engine[engine->id];
+
+			kce->initialised = true;
+		}
+	}
 }
 
-int i915_gem_context_enable(struct drm_i915_gem_request *req)
+void i915_gem_context_fini(struct drm_device *dev)
 {
-	struct intel_engine_cs *ring = req->ring;
-	int ret;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_gem_context *dctx = dev_priv->kernel_context;
 
-	if (i915.enable_execlists) {
-		if (ring->init_context == NULL)
-			return 0;
+	lockdep_assert_held(&dev->struct_mutex);
 
-		ret = ring->init_context(req);
-	} else
-		ret = i915_switch_context(req);
-
-	if (ret) {
-		DRM_ERROR("ring init context: %d\n", ret);
-		return ret;
-	}
+	i915_gem_context_unreference(dctx);
+	dev_priv->kernel_context = NULL;
 
-	return 0;
+	ida_destroy(&dev_priv->context_hw_ida);
 }
 
 static int context_idr_cleanup(int id, void *p, void *data)
 {
-	struct intel_context *ctx = p;
+	struct i915_gem_context *ctx = p;
 
+	ctx->file_priv = ERR_PTR(-EBADF);
 	i915_gem_context_unreference(ctx);
 	return 0;
 }
@@ -471,7 +522,7 @@ static int context_idr_cleanup(int id, void *p, void *data)
 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct intel_context *ctx;
+	struct i915_gem_context *ctx;
 
 	idr_init(&file_priv->context_idr);
 
@@ -491,152 +542,195 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
+	lockdep_assert_held(&dev->struct_mutex);
+
 	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
 	idr_destroy(&file_priv->context_idr);
 }
 
-struct intel_context *
-i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
-{
-	struct intel_context *ctx;
-
-	ctx = (struct intel_context *)idr_find(&file_priv->context_idr, id);
-	if (!ctx)
-		return ERR_PTR(-ENOENT);
-
-	return ctx;
-}
-
 static inline int
 mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 {
-	struct intel_engine_cs *ring = req->ring;
+	struct drm_i915_private *dev_priv = req->i915;
+	struct intel_engine_cs *engine = req->engine;
 	u32 flags = hw_flags | MI_MM_SPACE_GTT;
 	const int num_rings =
 		/* Use an extended w/a on ivb+ if signalling from other rings */
-		i915_semaphore_is_enabled(ring->dev) ?
-		hweight32(INTEL_INFO(ring->dev)->ring_mask) - 1 :
+		i915_semaphore_is_enabled(dev_priv) ?
+		hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 :
 		0;
-	int len, i, ret;
+	int len, ret;
 
 	/* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB
 	 * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value
 	 * explicitly, so we rely on the value at ring init, stored in
 	 * itlb_before_ctx_switch.
 	 */
-	if (IS_GEN6(ring->dev)) {
-		ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 0);
+	if (IS_GEN6(dev_priv)) {
+		ret = engine->flush(req, I915_GEM_GPU_DOMAINS, 0);
 		if (ret)
 			return ret;
 	}
 
 	/* These flags are for resource streamer on HSW+ */
-	if (IS_HASWELL(ring->dev) || INTEL_INFO(ring->dev)->gen >= 8)
+	if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
 		flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN);
-	else if (INTEL_INFO(ring->dev)->gen < 8)
+	else if (INTEL_GEN(dev_priv) < 8)
 		flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
 
 
 	len = 4;
-	if (INTEL_INFO(ring->dev)->gen >= 7)
-		len += 2 + (num_rings ? 4*num_rings + 2 : 0);
+	if (INTEL_GEN(dev_priv) >= 7)
+		len += 2 + (num_rings ? 4*num_rings + 6 : 0);
 
 	ret = intel_ring_begin(req, len);
 	if (ret)
 		return ret;
 
 	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
-	if (INTEL_INFO(ring->dev)->gen >= 7) {
-		intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
+	if (INTEL_GEN(dev_priv) >= 7) {
+		intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_DISABLE);
 		if (num_rings) {
 			struct intel_engine_cs *signaller;
 
-			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
-			for_each_ring(signaller, to_i915(ring->dev), i) {
-				if (signaller == ring)
+			intel_ring_emit(engine,
+					MI_LOAD_REGISTER_IMM(num_rings));
+			for_each_engine(signaller, dev_priv) {
+				if (signaller == engine)
 					continue;
 
-				intel_ring_emit_reg(ring, RING_PSMI_CTL(signaller->mmio_base));
-				intel_ring_emit(ring, _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
+				intel_ring_emit_reg(engine,
+						    RING_PSMI_CTL(signaller->mmio_base));
+				intel_ring_emit(engine,
+						_MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
 			}
 		}
 	}
 
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_emit(ring, MI_SET_CONTEXT);
-	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(req->ctx->legacy_hw_ctx.rcs_state) |
+	intel_ring_emit(engine, MI_NOOP);
+	intel_ring_emit(engine, MI_SET_CONTEXT);
+	intel_ring_emit(engine,
+			i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) |
 			flags);
 	/*
 	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
 	 * WaMiSetContext_Hang:snb,ivb,vlv
 	 */
-	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_emit(engine, MI_NOOP);
 
-	if (INTEL_INFO(ring->dev)->gen >= 7) {
+	if (INTEL_GEN(dev_priv) >= 7) {
 		if (num_rings) {
 			struct intel_engine_cs *signaller;
+			i915_reg_t last_reg = {}; /* keep gcc quiet */
 
-			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
-			for_each_ring(signaller, to_i915(ring->dev), i) {
-				if (signaller == ring)
+			intel_ring_emit(engine,
+					MI_LOAD_REGISTER_IMM(num_rings));
+			for_each_engine(signaller, dev_priv) {
+				if (signaller == engine)
 					continue;
 
-				intel_ring_emit_reg(ring, RING_PSMI_CTL(signaller->mmio_base));
-				intel_ring_emit(ring, _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
+				last_reg = RING_PSMI_CTL(signaller->mmio_base);
+				intel_ring_emit_reg(engine, last_reg);
+				intel_ring_emit(engine,
+						_MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
 			}
+
+			/* Insert a delay before the next switch! */
+			intel_ring_emit(engine,
+					MI_STORE_REGISTER_MEM |
+					MI_SRM_LRM_GLOBAL_GTT);
+			intel_ring_emit_reg(engine, last_reg);
+			intel_ring_emit(engine, engine->scratch.gtt_offset);
+			intel_ring_emit(engine, MI_NOOP);
 		}
-		intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
+		intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_ENABLE);
 	}
 
-	intel_ring_advance(ring);
+	intel_ring_advance(engine);
 
 	return ret;
 }
 
-static inline bool should_skip_switch(struct intel_engine_cs *ring,
-				      struct intel_context *from,
-				      struct intel_context *to)
+static int remap_l3(struct drm_i915_gem_request *req, int slice)
+{
+	u32 *remap_info = req->i915->l3_parity.remap_info[slice];
+	struct intel_engine_cs *engine = req->engine;
+	int i, ret;
+
+	if (!remap_info)
+		return 0;
+
+	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
+	if (ret)
+		return ret;
+
+	/*
+	 * Note: We do not worry about the concurrent register cacheline hang
+	 * here because no other code should access these registers other than
+	 * at initialization time.
+	 */
+	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4));
+	for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
+		intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
+		intel_ring_emit(engine, remap_info[i]);
+	}
+	intel_ring_emit(engine, MI_NOOP);
+	intel_ring_advance(engine);
+
+	return 0;
+}
+
+static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt,
+				   struct intel_engine_cs *engine,
+				   struct i915_gem_context *to)
 {
 	if (to->remap_slice)
 		return false;
 
-	if (to->ppgtt && from == to &&
-	    !(intel_ring_flag(ring) & to->ppgtt->pd_dirty_rings))
-		return true;
+	if (!to->engine[RCS].initialised)
+		return false;
 
-	return false;
+	if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
+		return false;
+
+	return to == engine->last_context;
 }
 
 static bool
-needs_pd_load_pre(struct intel_engine_cs *ring, struct intel_context *to)
+needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt,
+		  struct intel_engine_cs *engine,
+		  struct i915_gem_context *to)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	if (!ppgtt)
+		return false;
+
+	/* Always load the ppgtt on first use */
+	if (!engine->last_context)
+		return true;
 
-	if (!to->ppgtt)
+	/* Same context without new entries, skip */
+	if (engine->last_context == to &&
+	    !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
 		return false;
 
-	if (INTEL_INFO(ring->dev)->gen < 8)
+	if (engine->id != RCS)
 		return true;
 
-	if (ring != &dev_priv->ring[RCS])
+	if (INTEL_GEN(engine->i915) < 8)
 		return true;
 
 	return false;
 }
 
 static bool
-needs_pd_load_post(struct intel_engine_cs *ring, struct intel_context *to,
-		u32 hw_flags)
+needs_pd_load_post(struct i915_hw_ppgtt *ppgtt,
+		   struct i915_gem_context *to,
+		   u32 hw_flags)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-
-	if (!to->ppgtt)
-		return false;
-
-	if (!IS_GEN8(ring->dev))
+	if (!ppgtt)
 		return false;
 
-	if (ring != &dev_priv->ring[RCS])
+	if (!IS_GEN8(to->i915))
 		return false;
 
 	if (hw_flags & MI_RESTORE_INHIBIT)
@@ -645,58 +739,33 @@ needs_pd_load_post(struct intel_engine_cs *ring, struct intel_context *to,
 	return false;
 }
 
-static int do_switch(struct drm_i915_gem_request *req)
+static int do_rcs_switch(struct drm_i915_gem_request *req)
 {
-	struct intel_context *to = req->ctx;
-	struct intel_engine_cs *ring = req->ring;
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	struct intel_context *from = ring->last_context;
-	u32 hw_flags = 0;
-	bool uninitialized = false;
+	struct i915_gem_context *to = req->ctx;
+	struct intel_engine_cs *engine = req->engine;
+	struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
+	struct i915_gem_context *from;
+	u32 hw_flags;
 	int ret, i;
 
-	if (from != NULL && ring == &dev_priv->ring[RCS]) {
-		BUG_ON(from->legacy_hw_ctx.rcs_state == NULL);
-		BUG_ON(!i915_gem_obj_is_pinned(from->legacy_hw_ctx.rcs_state));
-	}
-
-	if (should_skip_switch(ring, from, to))
+	if (skip_rcs_switch(ppgtt, engine, to))
 		return 0;
 
 	/* Trying to pin first makes error handling easier. */
-	if (ring == &dev_priv->ring[RCS]) {
-		ret = i915_gem_obj_ggtt_pin(to->legacy_hw_ctx.rcs_state,
-					    get_context_alignment(ring->dev), 0);
-		if (ret)
-			return ret;
-	}
+	ret = i915_gem_obj_ggtt_pin(to->engine[RCS].state,
+				    to->ggtt_alignment,
+				    0);
+	if (ret)
+		return ret;
 
 	/*
 	 * Pin can switch back to the default context if we end up calling into
 	 * evict_everything - as a last ditch gtt defrag effort that also
 	 * switches to the default context. Hence we need to reload from here.
+	 *
+	 * XXX: Doing so is painfully broken!
 	 */
-	from = ring->last_context;
-
-	if (needs_pd_load_pre(ring, to)) {
-		/* Older GENs and non render rings still want the load first,
-		 * "PP_DCLV followed by PP_DIR_BASE register through Load
-		 * Register Immediate commands in Ring Buffer before submitting
-		 * a context."*/
-		trace_switch_mm(ring, to);
-		ret = to->ppgtt->switch_mm(to->ppgtt, req);
-		if (ret)
-			goto unpin_out;
-
-		/* Doing a PD load always reloads the page dirs */
-		to->ppgtt->pd_dirty_rings &= ~intel_ring_flag(ring);
-	}
-
-	if (ring != &dev_priv->ring[RCS]) {
-		if (from)
-			i915_gem_context_unreference(from);
-		goto done;
-	}
+	from = engine->last_context;
 
 	/*
 	 * Clear this page out of any CPU caches for coherent swap-in/out. Note
@@ -706,57 +775,36 @@ static int do_switch(struct drm_i915_gem_request *req)
 	 *
 	 * XXX: We need a real interface to do this instead of trickery.
 	 */
-	ret = i915_gem_object_set_to_gtt_domain(to->legacy_hw_ctx.rcs_state, false);
+	ret = i915_gem_object_set_to_gtt_domain(to->engine[RCS].state, false);
 	if (ret)
 		goto unpin_out;
 
-	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) {
-		hw_flags |= MI_RESTORE_INHIBIT;
+	if (needs_pd_load_pre(ppgtt, engine, to)) {
+		/* Older GENs and non render rings still want the load first,
+		 * "PP_DCLV followed by PP_DIR_BASE register through Load
+		 * Register Immediate commands in Ring Buffer before submitting
+		 * a context."*/
+		trace_switch_mm(engine, to);
+		ret = ppgtt->switch_mm(ppgtt, req);
+		if (ret)
+			goto unpin_out;
+	}
+
+	if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
 		/* NB: If we inhibit the restore, the context is not allowed to
 		 * die because future work may end up depending on valid address
 		 * space. This means we must enforce that a page table load
 		 * occur when this occurs. */
-	} else if (to->ppgtt &&
-		   (intel_ring_flag(ring) & to->ppgtt->pd_dirty_rings)) {
-		hw_flags |= MI_FORCE_RESTORE;
-		to->ppgtt->pd_dirty_rings &= ~intel_ring_flag(ring);
-	}
-
-	/* We should never emit switch_mm more than once */
-	WARN_ON(needs_pd_load_pre(ring, to) &&
-		needs_pd_load_post(ring, to, hw_flags));
-
-	ret = mi_set_context(req, hw_flags);
-	if (ret)
-		goto unpin_out;
-
-	/* GEN8 does *not* require an explicit reload if the PDPs have been
-	 * setup, and we do not wish to move them.
-	 */
-	if (needs_pd_load_post(ring, to, hw_flags)) {
-		trace_switch_mm(ring, to);
-		ret = to->ppgtt->switch_mm(to->ppgtt, req);
-		/* The hardware context switch is emitted, but we haven't
-		 * actually changed the state - so it's probably safe to bail
-		 * here. Still, let the user know something dangerous has
-		 * happened.
-		 */
-		if (ret) {
-			DRM_ERROR("Failed to change address space on context switch\n");
-			goto unpin_out;
-		}
-	}
-
-	for (i = 0; i < MAX_L3_SLICES; i++) {
-		if (!(to->remap_slice & (1<<i)))
-			continue;
-
-		ret = i915_gem_l3_remap(req, i);
-		/* If it failed, try again next round */
+		hw_flags = MI_RESTORE_INHIBIT;
+	else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)
+		hw_flags = MI_FORCE_RESTORE;
+	else
+		hw_flags = 0;
+
+	if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
+		ret = mi_set_context(req, hw_flags);
 		if (ret)
-			DRM_DEBUG_DRIVER("L3 remapping failed\n");
-		else
-			to->remap_slice &= ~(1<<i);
+			goto unpin_out;
 	}
 
 	/* The backing object for the context is done after switching to the
@@ -766,8 +814,8 @@ static int do_switch(struct drm_i915_gem_request *req)
 	 * MI_SET_CONTEXT instead of when the next seqno has completed.
 	 */
 	if (from != NULL) {
-		from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), req);
+		from->engine[RCS].state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->engine[RCS].state), req);
 		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 		 * whole damn pipeline, we don't need to explicitly mark the
 		 * object dirty. The only exception is that the context must be
@@ -775,33 +823,57 @@ static int do_switch(struct drm_i915_gem_request *req)
 		 * able to defer doing this until we know the object would be
 		 * swapped, but there is no way to do that yet.
 		 */
-		from->legacy_hw_ctx.rcs_state->dirty = 1;
+		from->engine[RCS].state->dirty = 1;
 
 		/* obj is kept alive until the next request by its active ref */
-		i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
+		i915_gem_object_ggtt_unpin(from->engine[RCS].state);
 		i915_gem_context_unreference(from);
 	}
+	i915_gem_context_reference(to);
+	engine->last_context = to;
+
+	/* GEN8 does *not* require an explicit reload if the PDPs have been
+	 * setup, and we do not wish to move them.
+	 */
+	if (needs_pd_load_post(ppgtt, to, hw_flags)) {
+		trace_switch_mm(engine, to);
+		ret = ppgtt->switch_mm(ppgtt, req);
+		/* The hardware context switch is emitted, but we haven't
+		 * actually changed the state - so it's probably safe to bail
+		 * here. Still, let the user know something dangerous has
+		 * happened.
+		 */
+		if (ret)
+			return ret;
+	}
 
-	uninitialized = !to->legacy_hw_ctx.initialized;
-	to->legacy_hw_ctx.initialized = true;
+	if (ppgtt)
+		ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
 
-done:
-	i915_gem_context_reference(to);
-	ring->last_context = to;
+	for (i = 0; i < MAX_L3_SLICES; i++) {
+		if (!(to->remap_slice & (1<<i)))
+			continue;
 
-	if (uninitialized) {
-		if (ring->init_context) {
-			ret = ring->init_context(req);
+		ret = remap_l3(req, i);
+		if (ret)
+			return ret;
+
+		to->remap_slice &= ~(1<<i);
+	}
+
+	if (!to->engine[RCS].initialised) {
+		if (engine->init_context) {
+			ret = engine->init_context(req);
 			if (ret)
-				DRM_ERROR("ring init context: %d\n", ret);
+				return ret;
 		}
+		to->engine[RCS].initialised = true;
 	}
 
 	return 0;
 
 unpin_out:
-	if (ring->id == RCS)
-		i915_gem_object_ggtt_unpin(to->legacy_hw_ctx.rcs_state);
+	i915_gem_object_ggtt_unpin(to->engine[RCS].state);
 	return ret;
 }
 
@@ -820,23 +892,38 @@ unpin_out:
  */
 int i915_switch_context(struct drm_i915_gem_request *req)
 {
-	struct intel_engine_cs *ring = req->ring;
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct intel_engine_cs *engine = req->engine;
 
 	WARN_ON(i915.enable_execlists);
-	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
-
-	if (req->ctx->legacy_hw_ctx.rcs_state == NULL) { /* We have the fake context */
-		if (req->ctx != ring->last_context) {
-			i915_gem_context_reference(req->ctx);
-			if (ring->last_context)
-				i915_gem_context_unreference(ring->last_context);
-			ring->last_context = req->ctx;
+	lockdep_assert_held(&req->i915->drm.struct_mutex);
+
+	if (!req->ctx->engine[engine->id].state) {
+		struct i915_gem_context *to = req->ctx;
+		struct i915_hw_ppgtt *ppgtt =
+			to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
+
+		if (needs_pd_load_pre(ppgtt, engine, to)) {
+			int ret;
+
+			trace_switch_mm(engine, to);
+			ret = ppgtt->switch_mm(ppgtt, req);
+			if (ret)
+				return ret;
+
+			ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
 		}
+
+		if (to != engine->last_context) {
+			i915_gem_context_reference(to);
+			if (engine->last_context)
+				i915_gem_context_unreference(engine->last_context);
+			engine->last_context = to;
+		}
+
 		return 0;
 	}
 
-	return do_switch(req);
+	return do_rcs_switch(req);
 }
 
 static bool contexts_enabled(struct drm_device *dev)
@@ -849,7 +936,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_context_create *args = data;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct intel_context *ctx;
+	struct i915_gem_context *ctx;
 	int ret;
 
 	if (!contexts_enabled(dev))
@@ -878,7 +965,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_context_destroy *args = data;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct intel_context *ctx;
+	struct i915_gem_context *ctx;
 	int ret;
 
 	if (args->pad != 0)
@@ -891,13 +978,13 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		return ret;
 
-	ctx = i915_gem_context_get(file_priv, args->ctx_id);
+	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
 	if (IS_ERR(ctx)) {
 		mutex_unlock(&dev->struct_mutex);
 		return PTR_ERR(ctx);
 	}
 
-	idr_remove(&ctx->file_priv->context_idr, ctx->user_handle);
+	idr_remove(&file_priv->context_idr, ctx->user_handle);
 	i915_gem_context_unreference(ctx);
 	mutex_unlock(&dev->struct_mutex);
 
@@ -910,14 +997,14 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_context_param *args = data;
-	struct intel_context *ctx;
+	struct i915_gem_context *ctx;
 	int ret;
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		return ret;
 
-	ctx = i915_gem_context_get(file_priv, args->ctx_id);
+	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
 	if (IS_ERR(ctx)) {
 		mutex_unlock(&dev->struct_mutex);
 		return PTR_ERR(ctx);
@@ -937,7 +1024,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		else if (to_i915(dev)->mm.aliasing_ppgtt)
 			args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total;
 		else
-			args->value = to_i915(dev)->gtt.base.total;
+			args->value = to_i915(dev)->ggtt.base.total;
+		break;
+	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		args->value = !!(ctx->flags & CONTEXT_NO_ERROR_CAPTURE);
 		break;
 	default:
 		ret = -EINVAL;
@@ -953,14 +1043,14 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_context_param *args = data;
-	struct intel_context *ctx;
+	struct i915_gem_context *ctx;
 	int ret;
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		return ret;
 
-	ctx = i915_gem_context_get(file_priv, args->ctx_id);
+	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
 	if (IS_ERR(ctx)) {
 		mutex_unlock(&dev->struct_mutex);
 		return PTR_ERR(ctx);
@@ -984,6 +1074,16 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 			ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0;
 		}
 		break;
+	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		if (args->size) {
+			ret = -EINVAL;
+		} else {
+			if (args->value)
+				ctx->flags |= CONTEXT_NO_ERROR_CAPTURE;
+			else
+				ctx->flags &= ~CONTEXT_NO_ERROR_CAPTURE;
+		}
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -992,3 +1092,42 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 
 	return ret;
 }
+
+int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
+				       void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_reset_stats *args = data;
+	struct i915_ctx_hang_stats *hs;
+	struct i915_gem_context *ctx;
+	int ret;
+
+	if (args->flags || args->pad)
+		return -EINVAL;
+
+	if (args->ctx_id == DEFAULT_CONTEXT_HANDLE && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ret;
+
+	ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id);
+	if (IS_ERR(ctx)) {
+		mutex_unlock(&dev->struct_mutex);
+		return PTR_ERR(ctx);
+	}
+	hs = &ctx->hang_stats;
+
+	if (capable(CAP_SYS_ADMIN))
+		args->reset_count = i915_reset_count(&dev_priv->gpu_error);
+	else
+		args->reset_count = 0;
+
+	args->batch_active = hs->batch_active;
+	args->batch_pending = hs->batch_pending;
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}