aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/gpu/drm/i915/gt/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c386
1 files changed, 290 insertions, 96 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 31455eceeb0c..683014e7bc51 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -176,8 +176,6 @@
/* Typical size of the average request (2 pipecontrols and a MI_BB) */
#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
-#define WA_TAIL_DWORDS 2
-#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
struct virtual_engine {
struct intel_engine_cs base;
@@ -247,7 +245,7 @@ static void mark_eio(struct i915_request *rq)
GEM_BUG_ON(i915_request_signaled(rq));
- dma_fence_set_error(&rq->fence, -EIO);
+ i915_request_set_error_once(rq, -EIO);
i915_request_mark_complete(rq);
}
@@ -295,7 +293,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
static inline int rq_prio(const struct i915_request *rq)
{
- return rq->sched.attr.priority;
+ return READ_ONCE(rq->sched.attr.priority);
}
static int effective_prio(const struct i915_request *rq)
@@ -1006,7 +1004,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
i915_request_cancel_breadcrumb(rq);
spin_unlock(&rq->lock);
}
- rq->engine = owner;
+ WRITE_ONCE(rq->engine, owner);
owner->submit_request(rq);
active = NULL;
}
@@ -1197,6 +1195,48 @@ static void reset_active(struct i915_request *rq,
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
}
+static u32 intel_context_get_runtime(const struct intel_context *ce)
+{
+ /*
+ * We can use either ppHWSP[16] which is recorded before the context
+ * switch (and so excludes the cost of context switches) or use the
+ * value from the context image itself, which is saved/restored earlier
+ * and so includes the cost of the save.
+ */
+ return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
+static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
+{
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+ ce->runtime.num_underflow += dt < 0;
+ ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
+#endif
+}
+
+static void intel_context_update_runtime(struct intel_context *ce)
+{
+ u32 old;
+ s32 dt;
+
+ if (intel_context_is_barrier(ce))
+ return;
+
+ old = ce->runtime.last;
+ ce->runtime.last = intel_context_get_runtime(ce);
+ dt = ce->runtime.last - old;
+
+ if (unlikely(dt <= 0)) {
+ CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
+ old, ce->runtime.last, dt);
+ st_update_runtime_underflow(ce, dt);
+ return;
+ }
+
+ ewma_runtime_add(&ce->runtime.avg, dt);
+ ce->runtime.total += dt;
+}
+
static inline struct intel_engine_cs *
__execlists_schedule_in(struct i915_request *rq)
{
@@ -1211,12 +1251,12 @@ __execlists_schedule_in(struct i915_request *rq)
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
execlists_check_context(ce, engine);
+ ce->lrc_desc &= ~GENMASK_ULL(47, 37);
if (ce->tag) {
/* Use a fixed tag for OA and friends */
ce->lrc_desc |= (u64)ce->tag << 32;
} else {
/* We don't need a strict matching tag, just different values */
- ce->lrc_desc &= ~GENMASK_ULL(47, 37);
ce->lrc_desc |=
(u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
GEN11_SW_CTX_ID_SHIFT;
@@ -1276,10 +1316,11 @@ __execlists_schedule_out(struct i915_request *rq,
* If we have just completed this context, the engine may now be
* idle and we want to re-enter powersaving.
*/
- if (list_is_last(&rq->link, &ce->timeline->requests) &&
+ if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
i915_request_completed(rq))
intel_engine_add_retire(engine, ce->timeline);
+ intel_context_update_runtime(ce);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
intel_gt_pm_put_async(engine->gt);
@@ -1395,15 +1436,26 @@ trace_ports(const struct intel_engine_execlists *execlists,
ports[1] ? ports[1]->fence.seqno : 0);
}
+static inline bool
+reset_in_progress(const struct intel_engine_execlists *execlists)
+{
+ return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
+}
+
static __maybe_unused bool
assert_pending_valid(const struct intel_engine_execlists *execlists,
const char *msg)
{
struct i915_request * const *port, *rq;
struct intel_context *ce = NULL;
+ bool sentinel = false;
trace_ports(execlists, msg, execlists->pending);
+ /* We may be messing around with the lists during reset, lalala */
+ if (reset_in_progress(execlists))
+ return true;
+
if (!execlists->pending[0]) {
GEM_TRACE_ERR("Nothing pending for promotion!\n");
return false;
@@ -1430,6 +1482,26 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
}
ce = rq->context;
+ /*
+ * Sentinels are supposed to be lonely so they flush the
+ * current exection off the HW. Check that they are the
+ * only request in the pending submission.
+ */
+ if (sentinel) {
+ GEM_TRACE_ERR("context:%llx after sentinel in pending[%zd]\n",
+ ce->timeline->fence_context,
+ port - execlists->pending);
+ return false;
+ }
+
+ sentinel = i915_request_has_sentinel(rq);
+ if (sentinel && port != execlists->pending) {
+ GEM_TRACE_ERR("sentinel context:%llx not in prime position[%zd]\n",
+ ce->timeline->fence_context,
+ port - execlists->pending);
+ return false;
+ }
+
/* Hold tightly onto the lock to prevent concurrent retires! */
if (!spin_trylock_irqsave(&rq->lock, flags))
continue;
@@ -1525,6 +1597,11 @@ static bool can_merge_ctx(const struct intel_context *prev,
return true;
}
+static unsigned long i915_request_flags(const struct i915_request *rq)
+{
+ return READ_ONCE(rq->fence.flags);
+}
+
static bool can_merge_rq(const struct i915_request *prev,
const struct i915_request *next)
{
@@ -1542,7 +1619,7 @@ static bool can_merge_rq(const struct i915_request *prev,
if (i915_request_completed(next))
return true;
- if (unlikely((prev->fence.flags ^ next->fence.flags) &
+ if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
(BIT(I915_FENCE_FLAG_NOPREEMPT) |
BIT(I915_FENCE_FLAG_SENTINEL))))
return false;
@@ -1550,6 +1627,7 @@ static bool can_merge_rq(const struct i915_request *prev,
if (!can_merge_ctx(prev->context, next->context))
return false;
+ GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
return true;
}
@@ -1585,7 +1663,7 @@ static bool virtual_matches(const struct virtual_engine *ve,
}
static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
- struct intel_engine_cs *engine)
+ struct i915_request *rq)
{
struct intel_engine_cs *old = ve->siblings[0];
@@ -1593,9 +1671,19 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
spin_lock(&old->breadcrumbs.irq_lock);
if (!list_empty(&ve->context.signal_link)) {
- list_move_tail(&ve->context.signal_link,
- &engine->breadcrumbs.signalers);
- intel_engine_signal_breadcrumbs(engine);
+ list_del_init(&ve->context.signal_link);
+
+ /*
+ * We cannot acquire the new engine->breadcrumbs.irq_lock
+ * (as we are holding a breadcrumbs.irq_lock already),
+ * so attach this request to the signaler on submission.
+ * The queued irq_work will occur when we finally drop
+ * the engine->active.lock after dequeue.
+ */
+ set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags);
+
+ /* Also transfer the pending irq_work for the old breadcrumb. */
+ intel_engine_signal_breadcrumbs(rq->engine);
}
spin_unlock(&old->breadcrumbs.irq_lock);
}
@@ -1605,6 +1693,11 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
&(rq__)->sched.waiters_list, \
wait_link)
+#define for_each_signaler(p__, rq__) \
+ list_for_each_entry_rcu(p__, \
+ &(rq__)->sched.signalers_list, \
+ signal_link)
+
static void defer_request(struct i915_request *rq, struct list_head * const pl)
{
LIST_HEAD(list);
@@ -1693,12 +1786,13 @@ timeslice(const struct intel_engine_cs *engine)
static unsigned long
active_timeslice(const struct intel_engine_cs *engine)
{
- const struct i915_request *rq = *engine->execlists.active;
+ const struct intel_engine_execlists *execlists = &engine->execlists;
+ const struct i915_request *rq = *execlists->active;
if (!rq || i915_request_completed(rq))
return 0;
- if (engine->execlists.switch_priority_hint < effective_prio(rq))
+ if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
return 0;
return timeslice(engine);
@@ -1715,8 +1809,11 @@ static void set_timeslice(struct intel_engine_cs *engine)
static void start_timeslice(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
+ int prio = queue_prio(execlists);
- execlists->switch_priority_hint = execlists->queue_priority_hint;
+ WRITE_ONCE(execlists->switch_priority_hint, prio);
+ if (prio == INT_MIN)
+ return;
if (timer_pending(&execlists->timer))
return;
@@ -1938,13 +2035,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
"",
yesno(engine != ve->siblings[0]));
- ve->request = NULL;
- ve->base.execlists.queue_priority_hint = INT_MIN;
+ WRITE_ONCE(ve->request, NULL);
+ WRITE_ONCE(ve->base.execlists.queue_priority_hint,
+ INT_MIN);
rb_erase_cached(rb, &execlists->virtual);
RB_CLEAR_NODE(rb);
GEM_BUG_ON(!(rq->execution_mask & engine->mask));
- rq->engine = engine;
+ WRITE_ONCE(rq->engine, engine);
if (engine != ve->siblings[0]) {
u32 *regs = ve->context.lrc_reg_state;
@@ -1957,7 +2055,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
engine);
if (!list_empty(&ve->context.signals))
- virtual_xfer_breadcrumbs(ve, engine);
+ virtual_xfer_breadcrumbs(ve, rq);
/*
* Move the bound engine to the top of the list
@@ -2064,6 +2162,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(last &&
!can_merge_ctx(last->context,
rq->context));
+ GEM_BUG_ON(last &&
+ i915_seqno_passed(last->fence.seqno,
+ rq->fence.seqno));
submit = true;
last = rq;
@@ -2134,6 +2235,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
execlists_schedule_out(*port);
clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
+ smp_wmb(); /* complete the seqlock for execlists_active() */
WRITE_ONCE(execlists->active, execlists->inflight);
}
@@ -2144,12 +2246,6 @@ invalidate_csb_entries(const u32 *first, const u32 *last)
clflush((void *)last);
}
-static inline bool
-reset_in_progress(const struct intel_engine_execlists *execlists)
-{
- return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
-}
-
/*
* Starting with Gen12, the status has a new format:
*
@@ -2240,7 +2336,6 @@ static void process_csb(struct intel_engine_cs *engine)
*/
head = execlists->csb_head;
tail = READ_ONCE(*execlists->csb_write);
- ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
if (unlikely(head == tail))
return;
@@ -2254,6 +2349,7 @@ static void process_csb(struct intel_engine_cs *engine)
*/
rmb();
+ ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
do {
bool promote;
@@ -2288,11 +2384,13 @@ static void process_csb(struct intel_engine_cs *engine)
if (promote) {
struct i915_request * const *old = execlists->active;
+ GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
+
+ ring_set_paused(engine, 0);
+
/* Point active to the new ELSP; prevent overwriting */
WRITE_ONCE(execlists->active, execlists->pending);
-
- if (!inject_preempt_hang(execlists))
- ring_set_paused(engine, 0);
+ smp_wmb(); /* notify execlists_active() */
/* cancel old inflight, prepare for switch */
trace_ports(execlists, "preempted", old);
@@ -2300,12 +2398,12 @@ static void process_csb(struct intel_engine_cs *engine)
execlists_schedule_out(*old++);
/* switch pending to inflight */
- GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
- WRITE_ONCE(execlists->active,
- memcpy(execlists->inflight,
- execlists->pending,
- execlists_num_ports(execlists) *
- sizeof(*execlists->pending)));
+ memcpy(execlists->inflight,
+ execlists->pending,
+ execlists_num_ports(execlists) *
+ sizeof(*execlists->pending));
+ smp_wmb(); /* complete the seqlock */
+ WRITE_ONCE(execlists->active, execlists->inflight);
WRITE_ONCE(execlists->pending[0], NULL);
} else {
@@ -2320,8 +2418,37 @@ static void process_csb(struct intel_engine_cs *engine)
* coherent (visible from the CPU) before the
* user interrupt and CSB is processed.
*/
- GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
- !reset_in_progress(execlists));
+ if (GEM_SHOW_DEBUG() &&
+ !i915_request_completed(*execlists->active) &&
+ !reset_in_progress(execlists)) {
+ struct i915_request *rq __maybe_unused =
+ *execlists->active;
+ const u32 *regs __maybe_unused =
+ rq->context->lrc_reg_state;
+
+ ENGINE_TRACE(engine,
+ "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
+ ENGINE_READ(engine, RING_START),
+ ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
+ ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
+ ENGINE_READ(engine, RING_CTL),
+ ENGINE_READ(engine, RING_MI_MODE));
+ ENGINE_TRACE(engine,
+ "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
+ i915_ggtt_offset(rq->ring->vma),
+ rq->head, rq->tail,
+ rq->fence.context,
+ lower_32_bits(rq->fence.seqno),
+ hwsp_seqno(rq));
+ ENGINE_TRACE(engine,
+ "ctx:{start:%08x, head:%04x, tail:%04x}, ",
+ regs[CTX_RING_START],
+ regs[CTX_RING_HEAD],
+ regs[CTX_RING_TAIL]);
+
+ GEM_BUG_ON("context completed before request");
+ }
+
execlists_schedule_out(*execlists->active++);
GEM_BUG_ON(execlists->active - execlists->inflight >
@@ -2349,7 +2476,7 @@ static void process_csb(struct intel_engine_cs *engine)
static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
{
lockdep_assert_held(&engine->active.lock);
- if (!engine->execlists.pending[0]) {
+ if (!READ_ONCE(engine->execlists.pending[0])) {
rcu_read_lock(); /* protect peeking at execlists->active */
execlists_dequeue(engine);
rcu_read_unlock();
@@ -2366,12 +2493,12 @@ static void __execlists_hold(struct i915_request *rq)
if (i915_request_is_active(rq))
__i915_request_unsubmit(rq);
- RQ_TRACE(rq, "on hold\n");
clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
list_move_tail(&rq->sched.link, &rq->engine->active.hold);
i915_request_set_hold(rq);
+ RQ_TRACE(rq, "on hold\n");
- list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+ for_each_waiter(p, rq) {
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
@@ -2385,7 +2512,7 @@ static void __execlists_hold(struct i915_request *rq)
if (i915_request_completed(w))
continue;
- if (i915_request_on_hold(rq))
+ if (i915_request_on_hold(w))
continue;
list_move_tail(&w->sched.link, &list);
@@ -2443,6 +2570,7 @@ static bool execlists_hold(struct intel_engine_cs *engine,
GEM_BUG_ON(i915_request_on_hold(rq));
GEM_BUG_ON(rq->engine != engine);
__execlists_hold(rq);
+ GEM_BUG_ON(list_empty(&engine->active.hold));
unlock:
spin_unlock_irq(&engine->active.lock);
@@ -2452,23 +2580,27 @@ unlock:
static bool hold_request(const struct i915_request *rq)
{
struct i915_dependency *p;
+ bool result = false;
/*
* If one of our ancestors is on hold, we must also be on hold,
* otherwise we will bypass it and execute before it.
*/
- list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
+ rcu_read_lock();
+ for_each_signaler(p, rq) {
const struct i915_request *s =
container_of(p->signaler, typeof(*s), sched);
if (s->engine != rq->engine)
continue;
- if (i915_request_on_hold(s))
- return true;
+ result = i915_request_on_hold(s);
+ if (result)
+ break;
}
+ rcu_read_unlock();
- return false;
+ return result;
}
static void __execlists_unhold(struct i915_request *rq)
@@ -2478,6 +2610,8 @@ static void __execlists_unhold(struct i915_request *rq)
do {
struct i915_dependency *p;
+ RQ_TRACE(rq, "hold release\n");
+
GEM_BUG_ON(!i915_request_on_hold(rq));
GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
@@ -2486,21 +2620,24 @@ static void __execlists_unhold(struct i915_request *rq)
i915_sched_lookup_priolist(rq->engine,
rq_prio(rq)));
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
- RQ_TRACE(rq, "hold release\n");
/* Also release any children on this engine that are ready */
- list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+ for_each_waiter(p, rq) {
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
+ /* Propagate any change in error status */
+ if (rq->fence.error)
+ i915_request_set_error_once(w, rq->fence.error);
+
if (w->engine != rq->engine)
continue;
- if (!i915_request_on_hold(rq))
+ if (!i915_request_on_hold(w))
continue;
/* Check that no other parents are also on hold */
- if (hold_request(rq))
+ if (hold_request(w))
continue;
list_move_tail(&w->sched.link, &list);
@@ -2615,13 +2752,13 @@ static bool execlists_capture(struct intel_engine_cs *engine)
if (!cap)
return true;
+ spin_lock_irq(&engine->active.lock);
cap->rq = execlists_active(&engine->execlists);
- GEM_BUG_ON(!cap->rq);
-
- rcu_read_lock();
- cap->rq = active_request(cap->rq->context->timeline, cap->rq);
- cap->rq = i915_request_get_rcu(cap->rq);
- rcu_read_unlock();
+ if (cap->rq) {
+ cap->rq = active_request(cap->rq->context->timeline, cap->rq);
+ cap->rq = i915_request_get_rcu(cap->rq);
+ }
+ spin_unlock_irq(&engine->active.lock);
if (!cap->rq)
goto err_free;
@@ -2660,27 +2797,25 @@ err_free:
return false;
}
-static noinline void preempt_reset(struct intel_engine_cs *engine)
+static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
{
const unsigned int bit = I915_RESET_ENGINE + engine->id;
unsigned long *lock = &engine->gt->reset.flags;
- if (i915_modparams.reset < 3)
+ if (!intel_has_reset_engine(engine->gt))
return;
if (test_and_set_bit(bit, lock))
return;
+ ENGINE_TRACE(engine, "reset for %s\n", msg);
+
/* Mark this tasklet as disabled to avoid waiting for it to complete */
tasklet_disable_nosync(&engine->execlists.tasklet);
- ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
- READ_ONCE(engine->props.preempt_timeout_ms),
- jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
-
ring_set_paused(engine, 1); /* Freeze the current request in place */
if (execlists_capture(engine))
- intel_engine_reset(engine, "preemption time out");
+ intel_engine_reset(engine, msg);
else
ring_set_paused(engine, 0);
@@ -2711,6 +2846,13 @@ static void execlists_submission_tasklet(unsigned long data)
bool timeout = preempt_timeout(engine);
process_csb(engine);
+
+ if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
+ engine->execlists.error_interrupt = 0;
+ if (ENGINE_READ(engine, RING_ESR)) /* confirm the error */
+ execlists_reset(engine, "CS error");
+ }
+
if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
unsigned long flags;
@@ -2719,8 +2861,8 @@ static void execlists_submission_tasklet(unsigned long data)
spin_unlock_irqrestore(&engine->active.lock, flags);
/* Recheck after serialising with direct-submission */
- if (timeout && preempt_timeout(engine))
- preempt_reset(engine);
+ if (unlikely(timeout && preempt_timeout(engine)))
+ execlists_reset(engine, "preemption time out");
}
}
@@ -2793,6 +2935,7 @@ static void execlists_submit_request(struct i915_request *request)
spin_lock_irqsave(&engine->active.lock, flags);
if (unlikely(ancestor_on_hold(engine, request))) {
+ RQ_TRACE(request, "ancestor on hold\n");
list_add_tail(&request->sched.link, &engine->active.hold);
i915_request_set_hold(request);
} else {
@@ -2874,6 +3017,7 @@ __execlists_update_reg_state(const struct intel_context *ce,
regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
regs[CTX_RING_HEAD] = head;
regs[CTX_RING_TAIL] = ring->tail;
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
/* RPCS */
if (engine->class == RENDER_CLASS) {
@@ -2921,22 +3065,6 @@ static void execlists_context_reset(struct intel_context *ce)
CE_TRACE(ce, "reset\n");
GEM_BUG_ON(!intel_context_is_pinned(ce));
- /*
- * Because we emit WA_TAIL_DWORDS there may be a disparity
- * between our bookkeeping in ce->ring->head and ce->ring->tail and
- * that stored in context. As we only write new commands from
- * ce->ring->tail onwards, everything before that is junk. If the GPU
- * starts reading from its RING_HEAD from the context, it may try to
- * execute that junk and die.
- *
- * The contexts that are stilled pinned on resume belong to the
- * kernel, and are local to each engine. All other contexts will
- * have their head/tail sanitized upon pinning before use, so they
- * will never see garbage,
- *
- * So to avoid that we reset the context images upon resume. For
- * simplicity, we just zero everything out.
- */
intel_ring_reset(ce->ring, ce->ring->emit);
/* Scrub away the garbage */
@@ -2964,7 +3092,8 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
- GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
+ if (!i915_request_timeline(rq)->has_initial_breadcrumb)
+ return 0;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
@@ -3257,7 +3386,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
goto err;
}
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (err)
goto err;
@@ -3346,6 +3475,49 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
return ret;
}
+static void enable_error_interrupt(struct intel_engine_cs *engine)
+{
+ u32 status;
+
+ engine->execlists.error_interrupt = 0;
+ ENGINE_WRITE(engine, RING_EMR, ~0u);
+ ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
+
+ status = ENGINE_READ(engine, RING_ESR);
+ if (unlikely(status)) {
+ dev_err(engine->i915->drm.dev,
+ "engine '%s' resumed still in error: %08x\n",
+ engine->name, status);
+ __intel_gt_reset(engine->gt, engine->mask);
+ }
+
+ /*
+ * On current gen8+, we have 2 signals to play with
+ *
+ * - I915_ERROR_INSTUCTION (bit 0)
+ *
+ * Generate an error if the command parser encounters an invalid
+ * instruction
+ *
+ * This is a fatal error.
+ *
+ * - CP_PRIV (bit 2)
+ *
+ * Generate an error on privilege violation (where the CP replaces
+ * the instruction with a no-op). This also fires for writes into
+ * read-only scratch pages.
+ *
+ * This is a non-fatal error, parsing continues.
+ *
+ * * there are a few others defined for odd HW that we do not use
+ *
+ * Since CP_PRIV fires for cases where we have chosen to ignore the
+ * error (as the HW is validating and suppressing the mistakes), we
+ * only unmask the instruction error bit.
+ */
+ ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
+}
+
static void enable_execlists(struct intel_engine_cs *engine)
{
u32 mode;
@@ -3367,6 +3539,8 @@ static void enable_execlists(struct intel_engine_cs *engine)
i915_ggtt_offset(engine->status_page.vma));
ENGINE_POSTING_READ(engine, RING_HWS_PGA);
+ enable_error_interrupt(engine);
+
engine->context_tag = 0;
}
@@ -3384,9 +3558,6 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine)
static int execlists_resume(struct intel_engine_cs *engine)
{
- intel_engine_apply_workarounds(engine);
- intel_engine_apply_whitelist(engine);
-
intel_mocs_init_engine(engine);
intel_engine_reset_breadcrumbs(engine);
@@ -3517,9 +3688,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
if (!rq)
goto unwind;
- /* We still have requests in-flight; the engine should be active */
- GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
-
ce = rq->context;
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
@@ -3529,8 +3697,12 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
goto out_replay;
}
+ /* We still have requests in-flight; the engine should be active */
+ GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
/* Context has requests still in-flight; it should not be idle! */
GEM_BUG_ON(i915_active_is_idle(&ce->active));
+
rq = active_request(ce->timeline, rq);
head = intel_ring_wrap(ce->ring, rq->head);
GEM_BUG_ON(head == ce->ring->tail);
@@ -3604,7 +3776,10 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
static void nop_submission_tasklet(unsigned long data)
{
+ struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
+
/* The driver is wedged; don't process any more events. */
+ WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
}
static void execlists_reset_cancel(struct intel_engine_cs *engine)
@@ -4273,6 +4448,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
+ engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
}
static void rcs_submission_override(struct intel_engine_cs *engine)
@@ -4514,8 +4690,13 @@ populate_lr_context(struct intel_context *ce,
inhibit = false;
}
- /* The second page of the context object contains some fields which must
- * be set up prior to the first execution. */
+ /* Clear the ppHWSP (inc. per-context counters) */
+ memset(vaddr, 0, PAGE_SIZE);
+
+ /*
+ * The second page of the context object contains some registers which
+ * must be set up prior to the first execution.
+ */
execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
ce, engine, ring, inhibit);
@@ -4553,8 +4734,17 @@ static int __execlists_context_alloc(struct intel_context *ce,
if (!ce->timeline) {
struct intel_timeline *tl;
+ struct i915_vma *hwsp;
- tl = intel_timeline_create(engine->gt, NULL);
+ /*
+ * Use the static global HWSP for the kernel context, and
+ * a dynamically allocated cacheline for everyone else.
+ */
+ hwsp = NULL;
+ if (unlikely(intel_context_is_barrier(ce)))
+ hwsp = engine->status_page.vma;
+
+ tl = intel_timeline_create(engine->gt, hwsp);
if (IS_ERR(tl)) {
ret = PTR_ERR(tl);
goto error_deref_obj;
@@ -4723,7 +4913,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
mask = rq->execution_mask;
if (unlikely(!mask)) {
/* Invalid selection, submit to a random engine in error */
- i915_request_skip(rq, -ENODEV);
+ i915_request_set_error_once(rq, -ENODEV);
mask = ve->siblings[0]->mask;
}
@@ -4737,7 +4927,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
static void virtual_submission_tasklet(unsigned long data)
{
struct virtual_engine * const ve = (struct virtual_engine *)data;
- const int prio = ve->base.execlists.queue_priority_hint;
+ const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
intel_engine_mask_t mask;
unsigned int n;
@@ -5133,11 +5323,15 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
show_request(m, last, "\t\tE ");
}
- last = NULL;
- count = 0;
+ if (execlists->switch_priority_hint != INT_MIN)
+ drm_printf(m, "\t\tSwitch priority hint: %d\n",
+ READ_ONCE(execlists->switch_priority_hint));
if (execlists->queue_priority_hint != INT_MIN)
drm_printf(m, "\t\tQueue priority hint: %d\n",
- execlists->queue_priority_hint);
+ READ_ONCE(execlists->queue_priority_hint));
+
+ last = NULL;
+ count = 0;
for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
int i;