aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c696
1 files changed, 435 insertions, 261 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9fdefbdc3546..0cf0f6fae675 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -133,12 +133,11 @@
*/
#include <linux/interrupt.h>
-#include "gem/i915_gem_context.h"
-
#include "i915_drv.h"
#include "i915_perf.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
+#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
@@ -489,17 +488,23 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
return desc;
}
-static u32 *set_offsets(u32 *regs,
+static inline unsigned int dword_in_page(void *addr)
+{
+ return offset_in_page(addr) / sizeof(u32);
+}
+
+static void set_offsets(u32 *regs,
const u8 *data,
- const struct intel_engine_cs *engine)
+ const struct intel_engine_cs *engine,
+ bool clear)
#define NOP(x) (BIT(7) | (x))
-#define LRI(count, flags) ((flags) << 6 | (count))
+#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
#define POSTED BIT(0)
#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
#define REG16(x) \
(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
(((x) >> 2) & 0x7f)
-#define END() 0
+#define END(x) 0, (x)
{
const u32 base = engine->mmio_base;
@@ -507,7 +512,10 @@ static u32 *set_offsets(u32 *regs,
u8 count, flags;
if (*data & BIT(7)) { /* skip */
- regs += *data++ & ~BIT(7);
+ count = *data++ & ~BIT(7);
+ if (clear)
+ memset32(regs, MI_NOOP, count);
+ regs += count;
continue;
}
@@ -533,12 +541,25 @@ static u32 *set_offsets(u32 *regs,
offset |= v & ~BIT(7);
} while (v & BIT(7));
- *regs = base + (offset << 2);
+ regs[0] = base + (offset << 2);
+ if (clear)
+ regs[1] = 0;
regs += 2;
} while (--count);
}
- return regs;
+ if (clear) {
+ u8 count = *++data;
+
+ /* Clear past the tail for HW access */
+ GEM_BUG_ON(dword_in_page(regs) > count);
+ memset32(regs, MI_NOOP, count - dword_in_page(regs));
+
+ /* Close the batch; used mainly by live_lrc_layout() */
+ *regs = MI_BATCH_BUFFER_END;
+ if (INTEL_GEN(engine->i915) >= 10)
+ *regs |= BIT(0);
+ }
}
static const u8 gen8_xcs_offsets[] = {
@@ -573,7 +594,7 @@ static const u8 gen8_xcs_offsets[] = {
REG16(0x200),
REG(0x028),
- END(),
+ END(80)
};
static const u8 gen9_xcs_offsets[] = {
@@ -657,7 +678,7 @@ static const u8 gen9_xcs_offsets[] = {
REG16(0x67c),
REG(0x068),
- END(),
+ END(176)
};
static const u8 gen12_xcs_offsets[] = {
@@ -689,7 +710,7 @@ static const u8 gen12_xcs_offsets[] = {
REG16(0x274),
REG16(0x270),
- END(),
+ END(80)
};
static const u8 gen8_rcs_offsets[] = {
@@ -726,7 +747,91 @@ static const u8 gen8_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END(),
+ END(80)
+};
+
+static const u8 gen9_rcs_offsets[] = {
+ NOP(1),
+ LRI(14, POSTED),
+ REG16(0x244),
+ REG(0x34),
+ REG(0x30),
+ REG(0x38),
+ REG(0x3c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+
+ NOP(3),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(1, 0),
+ REG(0xc8),
+
+ NOP(13),
+ LRI(44, POSTED),
+ REG(0x28),
+ REG(0x9c),
+ REG(0xc0),
+ REG(0x178),
+ REG(0x17c),
+ REG16(0x358),
+ REG(0x170),
+ REG(0x150),
+ REG(0x154),
+ REG(0x158),
+ REG16(0x41c),
+ REG16(0x600),
+ REG16(0x604),
+ REG16(0x608),
+ REG16(0x60c),
+ REG16(0x610),
+ REG16(0x614),
+ REG16(0x618),
+ REG16(0x61c),
+ REG16(0x620),
+ REG16(0x624),
+ REG16(0x628),
+ REG16(0x62c),
+ REG16(0x630),
+ REG16(0x634),
+ REG16(0x638),
+ REG16(0x63c),
+ REG16(0x640),
+ REG16(0x644),
+ REG16(0x648),
+ REG16(0x64c),
+ REG16(0x650),
+ REG16(0x654),
+ REG16(0x658),
+ REG16(0x65c),
+ REG16(0x660),
+ REG16(0x664),
+ REG16(0x668),
+ REG16(0x66c),
+ REG16(0x670),
+ REG16(0x674),
+ REG16(0x678),
+ REG16(0x67c),
+ REG(0x68),
+
+ END(176)
};
static const u8 gen11_rcs_offsets[] = {
@@ -767,7 +872,7 @@ static const u8 gen11_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END(),
+ END(80)
};
static const u8 gen12_rcs_offsets[] = {
@@ -808,7 +913,7 @@ static const u8 gen12_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END(),
+ END(80)
};
#undef END
@@ -833,6 +938,8 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
return gen12_rcs_offsets;
else if (INTEL_GEN(engine->i915) >= 11)
return gen11_rcs_offsets;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return gen9_rcs_offsets;
else
return gen8_rcs_offsets;
} else {
@@ -845,12 +952,6 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
}
}
-static void unwind_wa_tail(struct i915_request *rq)
-{
- rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
- assert_ring_tail_valid(rq->ring, rq->tail);
-}
-
static struct i915_request *
__unwind_incomplete_requests(struct intel_engine_cs *engine)
{
@@ -863,12 +964,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_for_each_entry_safe_reverse(rq, rn,
&engine->active.requests,
sched.link) {
-
if (i915_request_completed(rq))
continue; /* XXX */
__i915_request_unsubmit(rq);
- unwind_wa_tail(rq);
/*
* Push the request back into the queue for later resubmission.
@@ -888,7 +987,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_move(&rq->sched.link, pl);
active = rq;
} else {
- struct intel_engine_cs *owner = rq->hw_context->engine;
+ struct intel_engine_cs *owner = rq->context->engine;
/*
* Decouple the virtual breadcrumb before moving it
@@ -991,6 +1090,58 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
write_sequnlock_irqrestore(&engine->stats.lock, flags);
}
+static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x60;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return 0x54;
+ else if (engine->class == RENDER_CLASS)
+ return 0x58;
+ else
+ return -1;
+}
+
+static void
+execlists_check_context(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
+{
+ const struct intel_ring *ring = ce->ring;
+ u32 *regs = ce->lrc_reg_state;
+ bool valid = true;
+ int x;
+
+ if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
+ pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
+ engine->name,
+ regs[CTX_RING_START],
+ i915_ggtt_offset(ring->vma));
+ regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
+ valid = false;
+ }
+
+ if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
+ (RING_CTL_SIZE(ring->size) | RING_VALID)) {
+ pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
+ engine->name,
+ regs[CTX_RING_CTL],
+ (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+ valid = false;
+ }
+
+ x = lrc_ring_mi_mode(engine);
+ if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
+ pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
+ engine->name, regs[x + 1]);
+ regs[x + 1] &= ~STOP_RING;
+ regs[x + 1] |= STOP_RING << 16;
+ valid = false;
+ }
+
+ WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
+}
+
static void restore_default_state(struct intel_context *ce,
struct intel_engine_cs *engine)
{
@@ -1007,7 +1158,7 @@ static void restore_default_state(struct intel_context *ce,
static void reset_active(struct i915_request *rq,
struct intel_engine_cs *engine)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
u32 head;
/*
@@ -1025,8 +1176,8 @@ static void reset_active(struct i915_request *rq,
* remain correctly ordered. And we defer to __i915_request_submit()
* so that all asynchronous waits are correctly handled.
*/
- GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
- __func__, engine->name, rq->fence.context, rq->fence.seqno);
+ ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
+ rq->fence.context, rq->fence.seqno);
/* On resubmission of the active request, payload will be scrubbed */
if (i915_request_completed(rq))
@@ -1048,13 +1199,16 @@ static inline struct intel_engine_cs *
__execlists_schedule_in(struct i915_request *rq)
{
struct intel_engine_cs * const engine = rq->engine;
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
intel_context_get(ce);
- if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
+ if (unlikely(intel_context_is_banned(ce)))
reset_active(rq, engine);
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ execlists_check_context(ce, engine);
+
if (ce->tag) {
/* Use a fixed tag for OA and friends */
ce->lrc_desc |= (u64)ce->tag << 32;
@@ -1062,12 +1216,12 @@ __execlists_schedule_in(struct i915_request *rq)
/* We don't need a strict matching tag, just different values */
ce->lrc_desc &= ~GENMASK_ULL(47, 37);
ce->lrc_desc |=
- (u64)(engine->context_tag++ % NUM_CONTEXT_TAG) <<
+ (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
GEN11_SW_CTX_ID_SHIFT;
BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
}
- intel_gt_pm_get(engine->gt);
+ __intel_gt_pm_get(engine->gt);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(engine);
@@ -1077,7 +1231,7 @@ __execlists_schedule_in(struct i915_request *rq)
static inline struct i915_request *
execlists_schedule_in(struct i915_request *rq, int idx)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
struct intel_engine_cs *old;
GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
@@ -1108,7 +1262,7 @@ static inline void
__execlists_schedule_out(struct i915_request *rq,
struct intel_engine_cs * const engine)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
/*
* NB process_csb() is not under the engine->active.lock and hence
@@ -1146,7 +1300,7 @@ __execlists_schedule_out(struct i915_request *rq,
static inline void
execlists_schedule_out(struct i915_request *rq)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
struct intel_engine_cs *cur, *old;
trace_i915_request_out(rq);
@@ -1161,13 +1315,29 @@ execlists_schedule_out(struct i915_request *rq)
i915_request_put(rq);
}
-static u64 execlists_update_context(const struct i915_request *rq)
+static u64 execlists_update_context(struct i915_request *rq)
{
- struct intel_context *ce = rq->hw_context;
- u64 desc;
+ struct intel_context *ce = rq->context;
+ u64 desc = ce->lrc_desc;
+ u32 tail;
- ce->lrc_reg_state[CTX_RING_TAIL] =
- intel_ring_set_tail(rq->ring, rq->tail);
+ /*
+ * WaIdleLiteRestore:bdw,skl
+ *
+ * We should never submit the context with the same RING_TAIL twice
+ * just in case we submit an empty ring, which confuses the HW.
+ *
+ * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
+ * the normal request to be able to always advance the RING_TAIL on
+ * subsequent resubmissions (for lite restore). Should that fail us,
+ * and we try and submit the same tail again, force the context
+ * reload.
+ */
+ tail = intel_ring_set_tail(rq->ring, rq->tail);
+ if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
+ desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc_reg_state[CTX_RING_TAIL] = tail;
+ rq->tail = rq->wa_tail;
/*
* Make sure the context image is complete before we submit it to HW.
@@ -1178,21 +1348,10 @@ static u64 execlists_update_context(const struct i915_request *rq)
* may not be visible to the HW prior to the completion of the UC
* register write and that we may begin execution from the context
* before its image is complete leading to invalid PD chasing.
- *
- * Furthermore, Braswell, at least, wants a full mb to be sure that
- * the writes are coherent in memory (visible to the GPU) prior to
- * execution, and not just visible to other CPUs (as is the result of
- * wmb).
*/
- mb();
+ wmb();
- desc = ce->lrc_desc;
ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
-
- /* Wa_1607138340:tgl */
- if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0))
- desc |= CTX_DESC_FORCE_RESTORE;
-
return desc;
}
@@ -1218,15 +1377,14 @@ trace_ports(const struct intel_engine_execlists *execlists,
if (!ports[0])
return;
- GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
- engine->name, msg,
- ports[0]->fence.context,
- ports[0]->fence.seqno,
- i915_request_completed(ports[0]) ? "!" :
- i915_request_started(ports[0]) ? "*" :
- "",
- ports[1] ? ports[1]->fence.context : 0,
- ports[1] ? ports[1]->fence.seqno : 0);
+ ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
+ ports[0]->fence.context,
+ ports[0]->fence.seqno,
+ i915_request_completed(ports[0]) ? "!" :
+ i915_request_started(ports[0]) ? "*" :
+ "",
+ ports[1] ? ports[1]->fence.context : 0,
+ ports[1] ? ports[1]->fence.seqno : 0);
}
static __maybe_unused bool
@@ -1250,33 +1408,56 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
}
for (port = execlists->pending; (rq = *port); port++) {
- if (ce == rq->hw_context) {
- GEM_TRACE_ERR("Duplicate context in pending[%zd]\n",
+ unsigned long flags;
+ bool ok = true;
+
+ GEM_BUG_ON(!kref_read(&rq->fence.refcount));
+ GEM_BUG_ON(!i915_request_is_active(rq));
+
+ if (ce == rq->context) {
+ GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
return false;
}
+ ce = rq->context;
- ce = rq->hw_context;
- if (i915_request_completed(rq))
+ /* Hold tightly onto the lock to prevent concurrent retires! */
+ if (!spin_trylock_irqsave(&rq->lock, flags))
continue;
- if (i915_active_is_idle(&ce->active)) {
- GEM_TRACE_ERR("Inactive context in pending[%zd]\n",
+ if (i915_request_completed(rq))
+ goto unlock;
+
+ if (i915_active_is_idle(&ce->active) &&
+ !intel_context_is_barrier(ce)) {
+ GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
- return false;
+ ok = false;
+ goto unlock;
}
if (!i915_vma_is_pinned(ce->state)) {
- GEM_TRACE_ERR("Unpinned context in pending[%zd]\n",
+ GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
- return false;
+ ok = false;
+ goto unlock;
}
if (!i915_vma_is_pinned(ce->ring->vma)) {
- GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n",
+ GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
- return false;
+ ok = false;
+ goto unlock;
}
+
+unlock:
+ spin_unlock_irqrestore(&rq->lock, flags);
+ if (!ok)
+ return false;
}
return ce;
@@ -1321,7 +1502,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
static bool ctx_single_port_submission(const struct intel_context *ce)
{
return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
- i915_gem_context_force_single_submission(ce->gem_context));
+ intel_context_force_single_submission(ce));
}
static bool can_merge_ctx(const struct intel_context *prev,
@@ -1353,11 +1534,11 @@ static bool can_merge_rq(const struct i915_request *prev,
if (i915_request_completed(next))
return true;
- if (unlikely((prev->flags ^ next->flags) &
- (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL)))
+ if (unlikely((prev->fence.flags ^ next->fence.flags) &
+ (I915_FENCE_FLAG_NOPREEMPT | I915_FENCE_FLAG_SENTINEL)))
return false;
- if (!can_merge_ctx(prev->hw_context, next->hw_context))
+ if (!can_merge_ctx(prev->context, next->context))
return false;
return true;
@@ -1366,7 +1547,7 @@ static bool can_merge_rq(const struct i915_request *prev,
static void virtual_update_register_offsets(u32 *regs,
struct intel_engine_cs *engine)
{
- set_offsets(regs, reg_offsets(engine), engine);
+ set_offsets(regs, reg_offsets(engine), engine, false);
}
static bool virtual_matches(const struct virtual_engine *ve,
@@ -1405,7 +1586,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
if (!list_empty(&ve->context.signal_link)) {
list_move_tail(&ve->context.signal_link,
&engine->breadcrumbs.signalers);
- intel_engine_queue_breadcrumbs(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
spin_unlock(&old->breadcrumbs.irq_lock);
}
@@ -1513,7 +1694,7 @@ active_timeslice(const struct intel_engine_cs *engine)
{
const struct i915_request *rq = *engine->execlists.active;
- if (i915_request_completed(rq))
+ if (!rq || i915_request_completed(rq))
return 0;
if (engine->execlists.switch_priority_hint < effective_prio(rq))
@@ -1544,7 +1725,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
return 0;
/* Force a fast reset for terminated contexts (ignoring sysfs!) */
- if (unlikely(i915_gem_context_is_banned(rq->gem_context)))
+ if (unlikely(intel_context_is_banned(rq->context)))
return 1;
return READ_ONCE(engine->props.preempt_timeout_ms);
@@ -1559,6 +1740,11 @@ static void set_preempt_timeout(struct intel_engine_cs *engine)
active_preempt_timeout(engine));
}
+static inline void clear_ports(struct i915_request **ports, int count)
+{
+ memset_p((void **)ports, NULL, count);
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1621,12 +1807,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
last = last_active(execlists);
if (last) {
if (need_preempt(engine, last, rb)) {
- GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n",
- engine->name,
- last->fence.context,
- last->fence.seqno,
- last->sched.attr.priority,
- execlists->queue_priority_hint);
+ ENGINE_TRACE(engine,
+ "preempting last=%llx:%lld, prio=%d, hint=%d\n",
+ last->fence.context,
+ last->fence.seqno,
+ last->sched.attr.priority,
+ execlists->queue_priority_hint);
record_preemption(execlists);
/*
@@ -1652,16 +1838,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* tendency to ignore us rewinding the TAIL to the
* end of an earlier request.
*/
- last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+ last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
last = NULL;
} else if (need_timeslice(engine, last) &&
timer_expired(&engine->execlists.timer)) {
- GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
- engine->name,
- last->fence.context,
- last->fence.seqno,
- last->sched.attr.priority,
- execlists->queue_priority_hint);
+ ENGINE_TRACE(engine,
+ "expired last=%llx:%lld, prio=%d, hint=%d\n",
+ last->fence.context,
+ last->fence.seqno,
+ last->sched.attr.priority,
+ execlists->queue_priority_hint);
ring_set_paused(engine, 1);
defer_active(engine);
@@ -1703,16 +1889,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
return;
}
-
- /*
- * WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent
- * ring:HEAD == rq:TAIL as we resubmit the
- * request. See gen8_emit_fini_breadcrumb() for
- * where we prepare the padding after the
- * end of the request.
- */
- last->tail = last->wa_tail;
}
}
@@ -1734,7 +1910,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(rq != ve->request);
GEM_BUG_ON(rq->engine != &ve->base);
- GEM_BUG_ON(rq->hw_context != &ve->context);
+ GEM_BUG_ON(rq->context != &ve->context);
if (rq_prio(rq) >= queue_prio(execlists)) {
if (!virtual_matches(ve, rq, engine)) {
@@ -1748,14 +1924,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
return; /* leave this for another */
}
- GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
- engine->name,
- rq->fence.context,
- rq->fence.seqno,
- i915_request_completed(rq) ? "!" :
- i915_request_started(rq) ? "*" :
- "",
- yesno(engine != ve->siblings[0]));
+ ENGINE_TRACE(engine,
+ "virtual rq=%llx:%lld%s, new engine? %s\n",
+ rq->fence.context,
+ rq->fence.seqno,
+ i915_request_completed(rq) ? "!" :
+ i915_request_started(rq) ? "*" :
+ "",
+ yesno(engine != ve->siblings[0]));
ve->request = NULL;
ve->base.execlists.queue_priority_hint = INT_MIN;
@@ -1853,7 +2029,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* same LRCA, i.e. we must submit 2 different
* contexts if we submit 2 ELSP.
*/
- if (last->hw_context == rq->hw_context)
+ if (last->context == rq->context)
goto done;
if (i915_request_has_sentinel(last))
@@ -1866,8 +2042,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* the same context (even though a different
* request) to the second port.
*/
- if (ctx_single_port_submission(last->hw_context) ||
- ctx_single_port_submission(rq->hw_context))
+ if (ctx_single_port_submission(last->context) ||
+ ctx_single_port_submission(rq->context))
goto done;
merge = false;
@@ -1881,8 +2057,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
GEM_BUG_ON(last &&
- !can_merge_ctx(last->hw_context,
- rq->hw_context));
+ !can_merge_ctx(last->context,
+ rq->context));
submit = true;
last = rq;
@@ -1911,9 +2087,6 @@ done:
* interrupt for secondary ports).
*/
execlists->queue_priority_hint = queue_prio(execlists);
- GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n",
- engine->name, execlists->queue_priority_hint,
- yesno(submit));
if (submit) {
*port = execlists_schedule_in(last, port - execlists->pending);
@@ -1932,10 +2105,9 @@ done:
goto skip_submit;
}
+ clear_ports(port + 1, last_port - port);
- memset(port + 1, 0, (last_port - port) * sizeof(*port));
execlists_submit_ports(engine);
-
set_preempt_timeout(engine);
} else {
skip_submit:
@@ -1950,13 +2122,14 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
for (port = execlists->pending; *port; port++)
execlists_schedule_out(*port);
- memset(execlists->pending, 0, sizeof(execlists->pending));
+ clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
/* Mark the end of active before we overwrite *active */
for (port = xchg(&execlists->active, execlists->pending); *port; port++)
execlists_schedule_out(*port);
- WRITE_ONCE(execlists->active,
- memset(execlists->inflight, 0, sizeof(execlists->inflight)));
+ clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
+
+ WRITE_ONCE(execlists->active, execlists->inflight);
}
static inline void
@@ -2062,7 +2235,7 @@ static void process_csb(struct intel_engine_cs *engine)
*/
head = execlists->csb_head;
tail = READ_ONCE(*execlists->csb_write);
- GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
+ ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
if (unlikely(head == tail))
return;
@@ -2100,9 +2273,8 @@ static void process_csb(struct intel_engine_cs *engine)
* status notifier.
*/
- GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
- engine->name, head,
- buf[2 * head + 0], buf[2 * head + 1]);
+ ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
+ head, buf[2 * head + 0], buf[2 * head + 1]);
if (INTEL_GEN(engine->i915) >= 12)
promote = gen12_csb_parse(execlists, buf + 2 * head);
@@ -2113,7 +2285,6 @@ static void process_csb(struct intel_engine_cs *engine)
/* Point active to the new ELSP; prevent overwriting */
WRITE_ONCE(execlists->active, execlists->pending);
- set_timeslice(engine);
if (!inject_preempt_hang(execlists))
ring_set_paused(engine, 0);
@@ -2154,6 +2325,7 @@ static void process_csb(struct intel_engine_cs *engine)
} while (head != tail);
execlists->csb_head = head;
+ set_timeslice(engine);
/*
* Gen11 has proven to fail wrt global observation point between
@@ -2193,10 +2365,9 @@ static noinline void preempt_reset(struct intel_engine_cs *engine)
/* Mark this tasklet as disabled to avoid waiting for it to complete */
tasklet_disable_nosync(&engine->execlists.tasklet);
- GEM_TRACE("%s: preempt timeout %lu+%ums\n",
- engine->name,
- READ_ONCE(engine->props.preempt_timeout_ms),
- jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
+ ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
+ READ_ONCE(engine->props.preempt_timeout_ms),
+ jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
intel_engine_reset(engine, "preemption time out");
tasklet_enable(&engine->execlists.tasklet);
@@ -2337,7 +2508,7 @@ set_redzone(void *vaddr, const struct intel_engine_cs *engine)
vaddr += engine->context_size;
- memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
+ memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
}
static void
@@ -2348,7 +2519,7 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
vaddr += engine->context_size;
- if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE))
+ if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
dev_err_once(engine->i915->drm.dev,
"%s context redzone overwritten!\n",
engine->name);
@@ -2373,7 +2544,7 @@ __execlists_update_reg_state(const struct intel_context *ce,
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
- regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma);
+ regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
regs[CTX_RING_HEAD] = ring->head;
regs[CTX_RING_TAIL] = ring->tail;
@@ -2391,33 +2562,21 @@ __execlists_context_pin(struct intel_context *ce,
struct intel_engine_cs *engine)
{
void *vaddr;
- int ret;
GEM_BUG_ON(!ce->state);
-
- ret = intel_context_active_acquire(ce);
- if (ret)
- goto err;
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
vaddr = i915_gem_object_pin_map(ce->state->obj,
i915_coherent_map_type(engine->i915) |
I915_MAP_OVERRIDE);
- if (IS_ERR(vaddr)) {
- ret = PTR_ERR(vaddr);
- goto unpin_active;
- }
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
- ce->lrc_desc = lrc_descriptor(ce, engine);
+ ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
__execlists_update_reg_state(ce, engine);
return 0;
-
-unpin_active:
- intel_context_active_release(ce);
-err:
- return ret;
}
static int execlists_context_pin(struct intel_context *ce)
@@ -2432,6 +2591,9 @@ static int execlists_context_alloc(struct intel_context *ce)
static void execlists_context_reset(struct intel_context *ce)
{
+ CE_TRACE(ce, "reset\n");
+ GEM_BUG_ON(!intel_context_is_pinned(ce));
+
/*
* Because we emit WA_TAIL_DWORDS there may be a disparity
* between our bookkeeping in ce->ring->head and ce->ring->tail and
@@ -2448,8 +2610,14 @@ static void execlists_context_reset(struct intel_context *ce)
* So to avoid that we reset the context images upon resume. For
* simplicity, we just zero everything out.
*/
- intel_ring_reset(ce->ring, 0);
+ intel_ring_reset(ce->ring, ce->ring->emit);
+
+ /* Scrub away the garbage */
+ execlists_init_reg_state(ce->lrc_reg_state,
+ ce, ce->engine, ce->ring, true);
__execlists_update_reg_state(ce, ce->engine);
+
+ ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
}
static const struct intel_context_ops execlists_context_ops = {
@@ -2501,7 +2669,7 @@ static int execlists_request_alloc(struct i915_request *request)
{
int ret;
- GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
+ GEM_BUG_ON(!intel_context_is_pinned(request->context));
/*
* Flush enough space to reduce the likelihood of waiting after
@@ -2668,6 +2836,14 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+ /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
+ batch = gen8_emit_pipe_control(batch,
+ PIPE_CONTROL_FLUSH_L3 |
+ PIPE_CONTROL_STORE_DATA_INDEX |
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_QW_WRITE,
+ LRC_PPHWSP_SCRATCH_ADDR);
+
batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
/* WaMediaPoolStateCmdInWABB:bxt,glk */
@@ -2863,6 +3039,8 @@ static void enable_execlists(struct intel_engine_cs *engine)
RING_HWS_PGA,
i915_ggtt_offset(engine->status_page.vma));
ENGINE_POSTING_READ(engine, RING_HWS_PGA);
+
+ engine->context_tag = 0;
}
static bool unexpected_starting_state(struct intel_engine_cs *engine)
@@ -2902,8 +3080,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
struct intel_engine_execlists * const execlists = &engine->execlists;
unsigned long flags;
- GEM_TRACE("%s: depth<-%d\n", engine->name,
- atomic_read(&execlists->tasklet.count));
+ ENGINE_TRACE(engine, "depth<-%d\n",
+ atomic_read(&execlists->tasklet.count));
/*
* Prevent request submission to the hardware until we have
@@ -2956,26 +3134,20 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
WRITE_ONCE(*execlists->csb_write, reset_value);
wmb(); /* Make sure this is visible to HW (paranoia?) */
+ /*
+ * Sometimes Icelake forgets to reset its pointers on a GPU reset.
+ * Bludgeon them with a mmio update to be sure.
+ */
+ ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
+ reset_value << 8 | reset_value);
+ ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
invalidate_csb_entries(&execlists->csb_status[0],
&execlists->csb_status[reset_value]);
}
-static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
-{
- if (INTEL_GEN(engine->i915) >= 12)
- return 0x60;
- else if (INTEL_GEN(engine->i915) >= 9)
- return 0x54;
- else if (engine->class == RENDER_CLASS)
- return 0x58;
- else
- return -1;
-}
-
-static void __execlists_reset_reg_state(const struct intel_context *ce,
- const struct intel_engine_cs *engine)
+static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
{
- u32 *regs = ce->lrc_reg_state;
int x;
x = lrc_ring_mi_mode(engine);
@@ -2985,6 +3157,14 @@ static void __execlists_reset_reg_state(const struct intel_context *ce,
}
}
+static void __execlists_reset_reg_state(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
+{
+ u32 *regs = ce->lrc_reg_state;
+
+ __reset_stop_ring(regs, engine);
+}
+
static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -3012,7 +3192,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
/* We still have requests in-flight; the engine should be active */
GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
- ce = rq->hw_context;
+ ce = rq->context;
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
if (i915_request_completed(rq)) {
@@ -3069,8 +3249,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
restore_default_state(ce, engine);
out_replay:
- GEM_TRACE("%s replay {head:%04x, tail:%04x}\n",
- engine->name, ce->ring->head, ce->ring->tail);
+ ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
+ ce->ring->head, ce->ring->tail);
intel_ring_update_space(ce->ring);
__execlists_reset_reg_state(ce, engine);
__execlists_update_reg_state(ce, engine);
@@ -3082,11 +3262,11 @@ unwind:
__unwind_incomplete_requests(engine);
}
-static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
+static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
{
unsigned long flags;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
spin_lock_irqsave(&engine->active.lock, flags);
@@ -3100,14 +3280,14 @@ static void nop_submission_tasklet(unsigned long data)
/* The driver is wedged; don't process any more events. */
}
-static void execlists_cancel_requests(struct intel_engine_cs *engine)
+static void execlists_reset_cancel(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request *rq, *rn;
struct rb_node *rb;
unsigned long flags;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
/*
* Before we call engine->cancel_requests(), we should have exclusive
@@ -3194,13 +3374,13 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
if (__tasklet_enable(&execlists->tasklet))
/* And kick in case we missed a new request submission. */
tasklet_hi_schedule(&execlists->tasklet);
- GEM_TRACE("%s: depth->%d\n", engine->name,
- atomic_read(&execlists->tasklet.count));
+ ENGINE_TRACE(engine, "depth->%d\n",
+ atomic_read(&execlists->tasklet.count));
}
-static int gen8_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- const unsigned int flags)
+static int gen8_emit_bb_start_noarb(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
{
u32 *cs;
@@ -3234,7 +3414,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
return 0;
}
-static int gen9_emit_bb_start(struct i915_request *rq,
+static int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
@@ -3689,12 +3869,12 @@ static void execlists_park(struct intel_engine_cs *engine)
void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = execlists_submit_request;
- engine->cancel_requests = execlists_cancel_requests;
engine->schedule = i915_schedule;
engine->execlists.tasklet.func = execlists_submission_tasklet;
engine->reset.prepare = execlists_reset_prepare;
- engine->reset.reset = execlists_reset;
+ engine->reset.rewind = execlists_reset_rewind;
+ engine->reset.cancel = execlists_reset_cancel;
engine->reset.finish = execlists_reset_finish;
engine->park = execlists_park;
@@ -3709,13 +3889,27 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
if (INTEL_GEN(engine->i915) >= 12)
engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
+
+ if (intel_engine_has_preemption(engine))
+ engine->emit_bb_start = gen8_emit_bb_start;
+ else
+ engine->emit_bb_start = gen8_emit_bb_start_noarb;
+}
+
+static void execlists_shutdown(struct intel_engine_cs *engine)
+{
+ /* Synchronise with residual timers and any softirq they raise */
+ del_timer_sync(&engine->execlists.timer);
+ del_timer_sync(&engine->execlists.preempt);
+ tasklet_kill(&engine->execlists.tasklet);
}
-static void execlists_destroy(struct intel_engine_cs *engine)
+static void execlists_release(struct intel_engine_cs *engine)
{
+ execlists_shutdown(engine);
+
intel_engine_cleanup_common(engine);
lrc_destroy_wa_ctx(engine);
- kfree(engine);
}
static void
@@ -3723,13 +3917,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
{
/* Default vfuncs which can be overriden by each engine. */
- engine->destroy = execlists_destroy;
engine->resume = execlists_resume;
- engine->reset.prepare = execlists_reset_prepare;
- engine->reset.reset = execlists_reset;
- engine->reset.finish = execlists_reset_finish;
-
engine->cops = &execlists_context_ops;
engine->request_alloc = execlists_request_alloc;
@@ -3752,10 +3941,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
* until a more refined solution exists.
*/
}
- if (IS_GEN(engine->i915, 8))
- engine->emit_bb_start = gen8_emit_bb_start;
- else
- engine->emit_bb_start = gen9_emit_bb_start;
}
static inline void
@@ -3799,6 +3984,11 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
int intel_execlists_submission_setup(struct intel_engine_cs *engine)
{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct drm_i915_private *i915 = engine->i915;
+ struct intel_uncore *uncore = engine->uncore;
+ u32 base = engine->mmio_base;
+
tasklet_init(&engine->execlists.tasklet,
execlists_submission_tasklet, (unsigned long)engine);
timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
@@ -3810,21 +4000,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
if (engine->class == RENDER_CLASS)
rcs_submission_override(engine);
- return 0;
-}
-
-int intel_execlists_submission_init(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
- struct drm_i915_private *i915 = engine->i915;
- struct intel_uncore *uncore = engine->uncore;
- u32 base = engine->mmio_base;
- int ret;
-
- ret = intel_engine_init_common(engine);
- if (ret)
- return ret;
-
if (intel_init_workaround_bb(engine))
/*
* We continue even if we fail to initialize WA batch
@@ -3856,6 +4031,9 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
reset_csb_pointers(engine);
+ /* Finally, take ownership and responsibility for cleanup! */
+ engine->release = execlists_release;
+
return 0;
}
@@ -3895,18 +4073,21 @@ static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
static void init_common_reg_state(u32 * const regs,
const struct intel_engine_cs *engine,
- const struct intel_ring *ring)
+ const struct intel_ring *ring,
+ bool inhibit)
{
- regs[CTX_CONTEXT_CONTROL] =
- _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
- _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+ u32 ctl;
+
+ ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+ ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+ if (inhibit)
+ ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
if (INTEL_GEN(engine->i915) < 11)
- regs[CTX_CONTEXT_CONTROL] |=
- _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
- CTX_CTRL_RS_CTX_ENABLE);
+ ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
+ CTX_CTRL_RS_CTX_ENABLE);
+ regs[CTX_CONTEXT_CONTROL] = ctl;
- regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID;
- regs[CTX_BB_STATE] = RING_BB_PPGTT;
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
}
static void init_wa_bb_reg_state(u32 * const regs,
@@ -3962,7 +4143,7 @@ static void execlists_init_reg_state(u32 *regs,
const struct intel_context *ce,
const struct intel_engine_cs *engine,
const struct intel_ring *ring,
- bool close)
+ bool inhibit)
{
/*
* A context is actually a big batch buffer with several
@@ -3974,21 +4155,17 @@ static void execlists_init_reg_state(u32 *regs,
*
* Must keep consistent with virtual_update_register_offsets().
*/
- u32 *bbe = set_offsets(regs, reg_offsets(engine), engine);
+ set_offsets(regs, reg_offsets(engine), engine, inhibit);
- if (close) { /* Close the batch; used mainly by live_lrc_layout() */
- *bbe = MI_BATCH_BUFFER_END;
- if (INTEL_GEN(engine->i915) >= 10)
- *bbe |= BIT(0);
- }
-
- init_common_reg_state(regs, engine, ring);
+ init_common_reg_state(regs, engine, ring, inhibit);
init_ppgtt_reg_state(regs, vm_alias(ce->vm));
init_wa_bb_reg_state(regs, engine,
INTEL_GEN(engine->i915) >= 12 ?
GEN12_CTX_BB_PER_CTX_PTR :
CTX_BB_PER_CTX_PTR);
+
+ __reset_stop_ring(regs, engine);
}
static int
@@ -3999,7 +4176,6 @@ populate_lr_context(struct intel_context *ce,
{
bool inhibit = true;
void *vaddr;
- u32 *regs;
int ret;
vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
@@ -4023,16 +4199,14 @@ populate_lr_context(struct intel_context *ce,
memcpy(vaddr, defaults, engine->context_size);
i915_gem_object_unpin_map(engine->default_state);
+ __set_bit(CONTEXT_VALID_BIT, &ce->flags);
inhibit = false;
}
/* The second page of the context object contains some fields which must
* be set up prior to the first execution. */
- regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
- execlists_init_reg_state(regs, ce, engine, ring, inhibit);
- if (inhibit)
- regs[CTX_CONTEXT_CONTROL] |=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+ execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
+ ce, engine, ring, inhibit);
ret = 0;
err_unpin_ctx:
@@ -4120,17 +4294,18 @@ static void virtual_context_destroy(struct kref *kref)
for (n = 0; n < ve->num_siblings; n++) {
struct intel_engine_cs *sibling = ve->siblings[n];
struct rb_node *node = &ve->nodes[sibling->id].rb;
+ unsigned long flags;
if (RB_EMPTY_NODE(node))
continue;
- spin_lock_irq(&sibling->active.lock);
+ spin_lock_irqsave(&sibling->active.lock, flags);
/* Detachment is lazily performed in the execlists tasklet */
if (!RB_EMPTY_NODE(node))
rb_erase_cached(node, &sibling->execlists.virtual);
- spin_unlock_irq(&sibling->active.lock);
+ spin_unlock_irqrestore(&sibling->active.lock, flags);
}
GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
@@ -4169,6 +4344,13 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve)
ve->siblings[0]);
}
+static int virtual_context_alloc(struct intel_context *ce)
+{
+ struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+
+ return __execlists_context_alloc(ce, ve->siblings[0]);
+}
+
static int virtual_context_pin(struct intel_context *ce)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
@@ -4206,6 +4388,8 @@ static void virtual_context_exit(struct intel_context *ce)
}
static const struct intel_context_ops virtual_context_ops = {
+ .alloc = virtual_context_alloc,
+
.pin = virtual_context_pin,
.unpin = execlists_context_unpin,
@@ -4232,10 +4416,9 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
mask = ve->siblings[0]->mask;
}
- GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
- ve->base.name,
- rq->fence.context, rq->fence.seqno,
- mask, ve->base.execlists.queue_priority_hint);
+ ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
+ rq->fence.context, rq->fence.seqno,
+ mask, ve->base.execlists.queue_priority_hint);
return mask;
}
@@ -4326,10 +4509,9 @@ static void virtual_submit_request(struct i915_request *rq)
struct i915_request *old;
unsigned long flags;
- GEM_TRACE("%s: rq=%llx:%lld\n",
- ve->base.name,
- rq->fence.context,
- rq->fence.seqno);
+ ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
+ rq->fence.context,
+ rq->fence.seqno);
GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
@@ -4397,8 +4579,7 @@ virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
}
struct intel_context *
-intel_execlists_create_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs **siblings,
+intel_execlists_create_virtual(struct intel_engine_cs **siblings,
unsigned int count)
{
struct virtual_engine *ve;
@@ -4409,19 +4590,21 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
return ERR_PTR(-EINVAL);
if (count == 1)
- return intel_context_create(ctx, siblings[0]);
+ return intel_context_create(siblings[0]);
ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
if (!ve)
return ERR_PTR(-ENOMEM);
- ve->base.i915 = ctx->i915;
+ ve->base.i915 = siblings[0]->i915;
ve->base.gt = siblings[0]->gt;
ve->base.uncore = siblings[0]->uncore;
ve->base.id = -1;
+
ve->base.class = OTHER_CLASS;
ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
+ ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
/*
* The decision on whether to submit a request using semaphores
@@ -4442,7 +4625,6 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
intel_engine_init_breadcrumbs(&ve->base);
-
intel_engine_init_execlists(&ve->base);
ve->base.cops = &virtual_context_ops;
@@ -4458,7 +4640,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
virtual_submission_tasklet,
(unsigned long)ve);
- intel_context_init(&ve->context, ctx, &ve->base);
+ intel_context_init(&ve->context, &ve->base);
for (n = 0; n < count; n++) {
struct intel_engine_cs *sibling = siblings[n];
@@ -4525,12 +4707,6 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
- err = __execlists_context_alloc(&ve->context, siblings[0]);
- if (err)
- goto err_put;
-
- __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags);
-
return &ve->context;
err_put:
@@ -4539,14 +4715,12 @@ err_put:
}
struct intel_context *
-intel_execlists_clone_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs *src)
+intel_execlists_clone_virtual(struct intel_engine_cs *src)
{
struct virtual_engine *se = to_virtual_engine(src);
struct intel_context *dst;
- dst = intel_execlists_create_virtual(ctx,
- se->siblings,
+ dst = intel_execlists_create_virtual(se->siblings,
se->num_siblings);
if (IS_ERR(dst))
return dst;