aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c19
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c25
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h245
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c262
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c234
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c30
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pool.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pool.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h104
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.c18
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c160
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c207
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c137
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.h24
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h36
-rw-r--r--drivers/gpu/drm/i915/gt/intel_hangcheck.c360
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc.c161
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc_types.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c1650
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h39
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc_reg.h66
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c287
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c787
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.h28
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6_types.h29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c184
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.h16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset_types.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c323
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.h131
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c (renamed from drivers/gpu/drm/i915/gt/intel_ringbuffer.c)406
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_types.h51
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c1872
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h38
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps_types.h93
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.h37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c52
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline_types.h10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c70
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c7
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c71
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c350
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_pm.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c60
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c207
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_llc.c80
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_llc.h14
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c1895
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_reset.c16
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c138
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c270
-rw-r--r--drivers/gpu/drm/i915/gt/selftests/mock_timeline.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c185
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c56
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.h4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c41
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c15
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c38
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c76
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h11
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c46
76 files changed, 9155 insertions, 2843 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 09c68dda2098..55317081d48b 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -120,7 +120,6 @@ __dma_fence_signal__notify(struct dma_fence *fence,
struct dma_fence_cb *cur, *tmp;
lockdep_assert_held(fence->lock);
- lockdep_assert_irqs_disabled();
list_for_each_entry_safe(cur, tmp, list, node) {
INIT_LIST_HEAD(&cur->node);
@@ -134,9 +133,10 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
const ktime_t timestamp = ktime_get();
struct intel_context *ce, *cn;
struct list_head *pos, *next;
+ unsigned long flags;
LIST_HEAD(signal);
- spin_lock(&b->irq_lock);
+ spin_lock_irqsave(&b->irq_lock, flags);
if (b->irq_armed && list_empty(&b->signalers))
__intel_breadcrumbs_disarm_irq(b);
@@ -182,30 +182,23 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
}
}
- spin_unlock(&b->irq_lock);
+ spin_unlock_irqrestore(&b->irq_lock, flags);
list_for_each_safe(pos, next, &signal) {
struct i915_request *rq =
list_entry(pos, typeof(*rq), signal_link);
struct list_head cb_list;
- spin_lock(&rq->lock);
+ spin_lock_irqsave(&rq->lock, flags);
list_replace(&rq->fence.cb_list, &cb_list);
__dma_fence_signal__timestamp(&rq->fence, timestamp);
__dma_fence_signal__notify(&rq->fence, &cb_list);
- spin_unlock(&rq->lock);
+ spin_unlock_irqrestore(&rq->lock, flags);
i915_request_put(rq);
}
}
-void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
-{
- local_irq_disable();
- intel_engine_breadcrumbs_irq(engine);
- local_irq_enable();
-}
-
static void signal_irq_work(struct irq_work *work)
{
struct intel_engine_cs *engine =
@@ -275,7 +268,6 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
bool i915_request_enable_breadcrumb(struct i915_request *rq)
{
lockdep_assert_held(&rq->lock);
- lockdep_assert_irqs_disabled();
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
@@ -325,7 +317,6 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
lockdep_assert_held(&rq->lock);
- lockdep_assert_irqs_disabled();
/*
* We must wait for b->irq_lock so that we know the interrupt handler
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index f55691d151ae..ee9d2bcd2c13 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -13,6 +13,7 @@
#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
+#include "intel_ring.h"
static struct i915_global_context {
struct i915_global base;
@@ -62,7 +63,7 @@ int __intel_context_do_pin(struct intel_context *ce)
}
err = 0;
- with_intel_runtime_pm(&ce->engine->i915->runtime_pm, wakeref)
+ with_intel_runtime_pm(ce->engine->uncore->rpm, wakeref)
err = ce->ops->pin(ce);
if (err)
goto err;
@@ -134,10 +135,11 @@ static int __context_pin_state(struct i915_vma *vma)
static void __context_unpin_state(struct i915_vma *vma)
{
- __i915_vma_unpin(vma);
i915_vma_make_shrinkable(vma);
+ __i915_vma_unpin(vma);
}
+__i915_active_call
static void __intel_context_retire(struct i915_active *active)
{
struct intel_context *ce = container_of(active, typeof(*ce), active);
@@ -150,6 +152,7 @@ static void __intel_context_retire(struct i915_active *active)
intel_timeline_unpin(ce->timeline);
intel_ring_unpin(ce->ring);
+
intel_context_put(ce);
}
@@ -219,12 +222,20 @@ intel_context_init(struct intel_context *ce,
struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
+ struct i915_address_space *vm;
+
GEM_BUG_ON(!engine->cops);
kref_init(&ce->ref);
ce->gem_context = ctx;
- ce->vm = i915_vm_get(ctx->vm ?: &engine->gt->ggtt->vm);
+ rcu_read_lock();
+ vm = rcu_dereference(ctx->vm);
+ if (vm)
+ ce->vm = i915_vm_get(vm);
+ else
+ ce->vm = i915_vm_get(&engine->gt->ggtt->vm);
+ rcu_read_unlock();
if (ctx->timeline)
ce->timeline = intel_timeline_get(ctx->timeline);
@@ -238,7 +249,7 @@ intel_context_init(struct intel_context *ce,
mutex_init(&ce->pin_mutex);
- i915_active_init(ctx->i915, &ce->active,
+ i915_active_init(&ce->active,
__intel_context_active, __intel_context_retire);
}
@@ -298,14 +309,14 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
/* Only suitable for use in remotely modifying this context */
GEM_BUG_ON(rq->hw_context == ce);
- if (rq->timeline != tl) { /* beware timeline sharing */
+ if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
err = mutex_lock_interruptible_nested(&tl->mutex,
SINGLE_DEPTH_NESTING);
if (err)
return err;
/* Queue this switch after current activity by this context. */
- err = i915_active_request_set(&tl->last_request, rq);
+ err = i915_active_fence_set(&tl->last_request, rq);
mutex_unlock(&tl->mutex);
if (err)
return err;
@@ -319,7 +330,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
* words transfer the pinned ce object to tracked active request.
*/
GEM_BUG_ON(i915_active_is_idle(&ce->active));
- return i915_active_ref(&ce->active, rq->timeline, rq);
+ return i915_active_add_request(&ce->active, rq);
}
struct i915_request *intel_context_create_request(struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index dd742ac2fbdb..68b3d317d959 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,6 +12,7 @@
#include "i915_active.h"
#include "intel_context_types.h"
#include "intel_engine_types.h"
+#include "intel_ring_types.h"
#include "intel_timeline_types.h"
void intel_context_init(struct intel_context *ce,
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index bf9cedfccbf0..6959b05ae5f8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -58,6 +58,7 @@ struct intel_context {
u32 *lrc_reg_state;
u64 lrc_desc;
+ u32 tag; /* cookie passed to HW to track this context on submission */
unsigned int active_count; /* protected by timeline->mutex */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index d3c6993f4f46..bc3b72bfa9e3 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -19,6 +19,7 @@
#include "intel_workarounds.h"
struct drm_printer;
+struct intel_gt;
/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
* but keeps the logic simple. Indeed, the whole purpose of this macro is just
@@ -89,38 +90,6 @@ struct drm_printer;
/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
* do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
*/
-enum intel_engine_hangcheck_action {
- ENGINE_IDLE = 0,
- ENGINE_WAIT,
- ENGINE_ACTIVE_SEQNO,
- ENGINE_ACTIVE_HEAD,
- ENGINE_ACTIVE_SUBUNITS,
- ENGINE_WAIT_KICK,
- ENGINE_DEAD,
-};
-
-static inline const char *
-hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
-{
- switch (a) {
- case ENGINE_IDLE:
- return "idle";
- case ENGINE_WAIT:
- return "wait";
- case ENGINE_ACTIVE_SEQNO:
- return "active seqno";
- case ENGINE_ACTIVE_HEAD:
- return "active head";
- case ENGINE_ACTIVE_SUBUNITS:
- return "active subunits";
- case ENGINE_WAIT_KICK:
- return "wait kick";
- case ENGINE_DEAD:
- return "dead";
- }
-
- return "unknown";
-}
static inline unsigned int
execlists_num_ports(const struct intel_engine_execlists * const execlists)
@@ -136,6 +105,20 @@ execlists_active(const struct intel_engine_execlists *execlists)
return READ_ONCE(*execlists->active);
}
+static inline void
+execlists_active_lock_bh(struct intel_engine_execlists *execlists)
+{
+ local_bh_disable(); /* prevent local softirq and lock recursion */
+ tasklet_lock(&execlists->tasklet);
+}
+
+static inline void
+execlists_active_unlock_bh(struct intel_engine_execlists *execlists)
+{
+ tasklet_unlock(&execlists->tasklet);
+ local_bh_enable(); /* restore softirq, and kick ksoftirqd! */
+}
+
struct i915_request *
execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
@@ -192,126 +175,13 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
#define I915_HWS_CSB_WRITE_INDEX 0x1f
#define CNL_HWS_CSB_WRITE_INDEX 0x2f
-struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size);
-int intel_ring_pin(struct intel_ring *ring);
-void intel_ring_reset(struct intel_ring *ring, u32 tail);
-unsigned int intel_ring_update_space(struct intel_ring *ring);
-void intel_ring_unpin(struct intel_ring *ring);
-void intel_ring_free(struct kref *ref);
-
-static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
-{
- kref_get(&ring->ref);
- return ring;
-}
-
-static inline void intel_ring_put(struct intel_ring *ring)
-{
- kref_put(&ring->ref, intel_ring_free);
-}
-
void intel_engine_stop(struct intel_engine_cs *engine);
void intel_engine_cleanup(struct intel_engine_cs *engine);
-int __must_check intel_ring_cacheline_align(struct i915_request *rq);
-
-u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
-
-static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
-{
- /* Dummy function.
- *
- * This serves as a placeholder in the code so that the reader
- * can compare against the preceding intel_ring_begin() and
- * check that the number of dwords emitted matches the space
- * reserved for the command packet (i.e. the value passed to
- * intel_ring_begin()).
- */
- GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
-}
-
-static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
-{
- return pos & (ring->size - 1);
-}
-
-static inline bool
-intel_ring_offset_valid(const struct intel_ring *ring,
- unsigned int pos)
-{
- if (pos & -ring->size) /* must be strictly within the ring */
- return false;
-
- if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
- return false;
-
- return true;
-}
-
-static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
-{
- /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
- u32 offset = addr - rq->ring->vaddr;
- GEM_BUG_ON(offset > rq->ring->size);
- return intel_ring_wrap(rq->ring, offset);
-}
-
-static inline void
-assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
-{
- GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
-
- /*
- * "Ring Buffer Use"
- * Gen2 BSpec "1. Programming Environment" / 1.4.4.6
- * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
- * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
- * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
- * same cacheline, the Head Pointer must not be greater than the Tail
- * Pointer."
- *
- * We use ring->head as the last known location of the actual RING_HEAD,
- * it may have advanced but in the worst case it is equally the same
- * as ring->head and so we should never program RING_TAIL to advance
- * into the same cacheline as ring->head.
- */
-#define cacheline(a) round_down(a, CACHELINE_BYTES)
- GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
- tail < ring->head);
-#undef cacheline
-}
-
-static inline unsigned int
-intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
-{
- /* Whilst writes to the tail are strictly order, there is no
- * serialisation between readers and the writers. The tail may be
- * read by i915_request_retire() just as it is being updated
- * by execlists, as although the breadcrumb is complete, the context
- * switch hasn't been seen.
- */
- assert_ring_tail_valid(ring, tail);
- ring->tail = tail;
- return tail;
-}
-
-static inline unsigned int
-__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
-{
- /*
- * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
- * same cacheline, the Head Pointer must not be greater than the Tail
- * Pointer."
- */
- GEM_BUG_ON(!is_power_of_2(size));
- return (head - tail - CACHELINE_BYTES) & (size - 1);
-}
-
-int intel_engines_init_mmio(struct drm_i915_private *i915);
-int intel_engines_setup(struct drm_i915_private *i915);
-int intel_engines_init(struct drm_i915_private *i915);
-void intel_engines_cleanup(struct drm_i915_private *i915);
+int intel_engines_init_mmio(struct intel_gt *gt);
+int intel_engines_setup(struct intel_gt *gt);
+int intel_engines_init(struct intel_gt *gt);
+void intel_engines_cleanup(struct intel_gt *gt);
int intel_engine_init_common(struct intel_engine_cs *engine);
void intel_engine_cleanup_common(struct intel_engine_cs *engine);
@@ -335,7 +205,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine);
void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
static inline void
@@ -408,8 +277,9 @@ static inline void __intel_engine_reset(struct intel_engine_cs *engine,
engine->serial++; /* contexts lost */
}
-bool intel_engine_is_idle(struct intel_engine_cs *engine);
bool intel_engines_are_idle(struct intel_gt *gt);
+bool intel_engine_is_idle(struct intel_engine_cs *engine);
+void intel_engine_flush_submission(struct intel_engine_cs *engine);
void intel_engines_reset_default_submission(struct intel_gt *gt);
@@ -420,61 +290,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
struct drm_printer *m,
const char *header, ...);
-static inline void intel_engine_context_in(struct intel_engine_cs *engine)
-{
- unsigned long flags;
-
- if (READ_ONCE(engine->stats.enabled) == 0)
- return;
-
- write_seqlock_irqsave(&engine->stats.lock, flags);
-
- if (engine->stats.enabled > 0) {
- if (engine->stats.active++ == 0)
- engine->stats.start = ktime_get();
- GEM_BUG_ON(engine->stats.active == 0);
- }
-
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
-static inline void intel_engine_context_out(struct intel_engine_cs *engine)
-{
- unsigned long flags;
-
- if (READ_ONCE(engine->stats.enabled) == 0)
- return;
-
- write_seqlock_irqsave(&engine->stats.lock, flags);
-
- if (engine->stats.enabled > 0) {
- ktime_t last;
-
- if (engine->stats.active && --engine->stats.active == 0) {
- /*
- * Decrement the active context count and in case GPU
- * is now idle add up to the running total.
- */
- last = ktime_sub(ktime_get(), engine->stats.start);
-
- engine->stats.total = ktime_add(engine->stats.total,
- last);
- } else if (engine->stats.active == 0) {
- /*
- * After turning on engine stats, context out might be
- * the first event in which case we account from the
- * time stats gathering was turned on.
- */
- last = ktime_sub(ktime_get(), engine->stats.enabled_at);
-
- engine->stats.total = ktime_add(engine->stats.total,
- last);
- }
- }
-
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
int intel_enable_engine_stats(struct intel_engine_cs *engine);
void intel_disable_engine_stats(struct intel_engine_cs *engine);
@@ -511,4 +326,22 @@ void intel_engine_init_active(struct intel_engine_cs *engine,
#define ENGINE_MOCK 1
#define ENGINE_VIRTUAL 2
+static inline bool
+intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
+{
+ if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ return false;
+
+ return intel_engine_has_preemption(engine);
+}
+
+static inline bool
+intel_engine_has_timeslices(const struct intel_engine_cs *engine)
+{
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return false;
+
+ return intel_engine_has_semaphores(engine);
+}
+
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 82630db0394b..5ca3ec911e50 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -37,6 +37,7 @@
#include "intel_context.h"
#include "intel_lrc.h"
#include "intel_reset.h"
+#include "intel_ring.h"
/* Haswell does have the CXT_SIZE register however it does not appear to be
* valid. Now, docs explain in dwords what is in the context object. The full
@@ -277,6 +278,9 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
+ if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine)))
+ return -EINVAL;
+
if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
return -EINVAL;
@@ -293,6 +297,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
engine->id = id;
+ engine->legacy_idx = INVALID_ENGINE;
engine->mask = BIT(id);
engine->i915 = gt->i915;
engine->gt = gt;
@@ -304,6 +309,15 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->instance = info->instance;
__sprint_engine_name(engine);
+ engine->props.heartbeat_interval_ms =
+ CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
+ engine->props.preempt_timeout_ms =
+ CONFIG_DRM_I915_PREEMPT_TIMEOUT;
+ engine->props.stop_timeout_ms =
+ CONFIG_DRM_I915_STOP_TIMEOUT;
+ engine->props.timeslice_duration_ms =
+ CONFIG_DRM_I915_TIMESLICE_DURATION;
+
/*
* To be overridden by the backend on setup. However to facilitate
* cleanup on error during setup, we always provide the destroy vfunc.
@@ -328,6 +342,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
intel_engine_sanitize_mmio(engine);
gt->engine_class[info->class][info->instance] = engine;
+ gt->engine[id] = engine;
intel_engine_add_user(engine);
gt->i915->engine[id] = engine;
@@ -365,38 +380,40 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine)
}
}
-static void intel_setup_engine_capabilities(struct drm_i915_private *i915)
+static void intel_setup_engine_capabilities(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id)
+ for_each_engine(engine, gt, id)
__setup_engine_capabilities(engine);
}
/**
* intel_engines_cleanup() - free the resources allocated for Command Streamers
- * @i915: the i915 devic
+ * @gt: pointer to struct intel_gt
*/
-void intel_engines_cleanup(struct drm_i915_private *i915)
+void intel_engines_cleanup(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
engine->destroy(engine);
- i915->engine[id] = NULL;
+ gt->engine[id] = NULL;
+ gt->i915->engine[id] = NULL;
}
}
/**
* intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
- * @i915: the i915 device
+ * @gt: pointer to struct intel_gt
*
* Return: non-zero if the initialization failed.
*/
-int intel_engines_init_mmio(struct drm_i915_private *i915)
+int intel_engines_init_mmio(struct intel_gt *gt)
{
+ struct drm_i915_private *i915 = gt->i915;
struct intel_device_info *device_info = mkwrite_device_info(i915);
const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
unsigned int mask = 0;
@@ -414,7 +431,7 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
if (!HAS_ENGINE(i915, i))
continue;
- err = intel_engine_setup(&i915->gt, i);
+ err = intel_engine_setup(gt, i);
if (err)
goto cleanup;
@@ -431,36 +448,36 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
RUNTIME_INFO(i915)->num_engines = hweight32(mask);
- intel_gt_check_and_clear_faults(&i915->gt);
+ intel_gt_check_and_clear_faults(gt);
- intel_setup_engine_capabilities(i915);
+ intel_setup_engine_capabilities(gt);
return 0;
cleanup:
- intel_engines_cleanup(i915);
+ intel_engines_cleanup(gt);
return err;
}
/**
* intel_engines_init() - init the Engine Command Streamers
- * @i915: i915 device private
+ * @gt: pointer to struct intel_gt
*
* Return: non-zero if the initialization failed.
*/
-int intel_engines_init(struct drm_i915_private *i915)
+int intel_engines_init(struct intel_gt *gt)
{
int (*init)(struct intel_engine_cs *engine);
struct intel_engine_cs *engine;
enum intel_engine_id id;
int err;
- if (HAS_EXECLISTS(i915))
+ if (HAS_EXECLISTS(gt->i915))
init = intel_execlists_submission_init;
else
init = intel_ring_submission_init;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
err = init(engine);
if (err)
goto cleanup;
@@ -469,7 +486,7 @@ int intel_engines_init(struct drm_i915_private *i915)
return 0;
cleanup:
- intel_engines_cleanup(i915);
+ intel_engines_cleanup(gt);
return err;
}
@@ -513,7 +530,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
unsigned int flags;
flags = PIN_GLOBAL;
- if (!HAS_LLC(engine->i915))
+ if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
/*
* On g33, we cannot place HWS above 256MiB, so
* restrict its pinning to the low mappable arena.
@@ -597,7 +614,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_active(engine, ENGINE_PHYSICAL);
intel_engine_init_breadcrumbs(engine);
intel_engine_init_execlists(engine);
- intel_engine_init_hangcheck(engine);
intel_engine_init_cmd_parser(engine);
intel_engine_init__pm(engine);
@@ -616,26 +632,26 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
/**
* intel_engines_setup- setup engine state not requiring hw access
- * @i915: Device to setup.
+ * @gt: pointer to struct intel_gt
*
* Initializes engine structure members shared between legacy and execlists
* submission modes which do not require hardware access.
*
* Typically done early in the submission mode specific engine setup stage.
*/
-int intel_engines_setup(struct drm_i915_private *i915)
+int intel_engines_setup(struct intel_gt *gt)
{
int (*setup)(struct intel_engine_cs *engine);
struct intel_engine_cs *engine;
enum intel_engine_id id;
int err;
- if (HAS_EXECLISTS(i915))
+ if (HAS_EXECLISTS(gt->i915))
setup = intel_execlists_submission_setup;
else
setup = intel_ring_submission_setup;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
err = intel_engine_setup_common(engine);
if (err)
goto cleanup;
@@ -653,7 +669,7 @@ int intel_engines_setup(struct drm_i915_private *i915)
return 0;
cleanup:
- intel_engines_cleanup(i915);
+ intel_engines_cleanup(gt);
return err;
}
@@ -680,6 +696,8 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
engine->status_page.vma))
goto out_frame;
+ mutex_lock(&frame->timeline.mutex);
+
frame->ring.vaddr = frame->cs;
frame->ring.size = sizeof(frame->cs);
frame->ring.effective_size = frame->ring.size;
@@ -688,18 +706,22 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
frame->rq.i915 = engine->i915;
frame->rq.engine = engine;
frame->rq.ring = &frame->ring;
- frame->rq.timeline = &frame->timeline;
+ rcu_assign_pointer(frame->rq.timeline, &frame->timeline);
dw = intel_timeline_pin(&frame->timeline);
if (dw < 0)
goto out_timeline;
+ spin_lock_irq(&engine->active.lock);
dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
+ spin_unlock_irq(&engine->active.lock);
+
GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
intel_timeline_unpin(&frame->timeline);
out_timeline:
+ mutex_unlock(&frame->timeline.mutex);
intel_timeline_fini(&frame->timeline);
out_frame:
kfree(frame);
@@ -730,6 +752,7 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
static struct intel_context *
create_kernel_context(struct intel_engine_cs *engine)
{
+ static struct lock_class_key kernel;
struct intel_context *ce;
int err;
@@ -745,6 +768,14 @@ create_kernel_context(struct intel_engine_cs *engine)
return ERR_PTR(err);
}
+ /*
+ * Give our perma-pinned kernel timelines a separate lockdep class,
+ * so that we can use them from within the normal user timelines
+ * should we need to inject GPU operations during their request
+ * construction.
+ */
+ lockdep_set_class(&ce->timeline->mutex, &kernel);
+
return ce;
}
@@ -814,8 +845,10 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
if (engine->default_state)
i915_gem_object_put(engine->default_state);
- intel_context_unpin(engine->kernel_context);
- intel_context_put(engine->kernel_context);
+ if (engine->kernel_context) {
+ intel_context_unpin(engine->kernel_context);
+ intel_context_put(engine->kernel_context);
+ }
GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
intel_wa_list_free(&engine->ctx_wa_list);
@@ -851,6 +884,21 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
return bbaddr;
}
+static unsigned long stop_timeout(const struct intel_engine_cs *engine)
+{
+ if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */
+ return 0;
+
+ /*
+ * If we are doing a normal GPU reset, we can take our time and allow
+ * the engine to quiesce. We've stopped submission to the engine, and
+ * if we wait long enough an innocent context should complete and
+ * leave the engine idle. So they should not be caught unaware by
+ * the forthcoming GPU reset (which usually follows the stop_cs)!
+ */
+ return READ_ONCE(engine->props.stop_timeout_ms);
+}
+
int intel_engine_stop_cs(struct intel_engine_cs *engine)
{
struct intel_uncore *uncore = engine->uncore;
@@ -868,7 +916,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
err = 0;
if (__intel_wait_for_register_fw(uncore,
mode, MODE_IDLE, MODE_IDLE,
- 1000, 0,
+ 1000, stop_timeout(engine),
NULL)) {
GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
err = -ETIMEDOUT;
@@ -948,6 +996,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
struct intel_instdone *instdone)
{
struct drm_i915_private *i915 = engine->i915;
+ const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
struct intel_uncore *uncore = engine->uncore;
u32 mmio_base = engine->mmio_base;
int slice;
@@ -965,7 +1014,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
instdone->slice_common =
intel_uncore_read(uncore, GEN7_SC_INSTDONE);
- for_each_instdone_slice_subslice(i915, slice, subslice) {
+ for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
instdone->sampler[slice][subslice] =
read_subslice_reg(engine, slice, subslice,
GEN7_SAMPLER_INSTDONE);
@@ -1031,6 +1080,25 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
return idle;
}
+void intel_engine_flush_submission(struct intel_engine_cs *engine)
+{
+ struct tasklet_struct *t = &engine->execlists.tasklet;
+
+ if (__tasklet_is_scheduled(t)) {
+ local_bh_disable();
+ if (tasklet_trylock(t)) {
+ /* Must wait for any GPU reset in progress. */
+ if (__tasklet_is_enabled(t))
+ t->func(t->data);
+ tasklet_unlock(t);
+ }
+ local_bh_enable();
+ }
+
+ /* Otherwise flush the tasklet if it was running on another cpu */
+ tasklet_unlock_wait(t);
+}
+
/**
* intel_engine_is_idle() - Report if the engine has finished process all work
* @engine: the intel_engine_cs
@@ -1049,21 +1117,9 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
/* Waiting to drain ELSP? */
if (execlists_active(&engine->execlists)) {
- struct tasklet_struct *t = &engine->execlists.tasklet;
-
synchronize_hardirq(engine->i915->drm.pdev->irq);
- local_bh_disable();
- if (tasklet_trylock(t)) {
- /* Must wait for any GPU reset in progress. */
- if (__tasklet_is_enabled(t))
- t->func(t->data);
- tasklet_unlock(t);
- }
- local_bh_enable();
-
- /* Otherwise flush the tasklet if it was on another cpu */
- tasklet_unlock_wait(t);
+ intel_engine_flush_submission(engine);
if (execlists_active(&engine->execlists))
return false;
@@ -1093,7 +1149,7 @@ bool intel_engines_are_idle(struct intel_gt *gt)
if (!READ_ONCE(gt->awake))
return true;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
if (!intel_engine_is_idle(engine))
return false;
}
@@ -1106,7 +1162,7 @@ void intel_engines_reset_default_submission(struct intel_gt *gt)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
engine->set_default_submission(engine);
}
@@ -1118,6 +1174,8 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
case 3:
/* maybe only uses physical not virtual addresses */
return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
+ case 4:
+ return !IS_I965G(engine->i915); /* who knows! */
case 6:
return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
default:
@@ -1193,13 +1251,43 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len)
}
}
+static struct intel_timeline *get_timeline(struct i915_request *rq)
+{
+ struct intel_timeline *tl;
+
+ /*
+ * Even though we are holding the engine->active.lock here, there
+ * is no control over the submission queue per-se and we are
+ * inspecting the active state at a random point in time, with an
+ * unknown queue. Play safe and make sure the timeline remains valid.
+ * (Only being used for pretty printing, one extra kref shouldn't
+ * cause a camel stampede!)
+ */
+ rcu_read_lock();
+ tl = rcu_dereference(rq->timeline);
+ if (!kref_get_unless_zero(&tl->kref))
+ tl = NULL;
+ rcu_read_unlock();
+
+ return tl;
+}
+
+static const char *repr_timer(const struct timer_list *t)
+{
+ if (!READ_ONCE(t->expires))
+ return "inactive";
+
+ if (timer_pending(t))
+ return "active";
+
+ return "expired";
+}
+
static void intel_engine_print_registers(struct intel_engine_cs *engine,
struct drm_printer *m)
{
struct drm_i915_private *dev_priv = engine->i915;
- const struct intel_engine_execlists * const execlists =
- &engine->execlists;
- unsigned long flags;
+ struct intel_engine_execlists * const execlists = &engine->execlists;
u64 addr;
if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
@@ -1256,19 +1344,21 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
unsigned int idx;
u8 read, write;
- drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n",
- ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
- ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
- num_entries);
+ drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n",
+ yesno(test_bit(TASKLET_STATE_SCHED,
+ &engine->execlists.tasklet.state)),
+ enableddisabled(!atomic_read(&engine->execlists.tasklet.count)),
+ repr_timer(&engine->execlists.preempt),
+ repr_timer(&engine->execlists.timer));
read = execlists->csb_head;
write = READ_ONCE(*execlists->csb_write);
- drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n",
- read, write,
- yesno(test_bit(TASKLET_STATE_SCHED,
- &engine->execlists.tasklet.state)),
- enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
+ drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n",
+ ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
+ ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
+ read, write, num_entries);
+
if (read >= num_entries)
read = 0;
if (write >= num_entries)
@@ -1281,35 +1371,47 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
idx, hws[idx * 2], hws[idx * 2 + 1]);
}
- spin_lock_irqsave(&engine->active.lock, flags);
+ execlists_active_lock_bh(execlists);
+ rcu_read_lock();
for (port = execlists->active; (rq = *port); port++) {
char hdr[80];
int len;
len = snprintf(hdr, sizeof(hdr),
- "\t\tActive[%d: ",
+ "\t\tActive[%d]: ",
(int)(port - execlists->active));
- if (!i915_request_signaled(rq))
+ if (!i915_request_signaled(rq)) {
+ struct intel_timeline *tl = get_timeline(rq);
+
len += snprintf(hdr + len, sizeof(hdr) - len,
"ring:{start:%08x, hwsp:%08x, seqno:%08x}, ",
i915_ggtt_offset(rq->ring->vma),
- rq->timeline->hwsp_offset,
+ tl ? tl->hwsp_offset : 0,
hwsp_seqno(rq));
+
+ if (tl)
+ intel_timeline_put(tl);
+ }
snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
print_request(m, rq, hdr);
}
for (port = execlists->pending; (rq = *port); port++) {
+ struct intel_timeline *tl = get_timeline(rq);
char hdr[80];
snprintf(hdr, sizeof(hdr),
"\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
(int)(port - execlists->pending),
i915_ggtt_offset(rq->ring->vma),
- rq->timeline->hwsp_offset,
+ tl ? tl->hwsp_offset : 0,
hwsp_seqno(rq));
print_request(m, rq, hdr);
+
+ if (tl)
+ intel_timeline_put(tl);
}
- spin_unlock_irqrestore(&engine->active.lock, flags);
+ rcu_read_unlock();
+ execlists_active_unlock_bh(execlists);
} else if (INTEL_GEN(dev_priv) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
ENGINE_READ(engine, RING_PP_DIR_BASE));
@@ -1374,8 +1476,13 @@ void intel_engine_dump(struct intel_engine_cs *engine,
drm_printf(m, "*** WEDGED ***\n");
drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
- drm_printf(m, "\tHangcheck: %d ms ago\n",
- jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
+
+ rcu_read_lock();
+ rq = READ_ONCE(engine->heartbeat.systole);
+ if (rq)
+ drm_printf(m, "\tHeartbeat: %d ms ago\n",
+ jiffies_to_msecs(jiffies - rq->emitted_jiffies));
+ rcu_read_unlock();
drm_printf(m, "\tReset count: %d (global %d)\n",
i915_reset_engine_count(error, engine),
i915_reset_count(error));
@@ -1385,6 +1492,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
spin_lock_irqsave(&engine->active.lock, flags);
rq = intel_engine_find_active_request(engine);
if (rq) {
+ struct intel_timeline *tl = get_timeline(rq);
+
print_request(m, rq, "\t\tactive ");
drm_printf(m, "\t\tring->start: 0x%08x\n",
@@ -1397,18 +1506,27 @@ void intel_engine_dump(struct intel_engine_cs *engine,
rq->ring->emit);
drm_printf(m, "\t\tring->space: 0x%08x\n",
rq->ring->space);
- drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
- rq->timeline->hwsp_offset);
+
+ if (tl) {
+ drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
+ tl->hwsp_offset);
+ intel_timeline_put(tl);
+ }
print_request_ring(m, rq);
+
+ if (rq->hw_context->lrc_reg_state) {
+ drm_printf(m, "Logical Ring Context:\n");
+ hexdump(m, rq->hw_context->lrc_reg_state, PAGE_SIZE);
+ }
}
spin_unlock_irqrestore(&engine->active.lock, flags);
drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base);
- wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm);
+ wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
if (wakeref) {
intel_engine_print_registers(engine, m);
- intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(engine->uncore->rpm, wakeref);
} else {
drm_printf(m, "\tDevice is asleep; skipping register dump\n");
}
@@ -1440,8 +1558,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
if (!intel_engine_supports_stats(engine))
return -ENODEV;
- spin_lock_irqsave(&engine->active.lock, flags);
- write_seqlock(&engine->stats.lock);
+ execlists_active_lock_bh(execlists);
+ write_seqlock_irqsave(&engine->stats.lock, flags);
if (unlikely(engine->stats.enabled == ~0)) {
err = -EBUSY;
@@ -1469,8 +1587,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
}
unlock:
- write_sequnlock(&engine->stats.lock);
- spin_unlock_irqrestore(&engine->active.lock, flags);
+ write_sequnlock_irqrestore(&engine->stats.lock, flags);
+ execlists_active_unlock_bh(execlists);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
new file mode 100644
index 000000000000..06aa14c7aa8c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -0,0 +1,234 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_request.h"
+
+#include "intel_context.h"
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
+#include "intel_engine.h"
+#include "intel_gt.h"
+#include "intel_reset.h"
+
+/*
+ * While the engine is active, we send a periodic pulse along the engine
+ * to check on its health and to flush any idle-barriers. If that request
+ * is stuck, and we fail to preempt it, we declare the engine hung and
+ * issue a reset -- in the hope that restores progress.
+ */
+
+static bool next_heartbeat(struct intel_engine_cs *engine)
+{
+ long delay;
+
+ delay = READ_ONCE(engine->props.heartbeat_interval_ms);
+ if (!delay)
+ return false;
+
+ delay = msecs_to_jiffies_timeout(delay);
+ if (delay >= HZ)
+ delay = round_jiffies_up_relative(delay);
+ schedule_delayed_work(&engine->heartbeat.work, delay);
+
+ return true;
+}
+
+static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
+{
+ engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
+ i915_request_add_active_barriers(rq);
+}
+
+static void show_heartbeat(const struct i915_request *rq,
+ struct intel_engine_cs *engine)
+{
+ struct drm_printer p = drm_debug_printer("heartbeat");
+
+ intel_engine_dump(engine, &p,
+ "%s heartbeat {prio:%d} not ticking\n",
+ engine->name,
+ rq->sched.attr.priority);
+}
+
+static void heartbeat(struct work_struct *wrk)
+{
+ struct i915_sched_attr attr = {
+ .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN),
+ };
+ struct intel_engine_cs *engine =
+ container_of(wrk, typeof(*engine), heartbeat.work.work);
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_request *rq;
+
+ if (!intel_engine_pm_get_if_awake(engine))
+ return;
+
+ rq = engine->heartbeat.systole;
+ if (rq && i915_request_completed(rq)) {
+ i915_request_put(rq);
+ engine->heartbeat.systole = NULL;
+ }
+
+ if (intel_gt_is_wedged(engine->gt))
+ goto out;
+
+ if (engine->heartbeat.systole) {
+ if (engine->schedule &&
+ rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
+ /*
+ * Gradually raise the priority of the heartbeat to
+ * give high priority work [which presumably desires
+ * low latency and no jitter] the chance to naturally
+ * complete before being preempted.
+ */
+ attr.priority = I915_PRIORITY_MASK;
+ if (rq->sched.attr.priority >= attr.priority)
+ attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT);
+ if (rq->sched.attr.priority >= attr.priority)
+ attr.priority = I915_PRIORITY_BARRIER;
+
+ local_bh_disable();
+ engine->schedule(rq, &attr);
+ local_bh_enable();
+ } else {
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ show_heartbeat(rq, engine);
+
+ intel_gt_handle_error(engine->gt, engine->mask,
+ I915_ERROR_CAPTURE,
+ "stopped heartbeat on %s",
+ engine->name);
+ }
+ goto out;
+ }
+
+ if (engine->wakeref_serial == engine->serial)
+ goto out;
+
+ mutex_lock(&ce->timeline->mutex);
+
+ intel_context_enter(ce);
+ rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+ intel_context_exit(ce);
+ if (IS_ERR(rq))
+ goto unlock;
+
+ idle_pulse(engine, rq);
+ if (i915_modparams.enable_hangcheck)
+ engine->heartbeat.systole = i915_request_get(rq);
+
+ __i915_request_commit(rq);
+ __i915_request_queue(rq, &attr);
+
+unlock:
+ mutex_unlock(&ce->timeline->mutex);
+out:
+ if (!next_heartbeat(engine))
+ i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
+ intel_engine_pm_put(engine);
+}
+
+void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
+{
+ if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
+ return;
+
+ next_heartbeat(engine);
+}
+
+void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
+{
+ if (cancel_delayed_work(&engine->heartbeat.work))
+ i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
+}
+
+void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
+{
+ INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
+}
+
+int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
+ unsigned long delay)
+{
+ int err;
+
+ /* Send one last pulse before to cleanup persistent hogs */
+ if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) {
+ err = intel_engine_pulse(engine);
+ if (err)
+ return err;
+ }
+
+ WRITE_ONCE(engine->props.heartbeat_interval_ms, delay);
+
+ if (intel_engine_pm_get_if_awake(engine)) {
+ if (delay)
+ intel_engine_unpark_heartbeat(engine);
+ else
+ intel_engine_park_heartbeat(engine);
+ intel_engine_pm_put(engine);
+ }
+
+ return 0;
+}
+
+int intel_engine_pulse(struct intel_engine_cs *engine)
+{
+ struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_request *rq;
+ int err = 0;
+
+ if (!intel_engine_has_preemption(engine))
+ return -ENODEV;
+
+ if (!intel_engine_pm_get_if_awake(engine))
+ return 0;
+
+ if (mutex_lock_interruptible(&ce->timeline->mutex))
+ goto out_rpm;
+
+ intel_context_enter(ce);
+ rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+ intel_context_exit(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_unlock;
+ }
+
+ rq->flags |= I915_REQUEST_SENTINEL;
+ idle_pulse(engine, rq);
+
+ __i915_request_commit(rq);
+ __i915_request_queue(rq, &attr);
+
+out_unlock:
+ mutex_unlock(&ce->timeline->mutex);
+out_rpm:
+ intel_engine_pm_put(engine);
+ return err;
+}
+
+int intel_engine_flush_barriers(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ if (llist_empty(&engine->barrier_tasks))
+ return 0;
+
+ rq = i915_request_create(engine->kernel_context);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ idle_pulse(engine, rq);
+ i915_request_add(rq);
+
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_engine_heartbeat.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
new file mode 100644
index 000000000000..a7b8c0f9e005
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
@@ -0,0 +1,23 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_HEARTBEAT_H
+#define INTEL_ENGINE_HEARTBEAT_H
+
+struct intel_engine_cs;
+
+void intel_engine_init_heartbeat(struct intel_engine_cs *engine);
+
+int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
+ unsigned long delay);
+
+void intel_engine_park_heartbeat(struct intel_engine_cs *engine);
+void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine);
+
+int intel_engine_pulse(struct intel_engine_cs *engine);
+int intel_engine_flush_barriers(struct intel_engine_cs *engine);
+
+#endif /* INTEL_ENGINE_HEARTBEAT_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 65b5ca74b394..874d82677179 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -7,10 +7,13 @@
#include "i915_drv.h"
#include "intel_engine.h"
+#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
#include "intel_engine_pool.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_rc6.h"
+#include "intel_ring.h"
static int __engine_unpark(struct intel_wakeref *wf)
{
@@ -33,7 +36,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
if (engine->unpark)
engine->unpark(engine);
- intel_engine_init_hangcheck(engine);
+ intel_engine_unpark_heartbeat(engine);
return 0;
}
@@ -52,7 +55,7 @@ static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
static inline void __timeline_mark_unlock(struct intel_context *ce,
unsigned long flags)
{
- mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_);
+ mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
local_irq_restore(flags);
}
@@ -103,14 +106,14 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
/* Context switch failed, hope for the best! Maybe reset? */
goto out_unlock;
- intel_timeline_enter(rq->timeline);
+ intel_timeline_enter(i915_request_timeline(rq));
/* Check again on the next retirement. */
engine->wakeref_serial = engine->serial + 1;
i915_request_add_active_barriers(rq);
/* Install ourselves as a preemption barrier */
- rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE;
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
__i915_request_commit(rq);
/* Release our exclusive hold on the engine */
@@ -123,6 +126,19 @@ out_unlock:
return result;
}
+static void call_idle_barriers(struct intel_engine_cs *engine)
+{
+ struct llist_node *node, *next;
+
+ llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+ struct dma_fence_cb *cb =
+ container_of((struct list_head *)node,
+ typeof(*cb), node);
+
+ cb->func(NULL, cb);
+ }
+}
+
static int __engine_park(struct intel_wakeref *wf)
{
struct intel_engine_cs *engine =
@@ -142,6 +158,9 @@ static int __engine_park(struct intel_wakeref *wf)
GEM_TRACE("%s\n", engine->name);
+ call_idle_barriers(engine); /* cleanup after wedging */
+
+ intel_engine_park_heartbeat(engine);
intel_engine_disarm_breadcrumbs(engine);
intel_engine_pool_park(&engine->pool);
@@ -169,9 +188,10 @@ static const struct intel_wakeref_ops wf_ops = {
void intel_engine_init__pm(struct intel_engine_cs *engine)
{
- struct intel_runtime_pm *rpm = &engine->i915->runtime_pm;
+ struct intel_runtime_pm *rpm = engine->uncore->rpm;
intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
+ intel_engine_init_heartbeat(engine);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
index 4cd54c569911..397186818305 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
@@ -61,6 +61,7 @@ static int pool_active(struct i915_active *ref)
return 0;
}
+__i915_active_call
static void pool_retire(struct i915_active *ref)
{
struct intel_engine_pool_node *node =
@@ -94,7 +95,7 @@ node_create(struct intel_engine_pool *pool, size_t sz)
return ERR_PTR(-ENOMEM);
node->pool = pool;
- i915_active_init(engine->i915, &node->active, pool_active, pool_retire);
+ i915_active_init(&node->active, pool_active, pool_retire);
obj = i915_gem_object_create_internal(engine->i915, sz);
if (IS_ERR(obj)) {
@@ -103,13 +104,25 @@ node_create(struct intel_engine_pool *pool, size_t sz)
return ERR_CAST(obj);
}
+ i915_gem_object_set_readonly(obj);
+
node->obj = obj;
return node;
}
+static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine)
+{
+ if (intel_engine_is_virtual(engine))
+ engine = intel_virtual_engine_get_sibling(engine, 0);
+
+ GEM_BUG_ON(!engine);
+ return &engine->pool;
+}
+
struct intel_engine_pool_node *
-intel_engine_pool_get(struct intel_engine_pool *pool, size_t size)
+intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
{
+ struct intel_engine_pool *pool = lookup_pool(engine);
struct intel_engine_pool_node *node;
struct list_head *list;
unsigned long flags;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
index 8d069efd9457..1bd89cadc3b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
@@ -12,13 +12,13 @@
#include "i915_request.h"
struct intel_engine_pool_node *
-intel_engine_pool_get(struct intel_engine_pool *pool, size_t size);
+intel_engine_get_pool(struct intel_engine_cs *engine, size_t size);
static inline int
intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
struct i915_request *rq)
{
- return i915_active_ref(&node->active, rq->timeline, rq);
+ return i915_active_add_request(&node->active, rq);
}
static inline void
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index a82cea95c2f2..758f0e8ec672 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -15,6 +15,7 @@
#include <linux/rbtree.h>
#include <linux/timer.h>
#include <linux/types.h>
+#include <linux/workqueue.h>
#include "i915_gem.h"
#include "i915_pmu.h"
@@ -58,6 +59,7 @@ struct i915_gem_context;
struct i915_request;
struct i915_sched_attr;
struct intel_gt;
+struct intel_ring;
struct intel_uncore;
typedef u8 intel_engine_mask_t;
@@ -76,40 +78,6 @@ struct intel_instdone {
u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
};
-struct intel_engine_hangcheck {
- u64 acthd;
- u32 last_ring;
- u32 last_head;
- unsigned long action_timestamp;
- struct intel_instdone instdone;
-};
-
-struct intel_ring {
- struct kref ref;
- struct i915_vma *vma;
- void *vaddr;
-
- /*
- * As we have two types of rings, one global to the engine used
- * by ringbuffer submission and those that are exclusive to a
- * context used by execlists, we have to play safe and allow
- * atomic updates to the pin_count. However, the actual pinning
- * of the context is either done during initialisation for
- * ringbuffer submission or serialised as part of the context
- * pinning for execlists, and so we do not need a mutex ourselves
- * to serialise intel_ring_pin/intel_ring_unpin.
- */
- atomic_t pin_count;
-
- u32 head;
- u32 tail;
- u32 emit;
-
- u32 space;
- u32 size;
- u32 effective_size;
-};
-
/*
* we use a single page to load ctx workarounds so all of these
* values are referred in terms of dwords
@@ -148,6 +116,7 @@ enum intel_engine_id {
VECS1,
#define _VECS(n) (VECS0 + (n))
I915_NUM_ENGINES
+#define INVALID_ENGINE ((enum intel_engine_id)-1)
};
struct st_preempt_hang {
@@ -174,6 +143,11 @@ struct intel_engine_execlists {
struct timer_list timer;
/**
+ * @preempt: reset the current context if it fails to give way
+ */
+ struct timer_list preempt;
+
+ /**
* @default_priolist: priority list for I915_PRIORITY_NORMAL
*/
struct i915_priolist default_priolist;
@@ -303,10 +277,12 @@ struct intel_engine_cs {
u8 uabi_class;
u8 uabi_instance;
+ u32 uabi_capabilities;
u32 context_size;
u32 mmio_base;
- u32 uabi_capabilities;
+ unsigned int context_tag;
+#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS)
struct rb_node uabi_node;
@@ -323,6 +299,11 @@ struct intel_engine_cs {
intel_engine_mask_t saturated; /* submitting semaphores too late? */
+ struct {
+ struct delayed_work work;
+ struct i915_request *systole;
+ } heartbeat;
+
unsigned long serial;
unsigned long wakeref_serial;
@@ -473,14 +454,14 @@ struct intel_engine_cs {
/* status_notifier: list of callbacks for context-switch changes */
struct atomic_notifier_head context_status_notifier;
- struct intel_engine_hangcheck hangcheck;
-
-#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_USING_CMD_PARSER BIT(0)
#define I915_ENGINE_SUPPORTS_STATS BIT(1)
#define I915_ENGINE_HAS_PREEMPTION BIT(2)
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
#define I915_ENGINE_IS_VIRTUAL BIT(5)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
unsigned int flags;
/*
@@ -538,12 +519,25 @@ struct intel_engine_cs {
*/
ktime_t total;
} stats;
+
+ struct {
+ unsigned long heartbeat_interval_ms;
+ unsigned long preempt_timeout_ms;
+ unsigned long stop_timeout_ms;
+ unsigned long timeslice_duration_ms;
+ } props;
};
static inline bool
-intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
+intel_engine_using_cmd_parser(const struct intel_engine_cs *engine)
{
- return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+ return engine->flags & I915_ENGINE_USING_CMD_PARSER;
+}
+
+static inline bool
+intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER;
}
static inline bool
@@ -576,20 +570,24 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine)
return engine->flags & I915_ENGINE_IS_VIRTUAL;
}
-#define instdone_slice_mask(dev_priv__) \
- (IS_GEN(dev_priv__, 7) ? \
- 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
+static inline bool
+intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
+{
+ return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO;
+}
-#define instdone_subslice_mask(dev_priv__) \
- (IS_GEN(dev_priv__, 7) ? \
- 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
+#define instdone_has_slice(dev_priv___, sseu___, slice___) \
+ ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___))
-#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
- for ((slice__) = 0, (subslice__) = 0; \
- (slice__) < I915_MAX_SLICES; \
- (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
- (slice__) += ((subslice__) == 0)) \
- for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
- (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
+#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \
+ (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \
+ intel_sseu_has_subslice(sseu__, 0, subslice__))
+#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \
+ for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \
+ (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \
+ (slice_) += ((subslice_) == 0)) \
+ for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \
+ (instdone_has_subslice(dev_priv_, sseu_, slice_, \
+ subslice_)))
#endif /* __INTEL_ENGINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 77cd5de83930..7f7150a733f4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -160,10 +160,10 @@ static int legacy_ring_idx(const struct legacy_ring *ring)
};
if (GEM_DEBUG_WARN_ON(ring->class >= ARRAY_SIZE(map)))
- return -1;
+ return INVALID_ENGINE;
if (GEM_DEBUG_WARN_ON(ring->instance >= map[ring->class].max))
- return -1;
+ return INVALID_ENGINE;
return map[ring->class].base + ring->instance;
}
@@ -171,23 +171,15 @@ static int legacy_ring_idx(const struct legacy_ring *ring)
static void add_legacy_ring(struct legacy_ring *ring,
struct intel_engine_cs *engine)
{
- int idx;
-
if (engine->gt != ring->gt || engine->class != ring->class) {
ring->gt = engine->gt;
ring->class = engine->class;
ring->instance = 0;
}
- idx = legacy_ring_idx(ring);
- if (unlikely(idx == -1))
- return;
-
- GEM_BUG_ON(idx >= ARRAY_SIZE(ring->gt->engine));
- ring->gt->engine[idx] = engine;
- ring->instance++;
-
- engine->legacy_idx = idx;
+ engine->legacy_idx = legacy_ring_idx(ring);
+ if (engine->legacy_idx != INVALID_ENGINE)
+ ring->instance++;
}
void intel_engines_driver_register(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 86e00a2db8a4..4294f146f13c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -112,6 +112,7 @@
#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */
#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
+#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */
#define MI_SEMAPHORE_POLL (1 << 15)
#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12)
#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
@@ -119,6 +120,8 @@
#define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12)
#define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12)
#define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12)
+#define MI_SEMAPHORE_TOKEN_MASK REG_GENMASK(9, 5)
+#define MI_SEMAPHORE_TOKEN_SHIFT 5
#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1)
#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2)
#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */
@@ -132,7 +135,10 @@
* address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
*/
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
+/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
+#define MI_LRI_CS_MMIO (1<<19)
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
@@ -147,6 +153,7 @@
#define MI_FLUSH_DW_USE_PPGTT (0<<2)
#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1)
#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2)
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
#define MI_BATCH_NON_SECURE (1)
/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
@@ -156,7 +163,8 @@
#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0)
#define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
-#define MI_BATCH_RESOURCE_STREAMER (1<<10)
+#define MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
+#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/
/*
* 3D instructions used by the kernel
@@ -217,6 +225,7 @@
#define PIPE_CONTROL_CS_STALL (1<<20)
#define PIPE_CONTROL_TLB_INVALIDATE (1<<18)
#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16)
+#define PIPE_CONTROL_WRITE_TIMESTAMP (3<<14)
#define PIPE_CONTROL_QW_WRITE (1<<14)
#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14)
#define PIPE_CONTROL_DEPTH_STALL (1<<13)
@@ -224,7 +233,9 @@
#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */
#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */
#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */
+#define PIPE_CONTROL_L3_RO_CACHE_INVALIDATE REG_BIT(10) /* gen12 */
#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9)
+#define PIPE_CONTROL_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */
#define PIPE_CONTROL_NOTIFY (1<<8)
#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */
#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5)
@@ -235,6 +246,29 @@
#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0)
#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
/*
* Commands used only by the command parser
*/
@@ -251,7 +285,6 @@
#define MI_CLFLUSH MI_INSTR(0x27, 0)
#define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0)
#define MI_REPORT_PERF_COUNT_GGTT (1<<0)
-#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0)
#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0)
#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0)
#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index d48ec9a76ed1..4c26daf7ee46 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -6,7 +6,12 @@
#include "i915_drv.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_mocs.h"
+#include "intel_rc6.h"
+#include "intel_rps.h"
#include "intel_uncore.h"
+#include "intel_pm.h"
void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
{
@@ -18,15 +23,108 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock);
- intel_gt_init_hangcheck(gt);
intel_gt_init_reset(gt);
+ intel_gt_init_requests(gt);
intel_gt_pm_init_early(gt);
+
+ intel_rps_init_early(&gt->rps);
intel_uc_init_early(&gt->uc);
}
-void intel_gt_init_hw(struct drm_i915_private *i915)
+void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt)
+{
+ gt->ggtt = ggtt;
+
+ intel_gt_sanitize(gt, false);
+}
+
+static void init_unused_ring(struct intel_gt *gt, u32 base)
{
- i915->gt.ggtt = &i915->ggtt;
+ struct intel_uncore *uncore = gt->uncore;
+
+ intel_uncore_write(uncore, RING_CTL(base), 0);
+ intel_uncore_write(uncore, RING_HEAD(base), 0);
+ intel_uncore_write(uncore, RING_TAIL(base), 0);
+ intel_uncore_write(uncore, RING_START(base), 0);
+}
+
+static void init_unused_rings(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+
+ if (IS_I830(i915)) {
+ init_unused_ring(gt, PRB1_BASE);
+ init_unused_ring(gt, SRB0_BASE);
+ init_unused_ring(gt, SRB1_BASE);
+ init_unused_ring(gt, SRB2_BASE);
+ init_unused_ring(gt, SRB3_BASE);
+ } else if (IS_GEN(i915, 2)) {
+ init_unused_ring(gt, SRB0_BASE);
+ init_unused_ring(gt, SRB1_BASE);
+ } else if (IS_GEN(i915, 3)) {
+ init_unused_ring(gt, PRB1_BASE);
+ init_unused_ring(gt, PRB2_BASE);
+ }
+}
+
+int intel_gt_init_hw(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ int ret;
+
+ BUG_ON(!i915->kernel_context);
+ ret = intel_gt_terminally_wedged(gt);
+ if (ret)
+ return ret;
+
+ gt->last_init_time = ktime_get();
+
+ /* Double layer security blanket, see i915_gem_init() */
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9)
+ intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf));
+
+ if (IS_HASWELL(i915))
+ intel_uncore_write(uncore,
+ MI_PREDICATE_RESULT_2,
+ IS_HSW_GT3(i915) ?
+ LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
+
+ /* Apply the GT workarounds... */
+ intel_gt_apply_workarounds(gt);
+ /* ...and determine whether they are sticking. */
+ intel_gt_verify_workarounds(gt, "init");
+
+ intel_gt_init_swizzling(gt);
+
+ /*
+ * At least 830 can leave some of the unused rings
+ * "active" (ie. head != tail) after resume which
+ * will prevent c3 entry. Makes sure all unused rings
+ * are totally idle.
+ */
+ init_unused_rings(gt);
+
+ ret = i915_ppgtt_init_hw(gt);
+ if (ret) {
+ DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
+ goto out;
+ }
+
+ /* We can't enable contexts until all firmware is loaded */
+ ret = intel_uc_init_hw(&gt->uc);
+ if (ret) {
+ i915_probe_error(i915, "Enabling uc failed (%d)\n", ret);
+ goto out;
+ }
+
+ intel_mocs_init(gt);
+
+out:
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+ return ret;
}
static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
@@ -89,7 +187,7 @@ intel_gt_clear_error_registers(struct intel_gt *gt,
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine_masked(engine, i915, engine_mask, id)
+ for_each_engine_masked(engine, gt, engine_mask, id)
gen8_clear_engine_error_register(engine);
}
}
@@ -100,7 +198,7 @@ static void gen6_check_faults(struct intel_gt *gt)
enum intel_engine_id id;
u32 fault;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
fault = GEN6_RING_FAULT_REG_READ(engine);
if (fault & RING_FAULT_VALID) {
DRM_DEBUG_DRIVER("Unexpected fault\n"
@@ -176,7 +274,7 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt)
void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
{
- struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
intel_wakeref_t wakeref;
/*
@@ -200,18 +298,18 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
wmb();
- if (INTEL_INFO(i915)->has_coherent_ggtt)
+ if (INTEL_INFO(gt->i915)->has_coherent_ggtt)
return;
intel_gt_chipset_flush(gt);
- with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
- struct intel_uncore *uncore = gt->uncore;
+ with_intel_runtime_pm(uncore->rpm, wakeref) {
+ unsigned long flags;
- spin_lock_irq(&uncore->lock);
+ spin_lock_irqsave(&uncore->lock, flags);
intel_uncore_posting_read_fw(uncore,
RING_HEAD(RENDER_RING_BASE));
- spin_unlock_irq(&uncore->lock);
+ spin_unlock_irqrestore(&uncore->lock, flags);
}
}
@@ -222,7 +320,12 @@ void intel_gt_chipset_flush(struct intel_gt *gt)
intel_gtt_chipset_flush();
}
-int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
+void intel_gt_driver_register(struct intel_gt *gt)
+{
+ intel_rps_driver_register(&gt->rps);
+}
+
+static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
{
struct drm_i915_private *i915 = gt->i915;
struct drm_i915_gem_object *obj;
@@ -230,7 +333,7 @@ int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
int ret;
obj = i915_gem_object_create_stolen(i915, size);
- if (!obj)
+ if (IS_ERR(obj))
obj = i915_gem_object_create_internal(i915, size);
if (IS_ERR(obj)) {
DRM_ERROR("Failed to allocate scratch page\n");
@@ -256,11 +359,40 @@ err_unref:
return ret;
}
-void intel_gt_fini_scratch(struct intel_gt *gt)
+static void intel_gt_fini_scratch(struct intel_gt *gt)
{
i915_vma_unpin_and_release(&gt->scratch, 0);
}
+int intel_gt_init(struct intel_gt *gt)
+{
+ int err;
+
+ err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K);
+ if (err)
+ return err;
+
+ intel_gt_pm_init(gt);
+
+ return 0;
+}
+
+void intel_gt_driver_remove(struct intel_gt *gt)
+{
+ GEM_BUG_ON(gt->awake);
+}
+
+void intel_gt_driver_unregister(struct intel_gt *gt)
+{
+ intel_rps_driver_unregister(&gt->rps);
+}
+
+void intel_gt_driver_release(struct intel_gt *gt)
+{
+ intel_gt_pm_fini(gt);
+ intel_gt_fini_scratch(gt);
+}
+
void intel_gt_driver_late_release(struct intel_gt *gt)
{
intel_uc_driver_late_release(&gt->uc);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 4920cb351f10..5436f8c30708 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -28,7 +28,14 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc)
}
void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
-void intel_gt_init_hw(struct drm_i915_private *i915);
+void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt);
+int __must_check intel_gt_init_hw(struct intel_gt *gt);
+int intel_gt_init(struct intel_gt *gt);
+void intel_gt_driver_register(struct intel_gt *gt);
+
+void intel_gt_driver_unregister(struct intel_gt *gt);
+void intel_gt_driver_remove(struct intel_gt *gt);
+void intel_gt_driver_release(struct intel_gt *gt);
void intel_gt_driver_late_release(struct intel_gt *gt);
@@ -39,11 +46,6 @@ void intel_gt_clear_error_registers(struct intel_gt *gt,
void intel_gt_flush_ggtt_writes(struct intel_gt *gt);
void intel_gt_chipset_flush(struct intel_gt *gt);
-void intel_gt_init_hangcheck(struct intel_gt *gt);
-
-int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size);
-void intel_gt_fini_scratch(struct intel_gt *gt);
-
static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt,
enum intel_gt_scratch_field field)
{
@@ -55,6 +57,4 @@ static inline bool intel_gt_is_wedged(struct intel_gt *gt)
return __intel_reset_failed(&gt->reset);
}
-void intel_gt_queue_hangcheck(struct intel_gt *gt);
-
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 34a4fb624bf7..973ee7eded64 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -11,6 +11,7 @@
#include "intel_gt.h"
#include "intel_gt_irq.h"
#include "intel_uncore.h"
+#include "intel_rps.h"
static void guc_irq_handler(struct intel_guc *guc, u16 iir)
{
@@ -77,7 +78,7 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance,
return guc_irq_handler(&gt->uc.guc, iir);
if (instance == OTHER_GTPM_INSTANCE)
- return gen11_rps_irq_handler(gt, iir);
+ return gen11_rps_irq_handler(&gt->rps, iir);
WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
instance, iir);
@@ -336,7 +337,7 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4])
}
if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
- gen6_rps_irq_handler(gt->i915, gt_iir[2]);
+ gen6_rps_irq_handler(&gt->rps, gt_iir[2]);
guc_irq_handler(&gt->uc.guc, gt_iir[2] >> 16);
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 1363e069ec83..6187cdd06646 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -4,17 +4,38 @@
* Copyright © 2019 Intel Corporation
*/
+#include <linux/suspend.h>
+
#include "i915_drv.h"
+#include "i915_globals.h"
#include "i915_params.h"
+#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_llc.h"
#include "intel_pm.h"
+#include "intel_rc6.h"
+#include "intel_rps.h"
#include "intel_wakeref.h"
-static void pm_notify(struct drm_i915_private *i915, int state)
+static void user_forcewake(struct intel_gt *gt, bool suspend)
{
- blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915);
+ int count = atomic_read(&gt->user_wakeref);
+
+ /* Inside suspend/resume so single threaded, no races to worry about. */
+ if (likely(!count))
+ return;
+
+ intel_gt_pm_get(gt);
+ if (suspend) {
+ GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
+ atomic_sub(count, &gt->wakeref.count);
+ } else {
+ atomic_add(count, &gt->wakeref.count);
+ }
+ intel_gt_pm_put(gt);
}
static int __gt_unpark(struct intel_wakeref *wf)
@@ -24,6 +45,8 @@ static int __gt_unpark(struct intel_wakeref *wf)
GEM_TRACE("\n");
+ i915_globals_unpark();
+
/*
* It seems that the DMC likes to transition between the DC states a lot
* when there are no connected displays (no active power domains) during
@@ -38,41 +61,44 @@ static int __gt_unpark(struct intel_wakeref *wf)
gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
GEM_BUG_ON(!gt->awake);
- intel_enable_gt_powersave(i915);
-
- i915_update_gfx_val(i915);
- if (INTEL_GEN(i915) >= 6)
- gen6_rps_busy(i915);
+ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
+ intel_rps_unpark(&gt->rps);
i915_pmu_gt_unparked(i915);
- intel_gt_queue_hangcheck(gt);
-
- pm_notify(i915, INTEL_GT_UNPARK);
+ intel_gt_unpark_requests(gt);
return 0;
}
static int __gt_park(struct intel_wakeref *wf)
{
- struct drm_i915_private *i915 =
- container_of(wf, typeof(*i915), gt.wakeref);
- intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake);
+ struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
+ intel_wakeref_t wakeref = fetch_and_zero(&gt->awake);
+ struct drm_i915_private *i915 = gt->i915;
GEM_TRACE("\n");
- pm_notify(i915, INTEL_GT_PARK);
+ intel_gt_park_requests(gt);
+ i915_vma_parked(gt);
i915_pmu_gt_parked(i915);
- if (INTEL_GEN(i915) >= 6)
- gen6_rps_idle(i915);
+ intel_rps_park(&gt->rps);
/* Everything switched off, flush any residual interrupt just in case */
intel_synchronize_irq(i915);
+ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) {
+ intel_rc6_ctx_wa_check(&i915->gt.rc6);
+ intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+ }
+
GEM_BUG_ON(!wakeref);
intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
+ i915_globals_park();
+
return 0;
}
@@ -84,9 +110,18 @@ static const struct intel_wakeref_ops wf_ops = {
void intel_gt_pm_init_early(struct intel_gt *gt)
{
- intel_wakeref_init(&gt->wakeref, &gt->i915->runtime_pm, &wf_ops);
+ intel_wakeref_init(&gt->wakeref, gt->uncore->rpm, &wf_ops);
+}
- BLOCKING_INIT_NOTIFIER_HEAD(&gt->pm_notifications);
+void intel_gt_pm_init(struct intel_gt *gt)
+{
+ /*
+ * Enabling power-management should be "self-healing". If we cannot
+ * enable a feature, simply leave it disabled with a notice to the
+ * user.
+ */
+ intel_rc6_init(&gt->rc6);
+ intel_rps_init(&gt->rps);
}
static bool reset_engines(struct intel_gt *gt)
@@ -111,16 +146,47 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
- GEM_TRACE("\n");
+ GEM_TRACE("force:%s\n", yesno(force));
+
+ /* Use a raw wakeref to avoid calling intel_display_power_get early */
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+
+ /*
+ * As we have just resumed the machine and woken the device up from
+ * deep PCI sleep (presumably D3_cold), assume the HW has been reset
+ * back to defaults, recovering from whatever wedged state we left it
+ * in and so worth trying to use the device once more.
+ */
+ if (intel_gt_is_wedged(gt))
+ intel_gt_unset_wedged(gt);
intel_uc_sanitize(&gt->uc);
- if (!reset_engines(gt) && !force)
- return;
+ for_each_engine(engine, gt, id)
+ if (engine->reset.prepare)
+ engine->reset.prepare(engine);
+
+ intel_uc_reset_prepare(&gt->uc);
+
+ if (reset_engines(gt) || force) {
+ for_each_engine(engine, gt, id)
+ __intel_engine_reset(engine, false);
+ }
+
+ for_each_engine(engine, gt, id)
+ if (engine->reset.finish)
+ engine->reset.finish(engine);
- for_each_engine(engine, gt->i915, id)
- __intel_engine_reset(engine, false);
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+}
+
+void intel_gt_pm_fini(struct intel_gt *gt)
+{
+ intel_rc6_fini(&gt->rc6);
}
int intel_gt_resume(struct intel_gt *gt)
@@ -129,6 +195,8 @@ int intel_gt_resume(struct intel_gt *gt)
enum intel_engine_id id;
int err = 0;
+ GEM_TRACE("\n");
+
/*
* After resume, we may need to poke into the pinned kernel
* contexts to paper over any damage caused by the sudden suspend.
@@ -136,14 +204,23 @@ int intel_gt_resume(struct intel_gt *gt)
* allowing us to fixup the user contexts on their first pin.
*/
intel_gt_pm_get(gt);
- for_each_engine(engine, gt->i915, id) {
+
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+ intel_rc6_sanitize(&gt->rc6);
+
+ intel_rps_enable(&gt->rps);
+ intel_llc_enable(&gt->llc);
+
+ for_each_engine(engine, gt, id) {
struct intel_context *ce;
intel_engine_pm_get(engine);
ce = engine->kernel_context;
- if (ce)
+ if (ce) {
+ GEM_BUG_ON(!intel_context_is_pinned(ce));
ce->ops->reset(ce);
+ }
engine->serial++; /* kernel context lost */
err = engine->resume(engine);
@@ -156,19 +233,99 @@ int intel_gt_resume(struct intel_gt *gt)
break;
}
}
+
+ intel_rc6_enable(&gt->rc6);
+
+ intel_uc_resume(&gt->uc);
+
+ user_forcewake(gt, false);
+
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
intel_gt_pm_put(gt);
return err;
}
+static void wait_for_suspend(struct intel_gt *gt)
+{
+ if (!intel_gt_pm_is_awake(gt))
+ return;
+
+ if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+ /*
+ * Forcibly cancel outstanding work and leave
+ * the gpu quiet.
+ */
+ intel_gt_set_wedged(gt);
+ }
+
+ intel_gt_pm_wait_for_idle(gt);
+}
+
+void intel_gt_suspend_prepare(struct intel_gt *gt)
+{
+ user_forcewake(gt, true);
+ wait_for_suspend(gt);
+
+ intel_uc_suspend(&gt->uc);
+}
+
+static suspend_state_t pm_suspend_target(void)
+{
+#if IS_ENABLED(CONFIG_PM_SLEEP)
+ return pm_suspend_target_state;
+#else
+ return PM_SUSPEND_TO_IDLE;
+#endif
+}
+
+void intel_gt_suspend_late(struct intel_gt *gt)
+{
+ intel_wakeref_t wakeref;
+
+ /* We expect to be idle already; but also want to be independent */
+ wait_for_suspend(gt);
+
+ /*
+ * On disabling the device, we want to turn off HW access to memory
+ * that we no longer own.
+ *
+ * However, not all suspend-states disable the device. S0 (s2idle)
+ * is effectively runtime-suspend, the device is left powered on
+ * but needs to be put into a low power state. We need to keep
+ * powermanagement enabled, but we also retain system state and so
+ * it remains safe to keep on using our allocated memory.
+ */
+ if (pm_suspend_target() == PM_SUSPEND_TO_IDLE)
+ return;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
+ intel_rps_disable(&gt->rps);
+ intel_rc6_disable(&gt->rc6);
+ intel_llc_disable(&gt->llc);
+ }
+
+ intel_gt_sanitize(gt, false);
+
+ GEM_TRACE("\n");
+}
+
void intel_gt_runtime_suspend(struct intel_gt *gt)
{
intel_uc_runtime_suspend(&gt->uc);
+
+ GEM_TRACE("\n");
}
int intel_gt_runtime_resume(struct intel_gt *gt)
{
+ GEM_TRACE("\n");
+
intel_gt_init_swizzling(gt);
return intel_uc_runtime_resume(&gt->uc);
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_gt_pm.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index fb39d99cd6ee..b3e17399be9b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -12,11 +12,6 @@
#include "intel_gt_types.h"
#include "intel_wakeref.h"
-enum {
- INTEL_GT_UNPARK,
- INTEL_GT_PARK,
-};
-
static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt)
{
return intel_wakeref_is_active(&gt->wakeref);
@@ -43,10 +38,21 @@ static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
}
void intel_gt_pm_init_early(struct intel_gt *gt);
+void intel_gt_pm_init(struct intel_gt *gt);
+void intel_gt_pm_fini(struct intel_gt *gt);
void intel_gt_sanitize(struct intel_gt *gt, bool force);
+
+void intel_gt_suspend_prepare(struct intel_gt *gt);
+void intel_gt_suspend_late(struct intel_gt *gt);
int intel_gt_resume(struct intel_gt *gt);
+
void intel_gt_runtime_suspend(struct intel_gt *gt);
int intel_gt_runtime_resume(struct intel_gt *gt);
+static inline bool is_mock_gt(const struct intel_gt *gt)
+{
+ return I915_SELFTEST_ONLY(gt->awake == -ENODEV);
+}
+
#endif /* INTEL_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
new file mode 100644
index 000000000000..353809ac2754
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -0,0 +1,137 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h" /* for_each_engine() */
+#include "i915_request.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_timeline.h"
+
+static void retire_requests(struct intel_timeline *tl)
+{
+ struct i915_request *rq, *rn;
+
+ list_for_each_entry_safe(rq, rn, &tl->requests, link)
+ if (!i915_request_retire(rq))
+ break;
+}
+
+static void flush_submission(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, gt, id)
+ intel_engine_flush_submission(engine);
+}
+
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
+{
+ struct intel_gt_timelines *timelines = &gt->timelines;
+ struct intel_timeline *tl, *tn;
+ unsigned long active_count = 0;
+ unsigned long flags;
+ bool interruptible;
+ LIST_HEAD(free);
+
+ interruptible = true;
+ if (unlikely(timeout < 0))
+ timeout = -timeout, interruptible = false;
+
+ flush_submission(gt); /* kick the ksoftirqd tasklets */
+
+ spin_lock_irqsave(&timelines->lock, flags);
+ list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
+ if (!mutex_trylock(&tl->mutex)) {
+ active_count++; /* report busy to caller, try again? */
+ continue;
+ }
+
+ intel_timeline_get(tl);
+ GEM_BUG_ON(!tl->active_count);
+ tl->active_count++; /* pin the list element */
+ spin_unlock_irqrestore(&timelines->lock, flags);
+
+ if (timeout > 0) {
+ struct dma_fence *fence;
+
+ fence = i915_active_fence_get(&tl->last_request);
+ if (fence) {
+ timeout = dma_fence_wait_timeout(fence,
+ interruptible,
+ timeout);
+ dma_fence_put(fence);
+ }
+ }
+
+ retire_requests(tl);
+
+ spin_lock_irqsave(&timelines->lock, flags);
+
+ /* Resume iteration after dropping lock */
+ list_safe_reset_next(tl, tn, link);
+ if (!--tl->active_count)
+ list_del(&tl->link);
+ else
+ active_count += !!rcu_access_pointer(tl->last_request.fence);
+
+ mutex_unlock(&tl->mutex);
+
+ /* Defer the final release to after the spinlock */
+ if (refcount_dec_and_test(&tl->kref.refcount)) {
+ GEM_BUG_ON(tl->active_count);
+ list_add(&tl->link, &free);
+ }
+ }
+ spin_unlock_irqrestore(&timelines->lock, flags);
+
+ list_for_each_entry_safe(tl, tn, &free, link)
+ __intel_timeline_free(&tl->kref);
+
+ return active_count ? timeout : 0;
+}
+
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
+{
+ /* If the device is asleep, we have no requests outstanding */
+ if (!intel_gt_pm_is_awake(gt))
+ return 0;
+
+ while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) {
+ cond_resched();
+ if (signal_pending(current))
+ return -EINTR;
+ }
+
+ return timeout;
+}
+
+static void retire_work_handler(struct work_struct *work)
+{
+ struct intel_gt *gt =
+ container_of(work, typeof(*gt), requests.retire_work.work);
+
+ intel_gt_retire_requests(gt);
+ schedule_delayed_work(&gt->requests.retire_work,
+ round_jiffies_up_relative(HZ));
+}
+
+void intel_gt_init_requests(struct intel_gt *gt)
+{
+ INIT_DELAYED_WORK(&gt->requests.retire_work, retire_work_handler);
+}
+
+void intel_gt_park_requests(struct intel_gt *gt)
+{
+ cancel_delayed_work(&gt->requests.retire_work);
+}
+
+void intel_gt_unpark_requests(struct intel_gt *gt)
+{
+ schedule_delayed_work(&gt->requests.retire_work,
+ round_jiffies_up_relative(HZ));
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
new file mode 100644
index 000000000000..bd31cbce47e0
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_GT_REQUESTS_H
+#define INTEL_GT_REQUESTS_H
+
+struct intel_gt;
+
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
+static inline void intel_gt_retire_requests(struct intel_gt *gt)
+{
+ intel_gt_retire_requests_timeout(gt, 0);
+}
+
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
+
+void intel_gt_init_requests(struct intel_gt *gt);
+void intel_gt_park_requests(struct intel_gt *gt);
+void intel_gt_unpark_requests(struct intel_gt *gt);
+
+#endif /* INTEL_GT_REQUESTS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index dc295c196d11..d4e14dbd172e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -17,7 +17,10 @@
#include "i915_vma.h"
#include "intel_engine_types.h"
+#include "intel_llc_types.h"
#include "intel_reset_types.h"
+#include "intel_rc6_types.h"
+#include "intel_rps_types.h"
#include "intel_wakeref.h"
struct drm_i915_private;
@@ -25,14 +28,6 @@ struct i915_ggtt;
struct intel_engine_cs;
struct intel_uncore;
-struct intel_hangcheck {
- /* For hangcheck timer */
-#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
-#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
-
- struct delayed_work work;
-};
-
struct intel_gt {
struct drm_i915_private *i915;
struct intel_uncore *uncore;
@@ -49,12 +44,23 @@ struct intel_gt {
struct list_head hwsp_free_list;
} timelines;
+ struct intel_gt_requests {
+ /**
+ * We leave the user IRQ off as much as possible,
+ * but this means that requests will finish and never
+ * be retired once the system goes idle. Set a timer to
+ * fire periodically while the ring is running. When it
+ * fires, go retire requests.
+ */
+ struct delayed_work retire_work;
+ } requests;
+
struct intel_wakeref wakeref;
+ atomic_t user_wakeref;
struct list_head closed_vma;
spinlock_t closed_lock; /* guards the list of closed_vma */
- struct intel_hangcheck hangcheck;
struct intel_reset reset;
/**
@@ -66,7 +72,9 @@ struct intel_gt {
*/
intel_wakeref_t awake;
- struct blocking_notifier_head pm_notifications;
+ struct intel_llc llc;
+ struct intel_rc6 rc6;
+ struct intel_rps rps;
ktime_t last_init_time;
@@ -89,14 +97,16 @@ enum intel_gt_scratch_field {
INTEL_GT_SCRATCH_FIELD_DEFAULT = 0,
/* 8 bytes */
- INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA = 128,
-
- /* 8 bytes */
INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128,
/* 8 bytes */
INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
+ /* 6 * 8 bytes */
+ INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
+
+ /* 4 bytes */
+ INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
};
#endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
deleted file mode 100644
index 05d042cdefe2..000000000000
--- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "i915_drv.h"
-#include "intel_engine.h"
-#include "intel_gt.h"
-#include "intel_reset.h"
-
-struct hangcheck {
- u64 acthd;
- u32 ring;
- u32 head;
- enum intel_engine_hangcheck_action action;
- unsigned long action_timestamp;
- int deadlock;
- struct intel_instdone instdone;
- bool wedged:1;
- bool stalled:1;
-};
-
-static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
-{
- u32 tmp = current_instdone | *old_instdone;
- bool unchanged;
-
- unchanged = tmp == *old_instdone;
- *old_instdone |= tmp;
-
- return unchanged;
-}
-
-static bool subunits_stuck(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- struct intel_instdone instdone;
- struct intel_instdone *accu_instdone = &engine->hangcheck.instdone;
- bool stuck;
- int slice;
- int subslice;
-
- intel_engine_get_instdone(engine, &instdone);
-
- /* There might be unstable subunit states even when
- * actual head is not moving. Filter out the unstable ones by
- * accumulating the undone -> done transitions and only
- * consider those as progress.
- */
- stuck = instdone_unchanged(instdone.instdone,
- &accu_instdone->instdone);
- stuck &= instdone_unchanged(instdone.slice_common,
- &accu_instdone->slice_common);
-
- for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
- stuck &= instdone_unchanged(instdone.sampler[slice][subslice],
- &accu_instdone->sampler[slice][subslice]);
- stuck &= instdone_unchanged(instdone.row[slice][subslice],
- &accu_instdone->row[slice][subslice]);
- }
-
- return stuck;
-}
-
-static enum intel_engine_hangcheck_action
-head_stuck(struct intel_engine_cs *engine, u64 acthd)
-{
- if (acthd != engine->hangcheck.acthd) {
-
- /* Clear subunit states on head movement */
- memset(&engine->hangcheck.instdone, 0,
- sizeof(engine->hangcheck.instdone));
-
- return ENGINE_ACTIVE_HEAD;
- }
-
- if (!subunits_stuck(engine))
- return ENGINE_ACTIVE_SUBUNITS;
-
- return ENGINE_DEAD;
-}
-
-static enum intel_engine_hangcheck_action
-engine_stuck(struct intel_engine_cs *engine, u64 acthd)
-{
- enum intel_engine_hangcheck_action ha;
- u32 tmp;
-
- ha = head_stuck(engine, acthd);
- if (ha != ENGINE_DEAD)
- return ha;
-
- if (IS_GEN(engine->i915, 2))
- return ENGINE_DEAD;
-
- /* Is the chip hanging on a WAIT_FOR_EVENT?
- * If so we can simply poke the RB_WAIT bit
- * and break the hang. This should work on
- * all but the second generation chipsets.
- */
- tmp = ENGINE_READ(engine, RING_CTL);
- if (tmp & RING_WAIT) {
- intel_gt_handle_error(engine->gt, engine->mask, 0,
- "stuck wait on %s", engine->name);
- ENGINE_WRITE(engine, RING_CTL, tmp);
- return ENGINE_WAIT_KICK;
- }
-
- return ENGINE_DEAD;
-}
-
-static void hangcheck_load_sample(struct intel_engine_cs *engine,
- struct hangcheck *hc)
-{
- hc->acthd = intel_engine_get_active_head(engine);
- hc->ring = ENGINE_READ(engine, RING_START);
- hc->head = ENGINE_READ(engine, RING_HEAD);
-}
-
-static void hangcheck_store_sample(struct intel_engine_cs *engine,
- const struct hangcheck *hc)
-{
- engine->hangcheck.acthd = hc->acthd;
- engine->hangcheck.last_ring = hc->ring;
- engine->hangcheck.last_head = hc->head;
-}
-
-static enum intel_engine_hangcheck_action
-hangcheck_get_action(struct intel_engine_cs *engine,
- const struct hangcheck *hc)
-{
- if (intel_engine_is_idle(engine))
- return ENGINE_IDLE;
-
- if (engine->hangcheck.last_ring != hc->ring)
- return ENGINE_ACTIVE_SEQNO;
-
- if (engine->hangcheck.last_head != hc->head)
- return ENGINE_ACTIVE_SEQNO;
-
- return engine_stuck(engine, hc->acthd);
-}
-
-static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
- struct hangcheck *hc)
-{
- unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
-
- hc->action = hangcheck_get_action(engine, hc);
-
- /* We always increment the progress
- * if the engine is busy and still processing
- * the same request, so that no single request
- * can run indefinitely (such as a chain of
- * batches). The only time we do not increment
- * the hangcheck score on this ring, if this
- * engine is in a legitimate wait for another
- * engine. In that case the waiting engine is a
- * victim and we want to be sure we catch the
- * right culprit. Then every time we do kick
- * the ring, make it as a progress as the seqno
- * advancement might ensure and if not, it
- * will catch the hanging engine.
- */
-
- switch (hc->action) {
- case ENGINE_IDLE:
- case ENGINE_ACTIVE_SEQNO:
- /* Clear head and subunit states on seqno movement */
- hc->acthd = 0;
-
- memset(&engine->hangcheck.instdone, 0,
- sizeof(engine->hangcheck.instdone));
-
- /* Intentional fall through */
- case ENGINE_WAIT_KICK:
- case ENGINE_WAIT:
- engine->hangcheck.action_timestamp = jiffies;
- break;
-
- case ENGINE_ACTIVE_HEAD:
- case ENGINE_ACTIVE_SUBUNITS:
- /*
- * Seqno stuck with still active engine gets leeway,
- * in hopes that it is just a long shader.
- */
- timeout = I915_SEQNO_DEAD_TIMEOUT;
- break;
-
- case ENGINE_DEAD:
- break;
-
- default:
- MISSING_CASE(hc->action);
- }
-
- hc->stalled = time_after(jiffies,
- engine->hangcheck.action_timestamp + timeout);
- hc->wedged = time_after(jiffies,
- engine->hangcheck.action_timestamp +
- I915_ENGINE_WEDGED_TIMEOUT);
-}
-
-static void hangcheck_declare_hang(struct intel_gt *gt,
- intel_engine_mask_t hung,
- intel_engine_mask_t stuck)
-{
- struct intel_engine_cs *engine;
- intel_engine_mask_t tmp;
- char msg[80];
- int len;
-
- /* If some rings hung but others were still busy, only
- * blame the hanging rings in the synopsis.
- */
- if (stuck != hung)
- hung &= ~stuck;
- len = scnprintf(msg, sizeof(msg),
- "%s on ", stuck == hung ? "no progress" : "hang");
- for_each_engine_masked(engine, gt->i915, hung, tmp)
- len += scnprintf(msg + len, sizeof(msg) - len,
- "%s, ", engine->name);
- msg[len-2] = '\0';
-
- return intel_gt_handle_error(gt, hung, I915_ERROR_CAPTURE, "%s", msg);
-}
-
-/*
- * This is called when the chip hasn't reported back with completed
- * batchbuffers in a long time. We keep track per ring seqno progress and
- * if there are no progress, hangcheck score for that ring is increased.
- * Further, acthd is inspected to see if the ring is stuck. On stuck case
- * we kick the ring. If we see no progress on three subsequent calls
- * we assume chip is wedged and try to fix it by resetting the chip.
- */
-static void hangcheck_elapsed(struct work_struct *work)
-{
- struct intel_gt *gt =
- container_of(work, typeof(*gt), hangcheck.work.work);
- intel_engine_mask_t hung = 0, stuck = 0, wedged = 0;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
-
- if (!i915_modparams.enable_hangcheck)
- return;
-
- if (!READ_ONCE(gt->awake))
- return;
-
- if (intel_gt_is_wedged(gt))
- return;
-
- wakeref = intel_runtime_pm_get_if_in_use(&gt->i915->runtime_pm);
- if (!wakeref)
- return;
-
- /* As enabling the GPU requires fairly extensive mmio access,
- * periodically arm the mmio checker to see if we are triggering
- * any invalid access.
- */
- intel_uncore_arm_unclaimed_mmio_detection(gt->uncore);
-
- for_each_engine(engine, gt->i915, id) {
- struct hangcheck hc;
-
- intel_engine_signal_breadcrumbs(engine);
-
- hangcheck_load_sample(engine, &hc);
- hangcheck_accumulate_sample(engine, &hc);
- hangcheck_store_sample(engine, &hc);
-
- if (hc.stalled) {
- hung |= engine->mask;
- if (hc.action != ENGINE_DEAD)
- stuck |= engine->mask;
- }
-
- if (hc.wedged)
- wedged |= engine->mask;
- }
-
- if (GEM_SHOW_DEBUG() && (hung | stuck)) {
- struct drm_printer p = drm_debug_printer("hangcheck");
-
- for_each_engine(engine, gt->i915, id) {
- if (intel_engine_is_idle(engine))
- continue;
-
- intel_engine_dump(engine, &p, "%s\n", engine->name);
- }
- }
-
- if (wedged) {
- dev_err(gt->i915->drm.dev,
- "GPU recovery timed out,"
- " cancelling all in-flight rendering.\n");
- GEM_TRACE_DUMP();
- intel_gt_set_wedged(gt);
- }
-
- if (hung)
- hangcheck_declare_hang(gt, hung, stuck);
-
- intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
-
- /* Reset timer in case GPU hangs without another request being added */
- intel_gt_queue_hangcheck(gt);
-}
-
-void intel_gt_queue_hangcheck(struct intel_gt *gt)
-{
- unsigned long delay;
-
- if (unlikely(!i915_modparams.enable_hangcheck))
- return;
-
- /*
- * Don't continually defer the hangcheck so that it is always run at
- * least once after work has been scheduled on any ring. Otherwise,
- * we will ignore a hung ring if a second ring is kept busy.
- */
-
- delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES);
- queue_delayed_work(system_long_wq, &gt->hangcheck.work, delay);
-}
-
-void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
-{
- memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
- engine->hangcheck.action_timestamp = jiffies;
-}
-
-void intel_gt_init_hangcheck(struct intel_gt *gt)
-{
- INIT_DELAYED_WORK(&gt->hangcheck.work, hangcheck_elapsed);
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftest_hangcheck.c"
-#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c
new file mode 100644
index 000000000000..ceb785b75c25
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_llc.c
@@ -0,0 +1,161 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/cpufreq.h>
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_llc.h"
+#include "intel_sideband.h"
+
+struct ia_constants {
+ unsigned int min_gpu_freq;
+ unsigned int max_gpu_freq;
+
+ unsigned int min_ring_freq;
+ unsigned int max_ia_freq;
+};
+
+static struct intel_gt *llc_to_gt(struct intel_llc *llc)
+{
+ return container_of(llc, struct intel_gt, llc);
+}
+
+static unsigned int cpu_max_MHz(void)
+{
+ struct cpufreq_policy *policy;
+ unsigned int max_khz;
+
+ policy = cpufreq_cpu_get(0);
+ if (policy) {
+ max_khz = policy->cpuinfo.max_freq;
+ cpufreq_cpu_put(policy);
+ } else {
+ /*
+ * Default to measured freq if none found, PCU will ensure we
+ * don't go over
+ */
+ max_khz = tsc_khz;
+ }
+
+ return max_khz / 1000;
+}
+
+static bool get_ia_constants(struct intel_llc *llc,
+ struct ia_constants *consts)
+{
+ struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+ struct intel_rps *rps = &llc_to_gt(llc)->rps;
+
+ if (rps->max_freq <= rps->min_freq)
+ return false;
+
+ consts->max_ia_freq = cpu_max_MHz();
+
+ consts->min_ring_freq =
+ intel_uncore_read(llc_to_gt(llc)->uncore, DCLK) & 0xf;
+ /* convert DDR frequency from units of 266.6MHz to bandwidth */
+ consts->min_ring_freq = mult_frac(consts->min_ring_freq, 8, 3);
+
+ consts->min_gpu_freq = rps->min_freq;
+ consts->max_gpu_freq = rps->max_freq;
+ if (INTEL_GEN(i915) >= 9) {
+ /* Convert GT frequency to 50 HZ units */
+ consts->min_gpu_freq /= GEN9_FREQ_SCALER;
+ consts->max_gpu_freq /= GEN9_FREQ_SCALER;
+ }
+
+ return true;
+}
+
+static void calc_ia_freq(struct intel_llc *llc,
+ unsigned int gpu_freq,
+ const struct ia_constants *consts,
+ unsigned int *out_ia_freq,
+ unsigned int *out_ring_freq)
+{
+ struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+ const int diff = consts->max_gpu_freq - gpu_freq;
+ unsigned int ia_freq = 0, ring_freq = 0;
+
+ if (INTEL_GEN(i915) >= 9) {
+ /*
+ * ring_freq = 2 * GT. ring_freq is in 100MHz units
+ * No floor required for ring frequency on SKL.
+ */
+ ring_freq = gpu_freq;
+ } else if (INTEL_GEN(i915) >= 8) {
+ /* max(2 * GT, DDR). NB: GT is 50MHz units */
+ ring_freq = max(consts->min_ring_freq, gpu_freq);
+ } else if (IS_HASWELL(i915)) {
+ ring_freq = mult_frac(gpu_freq, 5, 4);
+ ring_freq = max(consts->min_ring_freq, ring_freq);
+ /* leave ia_freq as the default, chosen by cpufreq */
+ } else {
+ const int min_freq = 15;
+ const int scale = 180;
+
+ /*
+ * On older processors, there is no separate ring
+ * clock domain, so in order to boost the bandwidth
+ * of the ring, we need to upclock the CPU (ia_freq).
+ *
+ * For GPU frequencies less than 750MHz,
+ * just use the lowest ring freq.
+ */
+ if (gpu_freq < min_freq)
+ ia_freq = 800;
+ else
+ ia_freq = consts->max_ia_freq - diff * scale / 2;
+ ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
+ }
+
+ *out_ia_freq = ia_freq;
+ *out_ring_freq = ring_freq;
+}
+
+static void gen6_update_ring_freq(struct intel_llc *llc)
+{
+ struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+ struct ia_constants consts;
+ unsigned int gpu_freq;
+
+ if (!get_ia_constants(llc, &consts))
+ return;
+
+ /*
+ * For each potential GPU frequency, load a ring frequency we'd like
+ * to use for memory access. We do this by specifying the IA frequency
+ * the PCU should use as a reference to determine the ring frequency.
+ */
+ for (gpu_freq = consts.max_gpu_freq;
+ gpu_freq >= consts.min_gpu_freq;
+ gpu_freq--) {
+ unsigned int ia_freq, ring_freq;
+
+ calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq);
+ sandybridge_pcode_write(i915,
+ GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
+ ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
+ ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
+ gpu_freq);
+ }
+}
+
+void intel_llc_enable(struct intel_llc *llc)
+{
+ if (HAS_LLC(llc_to_gt(llc)->i915))
+ gen6_update_ring_freq(llc);
+}
+
+void intel_llc_disable(struct intel_llc *llc)
+{
+ /* Currently there is no HW configuration to be done to disable. */
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_llc.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.h b/drivers/gpu/drm/i915/gt/intel_llc.h
new file mode 100644
index 000000000000..ef09a890d2b7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_llc.h
@@ -0,0 +1,15 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_LLC_H
+#define INTEL_LLC_H
+
+struct intel_llc;
+
+void intel_llc_enable(struct intel_llc *llc);
+void intel_llc_disable(struct intel_llc *llc);
+
+#endif /* INTEL_LLC_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_llc_types.h b/drivers/gpu/drm/i915/gt/intel_llc_types.h
new file mode 100644
index 000000000000..ecad4687b930
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_llc_types.h
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_LLC_TYPES_H
+#define INTEL_LLC_TYPES_H
+
+struct intel_llc {
+};
+
+#endif /* INTEL_LLC_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d42584439f51..0ac3b26674ad 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -145,6 +145,7 @@
#include "intel_lrc_reg.h"
#include "intel_mocs.h"
#include "intel_reset.h"
+#include "intel_ring.h"
#include "intel_workarounds.h"
#define RING_EXECLIST_QFULL (1 << 0x2)
@@ -230,9 +231,41 @@ static int __execlists_context_alloc(struct intel_context *ce,
struct intel_engine_cs *engine);
static void execlists_init_reg_state(u32 *reg_state,
- struct intel_context *ce,
- struct intel_engine_cs *engine,
- struct intel_ring *ring);
+ const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ const struct intel_ring *ring,
+ bool close);
+static void
+__execlists_update_reg_state(const struct intel_context *ce,
+ const struct intel_engine_cs *engine);
+
+static void mark_eio(struct i915_request *rq)
+{
+ if (i915_request_completed(rq))
+ return;
+
+ GEM_BUG_ON(i915_request_signaled(rq));
+
+ dma_fence_set_error(&rq->fence, -EIO);
+ i915_request_mark_complete(rq);
+}
+
+static struct i915_request *
+active_request(const struct intel_timeline * const tl, struct i915_request *rq)
+{
+ struct i915_request *active = rq;
+
+ rcu_read_lock();
+ list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
+ if (i915_request_completed(rq))
+ break;
+
+ active = rq;
+ }
+ rcu_read_unlock();
+
+ return active;
+}
static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
{
@@ -330,10 +363,15 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
* However, the priority hint is a mere hint that we may need to
* preempt. If that hint is stale or we may be trying to preempt
* ourselves, ignore the request.
+ *
+ * More naturally we would write
+ * prio >= max(0, last);
+ * except that we wish to prevent triggering preemption at the same
+ * priority level: the task that is running should remain running
+ * to preserve FIFO ordering of dependencies.
*/
- last_prio = effective_prio(rq);
- if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint,
- last_prio))
+ last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
+ if (engine->execlists.queue_priority_hint <= last_prio)
return false;
/*
@@ -422,12 +460,8 @@ assert_priority_queue(const struct i915_request *prev,
static u64
lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
{
- struct i915_gem_context *ctx = ce->gem_context;
u64 desc;
- BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
- BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
-
desc = INTEL_LEGACY_32B_CONTEXT;
if (i915_vm_is_4lvl(ce->vm))
desc = INTEL_LEGACY_64B_CONTEXT;
@@ -437,33 +471,379 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
if (IS_GEN(engine->i915, 8))
desc |= GEN8_CTX_L3LLC_COHERENT;
- desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
- /* bits 12-31 */
+ desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
/*
* The following 32bits are copied into the OA reports (dword 2).
* Consider updating oa_get_render_ctx_id in i915_perf.c when changing
* anything below.
*/
if (INTEL_GEN(engine->i915) >= 11) {
- GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
- desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
- /* bits 37-47 */
-
desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
/* bits 48-53 */
- /* TODO: decide what to do with SW counter (bits 55-60) */
-
desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
/* bits 61-63 */
- } else {
- GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
- desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */
}
return desc;
}
+static u32 *set_offsets(u32 *regs,
+ const u8 *data,
+ const struct intel_engine_cs *engine)
+#define NOP(x) (BIT(7) | (x))
+#define LRI(count, flags) ((flags) << 6 | (count))
+#define POSTED BIT(0)
+#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
+#define REG16(x) \
+ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
+ (((x) >> 2) & 0x7f)
+#define END() 0
+{
+ const u32 base = engine->mmio_base;
+
+ while (*data) {
+ u8 count, flags;
+
+ if (*data & BIT(7)) { /* skip */
+ regs += *data++ & ~BIT(7);
+ continue;
+ }
+
+ count = *data & 0x3f;
+ flags = *data >> 6;
+ data++;
+
+ *regs = MI_LOAD_REGISTER_IMM(count);
+ if (flags & POSTED)
+ *regs |= MI_LRI_FORCE_POSTED;
+ if (INTEL_GEN(engine->i915) >= 11)
+ *regs |= MI_LRI_CS_MMIO;
+ regs++;
+
+ GEM_BUG_ON(!count);
+ do {
+ u32 offset = 0;
+ u8 v;
+
+ do {
+ v = *data++;
+ offset <<= 7;
+ offset |= v & ~BIT(7);
+ } while (v & BIT(7));
+
+ *regs = base + (offset << 2);
+ regs += 2;
+ } while (--count);
+ }
+
+ return regs;
+}
+
+static const u8 gen8_xcs_offsets[] = {
+ NOP(1),
+ LRI(11, 0),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+
+ NOP(9),
+ LRI(9, 0),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(2, 0),
+ REG16(0x200),
+ REG(0x028),
+
+ END(),
+};
+
+static const u8 gen9_xcs_offsets[] = {
+ NOP(1),
+ LRI(14, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+
+ NOP(3),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(1, POSTED),
+ REG16(0x200),
+
+ NOP(13),
+ LRI(44, POSTED),
+ REG(0x028),
+ REG(0x09c),
+ REG(0x0c0),
+ REG(0x178),
+ REG(0x17c),
+ REG16(0x358),
+ REG(0x170),
+ REG(0x150),
+ REG(0x154),
+ REG(0x158),
+ REG16(0x41c),
+ REG16(0x600),
+ REG16(0x604),
+ REG16(0x608),
+ REG16(0x60c),
+ REG16(0x610),
+ REG16(0x614),
+ REG16(0x618),
+ REG16(0x61c),
+ REG16(0x620),
+ REG16(0x624),
+ REG16(0x628),
+ REG16(0x62c),
+ REG16(0x630),
+ REG16(0x634),
+ REG16(0x638),
+ REG16(0x63c),
+ REG16(0x640),
+ REG16(0x644),
+ REG16(0x648),
+ REG16(0x64c),
+ REG16(0x650),
+ REG16(0x654),
+ REG16(0x658),
+ REG16(0x65c),
+ REG16(0x660),
+ REG16(0x664),
+ REG16(0x668),
+ REG16(0x66c),
+ REG16(0x670),
+ REG16(0x674),
+ REG16(0x678),
+ REG16(0x67c),
+ REG(0x068),
+
+ END(),
+};
+
+static const u8 gen12_xcs_offsets[] = {
+ NOP(1),
+ LRI(13, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+
+ NOP(5),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ END(),
+};
+
+static const u8 gen8_rcs_offsets[] = {
+ NOP(1),
+ LRI(14, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+
+ NOP(3),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END(),
+};
+
+static const u8 gen11_rcs_offsets[] = {
+ NOP(1),
+ LRI(15, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+
+ NOP(1),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ LRI(1, POSTED),
+ REG(0x1b0),
+
+ NOP(10),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END(),
+};
+
+static const u8 gen12_rcs_offsets[] = {
+ NOP(1),
+ LRI(13, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+
+ NOP(5),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ LRI(3, POSTED),
+ REG(0x1b0),
+ REG16(0x5a8),
+ REG16(0x5ac),
+
+ NOP(6),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END(),
+};
+
+#undef END
+#undef REG16
+#undef REG
+#undef LRI
+#undef NOP
+
+static const u8 *reg_offsets(const struct intel_engine_cs *engine)
+{
+ /*
+ * The gen12+ lists only have the registers we program in the basic
+ * default state. We rely on the context image using relative
+ * addressing to automatic fixup the register state between the
+ * physical engines for virtual engine.
+ */
+ GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
+ !intel_engine_has_relative_mmio(engine));
+
+ if (engine->class == RENDER_CLASS) {
+ if (INTEL_GEN(engine->i915) >= 12)
+ return gen12_rcs_offsets;
+ else if (INTEL_GEN(engine->i915) >= 11)
+ return gen11_rcs_offsets;
+ else
+ return gen8_rcs_offsets;
+ } else {
+ if (INTEL_GEN(engine->i915) >= 12)
+ return gen12_xcs_offsets;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return gen9_xcs_offsets;
+ else
+ return gen8_xcs_offsets;
+ }
+}
+
static void unwind_wa_tail(struct i915_request *rq)
{
rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
@@ -482,7 +862,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_for_each_entry_safe_reverse(rq, rn,
&engine->active.requests,
sched.link) {
- struct intel_engine_cs *owner;
if (i915_request_completed(rq))
continue; /* XXX */
@@ -497,8 +876,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
* engine so that it can be moved across onto another physical
* engine as load dictates.
*/
- owner = rq->hw_context->engine;
- if (likely(owner == engine)) {
+ if (likely(rq->execution_mask == engine->mask)) {
GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
if (rq_prio(rq) != prio) {
prio = rq_prio(rq);
@@ -509,6 +887,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_move(&rq->sched.link, pl);
active = rq;
} else {
+ struct intel_engine_cs *owner = rq->hw_context->engine;
+
/*
* Decouple the virtual breadcrumb before moving it
* back to the virtual engine -- we don't want the
@@ -518,7 +898,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
*/
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
&rq->fence.flags)) {
- spin_lock(&rq->lock);
+ spin_lock_nested(&rq->lock,
+ SINGLE_DEPTH_NESTING);
i915_request_cancel_breadcrumb(rq);
spin_unlock(&rq->lock);
}
@@ -554,6 +935,114 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
status, rq);
}
+static void intel_engine_context_in(struct intel_engine_cs *engine)
+{
+ unsigned long flags;
+
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ write_seqlock_irqsave(&engine->stats.lock, flags);
+
+ if (engine->stats.enabled > 0) {
+ if (engine->stats.active++ == 0)
+ engine->stats.start = ktime_get();
+ GEM_BUG_ON(engine->stats.active == 0);
+ }
+
+ write_sequnlock_irqrestore(&engine->stats.lock, flags);
+}
+
+static void intel_engine_context_out(struct intel_engine_cs *engine)
+{
+ unsigned long flags;
+
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ write_seqlock_irqsave(&engine->stats.lock, flags);
+
+ if (engine->stats.enabled > 0) {
+ ktime_t last;
+
+ if (engine->stats.active && --engine->stats.active == 0) {
+ /*
+ * Decrement the active context count and in case GPU
+ * is now idle add up to the running total.
+ */
+ last = ktime_sub(ktime_get(), engine->stats.start);
+
+ engine->stats.total = ktime_add(engine->stats.total,
+ last);
+ } else if (engine->stats.active == 0) {
+ /*
+ * After turning on engine stats, context out might be
+ * the first event in which case we account from the
+ * time stats gathering was turned on.
+ */
+ last = ktime_sub(ktime_get(), engine->stats.enabled_at);
+
+ engine->stats.total = ktime_add(engine->stats.total,
+ last);
+ }
+ }
+
+ write_sequnlock_irqrestore(&engine->stats.lock, flags);
+}
+
+static void restore_default_state(struct intel_context *ce,
+ struct intel_engine_cs *engine)
+{
+ u32 *regs = ce->lrc_reg_state;
+
+ if (engine->pinned_default_state)
+ memcpy(regs, /* skip restoring the vanilla PPHWSP */
+ engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+ engine->context_size - PAGE_SIZE);
+
+ execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+}
+
+static void reset_active(struct i915_request *rq,
+ struct intel_engine_cs *engine)
+{
+ struct intel_context * const ce = rq->hw_context;
+ u32 head;
+
+ /*
+ * The executing context has been cancelled. We want to prevent
+ * further execution along this context and propagate the error on
+ * to anything depending on its results.
+ *
+ * In __i915_request_submit(), we apply the -EIO and remove the
+ * requests' payloads for any banned requests. But first, we must
+ * rewind the context back to the start of the incomplete request so
+ * that we do not jump back into the middle of the batch.
+ *
+ * We preserve the breadcrumbs and semaphores of the incomplete
+ * requests so that inter-timeline dependencies (i.e other timelines)
+ * remain correctly ordered. And we defer to __i915_request_submit()
+ * so that all asynchronous waits are correctly handled.
+ */
+ GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
+ __func__, engine->name, rq->fence.context, rq->fence.seqno);
+
+ /* On resubmission of the active request, payload will be scrubbed */
+ if (i915_request_completed(rq))
+ head = rq->tail;
+ else
+ head = active_request(ce->timeline, rq)->head;
+ ce->ring->head = intel_ring_wrap(ce->ring, head);
+ intel_ring_update_space(ce->ring);
+
+ /* Scrub the context image to prevent replaying the previous batch */
+ restore_default_state(ce, engine);
+ __execlists_update_reg_state(ce, engine);
+
+ /* We've switched away, so this should be a no-op, but intent matters */
+ ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+}
+
static inline struct intel_engine_cs *
__execlists_schedule_in(struct i915_request *rq)
{
@@ -562,6 +1051,21 @@ __execlists_schedule_in(struct i915_request *rq)
intel_context_get(ce);
+ if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
+ reset_active(rq, engine);
+
+ if (ce->tag) {
+ /* Use a fixed tag for OA and friends */
+ ce->lrc_desc |= (u64)ce->tag << 32;
+ } else {
+ /* We don't need a strict matching tag, just different values */
+ ce->lrc_desc &= ~GENMASK_ULL(47, 37);
+ ce->lrc_desc |=
+ (u64)(engine->context_tag++ % NUM_CONTEXT_TAG) <<
+ GEN11_SW_CTX_ID_SHIFT;
+ BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
+ }
+
intel_gt_pm_get(engine->gt);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(engine);
@@ -605,6 +1109,12 @@ __execlists_schedule_out(struct i915_request *rq,
{
struct intel_context * const ce = rq->hw_context;
+ /*
+ * NB process_csb() is not under the engine->active.lock and hence
+ * schedule_out can race with schedule_in meaning that we should
+ * refrain from doing non-trivial work here.
+ */
+
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
intel_gt_pm_put(engine->gt);
@@ -631,7 +1141,6 @@ execlists_schedule_out(struct i915_request *rq)
struct intel_engine_cs *cur, *old;
trace_i915_request_out(rq);
- GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
old = READ_ONCE(ce->inflight);
do
@@ -648,7 +1157,7 @@ static u64 execlists_update_context(const struct i915_request *rq)
struct intel_context *ce = rq->hw_context;
u64 desc;
- ce->lrc_reg_state[CTX_RING_TAIL + 1] =
+ ce->lrc_reg_state[CTX_RING_TAIL] =
intel_ring_set_tail(rq->ring, rq->tail);
/*
@@ -671,6 +1180,10 @@ static u64 execlists_update_context(const struct i915_request *rq)
desc = ce->lrc_desc;
ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+ /* Wa_1607138340:tgl */
+ if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0))
+ desc |= CTX_DESC_FORCE_RESTORE;
+
return desc;
}
@@ -693,6 +1206,9 @@ trace_ports(const struct intel_engine_execlists *execlists,
const struct intel_engine_cs *engine =
container_of(execlists, typeof(*engine), execlists);
+ if (!ports[0])
+ return;
+
GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
engine->name, msg,
ports[0]->fence.context,
@@ -713,25 +1229,45 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
trace_ports(execlists, msg, execlists->pending);
- if (!execlists->pending[0])
+ if (!execlists->pending[0]) {
+ GEM_TRACE_ERR("Nothing pending for promotion!\n");
return false;
+ }
- if (execlists->pending[execlists_num_ports(execlists)])
+ if (execlists->pending[execlists_num_ports(execlists)]) {
+ GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
+ execlists_num_ports(execlists));
return false;
+ }
for (port = execlists->pending; (rq = *port); port++) {
- if (ce == rq->hw_context)
+ if (ce == rq->hw_context) {
+ GEM_TRACE_ERR("Duplicate context in pending[%zd]\n",
+ port - execlists->pending);
return false;
+ }
ce = rq->hw_context;
if (i915_request_completed(rq))
continue;
- if (i915_active_is_idle(&ce->active))
+ if (i915_active_is_idle(&ce->active)) {
+ GEM_TRACE_ERR("Inactive context in pending[%zd]\n",
+ port - execlists->pending);
+ return false;
+ }
+
+ if (!i915_vma_is_pinned(ce->state)) {
+ GEM_TRACE_ERR("Unpinned context in pending[%zd]\n",
+ port - execlists->pending);
return false;
+ }
- if (!i915_vma_is_pinned(ce->state))
+ if (!i915_vma_is_pinned(ce->ring->vma)) {
+ GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n",
+ port - execlists->pending);
return false;
+ }
}
return ce;
@@ -797,6 +1333,21 @@ static bool can_merge_rq(const struct i915_request *prev,
GEM_BUG_ON(prev == next);
GEM_BUG_ON(!assert_priority_queue(prev, next));
+ /*
+ * We do not submit known completed requests. Therefore if the next
+ * request is already completed, we can pretend to merge it in
+ * with the previous context (and we will skip updating the ELSP
+ * and tracking). Thus hopefully keeping the ELSP full with active
+ * contexts, despite the best efforts of preempt-to-busy to confuse
+ * us.
+ */
+ if (i915_request_completed(next))
+ return true;
+
+ if (unlikely((prev->flags ^ next->flags) &
+ (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL)))
+ return false;
+
if (!can_merge_ctx(prev->hw_context, next->hw_context))
return false;
@@ -806,47 +1357,7 @@ static bool can_merge_rq(const struct i915_request *prev,
static void virtual_update_register_offsets(u32 *regs,
struct intel_engine_cs *engine)
{
- u32 base = engine->mmio_base;
-
- /* Must match execlists_init_reg_state()! */
-
- regs[CTX_CONTEXT_CONTROL] =
- i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
- regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
- regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
- regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
- regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
-
- regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
- regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
- regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
- regs[CTX_SECOND_BB_HEAD_U] =
- i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
- regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
- regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
-
- regs[CTX_CTX_TIMESTAMP] =
- i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
- regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
- regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
- regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
- regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
- regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
- regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
- regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
- regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
-
- if (engine->class == RENDER_CLASS) {
- regs[CTX_RCS_INDIRECT_CTX] =
- i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
- regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
- i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
- regs[CTX_BB_PER_CTX_PTR] =
- i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
-
- regs[CTX_R_PWR_CLK_STATE] =
- i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
- }
+ set_offsets(regs, reg_offsets(engine), engine);
}
static bool virtual_matches(const struct virtual_engine *ve,
@@ -893,7 +1404,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
static struct i915_request *
last_active(const struct intel_engine_execlists *execlists)
{
- struct i915_request * const *last = execlists->active;
+ struct i915_request * const *last = READ_ONCE(execlists->active);
while (*last && i915_request_completed(*last))
last++;
@@ -961,7 +1472,7 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
{
int hint;
- if (!intel_engine_has_semaphores(engine))
+ if (!intel_engine_has_timeslices(engine))
return false;
if (list_is_last(&rq->sched.link, &engine->active.requests))
@@ -982,15 +1493,32 @@ switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
return rq_prio(list_next_entry(rq, sched.link));
}
-static bool
-enable_timeslice(const struct intel_engine_execlists *execlists)
+static inline unsigned long
+timeslice(const struct intel_engine_cs *engine)
{
- const struct i915_request *rq = *execlists->active;
+ return READ_ONCE(engine->props.timeslice_duration_ms);
+}
+
+static unsigned long
+active_timeslice(const struct intel_engine_cs *engine)
+{
+ const struct i915_request *rq = *engine->execlists.active;
if (i915_request_completed(rq))
- return false;
+ return 0;
- return execlists->switch_priority_hint >= effective_prio(rq);
+ if (engine->execlists.switch_priority_hint < effective_prio(rq))
+ return 0;
+
+ return timeslice(engine);
+}
+
+static void set_timeslice(struct intel_engine_cs *engine)
+{
+ if (!intel_engine_has_timeslices(engine))
+ return;
+
+ set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
}
static void record_preemption(struct intel_engine_execlists *execlists)
@@ -998,6 +1526,30 @@ static void record_preemption(struct intel_engine_execlists *execlists)
(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
}
+static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ rq = last_active(&engine->execlists);
+ if (!rq)
+ return 0;
+
+ /* Force a fast reset for terminated contexts (ignoring sysfs!) */
+ if (unlikely(i915_gem_context_is_banned(rq->gem_context)))
+ return 1;
+
+ return READ_ONCE(engine->props.preempt_timeout_ms);
+}
+
+static void set_preempt_timeout(struct intel_engine_cs *engine)
+{
+ if (!intel_engine_has_preempt_reset(engine))
+ return;
+
+ set_timer_ms(&engine->execlists.preempt,
+ active_preempt_timeout(engine));
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1094,7 +1646,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
last = NULL;
} else if (need_timeslice(engine, last) &&
- !timer_pending(&engine->execlists.timer)) {
+ timer_expired(&engine->execlists.timer)) {
GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
engine->name,
last->fence.context,
@@ -1130,8 +1682,18 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* submission.
*/
if (!list_is_last(&last->sched.link,
- &engine->active.requests))
+ &engine->active.requests)) {
+ /*
+ * Even if ELSP[1] is occupied and not worthy
+ * of timeslices, our queue might be.
+ */
+ if (!execlists->timer.expires &&
+ need_timeslice(engine, last))
+ set_timer_ms(&execlists->timer,
+ timeslice(engine));
+
return;
+ }
/*
* WaIdleLiteRestore:bdw,skl
@@ -1172,21 +1734,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
continue;
}
- if (i915_request_completed(rq)) {
- ve->request = NULL;
- ve->base.execlists.queue_priority_hint = INT_MIN;
- rb_erase_cached(rb, &execlists->virtual);
- RB_CLEAR_NODE(rb);
-
- rq->engine = engine;
- __i915_request_submit(rq);
-
- spin_unlock(&ve->base.active.lock);
-
- rb = rb_first_cached(&execlists->virtual);
- continue;
- }
-
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
return; /* leave this for another */
@@ -1214,7 +1761,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
unsigned int n;
GEM_BUG_ON(READ_ONCE(ve->context.inflight));
- virtual_update_register_offsets(regs, engine);
+
+ if (!intel_engine_has_relative_mmio(engine))
+ virtual_update_register_offsets(regs,
+ engine);
if (!list_empty(&ve->context.signals))
virtual_xfer_breadcrumbs(ve, engine);
@@ -1237,11 +1787,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(ve->siblings[0] != engine);
}
- __i915_request_submit(rq);
- if (!i915_request_completed(rq)) {
+ if (__i915_request_submit(rq)) {
submit = true;
last = rq;
}
+ i915_request_put(rq);
+
+ /*
+ * Hmm, we have a bunch of virtual engine requests,
+ * but the first one was already completed (thanks
+ * preempt-to-busy!). Keep looking at the veng queue
+ * until we have no more relevant requests (i.e.
+ * the normal submit queue has higher priority).
+ */
+ if (!submit) {
+ spin_unlock(&ve->base.active.lock);
+ rb = rb_first_cached(&execlists->virtual);
+ continue;
+ }
}
spin_unlock(&ve->base.active.lock);
@@ -1254,8 +1817,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
- if (i915_request_completed(rq))
- goto skip;
+ bool merge = true;
/*
* Can we combine this request with the current port?
@@ -1285,6 +1847,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (last->hw_context == rq->hw_context)
goto done;
+ if (i915_request_has_sentinel(last))
+ goto done;
+
/*
* If GVT overrides us we only ever submit
* port[0], leaving port[1] empty. Note that we
@@ -1296,14 +1861,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
ctx_single_port_submission(rq->hw_context))
goto done;
- *port = execlists_schedule_in(last, port - execlists->pending);
- port++;
+ merge = false;
}
- last = rq;
- submit = true;
-skip:
- __i915_request_submit(rq);
+ if (__i915_request_submit(rq)) {
+ if (!merge) {
+ *port = execlists_schedule_in(last, port - execlists->pending);
+ port++;
+ last = NULL;
+ }
+
+ GEM_BUG_ON(last &&
+ !can_merge_ctx(last->hw_context,
+ rq->hw_context));
+
+ submit = true;
+ last = rq;
+ }
}
rb_erase_cached(&p->node, &execlists->queue);
@@ -1334,11 +1908,28 @@ done:
if (submit) {
*port = execlists_schedule_in(last, port - execlists->pending);
- memset(port + 1, 0, (last_port - port) * sizeof(*port));
execlists->switch_priority_hint =
switch_prio(engine, *execlists->pending);
+
+ /*
+ * Skip if we ended up with exactly the same set of requests,
+ * e.g. trying to timeslice a pair of ordered contexts
+ */
+ if (!memcmp(execlists->active, execlists->pending,
+ (port - execlists->pending + 1) * sizeof(*port))) {
+ do
+ execlists_schedule_out(fetch_and_zero(port));
+ while (port-- != execlists->pending);
+
+ goto skip_submit;
+ }
+
+ memset(port + 1, 0, (last_port - port) * sizeof(*port));
execlists_submit_ports(engine);
+
+ set_preempt_timeout(engine);
} else {
+skip_submit:
ring_set_paused(engine, 0);
}
}
@@ -1371,13 +1962,6 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
}
-enum csb_step {
- CSB_NOP,
- CSB_PROMOTE,
- CSB_PREEMPT,
- CSB_COMPLETE,
-};
-
/*
* Starting with Gen12, the status has a new format:
*
@@ -1404,7 +1988,7 @@ enum csb_step {
* bits 47-57: sw context id of the lrc the GT switched away from
* bits 58-63: sw counter of the lrc the GT switched away from
*/
-static inline enum csb_step
+static inline bool
gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
{
u32 lower_dw = csb[0];
@@ -1413,9 +1997,6 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
- if (!ctx_away_valid && ctx_to_valid)
- return CSB_PROMOTE;
-
/*
* The context switch detail is not guaranteed to be 5 when a preemption
* occurs, so we can't just check for that. The check below works for
@@ -1423,8 +2004,10 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
* instructions and lite-restore. Preempt-to-idle via the CTRL register
* would require some extra handling, but we don't support that.
*/
- if (new_queue && ctx_away_valid)
- return CSB_PREEMPT;
+ if (!ctx_away_valid || new_queue) {
+ GEM_BUG_ON(!ctx_to_valid);
+ return true;
+ }
/*
* switch detail = 5 is covered by the case above and we do not expect a
@@ -1432,30 +2015,13 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
* use polling mode.
*/
GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
-
- if (*execlists->active) {
- GEM_BUG_ON(!ctx_away_valid);
- return CSB_COMPLETE;
- }
-
- return CSB_NOP;
+ return false;
}
-static inline enum csb_step
+static inline bool
gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
{
- unsigned int status = *csb;
-
- if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
- return CSB_PROMOTE;
-
- if (status & GEN8_CTX_STATUS_PREEMPTED)
- return CSB_PREEMPT;
-
- if (*execlists->active)
- return CSB_COMPLETE;
-
- return CSB_NOP;
+ return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
}
static void process_csb(struct intel_engine_cs *engine)
@@ -1465,7 +2031,14 @@ static void process_csb(struct intel_engine_cs *engine)
const u8 num_entries = execlists->csb_size;
u8 head, tail;
- GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
+ /*
+ * As we modify our execlists state tracking we require exclusive
+ * access. Either we are inside the tasklet, or the tasklet is disabled
+ * and we assume that is only inside the reset paths and so serialised.
+ */
+ GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
+ !reset_in_progress(execlists));
+ GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
/*
* Note that csb_write, csb_status may be either in HWSP or mmio.
@@ -1494,7 +2067,7 @@ static void process_csb(struct intel_engine_cs *engine)
rmb();
do {
- enum csb_step csb_step;
+ bool promote;
if (++head == num_entries)
head = 0;
@@ -1522,20 +2095,19 @@ static void process_csb(struct intel_engine_cs *engine)
buf[2 * head + 0], buf[2 * head + 1]);
if (INTEL_GEN(engine->i915) >= 12)
- csb_step = gen12_csb_parse(execlists, buf + 2 * head);
+ promote = gen12_csb_parse(execlists, buf + 2 * head);
else
- csb_step = gen8_csb_parse(execlists, buf + 2 * head);
+ promote = gen8_csb_parse(execlists, buf + 2 * head);
+ if (promote) {
+ if (!inject_preempt_hang(execlists))
+ ring_set_paused(engine, 0);
- switch (csb_step) {
- case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
+ /* cancel old inflight, prepare for switch */
trace_ports(execlists, "preempted", execlists->active);
-
while (*execlists->active)
execlists_schedule_out(*execlists->active++);
- /* fallthrough */
- case CSB_PROMOTE: /* switch pending to inflight */
- GEM_BUG_ON(*execlists->active);
+ /* switch pending to inflight */
GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
execlists->active =
memcpy(execlists->inflight,
@@ -1543,16 +2115,13 @@ static void process_csb(struct intel_engine_cs *engine)
execlists_num_ports(execlists) *
sizeof(*execlists->pending));
- if (enable_timeslice(execlists))
- mod_timer(&execlists->timer, jiffies + 1);
-
- if (!inject_preempt_hang(execlists))
- ring_set_paused(engine, 0);
+ set_timeslice(engine);
WRITE_ONCE(execlists->pending[0], NULL);
- break;
+ } else {
+ GEM_BUG_ON(!*execlists->active);
- case CSB_COMPLETE: /* port0 completed, advanced to port1 */
+ /* port0 completed, advanced to port1 */
trace_ports(execlists, "completed", execlists->active);
/*
@@ -1567,10 +2136,6 @@ static void process_csb(struct intel_engine_cs *engine)
GEM_BUG_ON(execlists->active - execlists->inflight >
execlists_num_ports(execlists));
- break;
-
- case CSB_NOP:
- break;
}
} while (head != tail);
@@ -1593,8 +2158,48 @@ static void process_csb(struct intel_engine_cs *engine)
static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
{
lockdep_assert_held(&engine->active.lock);
- if (!engine->execlists.pending[0])
+ if (!engine->execlists.pending[0]) {
+ rcu_read_lock(); /* protect peeking at execlists->active */
execlists_dequeue(engine);
+ rcu_read_unlock();
+ }
+}
+
+static noinline void preempt_reset(struct intel_engine_cs *engine)
+{
+ const unsigned int bit = I915_RESET_ENGINE + engine->id;
+ unsigned long *lock = &engine->gt->reset.flags;
+
+ if (i915_modparams.reset < 3)
+ return;
+
+ if (test_and_set_bit(bit, lock))
+ return;
+
+ /* Mark this tasklet as disabled to avoid waiting for it to complete */
+ tasklet_disable_nosync(&engine->execlists.tasklet);
+
+ GEM_TRACE("%s: preempt timeout %lu+%ums\n",
+ engine->name,
+ READ_ONCE(engine->props.preempt_timeout_ms),
+ jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
+ intel_engine_reset(engine, "preemption time out");
+
+ tasklet_enable(&engine->execlists.tasklet);
+ clear_and_wake_up_bit(bit, lock);
+}
+
+static bool preempt_timeout(const struct intel_engine_cs *const engine)
+{
+ const struct timer_list *t = &engine->execlists.preempt;
+
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
+ return false;
+
+ if (!timer_expired(t))
+ return false;
+
+ return READ_ONCE(engine->execlists.pending[0]);
}
/*
@@ -1604,23 +2209,39 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
static void execlists_submission_tasklet(unsigned long data)
{
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
- unsigned long flags;
+ bool timeout = preempt_timeout(engine);
process_csb(engine);
- if (!READ_ONCE(engine->execlists.pending[0])) {
+ if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
+ unsigned long flags;
+
spin_lock_irqsave(&engine->active.lock, flags);
__execlists_submission_tasklet(engine);
spin_unlock_irqrestore(&engine->active.lock, flags);
+
+ /* Recheck after serialising with direct-submission */
+ if (timeout && preempt_timeout(engine))
+ preempt_reset(engine);
}
}
-static void execlists_submission_timer(struct timer_list *timer)
+static void __execlists_kick(struct intel_engine_execlists *execlists)
{
- struct intel_engine_cs *engine =
- from_timer(engine, timer, execlists.timer);
-
/* Kick the tasklet for some interrupt coalescing and reset handling */
- tasklet_hi_schedule(&engine->execlists.tasklet);
+ tasklet_hi_schedule(&execlists->tasklet);
+}
+
+#define execlists_kick(t, member) \
+ __execlists_kick(container_of(t, struct intel_engine_execlists, member))
+
+static void execlists_timeslice(struct timer_list *timer)
+{
+ execlists_kick(timer, timer);
+}
+
+static void execlists_preempt(struct timer_list *timer)
+{
+ execlists_kick(timer, preempt);
}
static void queue_request(struct intel_engine_cs *engine,
@@ -1700,7 +2321,6 @@ set_redzone(void *vaddr, const struct intel_engine_cs *engine)
if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
return;
- vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
vaddr += engine->context_size;
memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
@@ -1712,7 +2332,6 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
return;
- vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
vaddr += engine->context_size;
if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE))
@@ -1726,14 +2345,13 @@ static void execlists_context_unpin(struct intel_context *ce)
check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
ce->engine);
- i915_gem_context_unpin_hw_id(ce->gem_context);
i915_gem_object_unpin_map(ce->state->obj);
intel_ring_reset(ce->ring, ce->ring->tail);
}
static void
-__execlists_update_reg_state(struct intel_context *ce,
- struct intel_engine_cs *engine)
+__execlists_update_reg_state(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
{
struct intel_ring *ring = ce->ring;
u32 *regs = ce->lrc_reg_state;
@@ -1741,16 +2359,16 @@ __execlists_update_reg_state(struct intel_context *ce,
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
- regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
- regs[CTX_RING_HEAD + 1] = ring->head;
- regs[CTX_RING_TAIL + 1] = ring->tail;
+ regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma);
+ regs[CTX_RING_HEAD] = ring->head;
+ regs[CTX_RING_TAIL] = ring->tail;
/* RPCS */
if (engine->class == RENDER_CLASS) {
- regs[CTX_R_PWR_CLK_STATE + 1] =
+ regs[CTX_R_PWR_CLK_STATE] =
intel_sseu_make_rpcs(engine->i915, &ce->sseu);
- i915_oa_init_reg_state(engine, ce, regs);
+ i915_oa_init_reg_state(ce, engine);
}
}
@@ -1776,18 +2394,12 @@ __execlists_context_pin(struct intel_context *ce,
goto unpin_active;
}
- ret = i915_gem_context_pin_hw_id(ce->gem_context);
- if (ret)
- goto unpin_map;
-
ce->lrc_desc = lrc_descriptor(ce, engine);
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
__execlists_update_reg_state(ce, engine);
return 0;
-unpin_map:
- i915_gem_object_unpin_map(ce->state->obj);
unpin_active:
intel_context_active_release(ce);
err:
@@ -1843,7 +2455,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
- GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
+ GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
@@ -1859,7 +2471,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
*cs++ = MI_NOOP;
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *cs++ = rq->timeline->hwsp_offset;
+ *cs++ = i915_request_timeline(rq)->hwsp_offset;
*cs++ = 0;
*cs++ = rq->fence.seqno - 1;
@@ -1871,60 +2483,6 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
return 0;
}
-static int emit_pdps(struct i915_request *rq)
-{
- const struct intel_engine_cs * const engine = rq->engine;
- struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->hw_context->vm);
- int err, i;
- u32 *cs;
-
- GEM_BUG_ON(intel_vgpu_active(rq->i915));
-
- /*
- * Beware ye of the dragons, this sequence is magic!
- *
- * Small changes to this sequence can cause anything from
- * GPU hangs to forcewake errors and machine lockups!
- */
-
- /* Flush any residual operations from the context load */
- err = engine->emit_flush(rq, EMIT_FLUSH);
- if (err)
- return err;
-
- /* Magic required to prevent forcewake errors! */
- err = engine->emit_flush(rq, EMIT_INVALIDATE);
- if (err)
- return err;
-
- cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /* Ensure the LRI have landed before we invalidate & continue */
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
- for (i = GEN8_3LVL_PDPES; i--; ) {
- const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
- u32 base = engine->mmio_base;
-
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
- *cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
- *cs++ = lower_32_bits(pd_daddr);
- }
- *cs++ = MI_NOOP;
-
- intel_ring_advance(rq, cs);
-
- /* Be doubly sure the LRI have landed before proceeding */
- err = engine->emit_flush(rq, EMIT_FLUSH);
- if (err)
- return err;
-
- /* Re-invalidate the TLB for luck */
- return engine->emit_flush(rq, EMIT_INVALIDATE);
-}
-
static int execlists_request_alloc(struct i915_request *request)
{
int ret;
@@ -1947,10 +2505,7 @@ static int execlists_request_alloc(struct i915_request *request)
*/
/* Unconditionally invalidate GPU caches and TLBs. */
- if (i915_vm_is_4lvl(request->hw_context->vm))
- ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
- else
- ret = emit_pdps(request);
+ ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
if (ret)
return ret;
@@ -2002,12 +2557,6 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
return batch;
}
-static u32 slm_offset(struct intel_engine_cs *engine)
-{
- return intel_gt_scratch_offset(engine->gt,
- INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA);
-}
-
/*
* Typically we only have one indirect_ctx and per_ctx batch buffer which are
* initialized at the beginning and shared across all contexts but this field
@@ -2036,10 +2585,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* Actual scratch location is at 128 bytes offset */
batch = gen8_emit_pipe_control(batch,
PIPE_CONTROL_FLUSH_L3 |
- PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_STORE_DATA_INDEX |
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_QW_WRITE,
- slm_offset(engine));
+ LRC_PPHWSP_SCRATCH_ADDR);
*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -2397,23 +2946,29 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
&execlists->csb_status[reset_value]);
}
-static struct i915_request *active_request(struct i915_request *rq)
+static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
{
- const struct list_head * const list = &rq->timeline->requests;
- const struct intel_context * const ce = rq->hw_context;
- struct i915_request *active = NULL;
-
- list_for_each_entry_from_reverse(rq, list, link) {
- if (i915_request_completed(rq))
- break;
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x60;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return 0x54;
+ else if (engine->class == RENDER_CLASS)
+ return 0x58;
+ else
+ return -1;
+}
- if (rq->hw_context != ce)
- break;
+static void __execlists_reset_reg_state(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
+{
+ u32 *regs = ce->lrc_reg_state;
+ int x;
- active = rq;
+ x = lrc_ring_mi_mode(engine);
+ if (x != -1) {
+ regs[x + 1] &= ~STOP_RING;
+ regs[x + 1] |= STOP_RING << 16;
}
-
- return active;
}
static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
@@ -2421,7 +2976,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
struct intel_engine_execlists * const execlists = &engine->execlists;
struct intel_context *ce;
struct i915_request *rq;
- u32 *regs;
+
+ mb(); /* paranoia: read the CSB pointers from after the reset */
+ clflush(execlists->csb_write);
+ mb();
process_csb(engine); /* drain preemption events */
@@ -2437,16 +2995,23 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
if (!rq)
goto unwind;
+ /* We still have requests in-flight; the engine should be active */
+ GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
ce = rq->hw_context;
- GEM_BUG_ON(i915_active_is_idle(&ce->active));
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
- rq = active_request(rq);
- if (!rq) {
- ce->ring->head = ce->ring->tail;
+
+ if (i915_request_completed(rq)) {
+ /* Idle context; tidy up the ring so we can restart afresh */
+ ce->ring->head = intel_ring_wrap(ce->ring, rq->tail);
goto out_replay;
}
+ /* Context has requests still in-flight; it should not be idle! */
+ GEM_BUG_ON(i915_active_is_idle(&ce->active));
+ rq = active_request(ce->timeline, rq);
ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
+ GEM_BUG_ON(ce->ring->head == ce->ring->tail);
/*
* If this request hasn't started yet, e.g. it is waiting on a
@@ -2486,19 +3051,16 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
- regs = ce->lrc_reg_state;
- if (engine->pinned_default_state) {
- memcpy(regs, /* skip restoring the vanilla PPHWSP */
- engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
- engine->context_size - PAGE_SIZE);
- }
- execlists_init_reg_state(regs, ce, engine, ce->ring);
+ GEM_BUG_ON(!intel_context_is_pinned(ce));
+ restore_default_state(ce, engine);
out_replay:
- GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
+ GEM_TRACE("%s replay {head:%04x, tail:%04x}\n",
engine->name, ce->ring->head, ce->ring->tail);
intel_ring_update_space(ce->ring);
+ __execlists_reset_reg_state(ce, engine);
__execlists_update_reg_state(ce, engine);
+ ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
unwind:
/* Push back any incomplete requests for replay after the reset. */
@@ -2552,12 +3114,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
__execlists_reset(engine, true);
/* Mark all executing requests as skipped. */
- list_for_each_entry(rq, &engine->active.requests, sched.link) {
- if (!i915_request_signaled(rq))
- dma_fence_set_error(&rq->fence, -EIO);
-
- i915_request_mark_complete(rq);
- }
+ list_for_each_entry(rq, &engine->active.requests, sched.link)
+ mark_eio(rq);
/* Flush the queued requests to the timeline list (for retiring). */
while ((rb = rb_first_cached(&execlists->queue))) {
@@ -2565,10 +3123,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
- list_del_init(&rq->sched.link);
+ mark_eio(rq);
__i915_request_submit(rq);
- dma_fence_set_error(&rq->fence, -EIO);
- i915_request_mark_complete(rq);
}
rb_erase_cached(&p->node, &execlists->queue);
@@ -2584,13 +3140,15 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
RB_CLEAR_NODE(rb);
spin_lock(&ve->base.active.lock);
- if (ve->request) {
- ve->request->engine = engine;
- __i915_request_submit(ve->request);
- dma_fence_set_error(&ve->request->fence, -EIO);
- i915_request_mark_complete(ve->request);
+ rq = fetch_and_zero(&ve->request);
+ if (rq) {
+ mark_eio(rq);
+
+ rq->engine = engine;
+ __i915_request_submit(rq);
+ i915_request_put(rq);
+
ve->base.execlists.queue_priority_hint = INT_MIN;
- ve->request = NULL;
}
spin_unlock(&ve->base.active.lock);
}
@@ -2723,7 +3281,7 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
}
*cs++ = cmd;
- *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
*cs++ = 0; /* upper addr */
*cs++ = 0; /* value */
intel_ring_advance(request, cs);
@@ -2734,10 +3292,6 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
static int gen8_emit_flush_render(struct i915_request *request,
u32 mode)
{
- struct intel_engine_cs *engine = request->engine;
- u32 scratch_addr =
- intel_gt_scratch_offset(engine->gt,
- INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
bool vf_flush_wa = false, dc_flush_wa = false;
u32 *cs, flags = 0;
int len;
@@ -2759,7 +3313,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
/*
* On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
@@ -2792,7 +3346,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
0);
- cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
if (dc_flush_wa)
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
@@ -2805,11 +3359,6 @@ static int gen8_emit_flush_render(struct i915_request *request,
static int gen11_emit_flush_render(struct i915_request *request,
u32 mode)
{
- struct intel_engine_cs *engine = request->engine;
- const u32 scratch_addr =
- intel_gt_scratch_offset(engine->gt,
- INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
-
if (mode & EMIT_FLUSH) {
u32 *cs;
u32 flags = 0;
@@ -2822,13 +3371,13 @@ static int gen11_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
cs = intel_ring_begin(request, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(request, cs);
}
@@ -2846,19 +3395,111 @@ static int gen11_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
cs = intel_ring_begin(request, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(request, cs);
}
return 0;
}
+static u32 preparser_disable(bool state)
+{
+ return MI_ARB_CHECK | 1 << 8 | state;
+}
+
+static int gen12_emit_flush_render(struct i915_request *request,
+ u32 mode)
+{
+ if (mode & EMIT_FLUSH) {
+ u32 flags = 0;
+ u32 *cs;
+
+ flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ /* Wa_1409600907:tgl */
+ flags |= PIPE_CONTROL_DEPTH_STALL;
+ flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
+
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+ flags |= PIPE_CONTROL_QW_WRITE;
+
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ cs = intel_ring_begin(request, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+ intel_ring_advance(request, cs);
+ }
+
+ if (mode & EMIT_INVALIDATE) {
+ u32 flags = 0;
+ u32 *cs;
+
+ flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
+
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+ flags |= PIPE_CONTROL_QW_WRITE;
+
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ cs = intel_ring_begin(request, 8);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /*
+ * Prevent the pre-parser from skipping past the TLB
+ * invalidate and loading a stale page for the batch
+ * buffer / request payload.
+ */
+ *cs++ = preparser_disable(true);
+
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+
+ *cs++ = preparser_disable(false);
+ intel_ring_advance(request, cs);
+
+ /*
+ * Wa_1604544889:tgl
+ */
+ if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
+ flags = 0;
+ flags |= PIPE_CONTROL_CS_STALL;
+ flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
+
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+ flags |= PIPE_CONTROL_QW_WRITE;
+
+ cs = intel_ring_begin(request, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cs = gen8_emit_pipe_control(cs, flags,
+ LRC_PPHWSP_SCRATCH_ADDR);
+ intel_ring_advance(request, cs);
+ }
+ }
+
+ return 0;
+}
+
/*
* Reserve space for 2 NOOPs at the end of each request to be
* used as a workaround for not being allowed to do lite
@@ -2907,7 +3548,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
{
cs = gen8_emit_ggtt_write(cs,
request->fence.seqno,
- request->timeline->hwsp_offset,
+ i915_request_active_timeline(request)->hwsp_offset,
0);
return gen8_emit_fini_breadcrumb_footer(request, cs);
@@ -2915,28 +3556,28 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
- cs = gen8_emit_ggtt_write_rcs(cs,
- request->fence.seqno,
- request->timeline->hwsp_offset,
- PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_DC_FLUSH_ENABLE);
-
- /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
cs = gen8_emit_pipe_control(cs,
- PIPE_CONTROL_FLUSH_ENABLE |
- PIPE_CONTROL_CS_STALL,
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE,
0);
+ /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
+ cs = gen8_emit_ggtt_write_rcs(cs,
+ request->fence.seqno,
+ i915_request_active_timeline(request)->hwsp_offset,
+ PIPE_CONTROL_FLUSH_ENABLE |
+ PIPE_CONTROL_CS_STALL);
+
return gen8_emit_fini_breadcrumb_footer(request, cs);
}
-static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
- u32 *cs)
+static u32 *
+gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
- request->timeline->hwsp_offset,
+ i915_request_active_timeline(request)->hwsp_offset,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -2947,9 +3588,88 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
return gen8_emit_fini_breadcrumb_footer(request, cs);
}
+/*
+ * Note that the CS instruction pre-parser will not stall on the breadcrumb
+ * flush and will continue pre-fetching the instructions after it before the
+ * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
+ * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
+ * of the next request before the memory has been flushed, we're guaranteed that
+ * we won't access the batch itself too early.
+ * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
+ * so, if the current request is modifying an instruction in the next request on
+ * the same intel_context, we might pre-fetch and then execute the pre-update
+ * instruction. To avoid this, the users of self-modifying code should either
+ * disable the parser around the code emitting the memory writes, via a new flag
+ * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
+ * the in-kernel use-cases we've opted to use a separate context, see
+ * reloc_gpu() as an example.
+ * All the above applies only to the instructions themselves. Non-inline data
+ * used by the instructions is not pre-fetched.
+ */
+
+static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
+{
+ *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = intel_hws_preempt_address(request->engine);
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
+
+ return cs;
+}
+
+static __always_inline u32*
+gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
+{
+ *cs++ = MI_USER_INTERRUPT;
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ if (intel_engine_has_semaphores(request->engine))
+ cs = gen12_emit_preempt_busywait(request, cs);
+
+ request->tail = intel_ring_offset(request, cs);
+ assert_ring_tail_valid(request->ring, request->tail);
+
+ return gen8_emit_wa_tail(request, cs);
+}
+
+static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+{
+ cs = gen8_emit_ggtt_write(cs,
+ request->fence.seqno,
+ i915_request_active_timeline(request)->hwsp_offset,
+ 0);
+
+ return gen12_emit_fini_breadcrumb_footer(request, cs);
+}
+
+static u32 *
+gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+{
+ cs = gen8_emit_ggtt_write_rcs(cs,
+ request->fence.seqno,
+ i915_request_active_timeline(request)->hwsp_offset,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ /* Wa_1409600907:tgl */
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE |
+ PIPE_CONTROL_HDC_PIPELINE_FLUSH);
+
+ return gen12_emit_fini_breadcrumb_footer(request, cs);
+}
+
static void execlists_park(struct intel_engine_cs *engine)
{
- del_timer(&engine->execlists.timer);
+ cancel_timer(&engine->execlists.timer);
+ cancel_timer(&engine->execlists.preempt);
}
void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
@@ -2972,6 +3692,9 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
}
+
+ if (INTEL_GEN(engine->i915) >= 12)
+ engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
}
static void execlists_destroy(struct intel_engine_cs *engine)
@@ -2999,6 +3722,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_flush = gen8_emit_flush;
engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
+ if (INTEL_GEN(engine->i915) >= 12)
+ engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
engine->set_default_submission = intel_execlists_set_default_submission;
@@ -3044,6 +3769,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
{
switch (INTEL_GEN(engine->i915)) {
case 12:
+ engine->emit_flush = gen12_emit_flush_render;
+ engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
+ break;
case 11:
engine->emit_flush = gen11_emit_flush_render;
engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
@@ -3059,7 +3787,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
{
tasklet_init(&engine->execlists.tasklet,
execlists_submission_tasklet, (unsigned long)engine);
- timer_setup(&engine->execlists.timer, execlists_submission_timer, 0);
+ timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
+ timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
logical_ring_default_vfuncs(engine);
logical_ring_default_irqs(engine);
@@ -3116,7 +3845,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
return 0;
}
-static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
+static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
{
u32 indirect_ctx_offset;
@@ -3149,86 +3878,50 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
return indirect_ctx_offset;
}
-static void execlists_init_reg_state(u32 *regs,
- struct intel_context *ce,
- struct intel_engine_cs *engine,
- struct intel_ring *ring)
-{
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
- bool rcs = engine->class == RENDER_CLASS;
- u32 base = engine->mmio_base;
-
- /*
- * A context is actually a big batch buffer with several
- * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
- * values we are setting here are only for the first context restore:
- * on a subsequent save, the GPU will recreate this batchbuffer with new
- * values (including all the missing MI_LOAD_REGISTER_IMM commands that
- * we are not initializing here).
- *
- * Must keep consistent with virtual_update_register_offsets().
- */
- regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
- MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
+static void init_common_reg_state(u32 * const regs,
+ const struct intel_engine_cs *engine,
+ const struct intel_ring *ring)
+{
+ regs[CTX_CONTEXT_CONTROL] =
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
- _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
- if (INTEL_GEN(engine->i915) < 11) {
- regs[CTX_CONTEXT_CONTROL + 1] |=
+ _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+ if (INTEL_GEN(engine->i915) < 11)
+ regs[CTX_CONTEXT_CONTROL] |=
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
- }
- CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
- CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
- RING_CTL_SIZE(ring->size) | RING_VALID);
- CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
- CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
- if (rcs) {
- struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
-
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
- RING_INDIRECT_CTX_OFFSET(base), 0);
- if (wa_ctx->indirect_ctx.size) {
- u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
-
- regs[CTX_RCS_INDIRECT_CTX + 1] =
- (ggtt_offset + wa_ctx->indirect_ctx.offset) |
- (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
-
- regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
- intel_lr_indirect_ctx_offset(engine) << 6;
- }
- CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
- if (wa_ctx->per_ctx.size) {
- u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+ regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+ regs[CTX_BB_STATE] = RING_BB_PPGTT;
+}
- regs[CTX_BB_PER_CTX_PTR + 1] =
- (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
- }
+static void init_wa_bb_reg_state(u32 * const regs,
+ const struct intel_engine_cs *engine,
+ u32 pos_bb_per_ctx)
+{
+ const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
+
+ if (wa_ctx->per_ctx.size) {
+ const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+
+ regs[pos_bb_per_ctx] =
+ (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
}
- regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+ if (wa_ctx->indirect_ctx.size) {
+ const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
- CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
- /* PDP values well be assigned later if needed */
- CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
- CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
- CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
- CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
- CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
- CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
- CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
- CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
+ regs[pos_bb_per_ctx + 2] =
+ (ggtt_offset + wa_ctx->indirect_ctx.offset) |
+ (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
+ regs[pos_bb_per_ctx + 4] =
+ intel_lr_indirect_ctx_offset(engine) << 6;
+ }
+}
+
+static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
+{
if (i915_vm_is_4lvl(&ppgtt->vm)) {
/* 64b PPGTT (48bit canonical)
* PDP0_DESCRIPTOR contains the base address to PML4 and
@@ -3241,15 +3934,47 @@ static void execlists_init_reg_state(u32 *regs,
ASSIGN_CTX_PDP(ppgtt, regs, 1);
ASSIGN_CTX_PDP(ppgtt, regs, 0);
}
+}
+
+static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
+{
+ if (i915_is_ggtt(vm))
+ return i915_vm_to_ggtt(vm)->alias;
+ else
+ return i915_vm_to_ppgtt(vm);
+}
- if (rcs) {
- regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
- CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
+static void execlists_init_reg_state(u32 *regs,
+ const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ const struct intel_ring *ring,
+ bool close)
+{
+ /*
+ * A context is actually a big batch buffer with several
+ * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
+ * values we are setting here are only for the first context restore:
+ * on a subsequent save, the GPU will recreate this batchbuffer with new
+ * values (including all the missing MI_LOAD_REGISTER_IMM commands that
+ * we are not initializing here).
+ *
+ * Must keep consistent with virtual_update_register_offsets().
+ */
+ u32 *bbe = set_offsets(regs, reg_offsets(engine), engine);
+
+ if (close) { /* Close the batch; used mainly by live_lrc_layout() */
+ *bbe = MI_BATCH_BUFFER_END;
+ if (INTEL_GEN(engine->i915) >= 10)
+ *bbe |= BIT(0);
}
- regs[CTX_END] = MI_BATCH_BUFFER_END;
- if (INTEL_GEN(engine->i915) >= 10)
- regs[CTX_END] |= BIT(0);
+ init_common_reg_state(regs, engine, ring);
+ init_ppgtt_reg_state(regs, vm_alias(ce->vm));
+
+ init_wa_bb_reg_state(regs, engine,
+ INTEL_GEN(engine->i915) >= 12 ?
+ GEN12_CTX_BB_PER_CTX_PTR :
+ CTX_BB_PER_CTX_PTR);
}
static int
@@ -3258,6 +3983,7 @@ populate_lr_context(struct intel_context *ce,
struct intel_engine_cs *engine,
struct intel_ring *ring)
{
+ bool inhibit = true;
void *vaddr;
u32 *regs;
int ret;
@@ -3272,12 +3998,6 @@ populate_lr_context(struct intel_context *ce,
set_redzone(vaddr, engine);
if (engine->default_state) {
- /*
- * We only want to copy over the template context state;
- * skipping over the headers reserved for GuC communication,
- * leaving those as zero.
- */
- const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE;
void *defaults;
defaults = i915_gem_object_pin_map(engine->default_state,
@@ -3287,23 +4007,22 @@ populate_lr_context(struct intel_context *ce,
goto err_unpin_ctx;
}
- memcpy(vaddr + start, defaults + start, engine->context_size);
+ memcpy(vaddr, defaults, engine->context_size);
i915_gem_object_unpin_map(engine->default_state);
+ inhibit = false;
}
/* The second page of the context object contains some fields which must
* be set up prior to the first execution. */
regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
- execlists_init_reg_state(regs, ce, engine, ring);
- if (!engine->default_state)
- regs[CTX_CONTEXT_CONTROL + 1] |=
+ execlists_init_reg_state(regs, ce, engine, ring, inhibit);
+ if (inhibit)
+ regs[CTX_CONTEXT_CONTROL] |=
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
ret = 0;
err_unpin_ctx:
- __i915_gem_object_flush_map(ctx_obj,
- LRC_HEADER_PAGES * PAGE_SIZE,
- engine->context_size);
+ __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
i915_gem_object_unpin_map(ctx_obj);
return ret;
}
@@ -3320,11 +4039,6 @@ static int __execlists_context_alloc(struct intel_context *ce,
GEM_BUG_ON(ce->state);
context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
- /*
- * Before the actual start of the context image, we insert a few pages
- * for our own use and for sharing with the GuC.
- */
- context_size += LRC_HEADER_PAGES * PAGE_SIZE;
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
context_size += I915_GTT_PAGE_SIZE; /* for redzone */
@@ -3436,8 +4150,9 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve)
return;
swap(ve->siblings[swp], ve->siblings[0]);
- virtual_update_register_offsets(ve->context.lrc_reg_state,
- ve->siblings[0]);
+ if (!intel_engine_has_relative_mmio(ve->siblings[0]))
+ virtual_update_register_offsets(ve->context.lrc_reg_state,
+ ve->siblings[0]);
}
static int virtual_context_pin(struct intel_context *ce)
@@ -3594,6 +4309,8 @@ submit_engine:
static void virtual_submit_request(struct i915_request *rq)
{
struct virtual_engine *ve = to_virtual_engine(rq->engine);
+ struct i915_request *old;
+ unsigned long flags;
GEM_TRACE("%s: rq=%llx:%lld\n",
ve->base.name,
@@ -3602,15 +4319,31 @@ static void virtual_submit_request(struct i915_request *rq)
GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
- GEM_BUG_ON(ve->request);
- GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+ spin_lock_irqsave(&ve->base.active.lock, flags);
- ve->base.execlists.queue_priority_hint = rq_prio(rq);
- WRITE_ONCE(ve->request, rq);
+ old = ve->request;
+ if (old) { /* background completion event from preempt-to-busy */
+ GEM_BUG_ON(!i915_request_completed(old));
+ __i915_request_submit(old);
+ i915_request_put(old);
+ }
+
+ if (i915_request_completed(rq)) {
+ __i915_request_submit(rq);
+
+ ve->base.execlists.queue_priority_hint = INT_MIN;
+ ve->request = NULL;
+ } else {
+ ve->base.execlists.queue_priority_hint = rq_prio(rq);
+ ve->request = i915_request_get(rq);
- list_move_tail(&rq->sched.link, virtual_queue(ve));
+ GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+ list_move_tail(&rq->sched.link, virtual_queue(ve));
- tasklet_schedule(&ve->base.execlists.tasklet);
+ tasklet_schedule(&ve->base.execlists.tasklet);
+ }
+
+ spin_unlock_irqrestore(&ve->base.active.lock, flags);
}
static struct ve_bond *
@@ -3631,18 +4364,22 @@ static void
virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
{
struct virtual_engine *ve = to_virtual_engine(rq->engine);
+ intel_engine_mask_t allowed, exec;
struct ve_bond *bond;
+ allowed = ~to_request(signal)->engine->mask;
+
bond = virtual_find_bond(ve, to_request(signal)->engine);
- if (bond) {
- intel_engine_mask_t old, new, cmp;
+ if (bond)
+ allowed &= bond->sibling_mask;
- cmp = READ_ONCE(rq->execution_mask);
- do {
- old = cmp;
- new = cmp & bond->sibling_mask;
- } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
- }
+ /* Restrict the bonded request to run on only the available engines */
+ exec = READ_ONCE(rq->execution_mask);
+ while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
+ ;
+
+ /* Prevent the master from being re-run on the bonded engines */
+ to_request(signal)->execution_mask &= ~allowed;
}
struct intel_context *
@@ -3666,6 +4403,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
ve->base.i915 = ctx->i915;
ve->base.gt = siblings[0]->gt;
+ ve->base.uncore = siblings[0]->uncore;
ve->base.id = -1;
ve->base.class = OTHER_CLASS;
ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
@@ -3689,6 +4427,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
+ intel_engine_init_breadcrumbs(&ve->base);
intel_engine_init_execlists(&ve->base);
@@ -3851,6 +4590,18 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
return 0;
}
+struct intel_engine_cs *
+intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
+ unsigned int sibling)
+{
+ struct virtual_engine *ve = to_virtual_engine(engine);
+
+ if (sibling >= ve->num_siblings)
+ return NULL;
+
+ return ve->siblings[sibling];
+}
+
void intel_execlists_show_requests(struct intel_engine_cs *engine,
struct drm_printer *m,
void (*show_request)(struct drm_printer *m,
@@ -3939,6 +4690,8 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
u32 head,
bool scrub)
{
+ GEM_BUG_ON(!intel_context_is_pinned(ce));
+
/*
* We want a simple context + ring to execute the breadcrumb update.
* We cannot rely on the context being intact across the GPU hang,
@@ -3947,16 +4700,8 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
- if (scrub) {
- u32 *regs = ce->lrc_reg_state;
-
- if (engine->pinned_default_state) {
- memcpy(regs, /* skip restoring the vanilla PPHWSP */
- engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
- engine->context_size - PAGE_SIZE);
- }
- execlists_init_reg_state(regs, ce, engine, ce->ring);
- }
+ if (scrub)
+ restore_default_state(ce, engine);
/* Rerun the request; its payload has been neutered (if guilty). */
ce->ring->head = head;
@@ -3965,6 +4710,13 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
__execlists_update_reg_state(ce, engine);
}
+bool
+intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
+{
+ return engine->set_default_submission ==
+ intel_execlists_set_default_submission;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_lrc.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index c2bba82bcc16..04511d8ebdc1 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -43,6 +43,7 @@ struct intel_engine_cs;
#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0)
#define CTX_CTRL_RS_CTX_ENABLE (1 << 1)
#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2)
+#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8)
#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0)
#define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510)
#define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550)
@@ -66,6 +67,12 @@ struct intel_engine_cs;
#define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8)
#define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0)
+#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
+#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
+#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
+/* in Gen12 ID 0x7FF is reserved to indicate idle */
+#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1)
+
enum {
INTEL_CONTEXT_SCHEDULE_IN = 0,
INTEL_CONTEXT_SCHEDULE_OUT,
@@ -79,30 +86,15 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine);
int intel_execlists_submission_init(struct intel_engine_cs *engine);
/* Logical Ring Contexts */
-
-/*
- * We allocate a header at the start of the context image for our own
- * use, therefore the actual location of the logical state is offset
- * from the start of the VMA. The layout is
- *
- * | [guc] | [hwsp] [logical state] |
- * |<- our header ->|<- context image ->|
- *
- */
-/* The first page is used for sharing data with the GuC */
-#define LRC_GUCSHR_PN (0)
-#define LRC_GUCSHR_SZ (1)
/* At the start of the context image is its per-process HWS page */
-#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + LRC_GUCSHR_SZ)
+#define LRC_PPHWSP_PN (0)
#define LRC_PPHWSP_SZ (1)
-/* Finally we have the logical state for the context */
+/* After the PPHWSP we have the logical state for the context */
#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ)
-/*
- * Currently we include the PPHWSP in __intel_engine_context_size() so
- * the size of the header is synonymous with the start of the PPHWSP.
- */
-#define LRC_HEADER_PAGES LRC_PPHWSP_PN
+/* Space within PPHWSP reserved to be used as scratch */
+#define LRC_PPHWSP_SCRATCH 0x34
+#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32))
void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
@@ -131,4 +123,11 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
const struct intel_engine_cs *master,
const struct intel_engine_cs *sibling);
+struct intel_engine_cs *
+intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
+ unsigned int sibling);
+
+bool
+intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
+
#endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index b8f20ad71169..06ab0276e10e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -9,55 +9,41 @@
#include <linux/types.h>
-/* GEN8+ Reg State Context */
-#define CTX_LRI_HEADER_0 0x01
-#define CTX_CONTEXT_CONTROL 0x02
-#define CTX_RING_HEAD 0x04
-#define CTX_RING_TAIL 0x06
-#define CTX_RING_BUFFER_START 0x08
-#define CTX_RING_BUFFER_CONTROL 0x0a
-#define CTX_BB_HEAD_U 0x0c
-#define CTX_BB_HEAD_L 0x0e
-#define CTX_BB_STATE 0x10
-#define CTX_SECOND_BB_HEAD_U 0x12
-#define CTX_SECOND_BB_HEAD_L 0x14
-#define CTX_SECOND_BB_STATE 0x16
-#define CTX_BB_PER_CTX_PTR 0x18
-#define CTX_RCS_INDIRECT_CTX 0x1a
-#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c
-#define CTX_LRI_HEADER_1 0x21
-#define CTX_CTX_TIMESTAMP 0x22
-#define CTX_PDP3_UDW 0x24
-#define CTX_PDP3_LDW 0x26
-#define CTX_PDP2_UDW 0x28
-#define CTX_PDP2_LDW 0x2a
-#define CTX_PDP1_UDW 0x2c
-#define CTX_PDP1_LDW 0x2e
-#define CTX_PDP0_UDW 0x30
-#define CTX_PDP0_LDW 0x32
-#define CTX_LRI_HEADER_2 0x41
-#define CTX_R_PWR_CLK_STATE 0x42
-#define CTX_END 0x44
-
-#define CTX_REG(reg_state, pos, reg, val) do { \
- u32 *reg_state__ = (reg_state); \
- const u32 pos__ = (pos); \
- (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
- (reg_state__)[(pos__) + 1] = (val); \
-} while (0)
+/* GEN8 to GEN11 Reg State Context */
+#define CTX_CONTEXT_CONTROL (0x02 + 1)
+#define CTX_RING_HEAD (0x04 + 1)
+#define CTX_RING_TAIL (0x06 + 1)
+#define CTX_RING_BUFFER_START (0x08 + 1)
+#define CTX_RING_BUFFER_CONTROL (0x0a + 1)
+#define CTX_BB_STATE (0x10 + 1)
+#define CTX_BB_PER_CTX_PTR (0x18 + 1)
+#define CTX_PDP3_UDW (0x24 + 1)
+#define CTX_PDP3_LDW (0x26 + 1)
+#define CTX_PDP2_UDW (0x28 + 1)
+#define CTX_PDP2_LDW (0x2a + 1)
+#define CTX_PDP1_UDW (0x2c + 1)
+#define CTX_PDP1_LDW (0x2e + 1)
+#define CTX_PDP0_UDW (0x30 + 1)
+#define CTX_PDP0_LDW (0x32 + 1)
+#define CTX_R_PWR_CLK_STATE (0x42 + 1)
+
+#define GEN9_CTX_RING_MI_MODE 0x54
+
+/* GEN12+ Reg State Context */
+#define GEN12_CTX_BB_PER_CTX_PTR (0x12 + 1)
#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
u32 *reg_state__ = (reg_state); \
const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \
- (reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \
- (reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \
+ (reg_state__)[CTX_PDP ## n ## _UDW] = upper_32_bits(addr__); \
+ (reg_state__)[CTX_PDP ## n ## _LDW] = lower_32_bits(addr__); \
} while (0)
#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
u32 *reg_state__ = (reg_state); \
const u64 addr__ = px_dma(ppgtt->pd); \
- (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \
- (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \
+ (reg_state__)[CTX_PDP0_UDW] = upper_32_bits(addr__); \
+ (reg_state__)[CTX_PDP0_LDW] = lower_32_bits(addr__); \
} while (0)
#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 728704bbbe18..2b977991b785 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -26,6 +26,7 @@
#include "intel_gt.h"
#include "intel_mocs.h"
#include "intel_lrc.h"
+#include "intel_ring.h"
/* structures required */
struct drm_i915_mocs_entry {
@@ -199,14 +200,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
MOCS_ENTRY(15, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
L3_3_WB), \
- /* Bypass LLC - Uncached (EHL+) */ \
- MOCS_ENTRY(16, \
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
- L3_1_UC), \
- /* Bypass LLC - L3 (Read-Only) (EHL+) */ \
- MOCS_ENTRY(17, \
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
- L3_3_WB), \
/* Self-Snoop - L3 + LLC */ \
MOCS_ENTRY(18, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
@@ -270,7 +263,7 @@ static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = {
L3_1_UC),
/* HW Special Case (Displayable) */
MOCS_ENTRY(61,
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1),
+ LE_1_UC | LE_TC_1_LLC,
L3_3_WB),
};
@@ -287,10 +280,9 @@ static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
GEN11_MOCS_ENTRIES
};
-static bool get_mocs_settings(struct intel_gt *gt,
+static bool get_mocs_settings(const struct drm_i915_private *i915,
struct drm_i915_mocs_table *table)
{
- struct drm_i915_private *i915 = gt->i915;
bool result = false;
if (INTEL_GEN(i915) >= 12) {
@@ -331,9 +323,9 @@ static bool get_mocs_settings(struct intel_gt *gt,
return result;
}
-static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)
+static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index)
{
- switch (engine_id) {
+ switch (engine->id) {
case RCS0:
return GEN9_GFX_MOCS(index);
case VCS0:
@@ -347,7 +339,7 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)
case VCS2:
return GEN11_MFX2_MOCS(index);
default:
- MISSING_CASE(engine_id);
+ MISSING_CASE(engine->id);
return INVALID_MMIO_REG;
}
}
@@ -365,118 +357,25 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table,
return table->table[I915_MOCS_PTE].control_value;
}
-/**
- * intel_mocs_init_engine() - emit the mocs control table
- * @engine: The engine for whom to emit the registers.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address.
- */
-void intel_mocs_init_engine(struct intel_engine_cs *engine)
+static void init_mocs_table(struct intel_engine_cs *engine,
+ const struct drm_i915_mocs_table *table)
{
- struct intel_gt *gt = engine->gt;
- struct intel_uncore *uncore = gt->uncore;
- struct drm_i915_mocs_table table;
- unsigned int index;
- u32 unused_value;
-
- /* Platforms with global MOCS do not need per-engine initialization. */
- if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
- return;
-
- /* Called under a blanket forcewake */
- assert_forcewakes_active(uncore, FORCEWAKE_ALL);
-
- if (!get_mocs_settings(gt, &table))
- return;
-
- /* Set unused values to PTE */
- unused_value = table.table[I915_MOCS_PTE].control_value;
-
- for (index = 0; index < table.size; index++) {
- u32 value = get_entry_control(&table, index);
+ struct intel_uncore *uncore = engine->uncore;
+ u32 unused_value = table->table[I915_MOCS_PTE].control_value;
+ unsigned int i;
+ for (i = 0; i < table->size; i++)
intel_uncore_write_fw(uncore,
- mocs_register(engine->id, index),
- value);
- }
+ mocs_register(engine, i),
+ get_entry_control(table, i));
- /* All remaining entries are also unused */
- for (; index < table.n_entries; index++)
+ /* All remaining entries are unused */
+ for (; i < table->n_entries; i++)
intel_uncore_write_fw(uncore,
- mocs_register(engine->id, index),
+ mocs_register(engine, i),
unused_value);
}
-static void intel_mocs_init_global(struct intel_gt *gt)
-{
- struct intel_uncore *uncore = gt->uncore;
- struct drm_i915_mocs_table table;
- unsigned int index;
-
- GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915));
-
- if (!get_mocs_settings(gt, &table))
- return;
-
- if (GEM_DEBUG_WARN_ON(table.size > table.n_entries))
- return;
-
- for (index = 0; index < table.size; index++)
- intel_uncore_write(uncore,
- GEN12_GLOBAL_MOCS(index),
- table.table[index].control_value);
-
- /*
- * Ok, now set the unused entries to the invalid entry (index 0). These
- * entries are officially undefined and no contract for the contents and
- * settings is given for these entries.
- */
- for (; index < table.n_entries; index++)
- intel_uncore_write(uncore,
- GEN12_GLOBAL_MOCS(index),
- table.table[0].control_value);
-}
-
-static int emit_mocs_control_table(struct i915_request *rq,
- const struct drm_i915_mocs_table *table)
-{
- enum intel_engine_id engine = rq->engine->id;
- unsigned int index;
- u32 unused_value;
- u32 *cs;
-
- if (GEM_WARN_ON(table->size > table->n_entries))
- return -ENODEV;
-
- /* Set unused values to PTE */
- unused_value = table->table[I915_MOCS_PTE].control_value;
-
- cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
-
- for (index = 0; index < table->size; index++) {
- u32 value = get_entry_control(table, index);
-
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
- *cs++ = value;
- }
-
- /* All remaining entries are also unused */
- for (; index < table->n_entries; index++) {
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
- *cs++ = unused_value;
- }
-
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
/*
* Get l3cc_value from MOCS entry taking into account when it's not used:
* I915_MOCS_PTE's value is returned in this case.
@@ -494,141 +393,99 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
u16 low,
u16 high)
{
- return low | high << 16;
+ return low | (u32)high << 16;
}
-static int emit_mocs_l3cc_table(struct i915_request *rq,
- const struct drm_i915_mocs_table *table)
+static void init_l3cc_table(struct intel_engine_cs *engine,
+ const struct drm_i915_mocs_table *table)
{
- u16 unused_value;
+ struct intel_uncore *uncore = engine->uncore;
+ u16 unused_value = table->table[I915_MOCS_PTE].l3cc_value;
unsigned int i;
- u32 *cs;
-
- if (GEM_WARN_ON(table->size > table->n_entries))
- return -ENODEV;
-
- /* Set unused values to PTE */
- unused_value = table->table[I915_MOCS_PTE].l3cc_value;
-
- cs = intel_ring_begin(rq, 2 + table->n_entries);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
for (i = 0; i < table->size / 2; i++) {
u16 low = get_entry_l3cc(table, 2 * i);
u16 high = get_entry_l3cc(table, 2 * i + 1);
- *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
- *cs++ = l3cc_combine(table, low, high);
+ intel_uncore_write(uncore,
+ GEN9_LNCFCMOCS(i),
+ l3cc_combine(table, low, high));
}
/* Odd table size - 1 left over */
- if (table->size & 0x01) {
+ if (table->size & 1) {
u16 low = get_entry_l3cc(table, 2 * i);
- *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
- *cs++ = l3cc_combine(table, low, unused_value);
+ intel_uncore_write(uncore,
+ GEN9_LNCFCMOCS(i),
+ l3cc_combine(table, low, unused_value));
i++;
}
/* All remaining entries are also unused */
- for (; i < table->n_entries / 2; i++) {
- *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
- *cs++ = l3cc_combine(table, unused_value, unused_value);
- }
+ for (; i < table->n_entries / 2; i++)
+ intel_uncore_write(uncore,
+ GEN9_LNCFCMOCS(i),
+ l3cc_combine(table, unused_value,
+ unused_value));
+}
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
+void intel_mocs_init_engine(struct intel_engine_cs *engine)
+{
+ struct drm_i915_mocs_table table;
+
+ /* Called under a blanket forcewake */
+ assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
+
+ if (!get_mocs_settings(engine->i915, &table))
+ return;
+
+ /* Platforms with global MOCS do not need per-engine initialization. */
+ if (!HAS_GLOBAL_MOCS_REGISTERS(engine->i915))
+ init_mocs_table(engine, &table);
- return 0;
+ if (engine->class == RENDER_CLASS)
+ init_l3cc_table(engine, &table);
}
-static void intel_mocs_init_l3cc_table(struct intel_gt *gt)
+static void intel_mocs_init_global(struct intel_gt *gt)
{
struct intel_uncore *uncore = gt->uncore;
struct drm_i915_mocs_table table;
- unsigned int i;
- u16 unused_value;
+ unsigned int index;
- if (!get_mocs_settings(gt, &table))
+ /*
+ * LLC and eDRAM control values are not applicable to dgfx
+ */
+ if (IS_DGFX(gt->i915))
return;
- /* Set unused values to PTE */
- unused_value = table.table[I915_MOCS_PTE].l3cc_value;
-
- for (i = 0; i < table.size / 2; i++) {
- u16 low = get_entry_l3cc(&table, 2 * i);
- u16 high = get_entry_l3cc(&table, 2 * i + 1);
+ GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915));
- intel_uncore_write(uncore,
- GEN9_LNCFCMOCS(i),
- l3cc_combine(&table, low, high));
- }
+ if (!get_mocs_settings(gt->i915, &table))
+ return;
- /* Odd table size - 1 left over */
- if (table.size & 0x01) {
- u16 low = get_entry_l3cc(&table, 2 * i);
+ if (GEM_DEBUG_WARN_ON(table.size > table.n_entries))
+ return;
+ for (index = 0; index < table.size; index++)
intel_uncore_write(uncore,
- GEN9_LNCFCMOCS(i),
- l3cc_combine(&table, low, unused_value));
- i++;
- }
+ GEN12_GLOBAL_MOCS(index),
+ table.table[index].control_value);
- /* All remaining entries are also unused */
- for (; i < table.n_entries / 2; i++)
+ /*
+ * Ok, now set the unused entries to the invalid entry (index 0). These
+ * entries are officially undefined and no contract for the contents and
+ * settings is given for these entries.
+ */
+ for (; index < table.n_entries; index++)
intel_uncore_write(uncore,
- GEN9_LNCFCMOCS(i),
- l3cc_combine(&table, unused_value,
- unused_value));
-}
-
-/**
- * intel_mocs_emit() - program the MOCS register.
- * @rq: Request to use to set up the MOCS tables.
- *
- * This function will emit a batch buffer with the values required for
- * programming the MOCS register values for all the currently supported
- * rings.
- *
- * These registers are partially stored in the RCS context, so they are
- * emitted at the same time so that when a context is created these registers
- * are set up. These registers have to be emitted into the start of the
- * context as setting the ELSP will re-init some of these registers back
- * to the hw values.
- *
- * Return: 0 on success, otherwise the error status.
- */
-int intel_mocs_emit(struct i915_request *rq)
-{
- struct drm_i915_mocs_table t;
- int ret;
-
- if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915) ||
- rq->engine->class != RENDER_CLASS)
- return 0;
-
- if (get_mocs_settings(rq->engine->gt, &t)) {
- /* Program the RCS control registers */
- ret = emit_mocs_control_table(rq, &t);
- if (ret)
- return ret;
-
- /* Now program the l3cc registers */
- ret = emit_mocs_l3cc_table(rq, &t);
- if (ret)
- return ret;
- }
-
- return 0;
+ GEN12_GLOBAL_MOCS(index),
+ table.table[0].control_value);
}
void intel_mocs_init(struct intel_gt *gt)
{
- intel_mocs_init_l3cc_table(gt);
-
if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
intel_mocs_init_global(gt);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
index 2ae816b7ca19..83371f3e6ba1 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.h
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
@@ -49,13 +49,10 @@
* context handling keep the MOCS in step.
*/
-struct i915_request;
struct intel_engine_cs;
struct intel_gt;
void intel_mocs_init(struct intel_gt *gt);
void intel_mocs_init_engine(struct intel_engine_cs *engine);
-int intel_mocs_emit(struct i915_request *rq);
-
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
new file mode 100644
index 000000000000..700104b90163
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -0,0 +1,787 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/pm_runtime.h>
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
+#include "intel_rc6.h"
+#include "intel_sideband.h"
+
+/**
+ * DOC: RC6
+ *
+ * RC6 is a special power stage which allows the GPU to enter an very
+ * low-voltage mode when idle, using down to 0V while at this stage. This
+ * stage is entered automatically when the GPU is idle when RC6 support is
+ * enabled, and as soon as new workload arises GPU wakes up automatically as
+ * well.
+ *
+ * There are different RC6 modes available in Intel GPU, which differentiate
+ * among each other with the latency required to enter and leave RC6 and
+ * voltage consumed by the GPU in different states.
+ *
+ * The combination of the following flags define which states GPU is allowed
+ * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
+ * RC6pp is deepest RC6. Their support by hardware varies according to the
+ * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
+ * which brings the most power savings; deeper states save more power, but
+ * require higher latency to switch to and wake up.
+ */
+
+static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6)
+{
+ return container_of(rc6, struct intel_gt, rc6);
+}
+
+static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc)
+{
+ return rc6_to_gt(rc)->uncore;
+}
+
+static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
+{
+ return rc6_to_gt(rc)->i915;
+}
+
+static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
+{
+ intel_uncore_write_fw(uncore, reg, val);
+}
+
+static void gen11_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /* 2b: Program RC6 thresholds.*/
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
+ set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
+
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+ for_each_engine(engine, rc6_to_gt(rc6), id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+ set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
+
+ set(uncore, GEN6_RC_SLEEP, 0);
+
+ set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
+
+ /*
+ * 2c: Program Coarse Power Gating Policies.
+ *
+ * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
+ * use instead is a more conservative estimate for the maximum time
+ * it takes us to service a CS interrupt and submit a new ELSP - that
+ * is the time which the GPU is idle waiting for the CPU to select the
+ * next request to execute. If the idle hysteresis is less than that
+ * interrupt service latency, the hardware will automatically gate
+ * the power well and we will then incur the wake up cost on top of
+ * the service latency. A similar guide from plane_state is that we
+ * do not want the enable hysteresis to less than the wakeup latency.
+ *
+ * igt/gem_exec_nop/sequential provides a rough estimate for the
+ * service latency, and puts it around 10us for Broadwell (and other
+ * big core) and around 40us for Broxton (and other low power cores).
+ * [Note that for legacy ringbuffer submission, this is less than 1us!]
+ * However, the wakeup latency on Broxton is closer to 100us. To be
+ * conservative, we have to factor in a context switch on top (due
+ * to ksoftirqd).
+ */
+ set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
+ set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
+
+ /* 3a: Enable RC6 */
+ set(uncore, GEN6_RC_CONTROL,
+ GEN6_RC_CTL_HW_ENABLE |
+ GEN6_RC_CTL_RC6_ENABLE |
+ GEN6_RC_CTL_EI_MODE(1));
+
+ set(uncore, GEN9_PG_ENABLE,
+ GEN9_RENDER_PG_ENABLE |
+ GEN9_MEDIA_PG_ENABLE |
+ GEN11_MEDIA_SAMPLER_PG_ENABLE);
+}
+
+static void gen9_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ u32 rc6_mode;
+
+ /* 2b: Program RC6 thresholds.*/
+ if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) {
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
+ set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
+ } else if (IS_SKYLAKE(rc6_to_i915(rc6))) {
+ /*
+ * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
+ * when CPG is enabled
+ */
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
+ } else {
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
+ }
+
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+ for_each_engine(engine, rc6_to_gt(rc6), id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+ set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
+
+ set(uncore, GEN6_RC_SLEEP, 0);
+
+ /*
+ * 2c: Program Coarse Power Gating Policies.
+ *
+ * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
+ * use instead is a more conservative estimate for the maximum time
+ * it takes us to service a CS interrupt and submit a new ELSP - that
+ * is the time which the GPU is idle waiting for the CPU to select the
+ * next request to execute. If the idle hysteresis is less than that
+ * interrupt service latency, the hardware will automatically gate
+ * the power well and we will then incur the wake up cost on top of
+ * the service latency. A similar guide from plane_state is that we
+ * do not want the enable hysteresis to less than the wakeup latency.
+ *
+ * igt/gem_exec_nop/sequential provides a rough estimate for the
+ * service latency, and puts it around 10us for Broadwell (and other
+ * big core) and around 40us for Broxton (and other low power cores).
+ * [Note that for legacy ringbuffer submission, this is less than 1us!]
+ * However, the wakeup latency on Broxton is closer to 100us. To be
+ * conservative, we have to factor in a context switch on top (due
+ * to ksoftirqd).
+ */
+ set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
+ set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
+
+ /* 3a: Enable RC6 */
+ set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
+
+ /* WaRsUseTimeoutMode:cnl (pre-prod) */
+ if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0))
+ rc6_mode = GEN7_RC_CTL_TO_MODE;
+ else
+ rc6_mode = GEN6_RC_CTL_EI_MODE(1);
+
+ set(uncore, GEN6_RC_CONTROL,
+ GEN6_RC_CTL_HW_ENABLE |
+ GEN6_RC_CTL_RC6_ENABLE |
+ rc6_mode);
+
+ /*
+ * WaRsDisableCoarsePowerGating:skl,cnl
+ * - Render/Media PG need to be disabled with RC6.
+ */
+ if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6)))
+ set(uncore, GEN9_PG_ENABLE,
+ GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
+}
+
+static void gen8_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /* 2b: Program RC6 thresholds.*/
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+ for_each_engine(engine, rc6_to_gt(rc6), id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+ set(uncore, GEN6_RC_SLEEP, 0);
+ set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
+
+ /* 3: Enable RC6 */
+ set(uncore, GEN6_RC_CONTROL,
+ GEN6_RC_CTL_HW_ENABLE |
+ GEN7_RC_CTL_TO_MODE |
+ GEN6_RC_CTL_RC6_ENABLE);
+}
+
+static void gen6_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ u32 rc6vids, rc6_mask;
+ int ret;
+
+ set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
+ set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
+
+ for_each_engine(engine, rc6_to_gt(rc6), id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+ set(uncore, GEN6_RC_SLEEP, 0);
+ set(uncore, GEN6_RC1e_THRESHOLD, 1000);
+ if (IS_IVYBRIDGE(i915))
+ set(uncore, GEN6_RC6_THRESHOLD, 125000);
+ else
+ set(uncore, GEN6_RC6_THRESHOLD, 50000);
+ set(uncore, GEN6_RC6p_THRESHOLD, 150000);
+ set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
+
+ /* We don't use those on Haswell */
+ rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
+ if (HAS_RC6p(i915))
+ rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
+ if (HAS_RC6pp(i915))
+ rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
+ set(uncore, GEN6_RC_CONTROL,
+ rc6_mask |
+ GEN6_RC_CTL_EI_MODE(1) |
+ GEN6_RC_CTL_HW_ENABLE);
+
+ rc6vids = 0;
+ ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
+ &rc6vids, NULL);
+ if (IS_GEN(i915, 6) && ret) {
+ DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
+ } else if (IS_GEN(i915, 6) &&
+ (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
+ DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
+ GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
+ rc6vids &= 0xffff00;
+ rc6vids |= GEN6_ENCODE_RC6_VID(450);
+ ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
+ if (ret)
+ DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
+ }
+}
+
+/* Check that the pcbr address is not empty. */
+static int chv_rc6_init(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ resource_size_t pctx_paddr, paddr;
+ resource_size_t pctx_size = 32 * SZ_1K;
+ u32 pcbr;
+
+ pcbr = intel_uncore_read(uncore, VLV_PCBR);
+ if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
+ DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+ paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size;
+ GEM_BUG_ON(paddr > U32_MAX);
+
+ pctx_paddr = (paddr & ~4095);
+ intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
+ }
+
+ return 0;
+}
+
+static int vlv_rc6_init(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct drm_i915_gem_object *pctx;
+ resource_size_t pctx_paddr;
+ resource_size_t pctx_size = 24 * SZ_1K;
+ u32 pcbr;
+
+ pcbr = intel_uncore_read(uncore, VLV_PCBR);
+ if (pcbr) {
+ /* BIOS set it up already, grab the pre-alloc'd space */
+ resource_size_t pcbr_offset;
+
+ pcbr_offset = (pcbr & ~4095) - i915->dsm.start;
+ pctx = i915_gem_object_create_stolen_for_preallocated(i915,
+ pcbr_offset,
+ I915_GTT_OFFSET_NONE,
+ pctx_size);
+ if (IS_ERR(pctx))
+ return PTR_ERR(pctx);
+
+ goto out;
+ }
+
+ DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+
+ /*
+ * From the Gunit register HAS:
+ * The Gfx driver is expected to program this register and ensure
+ * proper allocation within Gfx stolen memory. For example, this
+ * register should be programmed such than the PCBR range does not
+ * overlap with other ranges, such as the frame buffer, protected
+ * memory, or any other relevant ranges.
+ */
+ pctx = i915_gem_object_create_stolen(i915, pctx_size);
+ if (IS_ERR(pctx)) {
+ DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
+ return PTR_ERR(pctx);
+ }
+
+ GEM_BUG_ON(range_overflows_t(u64,
+ i915->dsm.start,
+ pctx->stolen->start,
+ U32_MAX));
+ pctx_paddr = i915->dsm.start + pctx->stolen->start;
+ intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
+
+out:
+ rc6->pctx = pctx;
+ return 0;
+}
+
+static void chv_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /* 2a: Program RC6 thresholds.*/
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+
+ for_each_engine(engine, rc6_to_gt(rc6), id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+ set(uncore, GEN6_RC_SLEEP, 0);
+
+ /* TO threshold set to 500 us (0x186 * 1.28 us) */
+ set(uncore, GEN6_RC6_THRESHOLD, 0x186);
+
+ /* Allows RC6 residency counter to work */
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+ VLV_MEDIA_RC6_COUNT_EN |
+ VLV_RENDER_RC6_COUNT_EN));
+
+ /* 3: Enable RC6 */
+ set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE);
+}
+
+static void vlv_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
+
+ for_each_engine(engine, rc6_to_gt(rc6), id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+ set(uncore, GEN6_RC6_THRESHOLD, 0x557);
+
+ /* Allows RC6 residency counter to work */
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+ VLV_MEDIA_RC0_COUNT_EN |
+ VLV_RENDER_RC0_COUNT_EN |
+ VLV_MEDIA_RC6_COUNT_EN |
+ VLV_RENDER_RC6_COUNT_EN));
+
+ set(uncore, GEN6_RC_CONTROL,
+ GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
+}
+
+static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ u32 rc6_ctx_base, rc_ctl, rc_sw_target;
+ bool enable_rc6 = true;
+
+ rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+ rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE);
+ rc_sw_target &= RC_SW_TARGET_STATE_MASK;
+ rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT;
+ DRM_DEBUG_DRIVER("BIOS enabled RC states: "
+ "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
+ onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
+ onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
+ rc_sw_target);
+
+ if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
+ DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
+ enable_rc6 = false;
+ }
+
+ /*
+ * The exact context size is not known for BXT, so assume a page size
+ * for this check.
+ */
+ rc6_ctx_base =
+ intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
+ if (!(rc6_ctx_base >= i915->dsm_reserved.start &&
+ rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) {
+ DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
+ enable_rc6 = false;
+ }
+
+ if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1 &&
+ (intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 &&
+ (intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 &&
+ (intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) {
+ DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
+ enable_rc6 = false;
+ }
+
+ if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) ||
+ !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) ||
+ !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) {
+ DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
+ enable_rc6 = false;
+ }
+
+ if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) {
+ DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
+ enable_rc6 = false;
+ }
+
+ if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) {
+ DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
+ enable_rc6 = false;
+ }
+
+ return enable_rc6;
+}
+
+static bool rc6_supported(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+
+ if (!HAS_RC6(i915))
+ return false;
+
+ if (intel_vgpu_active(i915))
+ return false;
+
+ if (is_mock_gt(rc6_to_gt(rc6)))
+ return false;
+
+ if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) {
+ dev_notice(i915->drm.dev,
+ "RC6 and powersaving disabled by BIOS\n");
+ return false;
+ }
+
+ return true;
+}
+
+static void rpm_get(struct intel_rc6 *rc6)
+{
+ GEM_BUG_ON(rc6->wakeref);
+ pm_runtime_get_sync(&rc6_to_i915(rc6)->drm.pdev->dev);
+ rc6->wakeref = true;
+}
+
+static void rpm_put(struct intel_rc6 *rc6)
+{
+ GEM_BUG_ON(!rc6->wakeref);
+ pm_runtime_put(&rc6_to_i915(rc6)->drm.pdev->dev);
+ rc6->wakeref = false;
+}
+
+static bool intel_rc6_ctx_corrupted(struct intel_rc6 *rc6)
+{
+ return !intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO);
+}
+
+static void intel_rc6_ctx_wa_init(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+
+ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ return;
+
+ if (intel_rc6_ctx_corrupted(rc6)) {
+ DRM_INFO("RC6 context corrupted, disabling runtime power management\n");
+ rc6->ctx_corrupted = true;
+ }
+}
+
+/**
+ * intel_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA
+ * @rc6: rc6 state
+ *
+ * Perform any steps needed to re-init the RC6 CTX WA after system resume.
+ */
+void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6)
+{
+ if (rc6->ctx_corrupted && !intel_rc6_ctx_corrupted(rc6)) {
+ DRM_INFO("RC6 context restored, re-enabling runtime power management\n");
+ rc6->ctx_corrupted = false;
+ }
+}
+
+/**
+ * intel_rc6_ctx_wa_check - check for a new RC6 CTX corruption
+ * @rc6: rc6 state
+ *
+ * Check if an RC6 CTX corruption has happened since the last check and if so
+ * disable RC6 and runtime power management.
+*/
+void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+
+ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ return;
+
+ if (rc6->ctx_corrupted)
+ return;
+
+ if (!intel_rc6_ctx_corrupted(rc6))
+ return;
+
+ DRM_NOTE("RC6 context corruption, disabling runtime power management\n");
+
+ intel_rc6_disable(rc6);
+ rc6->ctx_corrupted = true;
+
+ return;
+}
+
+static void __intel_rc6_disable(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ if (INTEL_GEN(i915) >= 9)
+ set(uncore, GEN9_PG_ENABLE, 0);
+ set(uncore, GEN6_RC_CONTROL, 0);
+ set(uncore, GEN6_RC_STATE, 0);
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+}
+
+void intel_rc6_init(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ int err;
+
+ /* Disable runtime-pm until we can save the GPU state with rc6 pctx */
+ rpm_get(rc6);
+
+ if (!rc6_supported(rc6))
+ return;
+
+ intel_rc6_ctx_wa_init(rc6);
+
+ if (IS_CHERRYVIEW(i915))
+ err = chv_rc6_init(rc6);
+ else if (IS_VALLEYVIEW(i915))
+ err = vlv_rc6_init(rc6);
+ else
+ err = 0;
+
+ /* Sanitize rc6, ensure it is disabled before we are ready. */
+ __intel_rc6_disable(rc6);
+
+ rc6->supported = err == 0;
+}
+
+void intel_rc6_sanitize(struct intel_rc6 *rc6)
+{
+ if (rc6->enabled) { /* unbalanced suspend/resume */
+ rpm_get(rc6);
+ rc6->enabled = false;
+ }
+
+ if (rc6->supported)
+ __intel_rc6_disable(rc6);
+}
+
+void intel_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+ if (!rc6->supported)
+ return;
+
+ GEM_BUG_ON(rc6->enabled);
+
+ if (rc6->ctx_corrupted)
+ return;
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ if (IS_CHERRYVIEW(i915))
+ chv_rc6_enable(rc6);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_rc6_enable(rc6);
+ else if (INTEL_GEN(i915) >= 11)
+ gen11_rc6_enable(rc6);
+ else if (INTEL_GEN(i915) >= 9)
+ gen9_rc6_enable(rc6);
+ else if (IS_BROADWELL(i915))
+ gen8_rc6_enable(rc6);
+ else if (INTEL_GEN(i915) >= 6)
+ gen6_rc6_enable(rc6);
+
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+ /* rc6 is ready, runtime-pm is go! */
+ rpm_put(rc6);
+ rc6->enabled = true;
+}
+
+void intel_rc6_disable(struct intel_rc6 *rc6)
+{
+ if (!rc6->enabled)
+ return;
+
+ rpm_get(rc6);
+ rc6->enabled = false;
+
+ __intel_rc6_disable(rc6);
+}
+
+void intel_rc6_fini(struct intel_rc6 *rc6)
+{
+ struct drm_i915_gem_object *pctx;
+
+ intel_rc6_disable(rc6);
+
+ pctx = fetch_and_zero(&rc6->pctx);
+ if (pctx)
+ i915_gem_object_put(pctx);
+
+ if (rc6->wakeref)
+ rpm_put(rc6);
+}
+
+static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg)
+{
+ u32 lower, upper, tmp;
+ int loop = 2;
+
+ /*
+ * The register accessed do not need forcewake. We borrow
+ * uncore lock to prevent concurrent access to range reg.
+ */
+ lockdep_assert_held(&uncore->lock);
+
+ /*
+ * vlv and chv residency counters are 40 bits in width.
+ * With a control bit, we can choose between upper or lower
+ * 32bit window into this counter.
+ *
+ * Although we always use the counter in high-range mode elsewhere,
+ * userspace may attempt to read the value before rc6 is initialised,
+ * before we have set the default VLV_COUNTER_CONTROL value. So always
+ * set the high bit to be safe.
+ */
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
+ upper = intel_uncore_read_fw(uncore, reg);
+ do {
+ tmp = upper;
+
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
+ lower = intel_uncore_read_fw(uncore, reg);
+
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
+ upper = intel_uncore_read_fw(uncore, reg);
+ } while (upper != tmp && --loop);
+
+ /*
+ * Everywhere else we always use VLV_COUNTER_CONTROL with the
+ * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
+ * now.
+ */
+
+ return lower | (u64)upper << 8;
+}
+
+u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ u64 time_hw, prev_hw, overflow_hw;
+ unsigned int fw_domains;
+ unsigned long flags;
+ unsigned int i;
+ u32 mul, div;
+
+ if (!rc6->supported)
+ return 0;
+
+ /*
+ * Store previous hw counter values for counter wrap-around handling.
+ *
+ * There are only four interesting registers and they live next to each
+ * other so we can use the relative address, compared to the smallest
+ * one as the index into driver storage.
+ */
+ i = (i915_mmio_reg_offset(reg) -
+ i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
+ if (WARN_ON_ONCE(i >= ARRAY_SIZE(rc6->cur_residency)))
+ return 0;
+
+ fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
+
+ spin_lock_irqsave(&uncore->lock, flags);
+ intel_uncore_forcewake_get__locked(uncore, fw_domains);
+
+ /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+ mul = 1000000;
+ div = i915->czclk_freq;
+ overflow_hw = BIT_ULL(40);
+ time_hw = vlv_residency_raw(uncore, reg);
+ } else {
+ /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
+ if (IS_GEN9_LP(i915)) {
+ mul = 10000;
+ div = 12;
+ } else {
+ mul = 1280;
+ div = 1;
+ }
+
+ overflow_hw = BIT_ULL(32);
+ time_hw = intel_uncore_read_fw(uncore, reg);
+ }
+
+ /*
+ * Counter wrap handling.
+ *
+ * But relying on a sufficient frequency of queries otherwise counters
+ * can still wrap.
+ */
+ prev_hw = rc6->prev_hw_residency[i];
+ rc6->prev_hw_residency[i] = time_hw;
+
+ /* RC6 delta from last sample. */
+ if (time_hw >= prev_hw)
+ time_hw -= prev_hw;
+ else
+ time_hw += overflow_hw - prev_hw;
+
+ /* Add delta to RC6 extended raw driver copy. */
+ time_hw += rc6->cur_residency[i];
+ rc6->cur_residency[i] = time_hw;
+
+ intel_uncore_forcewake_put__locked(uncore, fw_domains);
+ spin_unlock_irqrestore(&uncore->lock, flags);
+
+ return mul_u64_u32_div(time_hw, mul, div);
+}
+
+u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg)
+{
+ return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h
new file mode 100644
index 000000000000..1370f6834a4c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.h
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RC6_H
+#define INTEL_RC6_H
+
+#include "i915_reg.h"
+
+struct intel_engine_cs;
+struct intel_rc6;
+
+void intel_rc6_init(struct intel_rc6 *rc6);
+void intel_rc6_fini(struct intel_rc6 *rc6);
+
+void intel_rc6_sanitize(struct intel_rc6 *rc6);
+void intel_rc6_enable(struct intel_rc6 *rc6);
+void intel_rc6_disable(struct intel_rc6 *rc6);
+
+u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg);
+u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg);
+
+void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6);
+void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6);
+
+#endif /* INTEL_RC6_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
new file mode 100644
index 000000000000..89ad5697a8d4
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
@@ -0,0 +1,29 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RC6_TYPES_H
+#define INTEL_RC6_TYPES_H
+
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include "intel_engine_types.h"
+
+struct drm_i915_gem_object;
+
+struct intel_rc6 {
+ u64 prev_hw_residency[4];
+ u64 cur_residency[4];
+
+ struct drm_i915_gem_object *pctx;
+
+ bool supported : 1;
+ bool enabled : 1;
+ bool wakeref : 1;
+ bool ctx_corrupted : 1;
+};
+
+#endif /* INTEL_RC6_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 6d05f9c64178..c4edc35e7d89 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -27,6 +27,7 @@
#include "i915_drv.h"
#include "intel_renderstate.h"
+#include "intel_ring.h"
struct intel_renderstate {
const struct intel_renderstate_rodata *rodata;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index b9d84d52e986..f03e000051c1 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -42,11 +42,10 @@ static void engine_skip_context(struct i915_request *rq)
struct intel_engine_cs *engine = rq->engine;
struct i915_gem_context *hung_ctx = rq->gem_context;
- lockdep_assert_held(&engine->active.lock);
-
if (!i915_request_is_active(rq))
return;
+ lockdep_assert_held(&engine->active.lock);
list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
if (rq->gem_context == hung_ctx)
i915_request_skip(rq, -EIO);
@@ -123,7 +122,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
rq->fence.seqno,
yesno(guilty));
- lockdep_assert_held(&rq->engine->active.lock);
GEM_BUG_ON(i915_request_completed(rq));
if (guilty) {
@@ -284,14 +282,14 @@ static int gen6_reset_engines(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
- struct intel_engine_cs *engine;
- const u32 hw_engine_mask[] = {
+ static const u32 hw_engine_mask[] = {
[RCS0] = GEN6_GRDOM_RENDER,
[BCS0] = GEN6_GRDOM_BLT,
[VCS0] = GEN6_GRDOM_MEDIA,
[VCS1] = GEN8_GRDOM_MEDIA2,
[VECS0] = GEN6_GRDOM_VECS,
};
+ struct intel_engine_cs *engine;
u32 hw_mask;
if (engine_mask == ALL_ENGINES) {
@@ -300,7 +298,7 @@ static int gen6_reset_engines(struct intel_gt *gt,
intel_engine_mask_t tmp;
hw_mask = 0;
- for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
+ for_each_engine_masked(engine, gt, engine_mask, tmp) {
GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
hw_mask |= hw_engine_mask[engine->id];
}
@@ -309,7 +307,7 @@ static int gen6_reset_engines(struct intel_gt *gt,
return gen6_hw_domain_reset(gt, hw_mask);
}
-static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
+static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask)
{
struct intel_uncore *uncore = engine->uncore;
u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
@@ -318,6 +316,7 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
i915_reg_t sfc_usage;
u32 sfc_usage_bit;
u32 sfc_reset_bit;
+ int ret;
switch (engine->class) {
case VIDEO_DECODE_CLASS:
@@ -352,27 +351,33 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
}
/*
- * Tell the engine that a software reset is going to happen. The engine
- * will then try to force lock the SFC (if currently locked, it will
- * remain so until we tell the engine it is safe to unlock; if currently
- * unlocked, it will ignore this and all new lock requests). If SFC
- * ends up being locked to the engine we want to reset, we have to reset
- * it as well (we will unlock it once the reset sequence is completed).
+ * If the engine is using a SFC, tell the engine that a software reset
+ * is going to happen. The engine will then try to force lock the SFC.
+ * If SFC ends up being locked to the engine we want to reset, we have
+ * to reset it as well (we will unlock it once the reset sequence is
+ * completed).
*/
+ if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit))
+ return 0;
+
rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
- if (__intel_wait_for_register_fw(uncore,
- sfc_forced_lock_ack,
- sfc_forced_lock_ack_bit,
- sfc_forced_lock_ack_bit,
- 1000, 0, NULL)) {
- DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
+ ret = __intel_wait_for_register_fw(uncore,
+ sfc_forced_lock_ack,
+ sfc_forced_lock_ack_bit,
+ sfc_forced_lock_ack_bit,
+ 1000, 0, NULL);
+
+ /* Was the SFC released while we were trying to lock it? */
+ if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit))
return 0;
- }
- if (intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)
- return sfc_reset_bit;
+ if (ret) {
+ DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
+ return ret;
+ }
+ *hw_mask |= sfc_reset_bit;
return 0;
}
@@ -408,7 +413,7 @@ static int gen11_reset_engines(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
- const u32 hw_engine_mask[] = {
+ static const u32 hw_engine_mask[] = {
[RCS0] = GEN11_GRDOM_RENDER,
[BCS0] = GEN11_GRDOM_BLT,
[VCS0] = GEN11_GRDOM_MEDIA,
@@ -427,17 +432,26 @@ static int gen11_reset_engines(struct intel_gt *gt,
hw_mask = GEN11_GRDOM_FULL;
} else {
hw_mask = 0;
- for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
+ for_each_engine_masked(engine, gt, engine_mask, tmp) {
GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
hw_mask |= hw_engine_mask[engine->id];
- hw_mask |= gen11_lock_sfc(engine);
+ ret = gen11_lock_sfc(engine, &hw_mask);
+ if (ret)
+ goto sfc_unlock;
}
}
ret = gen6_hw_domain_reset(gt, hw_mask);
+sfc_unlock:
+ /*
+ * We unlock the SFC based on the lock status and not the result of
+ * gen11_lock_sfc to make sure that we clean properly if something
+ * wrong happened during the lock (e.g. lock acquired after timeout
+ * expiration).
+ */
if (engine_mask != ALL_ENGINES)
- for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
+ for_each_engine_masked(engine, gt, engine_mask, tmp)
gen11_unlock_sfc(engine);
return ret;
@@ -496,7 +510,7 @@ static int gen8_reset_engines(struct intel_gt *gt,
intel_engine_mask_t tmp;
int ret;
- for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
+ for_each_engine_masked(engine, gt, engine_mask, tmp) {
ret = gen8_engine_reset_prepare(engine);
if (ret && !reset_non_ready)
goto skip_reset;
@@ -522,19 +536,30 @@ static int gen8_reset_engines(struct intel_gt *gt,
ret = gen6_reset_engines(gt, engine_mask, retry);
skip_reset:
- for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
+ for_each_engine_masked(engine, gt, engine_mask, tmp)
gen8_engine_reset_cancel(engine);
return ret;
}
+static int mock_reset(struct intel_gt *gt,
+ intel_engine_mask_t mask,
+ unsigned int retry)
+{
+ return 0;
+}
+
typedef int (*reset_func)(struct intel_gt *,
intel_engine_mask_t engine_mask,
unsigned int retry);
-static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
+static reset_func intel_get_gpu_reset(const struct intel_gt *gt)
{
- if (INTEL_GEN(i915) >= 8)
+ struct drm_i915_private *i915 = gt->i915;
+
+ if (is_mock_gt(gt))
+ return mock_reset;
+ else if (INTEL_GEN(i915) >= 8)
return gen8_reset_engines;
else if (INTEL_GEN(i915) >= 6)
return gen6_reset_engines;
@@ -557,7 +582,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
int ret = -ETIMEDOUT;
int retry;
- reset = intel_get_gpu_reset(gt->i915);
+ reset = intel_get_gpu_reset(gt);
if (!reset)
return -ENODEV;
@@ -577,17 +602,20 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
return ret;
}
-bool intel_has_gpu_reset(struct drm_i915_private *i915)
+bool intel_has_gpu_reset(const struct intel_gt *gt)
{
if (!i915_modparams.reset)
return NULL;
- return intel_get_gpu_reset(i915);
+ return intel_get_gpu_reset(gt);
}
-bool intel_has_reset_engine(struct drm_i915_private *i915)
+bool intel_has_reset_engine(const struct intel_gt *gt)
{
- return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2;
+ if (i915_modparams.reset < 2)
+ return false;
+
+ return INTEL_INFO(gt->i915)->has_reset_engine;
}
int intel_reset_guc(struct intel_gt *gt)
@@ -654,7 +682,7 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
intel_engine_mask_t awake = 0;
enum intel_engine_id id;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
if (intel_engine_pm_get_if_awake(engine))
awake |= engine->mask;
reset_prepare_engine(engine);
@@ -684,10 +712,10 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
if (err)
return err;
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
__intel_engine_reset(engine, stalled_mask & engine->mask);
- i915_gem_restore_fences(gt->i915);
+ i915_gem_restore_fences(gt->ggtt);
return err;
}
@@ -697,7 +725,7 @@ static void reset_finish_engine(struct intel_engine_cs *engine)
engine->reset.finish(engine);
intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
- intel_engine_signal_breadcrumbs(engine);
+ intel_engine_breadcrumbs_irq(engine);
}
static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
@@ -705,7 +733,7 @@ static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
reset_finish_engine(engine);
if (awake & engine->mask)
intel_engine_pm_put(engine);
@@ -741,7 +769,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) {
struct drm_printer p = drm_debug_printer(__func__);
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
intel_engine_dump(engine, &p, "%s\n", engine->name);
}
@@ -758,7 +786,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
__intel_gt_reset(gt, ALL_ENGINES);
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
engine->submit_request = nop_submit_request;
/*
@@ -770,7 +798,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
set_bit(I915_WEDGED, &gt->reset.flags);
/* Mark all executing requests as skipped */
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
engine->cancel_requests(engine);
reset_finish(gt, awake);
@@ -783,7 +811,7 @@ void intel_gt_set_wedged(struct intel_gt *gt)
intel_wakeref_t wakeref;
mutex_lock(&gt->reset.mutex);
- with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
__intel_gt_set_wedged(gt);
mutex_unlock(&gt->reset.mutex);
}
@@ -793,11 +821,13 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
struct intel_gt_timelines *timelines = &gt->timelines;
struct intel_timeline *tl;
unsigned long flags;
+ bool ok;
if (!test_bit(I915_WEDGED, &gt->reset.flags))
return true;
- if (!gt->scratch) /* Never full initialised, recovery impossible */
+ /* Never fully initialised, recovery impossible */
+ if (test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags))
return false;
GEM_TRACE("start\n");
@@ -814,10 +844,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
*/
spin_lock_irqsave(&timelines->lock, flags);
list_for_each_entry(tl, &timelines->active_list, link) {
- struct i915_request *rq;
+ struct dma_fence *fence;
- rq = i915_active_request_get_unlocked(&tl->last_request);
- if (!rq)
+ fence = i915_active_fence_get(&tl->last_request);
+ if (!fence)
continue;
spin_unlock_irqrestore(&timelines->lock, flags);
@@ -829,8 +859,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
* (I915_FENCE_TIMEOUT) so this wait should not be unbounded
* in the worst case.
*/
- dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
- i915_request_put(rq);
+ dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT);
+ dma_fence_put(fence);
/* Restart iteration after droping lock */
spin_lock_irqsave(&timelines->lock, flags);
@@ -838,7 +868,18 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
}
spin_unlock_irqrestore(&timelines->lock, flags);
- intel_gt_sanitize(gt, false);
+ /* We must reset pending GPU events before restoring our submission */
+ ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
+ if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
+ ok = __intel_gt_reset(gt, ALL_ENGINES) == 0;
+ if (!ok) {
+ /*
+ * Warn CI about the unrecoverable wedged condition.
+ * Time for a reboot.
+ */
+ add_taint_for_CI(TAINT_WARN);
+ return false;
+ }
/*
* Undo nop_submit_request. We prevent all new i915 requests from
@@ -893,7 +934,7 @@ static int resume(struct intel_gt *gt)
enum intel_engine_id id;
int ret;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
ret = engine->resume(engine);
if (ret)
return ret;
@@ -943,7 +984,7 @@ void intel_gt_reset(struct intel_gt *gt,
awake = reset_prepare(gt);
- if (!intel_has_gpu_reset(gt->i915)) {
+ if (!intel_has_gpu_reset(gt)) {
if (i915_modparams.reset)
dev_err(gt->i915->drm.dev, "GPU reset not supported\n");
else
@@ -972,7 +1013,7 @@ void intel_gt_reset(struct intel_gt *gt,
* was running at the time of the reset (i.e. we weren't VT
* switched away).
*/
- ret = i915_gem_init_hw(gt->i915);
+ ret = intel_gt_init_hw(gt);
if (ret) {
DRM_ERROR("Failed to initialise HW following reset (%d)\n",
ret);
@@ -983,8 +1024,6 @@ void intel_gt_reset(struct intel_gt *gt,
if (ret)
goto taint;
- intel_gt_queue_hangcheck(gt);
-
finish:
reset_finish(gt, awake);
unlock:
@@ -1151,7 +1190,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
* isn't the case at least when we get here by doing a
* simulated reset via debugfs, so get an RPM reference.
*/
- wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
engine_mask &= INTEL_INFO(gt->i915)->engine_mask;
@@ -1164,8 +1203,8 @@ void intel_gt_handle_error(struct intel_gt *gt,
* Try engine reset when available. We fall back to full reset if
* single reset fails.
*/
- if (intel_has_reset_engine(gt->i915) && !intel_gt_is_wedged(gt)) {
- for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
+ if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
+ for_each_engine_masked(engine, gt, engine_mask, tmp) {
BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
&gt->reset.flags))
@@ -1193,7 +1232,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
synchronize_rcu_expedited();
/* Prevent any other reset-engine attempt. */
- for_each_engine(engine, gt->i915, tmp) {
+ for_each_engine(engine, gt, tmp) {
while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
&gt->reset.flags))
wait_on_bit(&gt->reset.flags,
@@ -1203,7 +1242,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
intel_gt_reset_global(gt, engine_mask, msg);
- for_each_engine(engine, gt->i915, tmp)
+ for_each_engine(engine, gt, tmp)
clear_bit_unlock(I915_RESET_ENGINE + engine->id,
&gt->reset.flags);
clear_bit_unlock(I915_RESET_BACKOFF, &gt->reset.flags);
@@ -1211,13 +1250,11 @@ void intel_gt_handle_error(struct intel_gt *gt,
wake_up_all(&gt->reset.queue);
out:
- intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
}
-int intel_gt_reset_trylock(struct intel_gt *gt)
+int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
{
- int srcu;
-
might_lock(&gt->reset.backoff_srcu);
might_sleep();
@@ -1232,10 +1269,10 @@ int intel_gt_reset_trylock(struct intel_gt *gt)
rcu_read_lock();
}
- srcu = srcu_read_lock(&gt->reset.backoff_srcu);
+ *srcu = srcu_read_lock(&gt->reset.backoff_srcu);
rcu_read_unlock();
- return srcu;
+ return 0;
}
void intel_gt_reset_unlock(struct intel_gt *gt, int tag)
@@ -1255,10 +1292,6 @@ int intel_gt_terminally_wedged(struct intel_gt *gt)
if (!test_bit(I915_RESET_BACKOFF, &gt->reset.flags))
return -EIO;
- /* XXX intel_reset_finish() still takes struct_mutex!!! */
- if (mutex_is_locked(&gt->i915->drm.struct_mutex))
- return -EAGAIN;
-
if (wait_event_interruptible(gt->reset.queue,
!test_bit(I915_RESET_BACKOFF,
&gt->reset.flags)))
@@ -1267,6 +1300,14 @@ int intel_gt_terminally_wedged(struct intel_gt *gt)
return intel_gt_is_wedged(gt) ? -EIO : 0;
}
+void intel_gt_set_wedged_on_init(struct intel_gt *gt)
+{
+ BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES >
+ I915_WEDGED_ON_INIT);
+ intel_gt_set_wedged(gt);
+ set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
+}
+
void intel_gt_init_reset(struct intel_gt *gt)
{
init_waitqueue_head(&gt->reset.queue);
@@ -1310,4 +1351,5 @@ void __intel_fini_wedge(struct intel_wedge_me *w)
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_reset.c"
+#include "selftest_hangcheck.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index 37a987b17108..8e8d5f761166 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -14,7 +14,6 @@
#include "intel_engine_types.h"
#include "intel_reset_types.h"
-struct drm_i915_private;
struct i915_request;
struct intel_engine_cs;
struct intel_gt;
@@ -38,13 +37,19 @@ int intel_engine_reset(struct intel_engine_cs *engine,
void __i915_request_reset(struct i915_request *rq, bool guilty);
-int __must_check intel_gt_reset_trylock(struct intel_gt *gt);
+int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu);
void intel_gt_reset_unlock(struct intel_gt *gt, int tag);
void intel_gt_set_wedged(struct intel_gt *gt);
bool intel_gt_unset_wedged(struct intel_gt *gt);
int intel_gt_terminally_wedged(struct intel_gt *gt);
+/*
+ * There's no unset_wedged_on_init paired with this one.
+ * Once we're wedged on init, there's no going back.
+ */
+void intel_gt_set_wedged_on_init(struct intel_gt *gt);
+
int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask);
int intel_reset_guc(struct intel_gt *gt);
@@ -68,10 +73,13 @@ void __intel_fini_wedge(struct intel_wedge_me *w);
static inline bool __intel_reset_failed(const struct intel_reset *reset)
{
+ GEM_BUG_ON(test_bit(I915_WEDGED_ON_INIT, &reset->flags) ?
+ !test_bit(I915_WEDGED, &reset->flags) : false);
+
return unlikely(test_bit(I915_WEDGED, &reset->flags));
}
-bool intel_has_gpu_reset(struct drm_i915_private *i915);
-bool intel_has_reset_engine(struct drm_i915_private *i915);
+bool intel_has_gpu_reset(const struct intel_gt *gt);
+bool intel_has_reset_engine(const struct intel_gt *gt);
#endif /* I915_RESET_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h
index 31968356e0c0..f43bc3a0fe4f 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h
@@ -29,11 +29,17 @@ struct intel_reset {
* we set the #I915_WEDGED bit. Prior to command submission, e.g.
* i915_request_alloc(), this bit is checked and the sequence
* aborted (with -EIO reported to userspace) if set.
+ *
+ * #I915_WEDGED_ON_INIT - If we fail to initialize the GPU we can no
+ * longer use the GPU - similar to #I915_WEDGED bit. The difference in
+ * in the way we're handling "forced" unwedged (e.g. through debugfs),
+ * which is not allowed in case we failed to initialize.
*/
unsigned long flags;
#define I915_RESET_BACKOFF 0
#define I915_RESET_MODESET 1
#define I915_RESET_ENGINE 2
+#define I915_WEDGED_ON_INIT (BITS_PER_LONG - 2)
#define I915_WEDGED (BITS_PER_LONG - 1)
struct mutex mutex; /* serialises wedging/unwedging */
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
new file mode 100644
index 000000000000..ece20504d240
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -0,0 +1,323 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gem/i915_gem_object.h"
+#include "i915_drv.h"
+#include "i915_vma.h"
+#include "intel_engine.h"
+#include "intel_ring.h"
+#include "intel_timeline.h"
+
+unsigned int intel_ring_update_space(struct intel_ring *ring)
+{
+ unsigned int space;
+
+ space = __intel_ring_space(ring->head, ring->emit, ring->size);
+
+ ring->space = space;
+ return space;
+}
+
+int intel_ring_pin(struct intel_ring *ring)
+{
+ struct i915_vma *vma = ring->vma;
+ unsigned int flags;
+ void *addr;
+ int ret;
+
+ if (atomic_fetch_inc(&ring->pin_count))
+ return 0;
+
+ flags = PIN_GLOBAL;
+
+ /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
+ flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+
+ if (vma->obj->stolen)
+ flags |= PIN_MAPPABLE;
+ else
+ flags |= PIN_HIGH;
+
+ ret = i915_vma_pin(vma, 0, 0, flags);
+ if (unlikely(ret))
+ goto err_unpin;
+
+ if (i915_vma_is_map_and_fenceable(vma))
+ addr = (void __force *)i915_vma_pin_iomap(vma);
+ else
+ addr = i915_gem_object_pin_map(vma->obj,
+ i915_coherent_map_type(vma->vm->i915));
+ if (IS_ERR(addr)) {
+ ret = PTR_ERR(addr);
+ goto err_ring;
+ }
+
+ i915_vma_make_unshrinkable(vma);
+
+ GEM_BUG_ON(ring->vaddr);
+ ring->vaddr = addr;
+
+ return 0;
+
+err_ring:
+ i915_vma_unpin(vma);
+err_unpin:
+ atomic_dec(&ring->pin_count);
+ return ret;
+}
+
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+ tail = intel_ring_wrap(ring, tail);
+ ring->tail = tail;
+ ring->head = tail;
+ ring->emit = tail;
+ intel_ring_update_space(ring);
+}
+
+void intel_ring_unpin(struct intel_ring *ring)
+{
+ struct i915_vma *vma = ring->vma;
+
+ if (!atomic_dec_and_test(&ring->pin_count))
+ return;
+
+ /* Discard any unused bytes beyond that submitted to hw. */
+ intel_ring_reset(ring, ring->emit);
+
+ i915_vma_unset_ggtt_write(vma);
+ if (i915_vma_is_map_and_fenceable(vma))
+ i915_vma_unpin_iomap(vma);
+ else
+ i915_gem_object_unpin_map(vma->obj);
+
+ GEM_BUG_ON(!ring->vaddr);
+ ring->vaddr = NULL;
+
+ i915_vma_unpin(vma);
+ i915_vma_make_purgeable(vma);
+}
+
+static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
+{
+ struct i915_address_space *vm = &ggtt->vm;
+ struct drm_i915_private *i915 = vm->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+
+ obj = ERR_PTR(-ENODEV);
+ if (i915_ggtt_has_aperture(ggtt))
+ obj = i915_gem_object_create_stolen(i915, size);
+ if (IS_ERR(obj))
+ obj = i915_gem_object_create_internal(i915, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ /*
+ * Mark ring buffers as read-only from GPU side (so no stray overwrites)
+ * if supported by the platform's GGTT.
+ */
+ if (vm->has_read_only)
+ i915_gem_object_set_readonly(obj);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma))
+ goto err;
+
+ return vma;
+
+err:
+ i915_gem_object_put(obj);
+ return vma;
+}
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine, int size)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct intel_ring *ring;
+ struct i915_vma *vma;
+
+ GEM_BUG_ON(!is_power_of_2(size));
+ GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
+
+ ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+ if (!ring)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&ring->ref);
+ ring->size = size;
+
+ /*
+ * Workaround an erratum on the i830 which causes a hang if
+ * the TAIL pointer points to within the last 2 cachelines
+ * of the buffer.
+ */
+ ring->effective_size = size;
+ if (IS_I830(i915) || IS_I845G(i915))
+ ring->effective_size -= 2 * CACHELINE_BYTES;
+
+ intel_ring_update_space(ring);
+
+ vma = create_ring_vma(engine->gt->ggtt, size);
+ if (IS_ERR(vma)) {
+ kfree(ring);
+ return ERR_CAST(vma);
+ }
+ ring->vma = vma;
+
+ return ring;
+}
+
+void intel_ring_free(struct kref *ref)
+{
+ struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
+
+ i915_vma_put(ring->vma);
+ kfree(ring);
+}
+
+static noinline int
+wait_for_space(struct intel_ring *ring,
+ struct intel_timeline *tl,
+ unsigned int bytes)
+{
+ struct i915_request *target;
+ long timeout;
+
+ if (intel_ring_update_space(ring) >= bytes)
+ return 0;
+
+ GEM_BUG_ON(list_empty(&tl->requests));
+ list_for_each_entry(target, &tl->requests, link) {
+ if (target->ring != ring)
+ continue;
+
+ /* Would completion of this request free enough space? */
+ if (bytes <= __intel_ring_space(target->postfix,
+ ring->emit, ring->size))
+ break;
+ }
+
+ if (GEM_WARN_ON(&target->link == &tl->requests))
+ return -ENOSPC;
+
+ timeout = i915_request_wait(target,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT);
+ if (timeout < 0)
+ return timeout;
+
+ i915_request_retire_upto(target);
+
+ intel_ring_update_space(ring);
+ GEM_BUG_ON(ring->space < bytes);
+ return 0;
+}
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
+{
+ struct intel_ring *ring = rq->ring;
+ const unsigned int remain_usable = ring->effective_size - ring->emit;
+ const unsigned int bytes = num_dwords * sizeof(u32);
+ unsigned int need_wrap = 0;
+ unsigned int total_bytes;
+ u32 *cs;
+
+ /* Packets must be qword aligned. */
+ GEM_BUG_ON(num_dwords & 1);
+
+ total_bytes = bytes + rq->reserved_space;
+ GEM_BUG_ON(total_bytes > ring->effective_size);
+
+ if (unlikely(total_bytes > remain_usable)) {
+ const int remain_actual = ring->size - ring->emit;
+
+ if (bytes > remain_usable) {
+ /*
+ * Not enough space for the basic request. So need to
+ * flush out the remainder and then wait for
+ * base + reserved.
+ */
+ total_bytes += remain_actual;
+ need_wrap = remain_actual | 1;
+ } else {
+ /*
+ * The base request will fit but the reserved space
+ * falls off the end. So we don't need an immediate
+ * wrap and only need to effectively wait for the
+ * reserved size from the start of ringbuffer.
+ */
+ total_bytes = rq->reserved_space + remain_actual;
+ }
+ }
+
+ if (unlikely(total_bytes > ring->space)) {
+ int ret;
+
+ /*
+ * Space is reserved in the ringbuffer for finalising the
+ * request, as that cannot be allowed to fail. During request
+ * finalisation, reserved_space is set to 0 to stop the
+ * overallocation and the assumption is that then we never need
+ * to wait (which has the risk of failing with EINTR).
+ *
+ * See also i915_request_alloc() and i915_request_add().
+ */
+ GEM_BUG_ON(!rq->reserved_space);
+
+ ret = wait_for_space(ring,
+ i915_request_timeline(rq),
+ total_bytes);
+ if (unlikely(ret))
+ return ERR_PTR(ret);
+ }
+
+ if (unlikely(need_wrap)) {
+ need_wrap &= ~1;
+ GEM_BUG_ON(need_wrap > ring->space);
+ GEM_BUG_ON(ring->emit + need_wrap > ring->size);
+ GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
+
+ /* Fill the tail with MI_NOOP */
+ memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
+ ring->space -= need_wrap;
+ ring->emit = 0;
+ }
+
+ GEM_BUG_ON(ring->emit > ring->size - bytes);
+ GEM_BUG_ON(ring->space < bytes);
+ cs = ring->vaddr + ring->emit;
+ GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
+ ring->emit += bytes;
+ ring->space -= bytes;
+
+ return cs;
+}
+
+/* Align the ring tail to a cacheline boundary */
+int intel_ring_cacheline_align(struct i915_request *rq)
+{
+ int num_dwords;
+ void *cs;
+
+ num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
+ if (num_dwords == 0)
+ return 0;
+
+ num_dwords = CACHELINE_DWORDS - num_dwords;
+ GEM_BUG_ON(num_dwords & 1);
+
+ cs = intel_ring_begin(rq, num_dwords);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
+ intel_ring_advance(rq, cs + num_dwords);
+
+ GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
new file mode 100644
index 000000000000..ea2839d9e044
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -0,0 +1,131 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RING_H
+#define INTEL_RING_H
+
+#include "i915_gem.h" /* GEM_BUG_ON */
+#include "i915_request.h"
+#include "intel_ring_types.h"
+
+struct intel_engine_cs;
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine, int size);
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
+int intel_ring_cacheline_align(struct i915_request *rq);
+
+unsigned int intel_ring_update_space(struct intel_ring *ring);
+
+int intel_ring_pin(struct intel_ring *ring);
+void intel_ring_unpin(struct intel_ring *ring);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+
+void intel_ring_free(struct kref *ref);
+
+static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
+{
+ kref_get(&ring->ref);
+ return ring;
+}
+
+static inline void intel_ring_put(struct intel_ring *ring)
+{
+ kref_put(&ring->ref, intel_ring_free);
+}
+
+static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
+{
+ /* Dummy function.
+ *
+ * This serves as a placeholder in the code so that the reader
+ * can compare against the preceding intel_ring_begin() and
+ * check that the number of dwords emitted matches the space
+ * reserved for the command packet (i.e. the value passed to
+ * intel_ring_begin()).
+ */
+ GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
+}
+
+static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+{
+ return pos & (ring->size - 1);
+}
+
+static inline bool
+intel_ring_offset_valid(const struct intel_ring *ring,
+ unsigned int pos)
+{
+ if (pos & -ring->size) /* must be strictly within the ring */
+ return false;
+
+ if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
+ return false;
+
+ return true;
+}
+
+static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
+{
+ /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
+ u32 offset = addr - rq->ring->vaddr;
+ GEM_BUG_ON(offset > rq->ring->size);
+ return intel_ring_wrap(rq->ring, offset);
+}
+
+static inline void
+assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
+{
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
+
+ /*
+ * "Ring Buffer Use"
+ * Gen2 BSpec "1. Programming Environment" / 1.4.4.6
+ * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
+ * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
+ * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+ * same cacheline, the Head Pointer must not be greater than the Tail
+ * Pointer."
+ *
+ * We use ring->head as the last known location of the actual RING_HEAD,
+ * it may have advanced but in the worst case it is equally the same
+ * as ring->head and so we should never program RING_TAIL to advance
+ * into the same cacheline as ring->head.
+ */
+#define cacheline(a) round_down(a, CACHELINE_BYTES)
+ GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
+ tail < ring->head);
+#undef cacheline
+}
+
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+ /* Whilst writes to the tail are strictly order, there is no
+ * serialisation between readers and the writers. The tail may be
+ * read by i915_request_retire() just as it is being updated
+ * by execlists, as although the breadcrumb is complete, the context
+ * switch hasn't been seen.
+ */
+ assert_ring_tail_valid(ring, tail);
+ ring->tail = tail;
+ return tail;
+}
+
+static inline unsigned int
+__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
+{
+ /*
+ * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+ * same cacheline, the Head Pointer must not be greater than the Tail
+ * Pointer."
+ */
+ GEM_BUG_ON(!is_power_of_2(size));
+ return (head - tail - CACHELINE_BYTES) & (size - 1);
+}
+
+#endif /* INTEL_RING_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 601c16239fdf..a47d5a7c32c9 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -40,6 +40,7 @@
#include "intel_gt_irq.h"
#include "intel_gt_pm_irq.h"
#include "intel_reset.h"
+#include "intel_ring.h"
#include "intel_workarounds.h"
/* Rough estimate of the typical request size, performing a flush,
@@ -47,16 +48,6 @@
*/
#define LEGACY_REQUEST_SIZE 200
-unsigned int intel_ring_update_space(struct intel_ring *ring)
-{
- unsigned int space;
-
- space = __intel_ring_space(ring->head, ring->emit, ring->size);
-
- ring->space = space;
- return space;
-}
-
static int
gen2_render_ring_flush(struct i915_request *rq, u32 mode)
{
@@ -322,7 +313,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_CS_STALL);
- *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = i915_request_active_timeline(rq)->hwsp_offset |
+ PIPE_CONTROL_GLOBAL_GTT;
*cs++ = rq->fence.seqno;
*cs++ = MI_USER_INTERRUPT;
@@ -425,7 +417,7 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL);
- *cs++ = rq->timeline->hwsp_offset;
+ *cs++ = i915_request_active_timeline(rq)->hwsp_offset;
*cs++ = rq->fence.seqno;
*cs++ = MI_USER_INTERRUPT;
@@ -439,8 +431,8 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
@@ -459,8 +451,8 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
int i;
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
@@ -930,6 +922,7 @@ static void cancel_requests(struct intel_engine_cs *engine)
static void i9xx_submit_request(struct i915_request *request)
{
i915_request_submit(request);
+ wmb(); /* paranoid flush writes out of the WCB before mmio */
ENGINE_WRITE(request->engine, RING_TAIL,
intel_ring_set_tail(request->ring, request->tail));
@@ -937,8 +930,8 @@ static void i9xx_submit_request(struct i915_request *request)
static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH;
@@ -960,8 +953,8 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
int i;
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH;
@@ -1184,167 +1177,9 @@ i915_emit_bb_start(struct i915_request *rq,
return 0;
}
-int intel_ring_pin(struct intel_ring *ring)
-{
- struct i915_vma *vma = ring->vma;
- unsigned int flags;
- void *addr;
- int ret;
-
- if (atomic_fetch_inc(&ring->pin_count))
- return 0;
-
- flags = PIN_GLOBAL;
-
- /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
- flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-
- if (vma->obj->stolen)
- flags |= PIN_MAPPABLE;
- else
- flags |= PIN_HIGH;
-
- ret = i915_vma_pin(vma, 0, 0, flags);
- if (unlikely(ret))
- goto err_unpin;
-
- if (i915_vma_is_map_and_fenceable(vma))
- addr = (void __force *)i915_vma_pin_iomap(vma);
- else
- addr = i915_gem_object_pin_map(vma->obj,
- i915_coherent_map_type(vma->vm->i915));
- if (IS_ERR(addr)) {
- ret = PTR_ERR(addr);
- goto err_ring;
- }
-
- i915_vma_make_unshrinkable(vma);
-
- GEM_BUG_ON(ring->vaddr);
- ring->vaddr = addr;
-
- return 0;
-
-err_ring:
- i915_vma_unpin(vma);
-err_unpin:
- atomic_dec(&ring->pin_count);
- return ret;
-}
-
-void intel_ring_reset(struct intel_ring *ring, u32 tail)
-{
- tail = intel_ring_wrap(ring, tail);
- ring->tail = tail;
- ring->head = tail;
- ring->emit = tail;
- intel_ring_update_space(ring);
-}
-
-void intel_ring_unpin(struct intel_ring *ring)
-{
- struct i915_vma *vma = ring->vma;
-
- if (!atomic_dec_and_test(&ring->pin_count))
- return;
-
- /* Discard any unused bytes beyond that submitted to hw. */
- intel_ring_reset(ring, ring->emit);
-
- i915_vma_unset_ggtt_write(vma);
- if (i915_vma_is_map_and_fenceable(vma))
- i915_vma_unpin_iomap(vma);
- else
- i915_gem_object_unpin_map(vma->obj);
-
- GEM_BUG_ON(!ring->vaddr);
- ring->vaddr = NULL;
-
- i915_vma_unpin(vma);
- i915_vma_make_purgeable(vma);
-}
-
-static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
-{
- struct i915_address_space *vm = &ggtt->vm;
- struct drm_i915_private *i915 = vm->i915;
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
-
- obj = i915_gem_object_create_stolen(i915, size);
- if (!obj)
- obj = i915_gem_object_create_internal(i915, size);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- /*
- * Mark ring buffers as read-only from GPU side (so no stray overwrites)
- * if supported by the platform's GGTT.
- */
- if (vm->has_read_only)
- i915_gem_object_set_readonly(obj);
-
- vma = i915_vma_instance(obj, vm, NULL);
- if (IS_ERR(vma))
- goto err;
-
- return vma;
-
-err:
- i915_gem_object_put(obj);
- return vma;
-}
-
-struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct intel_ring *ring;
- struct i915_vma *vma;
-
- GEM_BUG_ON(!is_power_of_2(size));
- GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
-
- ring = kzalloc(sizeof(*ring), GFP_KERNEL);
- if (!ring)
- return ERR_PTR(-ENOMEM);
-
- kref_init(&ring->ref);
-
- ring->size = size;
- /* Workaround an erratum on the i830 which causes a hang if
- * the TAIL pointer points to within the last 2 cachelines
- * of the buffer.
- */
- ring->effective_size = size;
- if (IS_I830(i915) || IS_I845G(i915))
- ring->effective_size -= 2 * CACHELINE_BYTES;
-
- intel_ring_update_space(ring);
-
- vma = create_ring_vma(engine->gt->ggtt, size);
- if (IS_ERR(vma)) {
- kfree(ring);
- return ERR_CAST(vma);
- }
- ring->vma = vma;
-
- return ring;
-}
-
-void intel_ring_free(struct kref *ref)
-{
- struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
-
- i915_vma_close(ring->vma);
- i915_vma_put(ring->vma);
-
- kfree(ring);
-}
-
static void __ring_context_fini(struct intel_context *ce)
{
- i915_gem_object_put(ce->state->obj);
+ i915_vma_put(ce->state);
}
static void ring_context_destroy(struct kref *ref)
@@ -1573,7 +1408,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *engine = rq->engine;
enum intel_engine_id id;
const int num_engines =
- IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
+ IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
bool force_restore = false;
int len;
u32 *cs;
@@ -1609,7 +1444,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *signaller;
*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
- for_each_engine(signaller, i915, id) {
+ for_each_engine(signaller, engine->gt, id) {
if (signaller == engine)
continue;
@@ -1663,7 +1498,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
i915_reg_t last_reg = {}; /* keep gcc quiet */
*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
- for_each_engine(signaller, i915, id) {
+ for_each_engine(signaller, engine->gt, id) {
if (signaller == engine)
continue;
@@ -1676,7 +1511,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
/* Insert a delay before the next switch! */
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
*cs++ = i915_mmio_reg_offset(last_reg);
- *cs++ = intel_gt_scratch_offset(rq->engine->gt,
+ *cs++ = intel_gt_scratch_offset(engine->gt,
INTEL_GT_SCRATCH_FIELD_DEFAULT);
*cs++ = MI_NOOP;
}
@@ -1741,46 +1576,22 @@ static int remap_l3(struct i915_request *rq)
static int switch_context(struct i915_request *rq)
{
- struct intel_engine_cs *engine = rq->engine;
- struct i915_address_space *vm = vm_alias(rq->hw_context);
- unsigned int unwind_mm = 0;
- u32 hw_flags = 0;
+ struct intel_context *ce = rq->hw_context;
+ struct i915_address_space *vm = vm_alias(ce);
int ret;
GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
if (vm) {
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- int loops;
-
- /*
- * Baytail takes a little more convincing that it really needs
- * to reload the PD between contexts. It is not just a little
- * longer, as adding more stalls after the load_pd_dir (i.e.
- * adding a long loop around flush_pd_dir) is not as effective
- * as reloading the PD umpteen times. 32 is derived from
- * experimentation (gem_exec_parallel/fds) and has no good
- * explanation.
- */
- loops = 1;
- if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915))
- loops = 32;
-
- do {
- ret = load_pd_dir(rq, ppgtt);
- if (ret)
- goto err;
- } while (--loops);
-
- if (ppgtt->pd_dirty_engines & engine->mask) {
- unwind_mm = engine->mask;
- ppgtt->pd_dirty_engines &= ~unwind_mm;
- hw_flags = MI_FORCE_RESTORE;
- }
+ ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm));
+ if (ret)
+ return ret;
}
- if (rq->hw_context->state) {
- GEM_BUG_ON(engine->id != RCS0);
+ if (ce->state) {
+ u32 hw_flags;
+
+ GEM_BUG_ON(rq->engine->id != RCS0);
/*
* The kernel context(s) is treated as pure scratch and is not
@@ -1789,22 +1600,25 @@ static int switch_context(struct i915_request *rq)
* as nothing actually executes using the kernel context; it
* is purely used for flushing user contexts.
*/
+ hw_flags = 0;
if (i915_gem_context_is_kernel(rq->gem_context))
hw_flags = MI_RESTORE_INHIBIT;
ret = mi_set_context(rq, hw_flags);
if (ret)
- goto err_mm;
+ return ret;
}
if (vm) {
+ struct intel_engine_cs *engine = rq->engine;
+
ret = engine->emit_flush(rq, EMIT_INVALIDATE);
if (ret)
- goto err_mm;
+ return ret;
ret = flush_pd_dir(rq);
if (ret)
- goto err_mm;
+ return ret;
/*
* Not only do we need a full barrier (post-sync write) after
@@ -1816,24 +1630,18 @@ static int switch_context(struct i915_request *rq)
*/
ret = engine->emit_flush(rq, EMIT_INVALIDATE);
if (ret)
- goto err_mm;
+ return ret;
ret = engine->emit_flush(rq, EMIT_FLUSH);
if (ret)
- goto err_mm;
+ return ret;
}
ret = remap_l3(rq);
if (ret)
- goto err_mm;
+ return ret;
return 0;
-
-err_mm:
- if (unwind_mm)
- i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm;
-err:
- return ret;
}
static int ring_request_alloc(struct i915_request *request)
@@ -1841,7 +1649,7 @@ static int ring_request_alloc(struct i915_request *request)
int ret;
GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
- GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
+ GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb);
/*
* Flush enough space to reduce the likelihood of waiting after
@@ -1863,146 +1671,6 @@ static int ring_request_alloc(struct i915_request *request)
return 0;
}
-static noinline int
-wait_for_space(struct intel_ring *ring,
- struct intel_timeline *tl,
- unsigned int bytes)
-{
- struct i915_request *target;
- long timeout;
-
- if (intel_ring_update_space(ring) >= bytes)
- return 0;
-
- GEM_BUG_ON(list_empty(&tl->requests));
- list_for_each_entry(target, &tl->requests, link) {
- if (target->ring != ring)
- continue;
-
- /* Would completion of this request free enough space? */
- if (bytes <= __intel_ring_space(target->postfix,
- ring->emit, ring->size))
- break;
- }
-
- if (GEM_WARN_ON(&target->link == &tl->requests))
- return -ENOSPC;
-
- timeout = i915_request_wait(target,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT);
- if (timeout < 0)
- return timeout;
-
- i915_request_retire_upto(target);
-
- intel_ring_update_space(ring);
- GEM_BUG_ON(ring->space < bytes);
- return 0;
-}
-
-u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
-{
- struct intel_ring *ring = rq->ring;
- const unsigned int remain_usable = ring->effective_size - ring->emit;
- const unsigned int bytes = num_dwords * sizeof(u32);
- unsigned int need_wrap = 0;
- unsigned int total_bytes;
- u32 *cs;
-
- /* Packets must be qword aligned. */
- GEM_BUG_ON(num_dwords & 1);
-
- total_bytes = bytes + rq->reserved_space;
- GEM_BUG_ON(total_bytes > ring->effective_size);
-
- if (unlikely(total_bytes > remain_usable)) {
- const int remain_actual = ring->size - ring->emit;
-
- if (bytes > remain_usable) {
- /*
- * Not enough space for the basic request. So need to
- * flush out the remainder and then wait for
- * base + reserved.
- */
- total_bytes += remain_actual;
- need_wrap = remain_actual | 1;
- } else {
- /*
- * The base request will fit but the reserved space
- * falls off the end. So we don't need an immediate
- * wrap and only need to effectively wait for the
- * reserved size from the start of ringbuffer.
- */
- total_bytes = rq->reserved_space + remain_actual;
- }
- }
-
- if (unlikely(total_bytes > ring->space)) {
- int ret;
-
- /*
- * Space is reserved in the ringbuffer for finalising the
- * request, as that cannot be allowed to fail. During request
- * finalisation, reserved_space is set to 0 to stop the
- * overallocation and the assumption is that then we never need
- * to wait (which has the risk of failing with EINTR).
- *
- * See also i915_request_alloc() and i915_request_add().
- */
- GEM_BUG_ON(!rq->reserved_space);
-
- ret = wait_for_space(ring, rq->timeline, total_bytes);
- if (unlikely(ret))
- return ERR_PTR(ret);
- }
-
- if (unlikely(need_wrap)) {
- need_wrap &= ~1;
- GEM_BUG_ON(need_wrap > ring->space);
- GEM_BUG_ON(ring->emit + need_wrap > ring->size);
- GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
-
- /* Fill the tail with MI_NOOP */
- memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
- ring->space -= need_wrap;
- ring->emit = 0;
- }
-
- GEM_BUG_ON(ring->emit > ring->size - bytes);
- GEM_BUG_ON(ring->space < bytes);
- cs = ring->vaddr + ring->emit;
- GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
- ring->emit += bytes;
- ring->space -= bytes;
-
- return cs;
-}
-
-/* Align the ring tail to a cacheline boundary */
-int intel_ring_cacheline_align(struct i915_request *rq)
-{
- int num_dwords;
- void *cs;
-
- num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
- if (num_dwords == 0)
- return 0;
-
- num_dwords = CACHELINE_DWORDS - num_dwords;
- GEM_BUG_ON(num_dwords & 1);
-
- cs = intel_ring_begin(rq, num_dwords);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
- intel_ring_advance(rq, cs);
-
- GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
- return 0;
-}
-
static void gen6_bsd_submit_request(struct i915_request *request)
{
struct intel_uncore *uncore = request->engine->uncore;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h
new file mode 100644
index 000000000000..d9f17f38e0cc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h
@@ -0,0 +1,51 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RING_TYPES_H
+#define INTEL_RING_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/kref.h>
+#include <linux/types.h>
+
+/*
+ * Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
+ * but keeps the logic simple. Indeed, the whole purpose of this macro is just
+ * to give some inclination as to some of the magic values used in the various
+ * workarounds!
+ */
+#define CACHELINE_BYTES 64
+#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
+
+struct i915_vma;
+
+struct intel_ring {
+ struct kref ref;
+ struct i915_vma *vma;
+ void *vaddr;
+
+ /*
+ * As we have two types of rings, one global to the engine used
+ * by ringbuffer submission and those that are exclusive to a
+ * context used by execlists, we have to play safe and allow
+ * atomic updates to the pin_count. However, the actual pinning
+ * of the context is either done during initialisation for
+ * ringbuffer submission or serialised as part of the context
+ * pinning for execlists, and so we do not need a mutex ourselves
+ * to serialise intel_ring_pin/intel_ring_unpin.
+ */
+ atomic_t pin_count;
+
+ u32 head;
+ u32 tail;
+ u32 emit;
+
+ u32 space;
+ u32 size;
+ u32 effective_size;
+};
+
+#endif /* INTEL_RING_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
new file mode 100644
index 000000000000..20d6ee148afc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -0,0 +1,1872 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_irq.h"
+#include "intel_gt_pm_irq.h"
+#include "intel_rps.h"
+#include "intel_sideband.h"
+#include "../../../platform/x86/intel_ips.h"
+
+/*
+ * Lock protecting IPS related data structures
+ */
+static DEFINE_SPINLOCK(mchdev_lock);
+
+static struct intel_gt *rps_to_gt(struct intel_rps *rps)
+{
+ return container_of(rps, struct intel_gt, rps);
+}
+
+static struct drm_i915_private *rps_to_i915(struct intel_rps *rps)
+{
+ return rps_to_gt(rps)->i915;
+}
+
+static struct intel_uncore *rps_to_uncore(struct intel_rps *rps)
+{
+ return rps_to_gt(rps)->uncore;
+}
+
+static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
+{
+ return mask & ~rps->pm_intrmsk_mbz;
+}
+
+static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
+{
+ u32 mask = 0;
+
+ /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */
+ if (val > rps->min_freq_softlimit)
+ mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
+ GEN6_PM_RP_DOWN_THRESHOLD |
+ GEN6_PM_RP_DOWN_TIMEOUT);
+
+ if (val < rps->max_freq_softlimit)
+ mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
+
+ mask &= rps->pm_events;
+
+ return rps_pm_sanitize_mask(rps, ~mask);
+}
+
+static void rps_reset_ei(struct intel_rps *rps)
+{
+ memset(&rps->ei, 0, sizeof(rps->ei));
+}
+
+static void rps_enable_interrupts(struct intel_rps *rps)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ rps_reset_ei(rps);
+
+ if (IS_VALLEYVIEW(gt->i915))
+ /* WaGsvRC0ResidencyMethod:vlv */
+ rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
+ else
+ rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
+ GEN6_PM_RP_DOWN_THRESHOLD |
+ GEN6_PM_RP_DOWN_TIMEOUT);
+
+ spin_lock_irq(&gt->irq_lock);
+ gen6_gt_pm_enable_irq(gt, rps->pm_events);
+ spin_unlock_irq(&gt->irq_lock);
+
+ intel_uncore_write(gt->uncore, GEN6_PMINTRMSK,
+ rps_pm_mask(rps, rps->cur_freq));
+}
+
+static void gen6_rps_reset_interrupts(struct intel_rps *rps)
+{
+ gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS);
+}
+
+static void gen11_rps_reset_interrupts(struct intel_rps *rps)
+{
+ while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM))
+ ;
+}
+
+static void rps_reset_interrupts(struct intel_rps *rps)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ spin_lock_irq(&gt->irq_lock);
+ if (INTEL_GEN(gt->i915) >= 11)
+ gen11_rps_reset_interrupts(rps);
+ else
+ gen6_rps_reset_interrupts(rps);
+
+ rps->pm_iir = 0;
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void rps_disable_interrupts(struct intel_rps *rps)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ rps->pm_events = 0;
+
+ intel_uncore_write(gt->uncore, GEN6_PMINTRMSK,
+ rps_pm_sanitize_mask(rps, ~0u));
+
+ spin_lock_irq(&gt->irq_lock);
+ gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
+ spin_unlock_irq(&gt->irq_lock);
+
+ intel_synchronize_irq(gt->i915);
+
+ /*
+ * Now that we will not be generating any more work, flush any
+ * outstanding tasks. As we are called on the RPS idle path,
+ * we will reset the GPU to minimum frequencies, so the current
+ * state of the worker can be discarded.
+ */
+ cancel_work_sync(&rps->work);
+
+ rps_reset_interrupts(rps);
+}
+
+static const struct cparams {
+ u16 i;
+ u16 t;
+ u16 m;
+ u16 c;
+} cparams[] = {
+ { 1, 1333, 301, 28664 },
+ { 1, 1066, 294, 24460 },
+ { 1, 800, 294, 25192 },
+ { 0, 1333, 276, 27605 },
+ { 0, 1066, 276, 27605 },
+ { 0, 800, 231, 23784 },
+};
+
+static void gen5_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u8 fmax, fmin, fstart;
+ u32 rgvmodectl;
+ int c_m, i;
+
+ if (i915->fsb_freq <= 3200)
+ c_m = 0;
+ else if (i915->fsb_freq <= 4800)
+ c_m = 1;
+ else
+ c_m = 2;
+
+ for (i = 0; i < ARRAY_SIZE(cparams); i++) {
+ if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
+ rps->ips.m = cparams[i].m;
+ rps->ips.c = cparams[i].c;
+ break;
+ }
+ }
+
+ rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
+
+ /* Set up min, max, and cur for interrupt handling */
+ fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
+ fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
+ fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+ MEMMODE_FSTART_SHIFT;
+ DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
+ fmax, fmin, fstart);
+
+ rps->min_freq = fmax;
+ rps->max_freq = fmin;
+
+ rps->idle_freq = rps->min_freq;
+ rps->cur_freq = rps->idle_freq;
+}
+
+static unsigned long
+__ips_chipset_val(struct intel_ips *ips)
+{
+ struct intel_uncore *uncore =
+ rps_to_uncore(container_of(ips, struct intel_rps, ips));
+ unsigned long now = jiffies_to_msecs(jiffies), dt;
+ unsigned long result;
+ u64 total, delta;
+
+ lockdep_assert_held(&mchdev_lock);
+
+ /*
+ * Prevent division-by-zero if we are asking too fast.
+ * Also, we don't get interesting results if we are polling
+ * faster than once in 10ms, so just return the saved value
+ * in such cases.
+ */
+ dt = now - ips->last_time1;
+ if (dt <= 10)
+ return ips->chipset_power;
+
+ /* FIXME: handle per-counter overflow */
+ total = intel_uncore_read(uncore, DMIEC);
+ total += intel_uncore_read(uncore, DDREC);
+ total += intel_uncore_read(uncore, CSIEC);
+
+ delta = total - ips->last_count1;
+
+ result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10);
+
+ ips->last_count1 = total;
+ ips->last_time1 = now;
+
+ ips->chipset_power = result;
+
+ return result;
+}
+
+static unsigned long ips_mch_val(struct intel_uncore *uncore)
+{
+ unsigned int m, x, b;
+ u32 tsfs;
+
+ tsfs = intel_uncore_read(uncore, TSFS);
+ x = intel_uncore_read8(uncore, TR1);
+
+ b = tsfs & TSFS_INTR_MASK;
+ m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT;
+
+ return m * x / 127 - b;
+}
+
+static int _pxvid_to_vd(u8 pxvid)
+{
+ if (pxvid == 0)
+ return 0;
+
+ if (pxvid >= 8 && pxvid < 31)
+ pxvid = 31;
+
+ return (pxvid + 2) * 125;
+}
+
+static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid)
+{
+ const int vd = _pxvid_to_vd(pxvid);
+
+ if (INTEL_INFO(i915)->is_mobile)
+ return max(vd - 1125, 0);
+
+ return vd;
+}
+
+static void __gen5_ips_update(struct intel_ips *ips)
+{
+ struct intel_uncore *uncore =
+ rps_to_uncore(container_of(ips, struct intel_rps, ips));
+ u64 now, delta, dt;
+ u32 count;
+
+ lockdep_assert_held(&mchdev_lock);
+
+ now = ktime_get_raw_ns();
+ dt = now - ips->last_time2;
+ do_div(dt, NSEC_PER_MSEC);
+
+ /* Don't divide by 0 */
+ if (dt <= 10)
+ return;
+
+ count = intel_uncore_read(uncore, GFXEC);
+ delta = count - ips->last_count2;
+
+ ips->last_count2 = count;
+ ips->last_time2 = now;
+
+ /* More magic constants... */
+ ips->gfx_power = div_u64(delta * 1181, dt * 10);
+}
+
+static void gen5_rps_update(struct intel_rps *rps)
+{
+ spin_lock_irq(&mchdev_lock);
+ __gen5_ips_update(&rps->ips);
+ spin_unlock_irq(&mchdev_lock);
+}
+
+static bool gen5_rps_set(struct intel_rps *rps, u8 val)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u16 rgvswctl;
+
+ lockdep_assert_held(&mchdev_lock);
+
+ rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
+ if (rgvswctl & MEMCTL_CMD_STS) {
+ DRM_DEBUG("gpu busy, RCS change rejected\n");
+ return false; /* still busy with another command */
+ }
+
+ /* Invert the frequency bin into an ips delay */
+ val = rps->max_freq - val;
+ val = rps->min_freq + val;
+
+ rgvswctl =
+ (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
+ (val << MEMCTL_FREQ_SHIFT) |
+ MEMCTL_SFCAVM;
+ intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
+ intel_uncore_posting_read16(uncore, MEMSWCTL);
+
+ rgvswctl |= MEMCTL_CMD_STS;
+ intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
+
+ return true;
+}
+
+static unsigned long intel_pxfreq(u32 vidfreq)
+{
+ int div = (vidfreq & 0x3f0000) >> 16;
+ int post = (vidfreq & 0x3000) >> 12;
+ int pre = (vidfreq & 0x7);
+
+ if (!pre)
+ return 0;
+
+ return div * 133333 / (pre << post);
+}
+
+static unsigned int init_emon(struct intel_uncore *uncore)
+{
+ u8 pxw[16];
+ int i;
+
+ /* Disable to program */
+ intel_uncore_write(uncore, ECR, 0);
+ intel_uncore_posting_read(uncore, ECR);
+
+ /* Program energy weights for various events */
+ intel_uncore_write(uncore, SDEW, 0x15040d00);
+ intel_uncore_write(uncore, CSIEW0, 0x007f0000);
+ intel_uncore_write(uncore, CSIEW1, 0x1e220004);
+ intel_uncore_write(uncore, CSIEW2, 0x04000004);
+
+ for (i = 0; i < 5; i++)
+ intel_uncore_write(uncore, PEW(i), 0);
+ for (i = 0; i < 3; i++)
+ intel_uncore_write(uncore, DEW(i), 0);
+
+ /* Program P-state weights to account for frequency power adjustment */
+ for (i = 0; i < 16; i++) {
+ u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i));
+ unsigned int freq = intel_pxfreq(pxvidfreq);
+ unsigned int vid =
+ (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
+ unsigned int val;
+
+ val = vid * vid * freq / 1000 * 255;
+ val /= 127 * 127 * 900;
+
+ pxw[i] = val;
+ }
+ /* Render standby states get 0 weight */
+ pxw[14] = 0;
+ pxw[15] = 0;
+
+ for (i = 0; i < 4; i++) {
+ intel_uncore_write(uncore, PXW(i),
+ pxw[i * 4 + 0] << 24 |
+ pxw[i * 4 + 1] << 16 |
+ pxw[i * 4 + 2] << 8 |
+ pxw[i * 4 + 3] << 0);
+ }
+
+ /* Adjust magic regs to magic values (more experimental results) */
+ intel_uncore_write(uncore, OGW0, 0);
+ intel_uncore_write(uncore, OGW1, 0);
+ intel_uncore_write(uncore, EG0, 0x00007f00);
+ intel_uncore_write(uncore, EG1, 0x0000000e);
+ intel_uncore_write(uncore, EG2, 0x000e0000);
+ intel_uncore_write(uncore, EG3, 0x68000300);
+ intel_uncore_write(uncore, EG4, 0x42000000);
+ intel_uncore_write(uncore, EG5, 0x00140031);
+ intel_uncore_write(uncore, EG6, 0);
+ intel_uncore_write(uncore, EG7, 0);
+
+ for (i = 0; i < 8; i++)
+ intel_uncore_write(uncore, PXWL(i), 0);
+
+ /* Enable PMON + select events */
+ intel_uncore_write(uncore, ECR, 0x80000019);
+
+ return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK;
+}
+
+static bool gen5_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u8 fstart, vstart;
+ u32 rgvmodectl;
+
+ spin_lock_irq(&mchdev_lock);
+
+ rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
+
+ /* Enable temp reporting */
+ intel_uncore_write16(uncore, PMMISC,
+ intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN);
+ intel_uncore_write16(uncore, TSC1,
+ intel_uncore_read16(uncore, TSC1) | TSE);
+
+ /* 100ms RC evaluation intervals */
+ intel_uncore_write(uncore, RCUPEI, 100000);
+ intel_uncore_write(uncore, RCDNEI, 100000);
+
+ /* Set max/min thresholds to 90ms and 80ms respectively */
+ intel_uncore_write(uncore, RCBMAXAVG, 90000);
+ intel_uncore_write(uncore, RCBMINAVG, 80000);
+
+ intel_uncore_write(uncore, MEMIHYST, 1);
+
+ /* Set up min, max, and cur for interrupt handling */
+ fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+ MEMMODE_FSTART_SHIFT;
+
+ vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
+ PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
+
+ intel_uncore_write(uncore,
+ MEMINTREN,
+ MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
+
+ intel_uncore_write(uncore, VIDSTART, vstart);
+ intel_uncore_posting_read(uncore, VIDSTART);
+
+ rgvmodectl |= MEMMODE_SWMODE_EN;
+ intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
+
+ if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
+ MEMCTL_CMD_STS) == 0, 10))
+ DRM_ERROR("stuck trying to change perf mode\n");
+ mdelay(1);
+
+ gen5_rps_set(rps, rps->cur_freq);
+
+ rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC);
+ rps->ips.last_count1 += intel_uncore_read(uncore, DDREC);
+ rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC);
+ rps->ips.last_time1 = jiffies_to_msecs(jiffies);
+
+ rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
+ rps->ips.last_time2 = ktime_get_raw_ns();
+
+ spin_unlock_irq(&mchdev_lock);
+
+ rps->ips.corr = init_emon(uncore);
+
+ return true;
+}
+
+static void gen5_rps_disable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u16 rgvswctl;
+
+ spin_lock_irq(&mchdev_lock);
+
+ rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
+
+ /* Ack interrupts, disable EFC interrupt */
+ intel_uncore_write(uncore, MEMINTREN,
+ intel_uncore_read(uncore, MEMINTREN) &
+ ~MEMINT_EVAL_CHG_EN);
+ intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
+ intel_uncore_write(uncore, DEIER,
+ intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT);
+ intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT);
+ intel_uncore_write(uncore, DEIMR,
+ intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT);
+
+ /* Go back to the starting frequency */
+ gen5_rps_set(rps, rps->idle_freq);
+ mdelay(1);
+ rgvswctl |= MEMCTL_CMD_STS;
+ intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
+ mdelay(1);
+
+ spin_unlock_irq(&mchdev_lock);
+}
+
+static u32 rps_limits(struct intel_rps *rps, u8 val)
+{
+ u32 limits;
+
+ /*
+ * Only set the down limit when we've reached the lowest level to avoid
+ * getting more interrupts, otherwise leave this clear. This prevents a
+ * race in the hw when coming out of rc6: There's a tiny window where
+ * the hw runs at the minimal clock before selecting the desired
+ * frequency, if the down threshold expires in that window we will not
+ * receive a down interrupt.
+ */
+ if (INTEL_GEN(rps_to_i915(rps)) >= 9) {
+ limits = rps->max_freq_softlimit << 23;
+ if (val <= rps->min_freq_softlimit)
+ limits |= rps->min_freq_softlimit << 14;
+ } else {
+ limits = rps->max_freq_softlimit << 24;
+ if (val <= rps->min_freq_softlimit)
+ limits |= rps->min_freq_softlimit << 16;
+ }
+
+ return limits;
+}
+
+static void rps_set_power(struct intel_rps *rps, int new_power)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 threshold_up = 0, threshold_down = 0; /* in % */
+ u32 ei_up = 0, ei_down = 0;
+
+ lockdep_assert_held(&rps->power.mutex);
+
+ if (new_power == rps->power.mode)
+ return;
+
+ /* Note the units here are not exactly 1us, but 1280ns. */
+ switch (new_power) {
+ case LOW_POWER:
+ /* Upclock if more than 95% busy over 16ms */
+ ei_up = 16000;
+ threshold_up = 95;
+
+ /* Downclock if less than 85% busy over 32ms */
+ ei_down = 32000;
+ threshold_down = 85;
+ break;
+
+ case BETWEEN:
+ /* Upclock if more than 90% busy over 13ms */
+ ei_up = 13000;
+ threshold_up = 90;
+
+ /* Downclock if less than 75% busy over 32ms */
+ ei_down = 32000;
+ threshold_down = 75;
+ break;
+
+ case HIGH_POWER:
+ /* Upclock if more than 85% busy over 10ms */
+ ei_up = 10000;
+ threshold_up = 85;
+
+ /* Downclock if less than 60% busy over 32ms */
+ ei_down = 32000;
+ threshold_down = 60;
+ break;
+ }
+
+ /* When byt can survive without system hang with dynamic
+ * sw freq adjustments, this restriction can be lifted.
+ */
+ if (IS_VALLEYVIEW(i915))
+ goto skip_hw_write;
+
+ intel_uncore_write(uncore, GEN6_RP_UP_EI,
+ GT_INTERVAL_FROM_US(i915, ei_up));
+ intel_uncore_write(uncore, GEN6_RP_UP_THRESHOLD,
+ GT_INTERVAL_FROM_US(i915,
+ ei_up * threshold_up / 100));
+
+ intel_uncore_write(uncore, GEN6_RP_DOWN_EI,
+ GT_INTERVAL_FROM_US(i915, ei_down));
+ intel_uncore_write(uncore, GEN6_RP_DOWN_THRESHOLD,
+ GT_INTERVAL_FROM_US(i915,
+ ei_down * threshold_down / 100));
+
+ intel_uncore_write(uncore, GEN6_RP_CONTROL,
+ (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
+ GEN6_RP_MEDIA_HW_NORMAL_MODE |
+ GEN6_RP_MEDIA_IS_GFX |
+ GEN6_RP_ENABLE |
+ GEN6_RP_UP_BUSY_AVG |
+ GEN6_RP_DOWN_IDLE_AVG);
+
+skip_hw_write:
+ rps->power.mode = new_power;
+ rps->power.up_threshold = threshold_up;
+ rps->power.down_threshold = threshold_down;
+}
+
+static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
+{
+ int new_power;
+
+ new_power = rps->power.mode;
+ switch (rps->power.mode) {
+ case LOW_POWER:
+ if (val > rps->efficient_freq + 1 &&
+ val > rps->cur_freq)
+ new_power = BETWEEN;
+ break;
+
+ case BETWEEN:
+ if (val <= rps->efficient_freq &&
+ val < rps->cur_freq)
+ new_power = LOW_POWER;
+ else if (val >= rps->rp0_freq &&
+ val > rps->cur_freq)
+ new_power = HIGH_POWER;
+ break;
+
+ case HIGH_POWER:
+ if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
+ val < rps->cur_freq)
+ new_power = BETWEEN;
+ break;
+ }
+ /* Max/min bins are special */
+ if (val <= rps->min_freq_softlimit)
+ new_power = LOW_POWER;
+ if (val >= rps->max_freq_softlimit)
+ new_power = HIGH_POWER;
+
+ mutex_lock(&rps->power.mutex);
+ if (rps->power.interactive)
+ new_power = HIGH_POWER;
+ rps_set_power(rps, new_power);
+ mutex_unlock(&rps->power.mutex);
+}
+
+void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
+{
+ mutex_lock(&rps->power.mutex);
+ if (interactive) {
+ if (!rps->power.interactive++ && rps->active)
+ rps_set_power(rps, HIGH_POWER);
+ } else {
+ GEM_BUG_ON(!rps->power.interactive);
+ rps->power.interactive--;
+ }
+ mutex_unlock(&rps->power.mutex);
+}
+
+static int gen6_rps_set(struct intel_rps *rps, u8 val)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 swreq;
+
+ if (INTEL_GEN(i915) >= 9)
+ swreq = GEN9_FREQUENCY(val);
+ else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+ swreq = HSW_FREQUENCY(val);
+ else
+ swreq = (GEN6_FREQUENCY(val) |
+ GEN6_OFFSET(0) |
+ GEN6_AGGRESSIVE_TURBO);
+ intel_uncore_write(uncore, GEN6_RPNSWREQ, swreq);
+
+ return 0;
+}
+
+static int vlv_rps_set(struct intel_rps *rps, u8 val)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ int err;
+
+ vlv_punit_get(i915);
+ err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
+ vlv_punit_put(i915);
+
+ return err;
+}
+
+static int rps_set(struct intel_rps *rps, u8 val)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ int err;
+
+ if (INTEL_GEN(i915) < 6)
+ return 0;
+
+ if (val == rps->last_freq)
+ return 0;
+
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+ err = vlv_rps_set(rps, val);
+ else
+ err = gen6_rps_set(rps, val);
+ if (err)
+ return err;
+
+ gen6_rps_set_thresholds(rps, val);
+ rps->last_freq = val;
+
+ return 0;
+}
+
+void intel_rps_unpark(struct intel_rps *rps)
+{
+ u8 freq;
+
+ if (!rps->enabled)
+ return;
+
+ /*
+ * Use the user's desired frequency as a guide, but for better
+ * performance, jump directly to RPe as our starting frequency.
+ */
+ mutex_lock(&rps->lock);
+ rps->active = true;
+ freq = max(rps->cur_freq, rps->efficient_freq),
+ freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit);
+ intel_rps_set(rps, freq);
+ rps->last_adj = 0;
+ mutex_unlock(&rps->lock);
+
+ if (INTEL_GEN(rps_to_i915(rps)) >= 6)
+ rps_enable_interrupts(rps);
+
+ if (IS_GEN(rps_to_i915(rps), 5))
+ gen5_rps_update(rps);
+}
+
+void intel_rps_park(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (!rps->enabled)
+ return;
+
+ if (INTEL_GEN(i915) >= 6)
+ rps_disable_interrupts(rps);
+
+ rps->active = false;
+ if (rps->last_freq <= rps->idle_freq)
+ return;
+
+ /*
+ * The punit delays the write of the frequency and voltage until it
+ * determines the GPU is awake. During normal usage we don't want to
+ * waste power changing the frequency if the GPU is sleeping (rc6).
+ * However, the GPU and driver is now idle and we do not want to delay
+ * switching to minimum voltage (reducing power whilst idle) as we do
+ * not expect to be woken in the near future and so must flush the
+ * change by waking the device.
+ *
+ * We choose to take the media powerwell (either would do to trick the
+ * punit into committing the voltage change) as that takes a lot less
+ * power than the render powerwell.
+ */
+ intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
+ rps_set(rps, rps->idle_freq);
+ intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
+}
+
+void intel_rps_boost(struct i915_request *rq)
+{
+ struct intel_rps *rps = &rq->engine->gt->rps;
+ unsigned long flags;
+
+ if (i915_request_signaled(rq) || !rps->active)
+ return;
+
+ /* Serializes with i915_request_retire() */
+ spin_lock_irqsave(&rq->lock, flags);
+ if (!i915_request_has_waitboost(rq) &&
+ !dma_fence_is_signaled_locked(&rq->fence)) {
+ rq->flags |= I915_REQUEST_WAITBOOST;
+
+ if (!atomic_fetch_inc(&rps->num_waiters) &&
+ READ_ONCE(rps->cur_freq) < rps->boost_freq)
+ schedule_work(&rps->work);
+
+ atomic_inc(&rps->boosts);
+ }
+ spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+int intel_rps_set(struct intel_rps *rps, u8 val)
+{
+ int err = 0;
+
+ lockdep_assert_held(&rps->lock);
+ GEM_BUG_ON(val > rps->max_freq);
+ GEM_BUG_ON(val < rps->min_freq);
+
+ if (rps->active) {
+ err = rps_set(rps, val);
+
+ /*
+ * Make sure we continue to get interrupts
+ * until we hit the minimum or maximum frequencies.
+ */
+ if (INTEL_GEN(rps_to_i915(rps)) >= 6) {
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ intel_uncore_write(uncore, GEN6_RP_INTERRUPT_LIMITS,
+ rps_limits(rps, val));
+
+ intel_uncore_write(uncore, GEN6_PMINTRMSK,
+ rps_pm_mask(rps, val));
+ }
+ }
+
+ if (err == 0)
+ rps->cur_freq = val;
+
+ return err;
+}
+
+static void gen6_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ /* All of these values are in units of 50MHz */
+
+ /* static values from HW: RP0 > RP1 > RPn (min_freq) */
+ if (IS_GEN9_LP(i915)) {
+ u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+
+ rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
+ rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
+ rps->min_freq = (rp_state_cap >> 0) & 0xff;
+ } else {
+ u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+
+ rps->rp0_freq = (rp_state_cap >> 0) & 0xff;
+ rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
+ rps->min_freq = (rp_state_cap >> 16) & 0xff;
+ }
+
+ /* hw_max = RP0 until we check for overclocking */
+ rps->max_freq = rps->rp0_freq;
+
+ rps->efficient_freq = rps->rp1_freq;
+ if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
+ IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
+ u32 ddcc_status = 0;
+
+ if (sandybridge_pcode_read(i915,
+ HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
+ &ddcc_status, NULL) == 0)
+ rps->efficient_freq =
+ clamp_t(u8,
+ (ddcc_status >> 8) & 0xff,
+ rps->min_freq,
+ rps->max_freq);
+ }
+
+ if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
+ /* Store the frequency values in 16.66 MHZ units, which is
+ * the natural hardware unit for SKL
+ */
+ rps->rp0_freq *= GEN9_FREQ_SCALER;
+ rps->rp1_freq *= GEN9_FREQ_SCALER;
+ rps->min_freq *= GEN9_FREQ_SCALER;
+ rps->max_freq *= GEN9_FREQ_SCALER;
+ rps->efficient_freq *= GEN9_FREQ_SCALER;
+ }
+}
+
+static bool rps_reset(struct intel_rps *rps)
+{
+ /* force a reset */
+ rps->power.mode = -1;
+ rps->last_freq = -1;
+
+ if (rps_set(rps, rps->min_freq)) {
+ DRM_ERROR("Failed to reset RPS to initial values\n");
+ return false;
+ }
+
+ rps->cur_freq = rps->min_freq;
+ return true;
+}
+
+/* See the Gen9_GT_PM_Programming_Guide doc for the below */
+static bool gen9_rps_enable(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ /* Program defaults and thresholds for RPS */
+ if (IS_GEN(i915, 9))
+ intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
+ GEN9_FREQUENCY(rps->rp1_freq));
+
+ /* 1 second timeout */
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
+ GT_INTERVAL_FROM_US(i915, 1000000));
+
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
+
+ return rps_reset(rps);
+}
+
+static bool gen8_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
+ HSW_FREQUENCY(rps->rp1_freq));
+
+ /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
+ 100000000 / 128); /* 1 second timeout */
+
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+ return rps_reset(rps);
+}
+
+static bool gen6_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ /* Power down if completely idle for over 50ms */
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+ return rps_reset(rps);
+}
+
+static int chv_rps_max_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
+
+ switch (RUNTIME_INFO(i915)->sseu.eu_total) {
+ case 8:
+ /* (2 * 4) config */
+ val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT;
+ break;
+ case 12:
+ /* (2 * 6) config */
+ val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT;
+ break;
+ case 16:
+ /* (2 * 8) config */
+ default:
+ /* Setting (2 * 8) Min RP0 for any other combination */
+ val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT;
+ break;
+ }
+
+ return val & FB_GFX_FREQ_FUSE_MASK;
+}
+
+static int chv_rps_rpe_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
+ val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
+
+ return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
+}
+
+static int chv_rps_guar_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
+
+ return val & FB_GFX_FREQ_FUSE_MASK;
+}
+
+static u32 chv_rps_min_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
+ val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
+
+ return val & FB_GFX_FREQ_FUSE_MASK;
+}
+
+static bool chv_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ /* 1: Program defaults and thresholds for RPS*/
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
+ intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
+ intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
+
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+ /* 2: Enable RPS */
+ intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
+ GEN6_RP_MEDIA_HW_NORMAL_MODE |
+ GEN6_RP_MEDIA_IS_GFX |
+ GEN6_RP_ENABLE |
+ GEN6_RP_UP_BUSY_AVG |
+ GEN6_RP_DOWN_IDLE_AVG);
+
+ /* Setting Fixed Bias */
+ vlv_punit_get(i915);
+
+ val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
+ vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
+
+ val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+
+ vlv_punit_put(i915);
+
+ /* RPS code assumes GPLL is used */
+ WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+
+ DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+ DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+
+ return rps_reset(rps);
+}
+
+static int vlv_rps_guar_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val, rp1;
+
+ val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
+
+ rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
+ rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
+
+ return rp1;
+}
+
+static int vlv_rps_max_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val, rp0;
+
+ val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
+
+ rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
+ /* Clamp to max */
+ rp0 = min_t(u32, rp0, 0xea);
+
+ return rp0;
+}
+
+static int vlv_rps_rpe_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val, rpe;
+
+ val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
+ rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
+ val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
+ rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
+
+ return rpe;
+}
+
+static int vlv_rps_min_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
+ /*
+ * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
+ * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
+ * a BYT-M B0 the above register contains 0xbf. Moreover when setting
+ * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
+ * to make sure it matches what Punit accepts.
+ */
+ return max_t(u32, val, 0xc0);
+}
+
+static bool vlv_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
+ intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
+ intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
+
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+ intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
+ GEN6_RP_MEDIA_TURBO |
+ GEN6_RP_MEDIA_HW_NORMAL_MODE |
+ GEN6_RP_MEDIA_IS_GFX |
+ GEN6_RP_ENABLE |
+ GEN6_RP_UP_BUSY_AVG |
+ GEN6_RP_DOWN_IDLE_CONT);
+
+ vlv_punit_get(i915);
+
+ /* Setting Fixed Bias */
+ val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
+ vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
+
+ val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+
+ vlv_punit_put(i915);
+
+ /* RPS code assumes GPLL is used */
+ WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+
+ DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+ DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+
+ return rps_reset(rps);
+}
+
+static unsigned long __ips_gfx_val(struct intel_ips *ips)
+{
+ struct intel_rps *rps = container_of(ips, typeof(*rps), ips);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ unsigned long t, corr, state1, corr2, state2;
+ u32 pxvid, ext_v;
+
+ lockdep_assert_held(&mchdev_lock);
+
+ pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq));
+ pxvid = (pxvid >> 24) & 0x7f;
+ ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid);
+
+ state1 = ext_v;
+
+ /* Revel in the empirically derived constants */
+
+ /* Correction factor in 1/100000 units */
+ t = ips_mch_val(uncore);
+ if (t > 80)
+ corr = t * 2349 + 135940;
+ else if (t >= 50)
+ corr = t * 964 + 29317;
+ else /* < 50 */
+ corr = t * 301 + 1004;
+
+ corr = corr * 150142 * state1 / 10000 - 78642;
+ corr /= 100000;
+ corr2 = corr * ips->corr;
+
+ state2 = corr2 * state1 / 10000;
+ state2 /= 100; /* convert to mW */
+
+ __gen5_ips_update(ips);
+
+ return ips->gfx_power + state2;
+}
+
+void intel_rps_enable(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ if (IS_CHERRYVIEW(i915))
+ rps->enabled = chv_rps_enable(rps);
+ else if (IS_VALLEYVIEW(i915))
+ rps->enabled = vlv_rps_enable(rps);
+ else if (INTEL_GEN(i915) >= 9)
+ rps->enabled = gen9_rps_enable(rps);
+ else if (INTEL_GEN(i915) >= 8)
+ rps->enabled = gen8_rps_enable(rps);
+ else if (INTEL_GEN(i915) >= 6)
+ rps->enabled = gen6_rps_enable(rps);
+ else if (IS_IRONLAKE_M(i915))
+ rps->enabled = gen5_rps_enable(rps);
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+ if (!rps->enabled)
+ return;
+
+ WARN_ON(rps->max_freq < rps->min_freq);
+ WARN_ON(rps->idle_freq > rps->max_freq);
+
+ WARN_ON(rps->efficient_freq < rps->min_freq);
+ WARN_ON(rps->efficient_freq > rps->max_freq);
+}
+
+static void gen6_rps_disable(struct intel_rps *rps)
+{
+ intel_uncore_write(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
+}
+
+void intel_rps_disable(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ rps->enabled = false;
+
+ if (INTEL_GEN(i915) >= 6)
+ gen6_rps_disable(rps);
+ else if (IS_IRONLAKE_M(i915))
+ gen5_rps_disable(rps);
+}
+
+static int byt_gpu_freq(struct intel_rps *rps, int val)
+{
+ /*
+ * N = val - 0xb7
+ * Slow = Fast = GPLL ref * N
+ */
+ return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
+}
+
+static int byt_freq_opcode(struct intel_rps *rps, int val)
+{
+ return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
+}
+
+static int chv_gpu_freq(struct intel_rps *rps, int val)
+{
+ /*
+ * N = val / 2
+ * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
+ */
+ return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
+}
+
+static int chv_freq_opcode(struct intel_rps *rps, int val)
+{
+ /* CHV needs even values */
+ return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
+}
+
+int intel_gpu_freq(struct intel_rps *rps, int val)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (INTEL_GEN(i915) >= 9)
+ return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
+ GEN9_FREQ_SCALER);
+ else if (IS_CHERRYVIEW(i915))
+ return chv_gpu_freq(rps, val);
+ else if (IS_VALLEYVIEW(i915))
+ return byt_gpu_freq(rps, val);
+ else
+ return val * GT_FREQUENCY_MULTIPLIER;
+}
+
+int intel_freq_opcode(struct intel_rps *rps, int val)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (INTEL_GEN(i915) >= 9)
+ return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
+ GT_FREQUENCY_MULTIPLIER);
+ else if (IS_CHERRYVIEW(i915))
+ return chv_freq_opcode(rps, val);
+ else if (IS_VALLEYVIEW(i915))
+ return byt_freq_opcode(rps, val);
+ else
+ return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
+}
+
+static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ rps->gpll_ref_freq =
+ vlv_get_cck_clock(i915, "GPLL ref",
+ CCK_GPLL_CLOCK_CONTROL,
+ i915->czclk_freq);
+
+ DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq);
+}
+
+static void vlv_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ vlv_iosf_sb_get(i915,
+ BIT(VLV_IOSF_SB_PUNIT) |
+ BIT(VLV_IOSF_SB_NC) |
+ BIT(VLV_IOSF_SB_CCK));
+
+ vlv_init_gpll_ref_freq(rps);
+
+ val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+ switch ((val >> 6) & 3) {
+ case 0:
+ case 1:
+ i915->mem_freq = 800;
+ break;
+ case 2:
+ i915->mem_freq = 1066;
+ break;
+ case 3:
+ i915->mem_freq = 1333;
+ break;
+ }
+ DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+
+ rps->max_freq = vlv_rps_max_freq(rps);
+ rps->rp0_freq = rps->max_freq;
+ DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->max_freq),
+ rps->max_freq);
+
+ rps->efficient_freq = vlv_rps_rpe_freq(rps);
+ DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->efficient_freq),
+ rps->efficient_freq);
+
+ rps->rp1_freq = vlv_rps_guar_freq(rps);
+ DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->rp1_freq),
+ rps->rp1_freq);
+
+ rps->min_freq = vlv_rps_min_freq(rps);
+ DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->min_freq),
+ rps->min_freq);
+
+ vlv_iosf_sb_put(i915,
+ BIT(VLV_IOSF_SB_PUNIT) |
+ BIT(VLV_IOSF_SB_NC) |
+ BIT(VLV_IOSF_SB_CCK));
+}
+
+static void chv_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ vlv_iosf_sb_get(i915,
+ BIT(VLV_IOSF_SB_PUNIT) |
+ BIT(VLV_IOSF_SB_NC) |
+ BIT(VLV_IOSF_SB_CCK));
+
+ vlv_init_gpll_ref_freq(rps);
+
+ val = vlv_cck_read(i915, CCK_FUSE_REG);
+
+ switch ((val >> 2) & 0x7) {
+ case 3:
+ i915->mem_freq = 2000;
+ break;
+ default:
+ i915->mem_freq = 1600;
+ break;
+ }
+ DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+
+ rps->max_freq = chv_rps_max_freq(rps);
+ rps->rp0_freq = rps->max_freq;
+ DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->max_freq),
+ rps->max_freq);
+
+ rps->efficient_freq = chv_rps_rpe_freq(rps);
+ DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->efficient_freq),
+ rps->efficient_freq);
+
+ rps->rp1_freq = chv_rps_guar_freq(rps);
+ DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->rp1_freq),
+ rps->rp1_freq);
+
+ rps->min_freq = chv_rps_min_freq(rps);
+ DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->min_freq),
+ rps->min_freq);
+
+ vlv_iosf_sb_put(i915,
+ BIT(VLV_IOSF_SB_PUNIT) |
+ BIT(VLV_IOSF_SB_NC) |
+ BIT(VLV_IOSF_SB_CCK));
+
+ WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
+ rps->min_freq) & 1,
+ "Odd GPU freq values\n");
+}
+
+static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
+{
+ ei->ktime = ktime_get_raw();
+ ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT);
+ ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT);
+}
+
+static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ const struct intel_rps_ei *prev = &rps->ei;
+ struct intel_rps_ei now;
+ u32 events = 0;
+
+ if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+ return 0;
+
+ vlv_c0_read(uncore, &now);
+
+ if (prev->ktime) {
+ u64 time, c0;
+ u32 render, media;
+
+ time = ktime_us_delta(now.ktime, prev->ktime);
+
+ time *= rps_to_i915(rps)->czclk_freq;
+
+ /* Workload can be split between render + media,
+ * e.g. SwapBuffers being blitted in X after being rendered in
+ * mesa. To account for this we need to combine both engines
+ * into our activity counter.
+ */
+ render = now.render_c0 - prev->render_c0;
+ media = now.media_c0 - prev->media_c0;
+ c0 = max(render, media);
+ c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
+
+ if (c0 > time * rps->power.up_threshold)
+ events = GEN6_PM_RP_UP_THRESHOLD;
+ else if (c0 < time * rps->power.down_threshold)
+ events = GEN6_PM_RP_DOWN_THRESHOLD;
+ }
+
+ rps->ei = now;
+ return events;
+}
+
+static void rps_work(struct work_struct *work)
+{
+ struct intel_rps *rps = container_of(work, typeof(*rps), work);
+ struct intel_gt *gt = rps_to_gt(rps);
+ bool client_boost = false;
+ int new_freq, adj, min, max;
+ u32 pm_iir = 0;
+
+ spin_lock_irq(&gt->irq_lock);
+ pm_iir = fetch_and_zero(&rps->pm_iir);
+ client_boost = atomic_read(&rps->num_waiters);
+ spin_unlock_irq(&gt->irq_lock);
+
+ /* Make sure we didn't queue anything we're not going to process. */
+ if ((pm_iir & rps->pm_events) == 0 && !client_boost)
+ goto out;
+
+ mutex_lock(&rps->lock);
+
+ pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
+
+ adj = rps->last_adj;
+ new_freq = rps->cur_freq;
+ min = rps->min_freq_softlimit;
+ max = rps->max_freq_softlimit;
+ if (client_boost)
+ max = rps->max_freq;
+ if (client_boost && new_freq < rps->boost_freq) {
+ new_freq = rps->boost_freq;
+ adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
+ if (adj > 0)
+ adj *= 2;
+ else /* CHV needs even encode values */
+ adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1;
+
+ if (new_freq >= rps->max_freq_softlimit)
+ adj = 0;
+ } else if (client_boost) {
+ adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
+ if (rps->cur_freq > rps->efficient_freq)
+ new_freq = rps->efficient_freq;
+ else if (rps->cur_freq > rps->min_freq_softlimit)
+ new_freq = rps->min_freq_softlimit;
+ adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
+ if (adj < 0)
+ adj *= 2;
+ else /* CHV needs even encode values */
+ adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1;
+
+ if (new_freq <= rps->min_freq_softlimit)
+ adj = 0;
+ } else { /* unknown event */
+ adj = 0;
+ }
+
+ rps->last_adj = adj;
+
+ /*
+ * Limit deboosting and boosting to keep ourselves at the extremes
+ * when in the respective power modes (i.e. slowly decrease frequencies
+ * while in the HIGH_POWER zone and slowly increase frequencies while
+ * in the LOW_POWER zone). On idle, we will hit the timeout and drop
+ * to the next level quickly, and conversely if busy we expect to
+ * hit a waitboost and rapidly switch into max power.
+ */
+ if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
+ (adj > 0 && rps->power.mode == LOW_POWER))
+ rps->last_adj = 0;
+
+ /* sysfs frequency interfaces may have snuck in while servicing the
+ * interrupt
+ */
+ new_freq += adj;
+ new_freq = clamp_t(int, new_freq, min, max);
+
+ if (intel_rps_set(rps, new_freq)) {
+ DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
+ rps->last_adj = 0;
+ }
+
+ mutex_unlock(&rps->lock);
+
+out:
+ spin_lock_irq(&gt->irq_lock);
+ gen6_gt_pm_unmask_irq(gt, rps->pm_events);
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+ const u32 events = rps->pm_events & pm_iir;
+
+ lockdep_assert_held(&gt->irq_lock);
+
+ if (unlikely(!events))
+ return;
+
+ gen6_gt_pm_mask_irq(gt, events);
+
+ rps->pm_iir |= events;
+ schedule_work(&rps->work);
+}
+
+void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ if (pm_iir & rps->pm_events) {
+ spin_lock(&gt->irq_lock);
+ gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events);
+ rps->pm_iir |= pm_iir & rps->pm_events;
+ schedule_work(&rps->work);
+ spin_unlock(&gt->irq_lock);
+ }
+
+ if (INTEL_GEN(gt->i915) >= 8)
+ return;
+
+ if (pm_iir & PM_VEBOX_USER_INTERRUPT)
+ intel_engine_breadcrumbs_irq(gt->engine[VECS0]);
+
+ if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
+ DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
+}
+
+void gen5_rps_irq_handler(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u32 busy_up, busy_down, max_avg, min_avg;
+ u8 new_freq;
+
+ spin_lock(&mchdev_lock);
+
+ intel_uncore_write16(uncore,
+ MEMINTRSTS,
+ intel_uncore_read(uncore, MEMINTRSTS));
+
+ intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
+ busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
+ busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
+ max_avg = intel_uncore_read(uncore, RCBMAXAVG);
+ min_avg = intel_uncore_read(uncore, RCBMINAVG);
+
+ /* Handle RCS change request from hw */
+ new_freq = rps->cur_freq;
+ if (busy_up > max_avg)
+ new_freq++;
+ else if (busy_down < min_avg)
+ new_freq--;
+ new_freq = clamp(new_freq,
+ rps->min_freq_softlimit,
+ rps->max_freq_softlimit);
+
+ if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq))
+ rps->cur_freq = new_freq;
+
+ spin_unlock(&mchdev_lock);
+}
+
+void intel_rps_init_early(struct intel_rps *rps)
+{
+ mutex_init(&rps->lock);
+ mutex_init(&rps->power.mutex);
+
+ INIT_WORK(&rps->work, rps_work);
+
+ atomic_set(&rps->num_waiters, 0);
+}
+
+void intel_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (IS_CHERRYVIEW(i915))
+ chv_rps_init(rps);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_rps_init(rps);
+ else if (INTEL_GEN(i915) >= 6)
+ gen6_rps_init(rps);
+ else if (IS_IRONLAKE_M(i915))
+ gen5_rps_init(rps);
+
+ /* Derive initial user preferences/limits from the hardware limits */
+ rps->max_freq_softlimit = rps->max_freq;
+ rps->min_freq_softlimit = rps->min_freq;
+
+ /* After setting max-softlimit, find the overclock max freq */
+ if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
+ u32 params = 0;
+
+ sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
+ &params, NULL);
+ if (params & BIT(31)) { /* OC supported */
+ DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
+ (rps->max_freq & 0xff) * 50,
+ (params & 0xff) * 50);
+ rps->max_freq = params & 0xff;
+ }
+ }
+
+ /* Finally allow us to boost to max by default */
+ rps->boost_freq = rps->max_freq;
+ rps->idle_freq = rps->min_freq;
+ rps->cur_freq = rps->idle_freq;
+
+ rps->pm_intrmsk_mbz = 0;
+
+ /*
+ * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
+ * if GEN6_PM_UP_EI_EXPIRED is masked.
+ *
+ * TODO: verify if this can be reproduced on VLV,CHV.
+ */
+ if (INTEL_GEN(i915) <= 7)
+ rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
+
+ if (INTEL_GEN(i915) >= 8)
+ rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+}
+
+u32 intel_get_cagf(struct intel_rps *rps, u32 rpstat)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 cagf;
+
+ if (INTEL_GEN(i915) >= 9)
+ cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
+ else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+ cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
+ else
+ cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
+
+ return cagf;
+}
+
+/* External interface for intel_ips.ko */
+
+static struct drm_i915_private __rcu *ips_mchdev;
+
+/**
+ * Tells the intel_ips driver that the i915 driver is now loaded, if
+ * IPS got loaded first.
+ *
+ * This awkward dance is so that neither module has to depend on the
+ * other in order for IPS to do the appropriate communication of
+ * GPU turbo limits to i915.
+ */
+static void
+ips_ping_for_i915_load(void)
+{
+ void (*link)(void);
+
+ link = symbol_get(ips_link_to_i915_driver);
+ if (link) {
+ link();
+ symbol_put(ips_link_to_i915_driver);
+ }
+}
+
+void intel_rps_driver_register(struct intel_rps *rps)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ /*
+ * We only register the i915 ips part with intel-ips once everything is
+ * set up, to avoid intel-ips sneaking in and reading bogus values.
+ */
+ if (IS_GEN(gt->i915, 5)) {
+ rcu_assign_pointer(ips_mchdev, gt->i915);
+ ips_ping_for_i915_load();
+ }
+}
+
+void intel_rps_driver_unregister(struct intel_rps *rps)
+{
+ rcu_assign_pointer(ips_mchdev, NULL);
+}
+
+static struct drm_i915_private *mchdev_get(void)
+{
+ struct drm_i915_private *i915;
+
+ rcu_read_lock();
+ i915 = rcu_dereference(ips_mchdev);
+ if (!kref_get_unless_zero(&i915->drm.ref))
+ i915 = NULL;
+ rcu_read_unlock();
+
+ return i915;
+}
+
+/**
+ * i915_read_mch_val - return value for IPS use
+ *
+ * Calculate and return a value for the IPS driver to use when deciding whether
+ * we have thermal and power headroom to increase CPU or GPU power budget.
+ */
+unsigned long i915_read_mch_val(void)
+{
+ struct drm_i915_private *i915;
+ unsigned long chipset_val = 0;
+ unsigned long graphics_val = 0;
+ intel_wakeref_t wakeref;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return 0;
+
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+ struct intel_ips *ips = &i915->gt.rps.ips;
+
+ spin_lock_irq(&mchdev_lock);
+ chipset_val = __ips_chipset_val(ips);
+ graphics_val = __ips_gfx_val(ips);
+ spin_unlock_irq(&mchdev_lock);
+ }
+
+ drm_dev_put(&i915->drm);
+ return chipset_val + graphics_val;
+}
+EXPORT_SYMBOL_GPL(i915_read_mch_val);
+
+/**
+ * i915_gpu_raise - raise GPU frequency limit
+ *
+ * Raise the limit; IPS indicates we have thermal headroom.
+ */
+bool i915_gpu_raise(void)
+{
+ struct drm_i915_private *i915;
+ struct intel_rps *rps;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return false;
+
+ rps = &i915->gt.rps;
+
+ spin_lock_irq(&mchdev_lock);
+ if (rps->max_freq_softlimit < rps->max_freq)
+ rps->max_freq_softlimit++;
+ spin_unlock_irq(&mchdev_lock);
+
+ drm_dev_put(&i915->drm);
+ return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_raise);
+
+/**
+ * i915_gpu_lower - lower GPU frequency limit
+ *
+ * IPS indicates we're close to a thermal limit, so throttle back the GPU
+ * frequency maximum.
+ */
+bool i915_gpu_lower(void)
+{
+ struct drm_i915_private *i915;
+ struct intel_rps *rps;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return false;
+
+ rps = &i915->gt.rps;
+
+ spin_lock_irq(&mchdev_lock);
+ if (rps->max_freq_softlimit > rps->min_freq)
+ rps->max_freq_softlimit--;
+ spin_unlock_irq(&mchdev_lock);
+
+ drm_dev_put(&i915->drm);
+ return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_lower);
+
+/**
+ * i915_gpu_busy - indicate GPU business to IPS
+ *
+ * Tell the IPS driver whether or not the GPU is busy.
+ */
+bool i915_gpu_busy(void)
+{
+ struct drm_i915_private *i915;
+ bool ret;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return false;
+
+ ret = i915->gt.awake;
+
+ drm_dev_put(&i915->drm);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_busy);
+
+/**
+ * i915_gpu_turbo_disable - disable graphics turbo
+ *
+ * Disable graphics turbo by resetting the max frequency and setting the
+ * current frequency to the default.
+ */
+bool i915_gpu_turbo_disable(void)
+{
+ struct drm_i915_private *i915;
+ struct intel_rps *rps;
+ bool ret;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return false;
+
+ rps = &i915->gt.rps;
+
+ spin_lock_irq(&mchdev_lock);
+ rps->max_freq_softlimit = rps->min_freq;
+ ret = gen5_rps_set(&i915->gt.rps, rps->min_freq);
+ spin_unlock_irq(&mchdev_lock);
+
+ drm_dev_put(&i915->drm);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
new file mode 100644
index 000000000000..9518c66c9792
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -0,0 +1,38 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RPS_H
+#define INTEL_RPS_H
+
+#include "intel_rps_types.h"
+
+struct i915_request;
+
+void intel_rps_init_early(struct intel_rps *rps);
+void intel_rps_init(struct intel_rps *rps);
+
+void intel_rps_driver_register(struct intel_rps *rps);
+void intel_rps_driver_unregister(struct intel_rps *rps);
+
+void intel_rps_enable(struct intel_rps *rps);
+void intel_rps_disable(struct intel_rps *rps);
+
+void intel_rps_park(struct intel_rps *rps);
+void intel_rps_unpark(struct intel_rps *rps);
+void intel_rps_boost(struct i915_request *rq);
+
+int intel_rps_set(struct intel_rps *rps, u8 val);
+void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive);
+
+int intel_gpu_freq(struct intel_rps *rps, int val);
+int intel_freq_opcode(struct intel_rps *rps, int val);
+u32 intel_get_cagf(struct intel_rps *rps, u32 rpstat1);
+
+void gen5_rps_irq_handler(struct intel_rps *rps);
+void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
+void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
+
+#endif /* INTEL_RPS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h
new file mode 100644
index 000000000000..c2e279154bd5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h
@@ -0,0 +1,93 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RPS_TYPES_H
+#define INTEL_RPS_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/ktime.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+struct intel_ips {
+ u64 last_count1;
+ unsigned long last_time1;
+ unsigned long chipset_power;
+ u64 last_count2;
+ u64 last_time2;
+ unsigned long gfx_power;
+ u8 corr;
+
+ int c, m;
+};
+
+struct intel_rps_ei {
+ ktime_t ktime;
+ u32 render_c0;
+ u32 media_c0;
+};
+
+struct intel_rps {
+ struct mutex lock; /* protects enabling and the worker */
+
+ /*
+ * work, interrupts_enabled and pm_iir are protected by
+ * dev_priv->irq_lock
+ */
+ struct work_struct work;
+ bool enabled;
+ bool active;
+ u32 pm_iir;
+
+ /* PM interrupt bits that should never be masked */
+ u32 pm_intrmsk_mbz;
+ u32 pm_events;
+
+ /* Frequencies are stored in potentially platform dependent multiples.
+ * In other words, *_freq needs to be multiplied by X to be interesting.
+ * Soft limits are those which are used for the dynamic reclocking done
+ * by the driver (raise frequencies under heavy loads, and lower for
+ * lighter loads). Hard limits are those imposed by the hardware.
+ *
+ * A distinction is made for overclocking, which is never enabled by
+ * default, and is considered to be above the hard limit if it's
+ * possible at all.
+ */
+ u8 cur_freq; /* Current frequency (cached, may not == HW) */
+ u8 last_freq; /* Last SWREQ frequency */
+ u8 min_freq_softlimit; /* Minimum frequency permitted by the driver */
+ u8 max_freq_softlimit; /* Max frequency permitted by the driver */
+ u8 max_freq; /* Maximum frequency, RP0 if not overclocking */
+ u8 min_freq; /* AKA RPn. Minimum frequency */
+ u8 boost_freq; /* Frequency to request when wait boosting */
+ u8 idle_freq; /* Frequency to request when we are idle */
+ u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */
+ u8 rp1_freq; /* "less than" RP0 power/freqency */
+ u8 rp0_freq; /* Non-overclocked max frequency. */
+ u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */
+
+ int last_adj;
+
+ struct {
+ struct mutex mutex;
+
+ enum { LOW_POWER, BETWEEN, HIGH_POWER } mode;
+ unsigned int interactive;
+
+ u8 up_threshold; /* Current %busy required to uplock */
+ u8 down_threshold; /* Current %busy required to downclock */
+ } power;
+
+ atomic_t num_waiters;
+ atomic_t boosts;
+
+ /* manual wa residency calculations */
+ struct intel_rps_ei ei;
+ struct intel_ips ips;
+};
+
+#endif /* INTEL_RPS_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 6bf2d87da109..74f793423231 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -8,6 +8,19 @@
#include "intel_lrc_reg.h"
#include "intel_sseu.h"
+void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
+ u8 max_subslices, u8 max_eus_per_subslice)
+{
+ sseu->max_slices = max_slices;
+ sseu->max_subslices = max_subslices;
+ sseu->max_eus_per_subslice = max_eus_per_subslice;
+
+ sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
+ GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE);
+ sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
+ GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE);
+}
+
unsigned int
intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
{
@@ -19,10 +32,32 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
return total;
}
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
+{
+ int i, offset = slice * sseu->ss_stride;
+ u32 mask = 0;
+
+ GEM_BUG_ON(slice >= sseu->max_slices);
+
+ for (i = 0; i < sseu->ss_stride; i++)
+ mask |= (u32)sseu->subslice_mask[offset + i] <<
+ i * BITS_PER_BYTE;
+
+ return mask;
+}
+
+void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
+ u32 ss_mask)
+{
+ int offset = slice * sseu->ss_stride;
+
+ memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride);
+}
+
unsigned int
intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
{
- return hweight8(sseu->subslice_mask[slice]);
+ return hweight32(intel_sseu_get_subslices(sseu, slice));
}
u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index b50d0401a4e2..d1d225204f09 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -10,15 +10,21 @@
#include <linux/types.h>
#include <linux/kernel.h>
+#include "i915_gem.h"
+
struct drm_i915_private;
#define GEN_MAX_SLICES (6) /* CNL upper bound */
#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */
#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
+#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
+#define GEN_MAX_EUS (16) /* TGL upper bound */
+#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
struct sseu_dev_info {
u8 slice_mask;
- u8 subslice_mask[GEN_MAX_SLICES];
+ u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
+ u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
u16 eu_total;
u8 eu_per_subslice;
u8 min_eu_in_pool;
@@ -33,11 +39,8 @@ struct sseu_dev_info {
u8 max_subslices;
u8 max_eus_per_subslice;
- /* We don't have more than 8 eus per subslice at the moment and as we
- * store eus enabled using bits, no need to multiply by eus per
- * subslice.
- */
- u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
+ u8 ss_stride;
+ u8 eu_stride;
};
/*
@@ -63,12 +66,34 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
return value;
}
+static inline bool
+intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice,
+ int subslice)
+{
+ u8 mask;
+ int ss_idx = subslice / BITS_PER_BYTE;
+
+ GEM_BUG_ON(ss_idx >= sseu->ss_stride);
+
+ mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx];
+
+ return mask & BIT(subslice % BITS_PER_BYTE);
+}
+
+void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
+ u8 max_subslices, u8 max_eus_per_subslice);
+
unsigned int
intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
unsigned int
intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
+
+void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
+ u32 ss_mask);
+
u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
const struct intel_sseu *req_sseu);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 9cb01d9828f1..14ad10acd548 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -4,13 +4,13 @@
* Copyright © 2016-2018 Intel Corporation
*/
-#include "gt/intel_gt_types.h"
-
#include "i915_drv.h"
#include "i915_active.h"
#include "i915_syncmap.h"
-#include "gt/intel_timeline.h"
+#include "intel_gt.h"
+#include "intel_ring.h"
+#include "intel_timeline.h"
#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
@@ -136,6 +136,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
kfree(cl);
}
+__i915_active_call
static void __cacheline_retire(struct i915_active *active)
{
struct intel_timeline_cacheline *cl =
@@ -177,8 +178,7 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
cl->hwsp = hwsp;
cl->vaddr = page_pack_bits(vaddr, cacheline);
- i915_active_init(hwsp->gt->i915, &cl->active,
- __cacheline_active, __cacheline_retire);
+ i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
return cl;
}
@@ -254,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
mutex_init(&timeline->mutex);
- INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
+ INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
INIT_LIST_HEAD(&timeline->requests);
i915_syncmap_init(&timeline->sync);
@@ -442,7 +442,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
* free it after the current request is retired, which ensures that
* all writes into the cacheline from previous requests are complete.
*/
- err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq);
+ err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
if (err)
goto err_cacheline;
@@ -493,24 +493,39 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
static int cacheline_ref(struct intel_timeline_cacheline *cl,
struct i915_request *rq)
{
- return i915_active_ref(&cl->active, rq->timeline, rq);
+ return i915_active_add_request(&cl->active, rq);
}
int intel_timeline_read_hwsp(struct i915_request *from,
struct i915_request *to,
u32 *hwsp)
{
- struct intel_timeline_cacheline *cl = from->hwsp_cacheline;
- struct intel_timeline *tl = from->timeline;
+ struct intel_timeline *tl;
int err;
- GEM_BUG_ON(to->timeline == tl);
+ rcu_read_lock();
+ tl = rcu_dereference(from->timeline);
+ if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref))
+ tl = NULL;
+ rcu_read_unlock();
+ if (!tl) /* already completed */
+ return 1;
+
+ GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl);
+
+ err = -EBUSY;
+ if (mutex_trylock(&tl->mutex)) {
+ struct intel_timeline_cacheline *cl = from->hwsp_cacheline;
+
+ if (i915_request_completed(from)) {
+ err = 1;
+ goto unlock;
+ }
- mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
- err = i915_request_completed(from);
- if (!err)
err = cacheline_ref(cl, to);
- if (!err) {
+ if (err)
+ goto unlock;
+
if (likely(cl == tl->hwsp_cacheline)) {
*hwsp = tl->hwsp_offset;
} else { /* across a seqno wrap, recover the original offset */
@@ -518,8 +533,11 @@ int intel_timeline_read_hwsp(struct i915_request *from,
ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) *
CACHELINE_BYTES;
}
+
+unlock:
+ mutex_unlock(&tl->mutex);
}
- mutex_unlock(&tl->mutex);
+ intel_timeline_put(tl);
return err;
}
@@ -541,7 +559,7 @@ void __intel_timeline_free(struct kref *kref)
container_of(kref, typeof(*timeline), kref);
intel_timeline_fini(timeline);
- kfree(timeline);
+ kfree_rcu(timeline, rcu);
}
static void timelines_fini(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 2b1baf2fcc8e..98d9ee166379 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -58,12 +58,13 @@ struct intel_timeline {
*/
struct list_head requests;
- /* Contains an RCU guarded pointer to the last request. No reference is
+ /*
+ * Contains an RCU guarded pointer to the last request. No reference is
* held to the request, users must carefully acquire a reference to
- * the request using i915_active_request_get_request_rcu(), or hold the
- * struct_mutex.
+ * the request using i915_active_fence_get(), or manage the RCU
+ * protection themselves (cf the i915_active_fence API).
*/
- struct i915_active_request last_request;
+ struct i915_active_fence last_request;
/**
* We track the most recent seqno that we wait on in every context so
@@ -80,6 +81,7 @@ struct intel_timeline {
struct intel_gt *gt;
struct kref kref;
+ struct rcu_head rcu;
};
#endif /* __I915_TIMELINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 45481eb1fa3c..e4bccc14602f 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -7,6 +7,7 @@
#include "i915_drv.h"
#include "intel_context.h"
#include "intel_gt.h"
+#include "intel_ring.h"
#include "intel_workarounds.h"
/**
@@ -567,6 +568,9 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
+ /* Wa_1409142259:tgl */
+ WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
+ GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
}
static void
@@ -796,11 +800,10 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
slice = fls(sseu->slice_mask) - 1;
- GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
- subslice = fls(l3_en & sseu->subslice_mask[slice]);
+ subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
if (!subslice) {
DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
- sseu->subslice_mask[slice], l3_en);
+ intel_sseu_get_subslices(sseu, slice), l3_en);
subslice = fls(l3_en);
WARN_ON(!subslice);
}
@@ -890,11 +893,27 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
wa_write_or(wal,
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_L3_COH_PIPE);
+
+ /* Wa_1607087056:icl */
+ wa_write_or(wal,
+ SLICE_UNIT_LEVEL_CLKGATE,
+ L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
}
static void
tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
+ /* Wa_1409420604:tgl */
+ if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
+ wa_write_or(wal,
+ SUBSLICE_UNIT_LEVEL_CLKGATE2,
+ CPSSUNIT_CLKGATE_DIS);
+
+ /* Wa_1409180338:tgl */
+ if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
+ wa_write_or(wal,
+ SLICE_UNIT_LEVEL_CLKGATE,
+ L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
}
static void
@@ -1063,6 +1082,9 @@ static void gen9_whitelist_build(struct i915_wa_list *w)
/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
whitelist_reg(w, GEN8_HDC_CHICKEN1);
+
+ /* WaSendPushConstantsFromMMIO:skl,bxt */
+ whitelist_reg(w, COMMON_SLICE_CHICKEN2);
}
static void skl_whitelist_build(struct intel_engine_cs *engine)
@@ -1194,6 +1216,26 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
static void tgl_whitelist_build(struct intel_engine_cs *engine)
{
+ struct i915_wa_list *w = &engine->whitelist;
+
+ switch (engine->class) {
+ case RENDER_CLASS:
+ /*
+ * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
+ *
+ * This covers 4 registers which are next to one another :
+ * - PS_INVOCATION_COUNT
+ * - PS_INVOCATION_COUNT_UDW
+ * - PS_DEPTH_COUNT
+ * - PS_DEPTH_COUNT_UDW
+ */
+ whitelist_reg_ext(w, PS_INVOCATION_COUNT,
+ RING_FORCE_TO_NONPRIV_ACCESS_RD |
+ RING_FORCE_TO_NONPRIV_RANGE_4);
+ break;
+ default:
+ break;
+ }
}
void intel_engine_init_whitelist(struct intel_engine_cs *engine)
@@ -1255,6 +1297,26 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
+ if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
+ /* Wa_1606700617:tgl */
+ wa_masked_en(wal,
+ GEN9_CS_DEBUG_MODE1,
+ FF_DOP_CLOCK_GATE_DISABLE);
+
+ /* Wa_1607138336:tgl */
+ wa_write_or(wal,
+ GEN9_CTX_PREEMPT_REG,
+ GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
+
+ /* Wa_1607030317:tgl */
+ /* Wa_1607186500:tgl */
+ /* Wa_1607297627:tgl */
+ wa_masked_en(wal,
+ GEN6_RC_SLEEP_PSMI_CONTROL,
+ GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+ GEN8_RC_SEMA_IDLE_MSG_DISABLE);
+ }
+
if (IS_GEN(i915, 11)) {
/* This is not an Wa. Enable for better image quality */
wa_masked_en(wal,
@@ -1449,7 +1511,7 @@ static bool mcr_range(struct drm_i915_private *i915, u32 offset)
* which only controls CPU initiated MMIO. Routing does not
* work for CS access so we cannot verify them on this path.
*/
- if (INTEL_GEN(i915) >= 8 && (offset >= 0xb100 && offset <= 0xb3ff))
+ if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff))
return true;
return false;
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 5d43cbc3f345..83f549d203a0 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -23,6 +23,7 @@
*/
#include "gem/i915_gem_context.h"
+#include "gt/intel_ring.h"
#include "i915_drv.h"
#include "intel_context.h"
@@ -240,6 +241,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
struct mock_engine *engine;
GEM_BUG_ON(id >= I915_NUM_ENGINES);
+ GEM_BUG_ON(!i915->gt.uncore);
engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL);
if (!engine)
@@ -248,9 +250,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
/* minimal engine setup for requests */
engine->base.i915 = i915;
engine->base.gt = &i915->gt;
+ engine->base.uncore = i915->gt.uncore;
snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
engine->base.id = id;
engine->base.mask = BIT(id);
+ engine->base.legacy_idx = INVALID_ENGINE;
engine->base.instance = id;
engine->base.status_page.addr = (void *)(engine + 1);
@@ -265,6 +269,9 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
engine->base.reset.finish = mock_reset_finish;
engine->base.cancel_requests = mock_cancel_requests;
+ i915->gt.engine[id] = &engine->base;
+ i915->gt.engine_class[0][id] = &engine->base;
+
/* fake hw queue */
spin_lock_init(&engine->hw_lock);
timer_setup(&engine->hw_delay, hw_delay_complete, 0);
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 9d1ea26c7a2d..bc720defc6b8 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -14,22 +14,28 @@
static int request_sync(struct i915_request *rq)
{
+ struct intel_timeline *tl = i915_request_timeline(rq);
long timeout;
int err = 0;
+ intel_timeline_get(tl);
i915_request_get(rq);
- i915_request_add(rq);
+ /* Opencode i915_request_add() so we can keep the timeline locked. */
+ __i915_request_commit(rq);
+ __i915_request_queue(rq, NULL);
+
timeout = i915_request_wait(rq, 0, HZ / 10);
- if (timeout < 0) {
+ if (timeout < 0)
err = timeout;
- } else {
- mutex_lock(&rq->timeline->mutex);
+ else
i915_request_retire_upto(rq);
- mutex_unlock(&rq->timeline->mutex);
- }
+
+ lockdep_unpin_lock(&tl->mutex, rq->cookie);
+ mutex_unlock(&tl->mutex);
i915_request_put(rq);
+ intel_timeline_put(tl);
return err;
}
@@ -41,24 +47,20 @@ static int context_sync(struct intel_context *ce)
mutex_lock(&tl->mutex);
do {
- struct i915_request *rq;
+ struct dma_fence *fence;
long timeout;
- rcu_read_lock();
- rq = rcu_dereference(tl->last_request.request);
- if (rq)
- rq = i915_request_get_rcu(rq);
- rcu_read_unlock();
- if (!rq)
+ fence = i915_active_fence_get(&tl->last_request);
+ if (!fence)
break;
- timeout = i915_request_wait(rq, 0, HZ / 10);
+ timeout = dma_fence_wait_timeout(fence, false, HZ / 10);
if (timeout < 0)
err = timeout;
else
- i915_request_retire_upto(rq);
+ i915_request_retire_upto(to_request(fence));
- i915_request_put(rq);
+ dma_fence_put(fence);
} while (!err);
mutex_unlock(&tl->mutex);
@@ -101,9 +103,6 @@ static int __live_context_size(struct intel_engine_cs *engine,
*
* TLDR; this overlaps with the execlists redzone.
*/
- if (HAS_EXECLISTS(engine->i915))
- vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
-
vaddr += engine->context_size - I915_GTT_PAGE_SIZE;
memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
@@ -153,15 +152,11 @@ static int live_context_size(void *arg)
* HW tries to write past the end of one.
*/
- mutex_lock(&gt->i915->drm.struct_mutex);
-
fixme = kernel_context(gt->i915);
- if (IS_ERR(fixme)) {
- err = PTR_ERR(fixme);
- goto unlock;
- }
+ if (IS_ERR(fixme))
+ return PTR_ERR(fixme);
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
struct {
struct drm_i915_gem_object *state;
void *pinned;
@@ -199,8 +194,6 @@ static int live_context_size(void *arg)
}
kernel_context_close(fixme);
-unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
return err;
}
@@ -303,26 +296,23 @@ static int live_active_context(void *arg)
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&gt->i915->drm.struct_mutex);
-
fixme = live_context(gt->i915, file);
if (IS_ERR(fixme)) {
err = PTR_ERR(fixme);
- goto unlock;
+ goto out_file;
}
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
err = __live_active_context(engine, fixme);
if (err)
break;
- err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
-unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
+out_file:
mock_file_free(gt->i915, file);
return err;
}
@@ -416,26 +406,23 @@ static int live_remote_context(void *arg)
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&gt->i915->drm.struct_mutex);
-
fixme = live_context(gt->i915, file);
if (IS_ERR(fixme)) {
err = PTR_ERR(fixme);
- goto unlock;
+ goto out_file;
}
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
err = __live_remote_context(engine, fixme);
if (err)
break;
- err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
-unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
+out_file:
mock_file_free(gt->i915, file);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
new file mode 100644
index 000000000000..e864406bd2d9
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -0,0 +1,350 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/sort.h>
+
+#include "i915_drv.h"
+
+#include "intel_gt_requests.h"
+#include "i915_selftest.h"
+
+struct pulse {
+ struct i915_active active;
+ struct kref kref;
+};
+
+static int pulse_active(struct i915_active *active)
+{
+ kref_get(&container_of(active, struct pulse, active)->kref);
+ return 0;
+}
+
+static void pulse_free(struct kref *kref)
+{
+ kfree(container_of(kref, struct pulse, kref));
+}
+
+static void pulse_put(struct pulse *p)
+{
+ kref_put(&p->kref, pulse_free);
+}
+
+static void pulse_retire(struct i915_active *active)
+{
+ pulse_put(container_of(active, struct pulse, active));
+}
+
+static struct pulse *pulse_create(void)
+{
+ struct pulse *p;
+
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return p;
+
+ kref_init(&p->kref);
+ i915_active_init(&p->active, pulse_active, pulse_retire);
+
+ return p;
+}
+
+static void pulse_unlock_wait(struct pulse *p)
+{
+ mutex_lock(&p->active.mutex);
+ mutex_unlock(&p->active.mutex);
+ flush_work(&p->active.work);
+}
+
+static int __live_idle_pulse(struct intel_engine_cs *engine,
+ int (*fn)(struct intel_engine_cs *cs))
+{
+ struct pulse *p;
+ int err;
+
+ GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
+ p = pulse_create();
+ if (!p)
+ return -ENOMEM;
+
+ err = i915_active_acquire(&p->active);
+ if (err)
+ goto out;
+
+ err = i915_active_acquire_preallocate_barrier(&p->active, engine);
+ if (err) {
+ i915_active_release(&p->active);
+ goto out;
+ }
+
+ i915_active_acquire_barrier(&p->active);
+ i915_active_release(&p->active);
+
+ GEM_BUG_ON(i915_active_is_idle(&p->active));
+ GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
+
+ err = fn(engine);
+ if (err)
+ goto out;
+
+ GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
+
+ if (intel_gt_retire_requests_timeout(engine->gt, HZ / 5)) {
+ err = -ETIME;
+ goto out;
+ }
+
+ GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
+
+ pulse_unlock_wait(p); /* synchronize with the retirement callback */
+
+ if (!i915_active_is_idle(&p->active)) {
+ struct drm_printer m = drm_err_printer("pulse");
+
+ pr_err("%s: heartbeat pulse did not flush idle tasks\n",
+ engine->name);
+ i915_active_print(&p->active, &m);
+
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ pulse_put(p);
+ return err;
+}
+
+static int live_idle_flush(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /* Check that we can flush the idle barriers */
+
+ for_each_engine(engine, gt, id) {
+ intel_engine_pm_get(engine);
+ err = __live_idle_pulse(engine, intel_engine_flush_barriers);
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int live_idle_pulse(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /* Check that heartbeat pulses flush the idle barriers */
+
+ for_each_engine(engine, gt, id) {
+ intel_engine_pm_get(engine);
+ err = __live_idle_pulse(engine, intel_engine_pulse);
+ intel_engine_pm_put(engine);
+ if (err && err != -ENODEV)
+ break;
+
+ err = 0;
+ }
+
+ return err;
+}
+
+static int cmp_u32(const void *_a, const void *_b)
+{
+ const u32 *a = _a, *b = _b;
+
+ return *a - *b;
+}
+
+static int __live_heartbeat_fast(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ ktime_t t0, t1;
+ u32 times[5];
+ int err;
+ int i;
+
+ ce = intel_context_create(engine->kernel_context->gem_context,
+ engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ intel_engine_pm_get(engine);
+
+ err = intel_engine_set_heartbeat(engine, 1);
+ if (err)
+ goto err_pm;
+
+ for (i = 0; i < ARRAY_SIZE(times); i++) {
+ /* Manufacture a tick */
+ do {
+ while (READ_ONCE(engine->heartbeat.systole))
+ flush_delayed_work(&engine->heartbeat.work);
+
+ engine->serial++; /* quick, pretend we are not idle! */
+ flush_delayed_work(&engine->heartbeat.work);
+ if (!delayed_work_pending(&engine->heartbeat.work)) {
+ pr_err("%s: heartbeat did not start\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_pm;
+ }
+
+ rcu_read_lock();
+ rq = READ_ONCE(engine->heartbeat.systole);
+ if (rq)
+ rq = i915_request_get_rcu(rq);
+ rcu_read_unlock();
+ } while (!rq);
+
+ t0 = ktime_get();
+ while (rq == READ_ONCE(engine->heartbeat.systole))
+ yield(); /* work is on the local cpu! */
+ t1 = ktime_get();
+
+ i915_request_put(rq);
+ times[i] = ktime_us_delta(t1, t0);
+ }
+
+ sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
+
+ pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
+ engine->name,
+ times[ARRAY_SIZE(times) / 2],
+ times[0],
+ times[ARRAY_SIZE(times) - 1]);
+
+ /* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */
+ if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) {
+ pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
+ engine->name,
+ times[ARRAY_SIZE(times) / 2],
+ jiffies_to_usecs(6));
+ err = -EINVAL;
+ }
+
+ intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
+err_pm:
+ intel_engine_pm_put(engine);
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_heartbeat_fast(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /* Check that the heartbeat ticks at the desired rate. */
+ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ err = __live_heartbeat_fast(engine);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int __live_heartbeat_off(struct intel_engine_cs *engine)
+{
+ int err;
+
+ intel_engine_pm_get(engine);
+
+ engine->serial++;
+ flush_delayed_work(&engine->heartbeat.work);
+ if (!delayed_work_pending(&engine->heartbeat.work)) {
+ pr_err("%s: heartbeat not running\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_pm;
+ }
+
+ err = intel_engine_set_heartbeat(engine, 0);
+ if (err)
+ goto err_pm;
+
+ engine->serial++;
+ flush_delayed_work(&engine->heartbeat.work);
+ if (delayed_work_pending(&engine->heartbeat.work)) {
+ pr_err("%s: heartbeat still running\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_beat;
+ }
+
+ if (READ_ONCE(engine->heartbeat.systole)) {
+ pr_err("%s: heartbeat still allocated\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_beat;
+ }
+
+err_beat:
+ intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
+err_pm:
+ intel_engine_pm_put(engine);
+ return err;
+}
+
+static int live_heartbeat_off(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /* Check that we can turn off heartbeat and not interrupt VIP */
+ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ err = __live_heartbeat_off(engine);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_idle_flush),
+ SUBTEST(live_idle_pulse),
+ SUBTEST(live_heartbeat_fast),
+ SUBTEST(live_heartbeat_off),
+ };
+ int saved_hangcheck;
+ int err;
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ saved_hangcheck = i915_modparams.enable_hangcheck;
+ i915_modparams.enable_hangcheck = INT_MAX;
+
+ err = intel_gt_live_subtests(tests, &i915->gt);
+
+ i915_modparams.enable_hangcheck = saved_hangcheck;
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
index 3a1419376912..20b9c83f43ad 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
@@ -25,7 +25,7 @@ static int live_engine_pm(void *arg)
}
GEM_BUG_ON(intel_gt_pm_is_awake(gt));
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
const typeof(*igt_atomic_phases) *p;
for (p = igt_atomic_phases; p->name; p++) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
new file mode 100644
index 000000000000..d1752f15702a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -0,0 +1,60 @@
+
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "selftest_llc.h"
+
+static int live_gt_resume(void *arg)
+{
+ struct intel_gt *gt = arg;
+ IGT_TIMEOUT(end_time);
+ int err;
+
+ /* Do several suspend/resume cycles to check we don't explode! */
+ do {
+ intel_gt_suspend_prepare(gt);
+ intel_gt_suspend_late(gt);
+
+ if (gt->rc6.enabled) {
+ pr_err("rc6 still enabled after suspend!\n");
+ intel_gt_set_wedged_on_init(gt);
+ err = -EINVAL;
+ break;
+ }
+
+ err = intel_gt_resume(gt);
+ if (err)
+ break;
+
+ if (gt->rc6.supported && !gt->rc6.enabled) {
+ pr_err("rc6 not enabled upon resume!\n");
+ intel_gt_set_wedged_on_init(gt);
+ err = -EINVAL;
+ break;
+ }
+
+ err = st_llc_verify(&gt->llc);
+ if (err) {
+ pr_err("llc state not restored upon resume!\n");
+ intel_gt_set_wedged_on_init(gt);
+ break;
+ }
+ } while (!__igt_timeout(end_time, NULL));
+
+ return err;
+}
+
+int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_gt_resume),
+ };
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index a0098fc35921..85e9ccf5c304 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -131,7 +131,7 @@ static struct i915_request *
hang_create_request(struct hang *h, struct intel_engine_cs *engine)
{
struct intel_gt *gt = h->gt;
- struct i915_address_space *vm = h->ctx->vm ?: &engine->gt->ggtt->vm;
+ struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx);
struct drm_i915_gem_object *obj;
struct i915_request *rq = NULL;
struct i915_vma *hws, *vma;
@@ -141,12 +141,15 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
int err;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
- if (IS_ERR(obj))
+ if (IS_ERR(obj)) {
+ i915_vm_put(vm);
return ERR_CAST(obj);
+ }
vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(gt->i915));
if (IS_ERR(vaddr)) {
i915_gem_object_put(obj);
+ i915_vm_put(vm);
return ERR_CAST(vaddr);
}
@@ -157,16 +160,22 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
h->batch = vaddr;
vma = i915_vma_instance(h->obj, vm, NULL);
- if (IS_ERR(vma))
+ if (IS_ERR(vma)) {
+ i915_vm_put(vm);
return ERR_CAST(vma);
+ }
hws = i915_vma_instance(h->hws, vm, NULL);
- if (IS_ERR(hws))
+ if (IS_ERR(hws)) {
+ i915_vm_put(vm);
return ERR_CAST(hws);
+ }
err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err)
+ if (err) {
+ i915_vm_put(vm);
return ERR_PTR(err);
+ }
err = i915_vma_pin(hws, 0, 0, PIN_USER);
if (err)
@@ -264,6 +273,7 @@ unpin_hws:
i915_vma_unpin(hws);
unpin_vma:
i915_vma_unpin(vma);
+ i915_vm_put(vm);
return err ? ERR_PTR(err) : rq;
}
@@ -285,7 +295,7 @@ static void hang_fini(struct hang *h)
kernel_context_close(h->ctx);
- igt_flush_test(h->gt->i915, I915_WAIT_LOCKED);
+ igt_flush_test(h->gt->i915);
}
static bool wait_until_running(struct hang *h, struct i915_request *rq)
@@ -309,12 +319,11 @@ static int igt_hang_sanitycheck(void *arg)
/* Basic check that we can execute our hanging batch */
- mutex_lock(&gt->i915->drm.struct_mutex);
err = hang_init(&h, gt);
if (err)
- goto unlock;
+ return err;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
struct intel_wedge_me w;
long timeout;
@@ -355,8 +364,6 @@ static int igt_hang_sanitycheck(void *arg)
fini:
hang_fini(&h);
-unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
return err;
}
@@ -383,9 +390,7 @@ static int igt_reset_nop(void *arg)
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&gt->i915->drm.struct_mutex);
ctx = live_context(gt->i915, file);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out;
@@ -395,9 +400,7 @@ static int igt_reset_nop(void *arg)
reset_count = i915_reset_count(global);
count = 0;
do {
- mutex_lock(&gt->i915->drm.struct_mutex);
-
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
int i;
for (i = 0; i < 16; i++) {
@@ -417,7 +420,6 @@ static int igt_reset_nop(void *arg)
intel_gt_reset(gt, ALL_ENGINES, NULL);
igt_global_reset_unlock(gt);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (intel_gt_is_wedged(gt)) {
err = -EIO;
break;
@@ -429,16 +431,13 @@ static int igt_reset_nop(void *arg)
break;
}
- err = igt_flush_test(gt->i915, 0);
+ err = igt_flush_test(gt->i915);
if (err)
break;
} while (time_before(jiffies, end_time));
pr_info("%s: %d resets\n", __func__, count);
- mutex_lock(&gt->i915->drm.struct_mutex);
- err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
- mutex_unlock(&gt->i915->drm.struct_mutex);
-
+ err = igt_flush_test(gt->i915);
out:
mock_file_free(gt->i915, file);
if (intel_gt_is_wedged(gt))
@@ -458,23 +457,21 @@ static int igt_reset_nop_engine(void *arg)
/* Check that we can engine-reset during non-user portions */
- if (!intel_has_reset_engine(gt->i915))
+ if (!intel_has_reset_engine(gt))
return 0;
file = mock_file(gt->i915);
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&gt->i915->drm.struct_mutex);
ctx = live_context(gt->i915, file);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out;
}
i915_gem_context_clear_bannable(ctx);
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
unsigned int reset_count, reset_engine_count;
unsigned int count;
IGT_TIMEOUT(end_time);
@@ -494,7 +491,6 @@ static int igt_reset_nop_engine(void *arg)
break;
}
- mutex_lock(&gt->i915->drm.struct_mutex);
for (i = 0; i < 16; i++) {
struct i915_request *rq;
@@ -507,7 +503,6 @@ static int igt_reset_nop_engine(void *arg)
i915_request_add(rq);
}
err = intel_engine_reset(engine, NULL);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (err) {
pr_err("i915_reset_engine failed\n");
break;
@@ -533,15 +528,12 @@ static int igt_reset_nop_engine(void *arg)
if (err)
break;
- err = igt_flush_test(gt->i915, 0);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
- mutex_lock(&gt->i915->drm.struct_mutex);
- err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
- mutex_unlock(&gt->i915->drm.struct_mutex);
-
+ err = igt_flush_test(gt->i915);
out:
mock_file_free(gt->i915, file);
if (intel_gt_is_wedged(gt))
@@ -559,18 +551,16 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
/* Check that we can issue an engine reset on an idle engine (no-op) */
- if (!intel_has_reset_engine(gt->i915))
+ if (!intel_has_reset_engine(gt))
return 0;
if (active) {
- mutex_lock(&gt->i915->drm.struct_mutex);
err = hang_init(&h, gt);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (err)
return err;
}
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
unsigned int reset_count, reset_engine_count;
IGT_TIMEOUT(end_time);
@@ -593,17 +583,14 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
if (active) {
struct i915_request *rq;
- mutex_lock(&gt->i915->drm.struct_mutex);
rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- mutex_unlock(&gt->i915->drm.struct_mutex);
break;
}
i915_request_get(rq);
i915_request_add(rq);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (!wait_until_running(&h, rq)) {
struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
@@ -647,7 +634,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
if (err)
break;
- err = igt_flush_test(gt->i915, 0);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
@@ -655,11 +642,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
if (intel_gt_is_wedged(gt))
err = -EIO;
- if (active) {
- mutex_lock(&gt->i915->drm.struct_mutex);
+ if (active)
hang_fini(&h);
- mutex_unlock(&gt->i915->drm.struct_mutex);
- }
return err;
}
@@ -725,9 +709,7 @@ static int active_engine(void *data)
return PTR_ERR(file);
for (count = 0; count < ARRAY_SIZE(ctx); count++) {
- mutex_lock(&engine->i915->drm.struct_mutex);
ctx[count] = live_context(engine->i915, file);
- mutex_unlock(&engine->i915->drm.struct_mutex);
if (IS_ERR(ctx[count])) {
err = PTR_ERR(ctx[count]);
while (--count)
@@ -741,10 +723,8 @@ static int active_engine(void *data)
struct i915_request *old = rq[idx];
struct i915_request *new;
- mutex_lock(&engine->i915->drm.struct_mutex);
new = igt_request_alloc(ctx[idx], engine);
if (IS_ERR(new)) {
- mutex_unlock(&engine->i915->drm.struct_mutex);
err = PTR_ERR(new);
break;
}
@@ -755,7 +735,6 @@ static int active_engine(void *data)
rq[idx] = i915_request_get(new);
i915_request_add(new);
- mutex_unlock(&engine->i915->drm.struct_mutex);
err = active_request_put(old);
if (err)
@@ -791,13 +770,11 @@ static int __igt_reset_engines(struct intel_gt *gt,
* with any other engine.
*/
- if (!intel_has_reset_engine(gt->i915))
+ if (!intel_has_reset_engine(gt))
return 0;
if (flags & TEST_ACTIVE) {
- mutex_lock(&gt->i915->drm.struct_mutex);
err = hang_init(&h, gt);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (err)
return err;
@@ -805,7 +782,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
h.ctx->sched.priority = 1024;
}
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
struct active_engine threads[I915_NUM_ENGINES] = {};
unsigned long device = i915_reset_count(global);
unsigned long count = 0, reported;
@@ -823,7 +800,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
}
memset(threads, 0, sizeof(threads));
- for_each_engine(other, gt->i915, tmp) {
+ for_each_engine(other, gt, tmp) {
struct task_struct *tsk;
threads[tmp].resets =
@@ -849,23 +826,22 @@ static int __igt_reset_engines(struct intel_gt *gt,
get_task_struct(tsk);
}
+ yield(); /* start all threads before we begin */
+
intel_engine_pm_get(engine);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
struct i915_request *rq = NULL;
if (flags & TEST_ACTIVE) {
- mutex_lock(&gt->i915->drm.struct_mutex);
rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- mutex_unlock(&gt->i915->drm.struct_mutex);
break;
}
i915_request_get(rq);
i915_request_add(rq);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (!wait_until_running(&h, rq)) {
struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
@@ -940,7 +916,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
}
unwind:
- for_each_engine(other, gt->i915, tmp) {
+ for_each_engine(other, gt, tmp) {
int ret;
if (!threads[tmp].task)
@@ -977,9 +953,7 @@ unwind:
if (err)
break;
- mutex_lock(&gt->i915->drm.struct_mutex);
- err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
- mutex_unlock(&gt->i915->drm.struct_mutex);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
@@ -987,11 +961,8 @@ unwind:
if (intel_gt_is_wedged(gt))
err = -EIO;
- if (flags & TEST_ACTIVE) {
- mutex_lock(&gt->i915->drm.struct_mutex);
+ if (flags & TEST_ACTIVE)
hang_fini(&h);
- mutex_unlock(&gt->i915->drm.struct_mutex);
- }
return err;
}
@@ -1047,7 +1018,7 @@ static int igt_reset_wait(void *arg)
{
struct intel_gt *gt = arg;
struct i915_gpu_error *global = &gt->i915->gpu_error;
- struct intel_engine_cs *engine = gt->i915->engine[RCS0];
+ struct intel_engine_cs *engine = gt->engine[RCS0];
struct i915_request *rq;
unsigned int reset_count;
struct hang h;
@@ -1061,7 +1032,6 @@ static int igt_reset_wait(void *arg)
igt_global_reset_lock(gt);
- mutex_lock(&gt->i915->drm.struct_mutex);
err = hang_init(&h, gt);
if (err)
goto unlock;
@@ -1109,7 +1079,6 @@ out_rq:
fini:
hang_fini(&h);
unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
igt_global_reset_unlock(gt);
if (intel_gt_is_wedged(gt))
@@ -1127,15 +1096,14 @@ static int evict_vma(void *data)
{
struct evict_vma *arg = data;
struct i915_address_space *vm = arg->vma->vm;
- struct drm_i915_private *i915 = vm->i915;
struct drm_mm_node evict = arg->vma->node;
int err;
complete(&arg->completion);
- mutex_lock(&i915->drm.struct_mutex);
+ mutex_lock(&vm->mutex);
err = i915_gem_evict_for_node(vm, &evict, 0);
- mutex_unlock(&i915->drm.struct_mutex);
+ mutex_unlock(&vm->mutex);
return err;
}
@@ -1143,39 +1111,33 @@ static int evict_vma(void *data)
static int evict_fence(void *data)
{
struct evict_vma *arg = data;
- struct drm_i915_private *i915 = arg->vma->vm->i915;
int err;
complete(&arg->completion);
- mutex_lock(&i915->drm.struct_mutex);
-
/* Mark the fence register as dirty to force the mmio update. */
err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512);
if (err) {
pr_err("Invalid Y-tiling settings; err:%d\n", err);
- goto out_unlock;
+ return err;
}
err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE);
if (err) {
pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err);
- goto out_unlock;
+ return err;
}
err = i915_vma_pin_fence(arg->vma);
i915_vma_unpin(arg->vma);
if (err) {
pr_err("Unable to pin Y-tiled fence; err:%d\n", err);
- goto out_unlock;
+ return err;
}
i915_vma_unpin_fence(arg->vma);
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
-
- return err;
+ return 0;
}
static int __igt_reset_evict_vma(struct intel_gt *gt,
@@ -1183,23 +1145,26 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
int (*fn)(void *),
unsigned int flags)
{
- struct intel_engine_cs *engine = gt->i915->engine[RCS0];
+ struct intel_engine_cs *engine = gt->engine[RCS0];
struct drm_i915_gem_object *obj;
struct task_struct *tsk = NULL;
struct i915_request *rq;
struct evict_vma arg;
struct hang h;
+ unsigned int pin_flags;
int err;
+ if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE)
+ return 0;
+
if (!engine || !intel_engine_can_store_dword(engine))
return 0;
/* Check that we can recover an unbind stuck on a hanging request */
- mutex_lock(&gt->i915->drm.struct_mutex);
err = hang_init(&h, gt);
if (err)
- goto unlock;
+ return err;
obj = i915_gem_object_create_internal(gt->i915, SZ_1M);
if (IS_ERR(obj)) {
@@ -1227,10 +1192,12 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
goto out_obj;
}
- err = i915_vma_pin(arg.vma, 0, 0,
- i915_vma_is_ggtt(arg.vma) ?
- PIN_GLOBAL | PIN_MAPPABLE :
- PIN_USER);
+ pin_flags = i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER;
+
+ if (flags & EXEC_OBJECT_NEEDS_FENCE)
+ pin_flags |= PIN_MAPPABLE;
+
+ err = i915_vma_pin(arg.vma, 0, 0, pin_flags);
if (err) {
i915_request_add(rq);
goto out_obj;
@@ -1262,8 +1229,6 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
if (err)
goto out_rq;
- mutex_unlock(&gt->i915->drm.struct_mutex);
-
if (!wait_until_running(&h, rq)) {
struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
@@ -1312,16 +1277,12 @@ out_reset:
put_task_struct(tsk);
}
- mutex_lock(&gt->i915->drm.struct_mutex);
out_rq:
i915_request_put(rq);
out_obj:
i915_gem_object_put(obj);
fini:
hang_fini(&h);
-unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
-
if (intel_gt_is_wedged(gt))
return -EIO;
@@ -1340,6 +1301,7 @@ static int igt_reset_evict_ppgtt(void *arg)
{
struct intel_gt *gt = arg;
struct i915_gem_context *ctx;
+ struct i915_address_space *vm;
struct drm_file *file;
int err;
@@ -1347,18 +1309,20 @@ static int igt_reset_evict_ppgtt(void *arg)
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&gt->i915->drm.struct_mutex);
ctx = live_context(gt->i915, file);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out;
}
err = 0;
- if (ctx->vm) /* aliasing == global gtt locking, covered above */
- err = __igt_reset_evict_vma(gt, ctx->vm,
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ if (!i915_is_ggtt(vm)) {
+ /* aliasing == global gtt locking, covered above */
+ err = __igt_reset_evict_vma(gt, vm,
evict_vma, EXEC_OBJECT_WRITE);
+ }
+ i915_vm_put(vm);
out:
mock_file_free(gt->i915, file);
@@ -1379,7 +1343,7 @@ static int wait_for_others(struct intel_gt *gt,
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
if (engine == exclude)
continue;
@@ -1403,12 +1367,11 @@ static int igt_reset_queue(void *arg)
igt_global_reset_lock(gt);
- mutex_lock(&gt->i915->drm.struct_mutex);
err = hang_init(&h, gt);
if (err)
goto unlock;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_request *prev;
IGT_TIMEOUT(end_time);
unsigned int count;
@@ -1518,7 +1481,7 @@ static int igt_reset_queue(void *arg)
i915_request_put(prev);
- err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
@@ -1526,7 +1489,6 @@ static int igt_reset_queue(void *arg)
fini:
hang_fini(&h);
unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
igt_global_reset_unlock(gt);
if (intel_gt_is_wedged(gt))
@@ -1539,7 +1501,7 @@ static int igt_handle_error(void *arg)
{
struct intel_gt *gt = arg;
struct i915_gpu_error *global = &gt->i915->gpu_error;
- struct intel_engine_cs *engine = gt->i915->engine[RCS0];
+ struct intel_engine_cs *engine = gt->engine[RCS0];
struct hang h;
struct i915_request *rq;
struct i915_gpu_state *error;
@@ -1547,17 +1509,15 @@ static int igt_handle_error(void *arg)
/* Check that we can issue a global GPU and engine reset */
- if (!intel_has_reset_engine(gt->i915))
+ if (!intel_has_reset_engine(gt))
return 0;
if (!engine || !intel_engine_can_store_dword(engine))
return 0;
- mutex_lock(&gt->i915->drm.struct_mutex);
-
err = hang_init(&h, gt);
if (err)
- goto err_unlock;
+ return err;
rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
@@ -1581,8 +1541,6 @@ static int igt_handle_error(void *arg)
goto err_request;
}
- mutex_unlock(&gt->i915->drm.struct_mutex);
-
/* Temporarily disable error capture */
error = xchg(&global->first_error, (void *)-1);
@@ -1590,8 +1548,6 @@ static int igt_handle_error(void *arg)
xchg(&global->first_error, error);
- mutex_lock(&gt->i915->drm.struct_mutex);
-
if (rq->fence.error != -EIO) {
pr_err("Guilty request not identified!\n");
err = -EINVAL;
@@ -1602,8 +1558,6 @@ err_request:
i915_request_put(rq);
err_fini:
hang_fini(&h);
-err_unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
return err;
}
@@ -1617,7 +1571,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
GEM_TRACE("i915_reset_engine(%s:%s) under %s\n",
engine->name, mode, p->name);
- tasklet_disable_nosync(t);
+ tasklet_disable(t);
p->critical_section_begin();
err = intel_engine_reset(engine, NULL);
@@ -1689,14 +1643,13 @@ static int igt_reset_engines_atomic(void *arg)
/* Check that the engines resets are usable from atomic context */
- if (!intel_has_reset_engine(gt->i915))
+ if (!intel_has_reset_engine(gt))
return 0;
if (USES_GUC_SUBMISSION(gt->i915))
return 0;
igt_global_reset_lock(gt);
- mutex_lock(&gt->i915->drm.struct_mutex);
/* Flush any requests before we get started and check basics */
if (!igt_force_reset(gt))
@@ -1706,7 +1659,7 @@ static int igt_reset_engines_atomic(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
err = igt_atomic_reset_engine(engine, p);
if (err)
goto out;
@@ -1716,9 +1669,7 @@ static int igt_reset_engines_atomic(void *arg)
out:
/* As we poke around the guts, do a full reset before continuing. */
igt_force_reset(gt);
-
unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
igt_global_reset_unlock(gt);
return err;
@@ -1743,27 +1694,19 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
};
struct intel_gt *gt = &i915->gt;
intel_wakeref_t wakeref;
- bool saved_hangcheck;
int err;
- if (!intel_has_gpu_reset(gt->i915))
+ if (!intel_has_gpu_reset(gt))
return 0;
if (intel_gt_is_wedged(gt))
return -EIO; /* we're long past hope of a successful reset */
- wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
- saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
- drain_delayed_work(&gt->hangcheck.work); /* flush param */
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
err = intel_gt_live_subtests(tests, gt);
- mutex_lock(&gt->i915->drm.struct_mutex);
- igt_flush_test(gt->i915, I915_WAIT_LOCKED);
- mutex_unlock(&gt->i915->drm.struct_mutex);
-
- i915_modparams.enable_hangcheck = saved_hangcheck;
- intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c
new file mode 100644
index 000000000000..fd3770e48ac7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_llc.c
@@ -0,0 +1,80 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_pm.h" /* intel_gpu_freq() */
+#include "selftest_llc.h"
+#include "intel_rps.h"
+
+static int gen6_verify_ring_freq(struct intel_llc *llc)
+{
+ struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+ struct ia_constants consts;
+ intel_wakeref_t wakeref;
+ unsigned int gpu_freq;
+ int err = 0;
+
+ wakeref = intel_runtime_pm_get(llc_to_gt(llc)->uncore->rpm);
+
+ if (!get_ia_constants(llc, &consts)) {
+ err = -ENODEV;
+ goto out_rpm;
+ }
+
+ for (gpu_freq = consts.min_gpu_freq;
+ gpu_freq <= consts.max_gpu_freq;
+ gpu_freq++) {
+ struct intel_rps *rps = &llc_to_gt(llc)->rps;
+
+ unsigned int ia_freq, ring_freq, found;
+ u32 val;
+
+ calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq);
+
+ val = gpu_freq;
+ if (sandybridge_pcode_read(i915,
+ GEN6_PCODE_READ_MIN_FREQ_TABLE,
+ &val, NULL)) {
+ pr_err("Failed to read freq table[%d], range [%d, %d]\n",
+ gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq);
+ err = -ENXIO;
+ break;
+ }
+
+ found = (val >> 0) & 0xff;
+ if (found != ia_freq) {
+ pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected CPU freq, found %d, expected %d\n",
+ gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq,
+ intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)),
+ found, ia_freq);
+ err = -EINVAL;
+ break;
+ }
+
+ found = (val >> 8) & 0xff;
+ if (found != ring_freq) {
+ pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected ring freq, found %d, expected %d\n",
+ gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq,
+ intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)),
+ found, ring_freq);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+out_rpm:
+ intel_runtime_pm_put(llc_to_gt(llc)->uncore->rpm, wakeref);
+ return err;
+}
+
+int st_llc_verify(struct intel_llc *llc)
+{
+ int err = 0;
+
+ if (HAS_LLC(llc_to_gt(llc)->i915))
+ err = gen6_verify_ring_freq(llc);
+
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.h b/drivers/gpu/drm/i915/gt/selftest_llc.h
new file mode 100644
index 000000000000..873f896e72f2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_llc.h
@@ -0,0 +1,14 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef SELFTEST_LLC_H
+#define SELFTEST_LLC_H
+
+struct intel_llc;
+
+int st_llc_verify(struct intel_llc *llc);
+
+#endif /* SELFTEST_LLC_H */
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index d791158988d6..eb71ac2f992c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -7,6 +7,7 @@
#include <linux/prime_numbers.h>
#include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_reset.h"
#include "i915_selftest.h"
@@ -19,26 +20,52 @@
#include "gem/selftests/igt_gem_utils.h"
#include "gem/selftests/mock_context.h"
+#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
+#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
+
+static struct i915_vma *create_scratch(struct intel_gt *gt)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err) {
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+ }
+
+ return vma;
+}
+
static int live_sanitycheck(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_engines_iter it;
struct i915_gem_context *ctx;
struct intel_context *ce;
struct igt_spinner spin;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
- if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+ if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (igt_spinner_init(&spin, &i915->gt))
- goto err_unlock;
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
- ctx = kernel_context(i915);
+ ctx = kernel_context(gt->i915);
if (!ctx)
goto err_spin;
@@ -55,13 +82,13 @@ static int live_sanitycheck(void *arg)
if (!igt_wait_for_spinner(&spin, rq)) {
GEM_TRACE("spinner failed to start\n");
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx;
}
igt_spinner_end(&spin);
- if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+ if (igt_flush_test(gt->i915)) {
err = -EIO;
goto err_ctx;
}
@@ -73,12 +100,175 @@ err_ctx:
kernel_context_close(ctx);
err_spin:
igt_spinner_fini(&spin);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
}
+static int live_unlite_restore(struct intel_gt *gt, int prio)
+{
+ struct intel_engine_cs *engine;
+ struct i915_gem_context *ctx;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = -ENOMEM;
+
+ /*
+ * Check that we can correctly context switch between 2 instances
+ * on the same engine from the same parent context.
+ */
+
+ if (igt_spinner_init(&spin, gt))
+ return err;
+
+ ctx = kernel_context(gt->i915);
+ if (!ctx)
+ goto err_spin;
+
+ err = 0;
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce[2] = {};
+ struct i915_request *rq[2];
+ struct igt_live_test t;
+ int n;
+
+ if (prio && !intel_engine_has_preemption(engine))
+ continue;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
+ err = -EIO;
+ break;
+ }
+
+ for (n = 0; n < ARRAY_SIZE(ce); n++) {
+ struct intel_context *tmp;
+
+ tmp = intel_context_create(ctx, engine);
+ if (IS_ERR(tmp)) {
+ err = PTR_ERR(tmp);
+ goto err_ce;
+ }
+
+ err = intel_context_pin(tmp);
+ if (err) {
+ intel_context_put(tmp);
+ goto err_ce;
+ }
+
+ /*
+ * Setup the pair of contexts such that if we
+ * lite-restore using the RING_TAIL from ce[1] it
+ * will execute garbage from ce[0]->ring.
+ */
+ memset(tmp->ring->vaddr,
+ POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
+ tmp->ring->vma->size);
+
+ ce[n] = tmp;
+ }
+ GEM_BUG_ON(!ce[1]->ring->size);
+ intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
+ __execlists_update_reg_state(ce[1], engine);
+
+ rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
+ if (IS_ERR(rq[0])) {
+ err = PTR_ERR(rq[0]);
+ goto err_ce;
+ }
+
+ i915_request_get(rq[0]);
+ i915_request_add(rq[0]);
+ GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
+
+ if (!igt_wait_for_spinner(&spin, rq[0])) {
+ i915_request_put(rq[0]);
+ goto err_ce;
+ }
+
+ rq[1] = i915_request_create(ce[1]);
+ if (IS_ERR(rq[1])) {
+ err = PTR_ERR(rq[1]);
+ i915_request_put(rq[0]);
+ goto err_ce;
+ }
+
+ if (!prio) {
+ /*
+ * Ensure we do the switch to ce[1] on completion.
+ *
+ * rq[0] is already submitted, so this should reduce
+ * to a no-op (a wait on a request on the same engine
+ * uses the submit fence, not the completion fence),
+ * but it will install a dependency on rq[1] for rq[0]
+ * that will prevent the pair being reordered by
+ * timeslicing.
+ */
+ i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+ }
+
+ i915_request_get(rq[1]);
+ i915_request_add(rq[1]);
+ GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
+ i915_request_put(rq[0]);
+
+ if (prio) {
+ struct i915_sched_attr attr = {
+ .priority = prio,
+ };
+
+ /* Alternatively preempt the spinner with ce[1] */
+ engine->schedule(rq[1], &attr);
+ }
+
+ /* And switch back to ce[0] for good measure */
+ rq[0] = i915_request_create(ce[0]);
+ if (IS_ERR(rq[0])) {
+ err = PTR_ERR(rq[0]);
+ i915_request_put(rq[1]);
+ goto err_ce;
+ }
+
+ i915_request_await_dma_fence(rq[0], &rq[1]->fence);
+ i915_request_get(rq[0]);
+ i915_request_add(rq[0]);
+ GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
+ i915_request_put(rq[1]);
+ i915_request_put(rq[0]);
+
+err_ce:
+ tasklet_kill(&engine->execlists.tasklet); /* flush submission */
+ igt_spinner_end(&spin);
+ for (n = 0; n < ARRAY_SIZE(ce); n++) {
+ if (IS_ERR_OR_NULL(ce[n]))
+ break;
+
+ intel_context_unpin(ce[n]);
+ intel_context_put(ce[n]);
+ }
+
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ kernel_context_close(ctx);
+err_spin:
+ igt_spinner_fini(&spin);
+ return err;
+}
+
+static int live_unlite_switch(void *arg)
+{
+ return live_unlite_restore(arg, 0);
+}
+
+static int live_unlite_preempt(void *arg)
+{
+ return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
+}
+
static int
emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
{
@@ -131,7 +321,13 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
if (IS_ERR(rq))
goto out_ctx;
- err = emit_semaphore_chain(rq, vma, idx);
+ err = 0;
+ if (rq->engine->emit_init_breadcrumb)
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (err == 0)
+ err = emit_semaphore_chain(rq, vma, idx);
+ if (err == 0)
+ i915_request_get(rq);
i915_request_add(rq);
if (err)
rq = ERR_PTR(err);
@@ -144,10 +340,10 @@ out_ctx:
static int
release_queue(struct intel_engine_cs *engine,
struct i915_vma *vma,
- int idx)
+ int idx, int prio)
{
struct i915_sched_attr attr = {
- .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
+ .priority = prio,
};
struct i915_request *rq;
u32 *cs;
@@ -168,9 +364,15 @@ release_queue(struct intel_engine_cs *engine,
*cs++ = 1;
intel_ring_advance(rq, cs);
+
+ i915_request_get(rq);
i915_request_add(rq);
+ local_bh_disable();
engine->schedule(rq, &attr);
+ local_bh_enable(); /* kick tasklet */
+
+ i915_request_put(rq);
return 0;
}
@@ -189,8 +391,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
if (IS_ERR(head))
return PTR_ERR(head);
- i915_request_get(head);
- for_each_engine(engine, outer->i915, id) {
+ for_each_engine(engine, outer->gt, id) {
for (i = 0; i < count; i++) {
struct i915_request *rq;
@@ -199,15 +400,16 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
err = PTR_ERR(rq);
goto out;
}
+
+ i915_request_put(rq);
}
}
- err = release_queue(outer, vma, n);
+ err = release_queue(outer, vma, n, INT_MAX);
if (err)
goto out;
- if (i915_request_wait(head,
- I915_WAIT_LOCKED,
+ if (i915_request_wait(head, 0,
2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
count, n);
@@ -223,9 +425,8 @@ out:
static int live_timeslice_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct drm_i915_gem_object *obj;
- intel_wakeref_t wakeref;
struct i915_vma *vma;
void *vaddr;
int err = 0;
@@ -239,17 +440,14 @@ static int live_timeslice_preempt(void *arg)
* need to preempt the current task and replace it with another
* ready task.
*/
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(obj)) {
- err = PTR_ERR(obj);
- goto err_unlock;
- }
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
@@ -269,7 +467,7 @@ static int live_timeslice_preempt(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
if (!intel_engine_has_preemption(engine))
continue;
@@ -279,7 +477,7 @@ static int live_timeslice_preempt(void *arg)
if (err)
goto err_pin;
- if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+ if (igt_flush_test(gt->i915)) {
err = -EIO;
goto err_pin;
}
@@ -292,22 +490,168 @@ err_map:
i915_gem_object_unpin_map(obj);
err_obj:
i915_gem_object_put(obj);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
+static struct i915_request *nop_request(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ rq = i915_request_create(engine->kernel_context);
+ if (IS_ERR(rq))
+ return rq;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ return rq;
+}
+
+static void wait_for_submit(struct intel_engine_cs *engine,
+ struct i915_request *rq)
+{
+ do {
+ cond_resched();
+ intel_engine_flush_submission(engine);
+ } while (!i915_request_is_active(rq));
+}
+
+static long timeslice_threshold(const struct intel_engine_cs *engine)
+{
+ return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
+}
+
+static int live_timeslice_queue(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct drm_i915_gem_object *obj;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct i915_vma *vma;
+ void *vaddr;
+ int err = 0;
+
+ /*
+ * Make sure that even if ELSP[0] and ELSP[1] are filled with
+ * timeslicing between them disabled, we *do* enable timeslicing
+ * if the queue demands it. (Normally, we do not submit if
+ * ELSP[1] is already occupied, so must rely on timeslicing to
+ * eject ELSP[0] in favour of the queue.)
+ */
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return 0;
+
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_obj;
+ }
+
+ vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto err_obj;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err)
+ goto err_map;
+
+ for_each_engine(engine, gt, id) {
+ struct i915_sched_attr attr = {
+ .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
+ };
+ struct i915_request *rq, *nop;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ memset(vaddr, 0, PAGE_SIZE);
+
+ /* ELSP[0]: semaphore wait */
+ rq = semaphore_queue(engine, vma, 0);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_pin;
+ }
+ engine->schedule(rq, &attr);
+ wait_for_submit(engine, rq);
+
+ /* ELSP[1]: nop request */
+ nop = nop_request(engine);
+ if (IS_ERR(nop)) {
+ err = PTR_ERR(nop);
+ i915_request_put(rq);
+ goto err_pin;
+ }
+ wait_for_submit(engine, nop);
+ i915_request_put(nop);
+
+ GEM_BUG_ON(i915_request_completed(rq));
+ GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+ /* Queue: semaphore signal, matching priority as semaphore */
+ err = release_queue(engine, vma, 1, effective_prio(rq));
+ if (err) {
+ i915_request_put(rq);
+ goto err_pin;
+ }
+
+ intel_engine_flush_submission(engine);
+ if (!READ_ONCE(engine->execlists.timer.expires) &&
+ !i915_request_completed(rq)) {
+ struct drm_printer p =
+ drm_info_printer(gt->i915->drm.dev);
+
+ GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
+ engine->name);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+ GEM_TRACE_DUMP();
+
+ memset(vaddr, 0xff, PAGE_SIZE);
+ err = -EINVAL;
+ }
+
+ /* Timeslice every jiffy, so within 2 we should signal */
+ if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
+ struct drm_printer p =
+ drm_info_printer(gt->i915->drm.dev);
+
+ pr_err("%s: Failed to timeslice into queue\n",
+ engine->name);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ memset(vaddr, 0xff, PAGE_SIZE);
+ err = -EIO;
+ }
+ i915_request_put(rq);
+ if (err)
+ break;
+ }
+
+err_pin:
+ i915_vma_unpin(vma);
+err_map:
+ i915_gem_object_unpin_map(obj);
+err_obj:
+ i915_gem_object_put(obj);
return err;
}
static int live_busywait_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct intel_engine_cs *engine;
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
u32 *map;
@@ -316,22 +660,19 @@ static int live_busywait_preempt(void *arg)
* preempt the busywaits used to synchronise between rings.
*/
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- ctx_hi = kernel_context(i915);
+ ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
- goto err_unlock;
+ return -ENOMEM;
ctx_hi->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
- ctx_lo = kernel_context(i915);
+ ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
ctx_lo->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
- obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err_ctx_lo;
@@ -343,7 +684,7 @@ static int live_busywait_preempt(void *arg)
goto err_obj;
}
- vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_map;
@@ -353,7 +694,7 @@ static int live_busywait_preempt(void *arg)
if (err)
goto err_map;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_request *lo, *hi;
struct igt_live_test t;
u32 *cs;
@@ -364,7 +705,7 @@ static int live_busywait_preempt(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_vma;
}
@@ -444,7 +785,7 @@ static int live_busywait_preempt(void *arg)
i915_request_add(hi);
if (i915_request_wait(lo, 0, HZ / 5) < 0) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
+ struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
pr_err("%s: Failed to preempt semaphore busywait!\n",
engine->name);
@@ -452,7 +793,7 @@ static int live_busywait_preempt(void *arg)
intel_engine_dump(engine, &p, "%s\n", engine->name);
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_vma;
}
@@ -475,9 +816,6 @@ err_ctx_lo:
kernel_context_close(ctx_lo);
err_ctx_hi:
kernel_context_close(ctx_hi);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
}
@@ -501,49 +839,45 @@ spinner_create_request(struct igt_spinner *spin,
static int live_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct igt_spinner spin_hi, spin_lo;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+ if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
pr_err("Logical preemption supported, but not exposed\n");
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (igt_spinner_init(&spin_hi, &i915->gt))
- goto err_unlock;
+ if (igt_spinner_init(&spin_hi, gt))
+ return -ENOMEM;
- if (igt_spinner_init(&spin_lo, &i915->gt))
+ if (igt_spinner_init(&spin_lo, gt))
goto err_spin_hi;
- ctx_hi = kernel_context(i915);
+ ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
goto err_spin_lo;
ctx_hi->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
- ctx_lo = kernel_context(i915);
+ ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
ctx_lo->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct igt_live_test t;
struct i915_request *rq;
if (!intel_engine_has_preemption(engine))
continue;
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_ctx_lo;
}
@@ -559,7 +893,7 @@ static int live_preempt(void *arg)
if (!igt_wait_for_spinner(&spin_lo, rq)) {
GEM_TRACE("lo spinner failed to start\n");
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
@@ -576,7 +910,7 @@ static int live_preempt(void *arg)
if (!igt_wait_for_spinner(&spin_hi, rq)) {
GEM_TRACE("hi spinner failed to start\n");
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
@@ -599,54 +933,47 @@ err_spin_lo:
igt_spinner_fini(&spin_lo);
err_spin_hi:
igt_spinner_fini(&spin_hi);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
}
static int live_late_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct igt_spinner spin_hi, spin_lo;
struct intel_engine_cs *engine;
struct i915_sched_attr attr = {};
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (igt_spinner_init(&spin_hi, &i915->gt))
- goto err_unlock;
+ if (igt_spinner_init(&spin_hi, gt))
+ return -ENOMEM;
- if (igt_spinner_init(&spin_lo, &i915->gt))
+ if (igt_spinner_init(&spin_lo, gt))
goto err_spin_hi;
- ctx_hi = kernel_context(i915);
+ ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
goto err_spin_lo;
- ctx_lo = kernel_context(i915);
+ ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
ctx_lo->sched.priority = I915_USER_PRIORITY(1);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct igt_live_test t;
struct i915_request *rq;
if (!intel_engine_has_preemption(engine))
continue;
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_ctx_lo;
}
@@ -705,15 +1032,12 @@ err_spin_lo:
igt_spinner_fini(&spin_lo);
err_spin_hi:
igt_spinner_fini(&spin_hi);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
igt_spinner_end(&spin_hi);
igt_spinner_end(&spin_lo);
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
@@ -723,14 +1047,13 @@ struct preempt_client {
struct i915_gem_context *ctx;
};
-static int preempt_client_init(struct drm_i915_private *i915,
- struct preempt_client *c)
+static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
{
- c->ctx = kernel_context(i915);
+ c->ctx = kernel_context(gt->i915);
if (!c->ctx)
return -ENOMEM;
- if (igt_spinner_init(&c->spin, &i915->gt))
+ if (igt_spinner_init(&c->spin, gt))
goto err_ctx;
return 0;
@@ -748,11 +1071,10 @@ static void preempt_client_fini(struct preempt_client *c)
static int live_nopreempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct preempt_client a, b;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
/*
@@ -760,19 +1082,16 @@ static int live_nopreempt(void *arg)
* that may be being observed and not want to be interrupted.
*/
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (preempt_client_init(i915, &a))
- goto err_unlock;
- if (preempt_client_init(i915, &b))
+ if (preempt_client_init(gt, &a))
+ return -ENOMEM;
+ if (preempt_client_init(gt, &b))
goto err_client_a;
b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_request *rq_a, *rq_b;
if (!intel_engine_has_preemption(engine))
@@ -832,7 +1151,7 @@ static int live_nopreempt(void *arg)
goto err_wedged;
}
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(gt->i915))
goto err_wedged;
}
@@ -841,29 +1160,344 @@ err_client_b:
preempt_client_fini(&b);
err_client_a:
preempt_client_fini(&a);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
igt_spinner_end(&b.spin);
igt_spinner_end(&a.spin);
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_b;
}
+struct live_preempt_cancel {
+ struct intel_engine_cs *engine;
+ struct preempt_client a, b;
+};
+
+static int __cancel_active0(struct live_preempt_cancel *arg)
+{
+ struct i915_request *rq;
+ struct igt_live_test t;
+ int err;
+
+ /* Preempt cancel of ELSP0 */
+ GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+ if (igt_live_test_begin(&t, arg->engine->i915,
+ __func__, arg->engine->name))
+ return -EIO;
+
+ clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
+ rq = spinner_create_request(&arg->a.spin,
+ arg->a.ctx, arg->engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
+ err = -EIO;
+ goto out;
+ }
+
+ i915_gem_context_set_banned(arg->a.ctx);
+ err = intel_engine_pulse(arg->engine);
+ if (err)
+ goto out;
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (rq->fence.error != -EIO) {
+ pr_err("Cancelled inflight0 request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ i915_request_put(rq);
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ return err;
+}
+
+static int __cancel_active1(struct live_preempt_cancel *arg)
+{
+ struct i915_request *rq[2] = {};
+ struct igt_live_test t;
+ int err;
+
+ /* Preempt cancel of ELSP1 */
+ GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+ if (igt_live_test_begin(&t, arg->engine->i915,
+ __func__, arg->engine->name))
+ return -EIO;
+
+ clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
+ rq[0] = spinner_create_request(&arg->a.spin,
+ arg->a.ctx, arg->engine,
+ MI_NOOP); /* no preemption */
+ if (IS_ERR(rq[0]))
+ return PTR_ERR(rq[0]);
+
+ i915_request_get(rq[0]);
+ i915_request_add(rq[0]);
+ if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
+ err = -EIO;
+ goto out;
+ }
+
+ clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags);
+ rq[1] = spinner_create_request(&arg->b.spin,
+ arg->b.ctx, arg->engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq[1])) {
+ err = PTR_ERR(rq[1]);
+ goto out;
+ }
+
+ i915_request_get(rq[1]);
+ err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+ i915_request_add(rq[1]);
+ if (err)
+ goto out;
+
+ i915_gem_context_set_banned(arg->b.ctx);
+ err = intel_engine_pulse(arg->engine);
+ if (err)
+ goto out;
+
+ igt_spinner_end(&arg->a.spin);
+ if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (rq[0]->fence.error != 0) {
+ pr_err("Normal inflight0 request did not complete\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rq[1]->fence.error != -EIO) {
+ pr_err("Cancelled inflight1 request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ i915_request_put(rq[1]);
+ i915_request_put(rq[0]);
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ return err;
+}
+
+static int __cancel_queued(struct live_preempt_cancel *arg)
+{
+ struct i915_request *rq[3] = {};
+ struct igt_live_test t;
+ int err;
+
+ /* Full ELSP and one in the wings */
+ GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+ if (igt_live_test_begin(&t, arg->engine->i915,
+ __func__, arg->engine->name))
+ return -EIO;
+
+ clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
+ rq[0] = spinner_create_request(&arg->a.spin,
+ arg->a.ctx, arg->engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq[0]))
+ return PTR_ERR(rq[0]);
+
+ i915_request_get(rq[0]);
+ i915_request_add(rq[0]);
+ if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
+ err = -EIO;
+ goto out;
+ }
+
+ clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags);
+ rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
+ if (IS_ERR(rq[1])) {
+ err = PTR_ERR(rq[1]);
+ goto out;
+ }
+
+ i915_request_get(rq[1]);
+ err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+ i915_request_add(rq[1]);
+ if (err)
+ goto out;
+
+ rq[2] = spinner_create_request(&arg->b.spin,
+ arg->a.ctx, arg->engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq[2])) {
+ err = PTR_ERR(rq[2]);
+ goto out;
+ }
+
+ i915_request_get(rq[2]);
+ err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
+ i915_request_add(rq[2]);
+ if (err)
+ goto out;
+
+ i915_gem_context_set_banned(arg->a.ctx);
+ err = intel_engine_pulse(arg->engine);
+ if (err)
+ goto out;
+
+ if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (rq[0]->fence.error != -EIO) {
+ pr_err("Cancelled inflight0 request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rq[1]->fence.error != 0) {
+ pr_err("Normal inflight1 request did not complete\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rq[2]->fence.error != -EIO) {
+ pr_err("Cancelled queued request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ i915_request_put(rq[2]);
+ i915_request_put(rq[1]);
+ i915_request_put(rq[0]);
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ return err;
+}
+
+static int __cancel_hostile(struct live_preempt_cancel *arg)
+{
+ struct i915_request *rq;
+ int err;
+
+ /* Preempt cancel non-preemptible spinner in ELSP0 */
+ if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ return 0;
+
+ GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+ clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
+ rq = spinner_create_request(&arg->a.spin,
+ arg->a.ctx, arg->engine,
+ MI_NOOP); /* preemption disabled */
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
+ err = -EIO;
+ goto out;
+ }
+
+ i915_gem_context_set_banned(arg->a.ctx);
+ err = intel_engine_pulse(arg->engine); /* force reset */
+ if (err)
+ goto out;
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (rq->fence.error != -EIO) {
+ pr_err("Cancelled inflight0 request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ i915_request_put(rq);
+ if (igt_flush_test(arg->engine->i915))
+ err = -EIO;
+ return err;
+}
+
+static int live_preempt_cancel(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct live_preempt_cancel data;
+ enum intel_engine_id id;
+ int err = -ENOMEM;
+
+ /*
+ * To cancel an inflight context, we need to first remove it from the
+ * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
+ */
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+ return 0;
+
+ if (preempt_client_init(gt, &data.a))
+ return -ENOMEM;
+ if (preempt_client_init(gt, &data.b))
+ goto err_client_a;
+
+ for_each_engine(data.engine, gt, id) {
+ if (!intel_engine_has_preemption(data.engine))
+ continue;
+
+ err = __cancel_active0(&data);
+ if (err)
+ goto err_wedged;
+
+ err = __cancel_active1(&data);
+ if (err)
+ goto err_wedged;
+
+ err = __cancel_queued(&data);
+ if (err)
+ goto err_wedged;
+
+ err = __cancel_hostile(&data);
+ if (err)
+ goto err_wedged;
+ }
+
+ err = 0;
+err_client_b:
+ preempt_client_fini(&data.b);
+err_client_a:
+ preempt_client_fini(&data.a);
+ return err;
+
+err_wedged:
+ GEM_TRACE_DUMP();
+ igt_spinner_end(&data.b.spin);
+ igt_spinner_end(&data.a.spin);
+ intel_gt_set_wedged(gt);
+ goto err_client_b;
+}
+
static int live_suppress_self_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct i915_sched_attr attr = {
.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
};
struct preempt_client a, b;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
/*
@@ -873,30 +1507,31 @@ static int live_suppress_self_preempt(void *arg)
* completion event.
*/
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (USES_GUC_SUBMISSION(i915))
+ if (USES_GUC_SUBMISSION(gt->i915))
return 0; /* presume black blox */
- if (intel_vgpu_active(i915))
+ if (intel_vgpu_active(gt->i915))
return 0; /* GVT forces single port & request submission */
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (preempt_client_init(i915, &a))
- goto err_unlock;
- if (preempt_client_init(i915, &b))
+ if (preempt_client_init(gt, &a))
+ return -ENOMEM;
+ if (preempt_client_init(gt, &b))
goto err_client_a;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_request *rq_a, *rq_b;
int depth;
if (!intel_engine_has_preemption(engine))
continue;
+ if (igt_flush_test(gt->i915))
+ goto err_wedged;
+
+ intel_engine_pm_get(engine);
engine->execlists.preempt_hang.count = 0;
rq_a = spinner_create_request(&a.spin,
@@ -904,12 +1539,14 @@ static int live_suppress_self_preempt(void *arg)
MI_NOOP);
if (IS_ERR(rq_a)) {
err = PTR_ERR(rq_a);
+ intel_engine_pm_put(engine);
goto err_client_b;
}
i915_request_add(rq_a);
if (!igt_wait_for_spinner(&a.spin, rq_a)) {
pr_err("First client failed to start\n");
+ intel_engine_pm_put(engine);
goto err_wedged;
}
@@ -921,6 +1558,7 @@ static int live_suppress_self_preempt(void *arg)
MI_NOOP);
if (IS_ERR(rq_b)) {
err = PTR_ERR(rq_b);
+ intel_engine_pm_put(engine);
goto err_client_b;
}
i915_request_add(rq_b);
@@ -931,6 +1569,7 @@ static int live_suppress_self_preempt(void *arg)
if (!igt_wait_for_spinner(&b.spin, rq_b)) {
pr_err("Second client failed to start\n");
+ intel_engine_pm_put(engine);
goto err_wedged;
}
@@ -944,11 +1583,13 @@ static int live_suppress_self_preempt(void *arg)
engine->name,
engine->execlists.preempt_hang.count,
depth);
+ intel_engine_pm_put(engine);
err = -EINVAL;
goto err_client_b;
}
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ intel_engine_pm_put(engine);
+ if (igt_flush_test(gt->i915))
goto err_wedged;
}
@@ -957,15 +1598,12 @@ err_client_b:
preempt_client_fini(&b);
err_client_a:
preempt_client_fini(&a);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
igt_spinner_end(&b.spin);
igt_spinner_end(&a.spin);
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_b;
}
@@ -984,9 +1622,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
if (!rq)
return NULL;
- INIT_LIST_HEAD(&rq->active_list);
rq->engine = engine;
+ spin_lock_init(&rq->lock);
+ INIT_LIST_HEAD(&rq->fence.cb_list);
+ rq->fence.lock = &rq->lock;
+ rq->fence.ops = &i915_fence_ops;
+
i915_sched_node_init(&rq->sched);
/* mark this request as permanently incomplete */
@@ -1021,11 +1663,10 @@ static void dummy_request_free(struct i915_request *dummy)
static int live_suppress_wait_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct preempt_client client[4];
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
int i;
@@ -1035,22 +1676,19 @@ static int live_suppress_wait_preempt(void *arg)
* not needlessly generate preempt-to-idle cycles.
*/
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
- goto err_unlock;
- if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
+ if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
+ return -ENOMEM;
+ if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
goto err_client_0;
- if (preempt_client_init(i915, &client[2])) /* head of queue */
+ if (preempt_client_init(gt, &client[2])) /* head of queue */
goto err_client_1;
- if (preempt_client_init(i915, &client[3])) /* bystander */
+ if (preempt_client_init(gt, &client[3])) /* bystander */
goto err_client_2;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
int depth;
if (!intel_engine_has_preemption(engine))
@@ -1079,8 +1717,8 @@ static int live_suppress_wait_preempt(void *arg)
}
/* Disable NEWCLIENT promotion */
- __i915_active_request_set(&rq[i]->timeline->last_request,
- dummy);
+ __i915_active_fence_set(&i915_request_timeline(rq[i])->last_request,
+ &dummy->fence);
i915_request_add(rq[i]);
}
@@ -1105,7 +1743,7 @@ static int live_suppress_wait_preempt(void *arg)
for (i = 0; i < ARRAY_SIZE(client); i++)
igt_spinner_end(&client[i].spin);
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(gt->i915))
goto err_wedged;
if (engine->execlists.preempt_hang.count) {
@@ -1128,26 +1766,22 @@ err_client_1:
preempt_client_fini(&client[1]);
err_client_0:
preempt_client_fini(&client[0]);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
for (i = 0; i < ARRAY_SIZE(client); i++)
igt_spinner_end(&client[i].spin);
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_3;
}
static int live_chain_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct preempt_client hi, lo;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
/*
@@ -1156,19 +1790,16 @@ static int live_chain_preempt(void *arg)
* the previously submitted spinner in B.
*/
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (preempt_client_init(i915, &hi))
- goto err_unlock;
+ if (preempt_client_init(gt, &hi))
+ return -ENOMEM;
- if (preempt_client_init(i915, &lo))
+ if (preempt_client_init(gt, &lo))
goto err_client_hi;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_sched_attr attr = {
.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
};
@@ -1199,7 +1830,7 @@ static int live_chain_preempt(void *arg)
goto err_wedged;
}
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_wedged;
}
@@ -1237,7 +1868,7 @@ static int live_chain_preempt(void *arg)
igt_spinner_end(&hi.spin);
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
- drm_info_printer(i915->drm.dev);
+ drm_info_printer(gt->i915->drm.dev);
pr_err("Failed to preempt over chain of %d\n",
count);
@@ -1253,7 +1884,7 @@ static int live_chain_preempt(void *arg)
i915_request_add(rq);
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
- drm_info_printer(i915->drm.dev);
+ drm_info_printer(gt->i915->drm.dev);
pr_err("Failed to flush low priority chain of %d requests\n",
count);
@@ -1274,57 +1905,50 @@ err_client_lo:
preempt_client_fini(&lo);
err_client_hi:
preempt_client_fini(&hi);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
igt_spinner_end(&hi.spin);
igt_spinner_end(&lo.spin);
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_lo;
}
static int live_preempt_hang(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct igt_spinner spin_hi, spin_lo;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (!intel_has_reset_engine(i915))
+ if (!intel_has_reset_engine(gt))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (igt_spinner_init(&spin_hi, &i915->gt))
- goto err_unlock;
+ if (igt_spinner_init(&spin_hi, gt))
+ return -ENOMEM;
- if (igt_spinner_init(&spin_lo, &i915->gt))
+ if (igt_spinner_init(&spin_lo, gt))
goto err_spin_hi;
- ctx_hi = kernel_context(i915);
+ ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
goto err_spin_lo;
ctx_hi->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
- ctx_lo = kernel_context(i915);
+ ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
ctx_lo->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_request *rq;
if (!intel_engine_has_preemption(engine))
@@ -1341,7 +1965,7 @@ static int live_preempt_hang(void *arg)
if (!igt_wait_for_spinner(&spin_lo, rq)) {
GEM_TRACE("lo spinner failed to start\n");
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
@@ -1363,28 +1987,28 @@ static int live_preempt_hang(void *arg)
HZ / 10)) {
pr_err("Preemption did not occur within timeout!");
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
- set_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags);
+ set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
intel_engine_reset(engine, NULL);
- clear_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags);
+ clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
engine->execlists.preempt_hang.inject_hang = false;
if (!igt_wait_for_spinner(&spin_hi, rq)) {
GEM_TRACE("hi spinner failed to start\n");
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
igt_spinner_end(&spin_hi);
igt_spinner_end(&spin_lo);
- if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+ if (igt_flush_test(gt->i915)) {
err = -EIO;
goto err_ctx_lo;
}
@@ -1399,9 +2023,105 @@ err_spin_lo:
igt_spinner_fini(&spin_lo);
err_spin_hi:
igt_spinner_fini(&spin_hi);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
+static int live_preempt_timeout(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct i915_gem_context *ctx_hi, *ctx_lo;
+ struct igt_spinner spin_lo;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = -ENOMEM;
+
+ /*
+ * Check that we force preemption to occur by cancelling the previous
+ * context if it refuses to yield the GPU.
+ */
+ if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ return 0;
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+ return 0;
+
+ if (!intel_has_reset_engine(gt))
+ return 0;
+
+ if (igt_spinner_init(&spin_lo, gt))
+ return -ENOMEM;
+
+ ctx_hi = kernel_context(gt->i915);
+ if (!ctx_hi)
+ goto err_spin_lo;
+ ctx_hi->sched.priority =
+ I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
+
+ ctx_lo = kernel_context(gt->i915);
+ if (!ctx_lo)
+ goto err_ctx_hi;
+ ctx_lo->sched.priority =
+ I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
+
+ for_each_engine(engine, gt, id) {
+ unsigned long saved_timeout;
+ struct i915_request *rq;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ rq = spinner_create_request(&spin_lo, ctx_lo, engine,
+ MI_NOOP); /* preemption disabled */
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ctx_lo;
+ }
+
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&spin_lo, rq)) {
+ intel_gt_set_wedged(gt);
+ err = -EIO;
+ goto err_ctx_lo;
+ }
+
+ rq = igt_request_alloc(ctx_hi, engine);
+ if (IS_ERR(rq)) {
+ igt_spinner_end(&spin_lo);
+ err = PTR_ERR(rq);
+ goto err_ctx_lo;
+ }
+
+ /* Flush the previous CS ack before changing timeouts */
+ while (READ_ONCE(engine->execlists.pending[0]))
+ cpu_relax();
+
+ saved_timeout = engine->props.preempt_timeout_ms;
+ engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ intel_engine_flush_submission(engine);
+ engine->props.preempt_timeout_ms = saved_timeout;
+
+ if (i915_request_wait(rq, 0, HZ / 10) < 0) {
+ intel_gt_set_wedged(gt);
+ i915_request_put(rq);
+ err = -ETIME;
+ goto err_ctx_lo;
+ }
+
+ igt_spinner_end(&spin_lo);
+ i915_request_put(rq);
+ }
+
+ err = 0;
+err_ctx_lo:
+ kernel_context_close(ctx_lo);
+err_ctx_hi:
+ kernel_context_close(ctx_hi);
+err_spin_lo:
+ igt_spinner_fini(&spin_lo);
return err;
}
@@ -1416,7 +2136,7 @@ static int random_priority(struct rnd_state *rnd)
}
struct preempt_smoke {
- struct drm_i915_private *i915;
+ struct intel_gt *gt;
struct i915_gem_context **contexts;
struct intel_engine_cs *engine;
struct drm_i915_gem_object *batch;
@@ -1440,7 +2160,11 @@ static int smoke_submit(struct preempt_smoke *smoke,
int err = 0;
if (batch) {
- vma = i915_vma_instance(batch, ctx->vm, NULL);
+ struct i915_address_space *vm;
+
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ vma = i915_vma_instance(batch, vm, NULL);
+ i915_vm_put(vm);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -1489,11 +2213,9 @@ static int smoke_crescendo_thread(void *arg)
struct i915_gem_context *ctx = smoke_context(smoke);
int err;
- mutex_lock(&smoke->i915->drm.struct_mutex);
err = smoke_submit(smoke,
ctx, count % I915_PRIORITY_MAX,
smoke->batch);
- mutex_unlock(&smoke->i915->drm.struct_mutex);
if (err)
return err;
@@ -1514,9 +2236,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
unsigned long count;
int err = 0;
- mutex_unlock(&smoke->i915->drm.struct_mutex);
-
- for_each_engine(engine, smoke->i915, id) {
+ for_each_engine(engine, smoke->gt, id) {
arg[id] = *smoke;
arg[id].engine = engine;
if (!(flags & BATCH))
@@ -1532,8 +2252,10 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
get_task_struct(tsk[id]);
}
+ yield(); /* start all threads before we kthread_stop() */
+
count = 0;
- for_each_engine(engine, smoke->i915, id) {
+ for_each_engine(engine, smoke->gt, id) {
int status;
if (IS_ERR_OR_NULL(tsk[id]))
@@ -1548,11 +2270,9 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
put_task_struct(tsk[id]);
}
- mutex_lock(&smoke->i915->drm.struct_mutex);
-
pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
count, flags,
- RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
+ RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
return 0;
}
@@ -1564,7 +2284,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
count = 0;
do {
- for_each_engine(smoke->engine, smoke->i915, id) {
+ for_each_engine(smoke->engine, smoke->gt, id) {
struct i915_gem_context *ctx = smoke_context(smoke);
int err;
@@ -1580,25 +2300,24 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
count, flags,
- RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
+ RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
return 0;
}
static int live_preempt_smoke(void *arg)
{
struct preempt_smoke smoke = {
- .i915 = arg,
+ .gt = arg,
.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
.ncontext = 1024,
};
const unsigned int phase[] = { 0, BATCH };
- intel_wakeref_t wakeref;
struct igt_live_test t;
int err = -ENOMEM;
u32 *cs;
int n;
- if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
return 0;
smoke.contexts = kmalloc_array(smoke.ncontext,
@@ -1607,13 +2326,11 @@ static int live_preempt_smoke(void *arg)
if (!smoke.contexts)
return -ENOMEM;
- mutex_lock(&smoke.i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&smoke.i915->runtime_pm);
-
- smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE);
+ smoke.batch =
+ i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
if (IS_ERR(smoke.batch)) {
err = PTR_ERR(smoke.batch);
- goto err_unlock;
+ goto err_free;
}
cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
@@ -1627,13 +2344,13 @@ static int live_preempt_smoke(void *arg)
i915_gem_object_flush_map(smoke.batch);
i915_gem_object_unpin_map(smoke.batch);
- if (igt_live_test_begin(&t, smoke.i915, __func__, "all")) {
+ if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
err = -EIO;
goto err_batch;
}
for (n = 0; n < smoke.ncontext; n++) {
- smoke.contexts[n] = kernel_context(smoke.i915);
+ smoke.contexts[n] = kernel_context(smoke.gt->i915);
if (!smoke.contexts[n])
goto err_ctx;
}
@@ -1660,15 +2377,13 @@ err_ctx:
err_batch:
i915_gem_object_put(smoke.batch);
-err_unlock:
- intel_runtime_pm_put(&smoke.i915->runtime_pm, wakeref);
- mutex_unlock(&smoke.i915->drm.struct_mutex);
+err_free:
kfree(smoke.contexts);
return err;
}
-static int nop_virtual_engine(struct drm_i915_private *i915,
+static int nop_virtual_engine(struct intel_gt *gt,
struct intel_engine_cs **siblings,
unsigned int nsibling,
unsigned int nctx,
@@ -1687,7 +2402,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
for (n = 0; n < nctx; n++) {
- ctx[n] = kernel_context(i915);
+ ctx[n] = kernel_context(gt->i915);
if (!ctx[n]) {
err = -ENOMEM;
nctx = n;
@@ -1712,7 +2427,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
}
}
- err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name);
+ err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
if (err)
goto out;
@@ -1759,7 +2474,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
request[nc]->fence.context,
request[nc]->fence.seqno);
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
break;
}
}
@@ -1781,7 +2496,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
prime, div64_u64(ktime_to_ns(times[1]), prime));
out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(gt->i915))
err = -EIO;
for (nc = 0; nc < nctx; nc++) {
@@ -1794,25 +2509,22 @@ out:
static int live_virtual_engine(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
struct intel_engine_cs *engine;
- struct intel_gt *gt = &i915->gt;
enum intel_engine_id id;
unsigned int class, inst;
- int err = -ENODEV;
+ int err;
- if (USES_GUC_SUBMISSION(i915))
+ if (USES_GUC_SUBMISSION(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
-
- for_each_engine(engine, i915, id) {
- err = nop_virtual_engine(i915, &engine, 1, 1, 0);
+ for_each_engine(engine, gt, id) {
+ err = nop_virtual_engine(gt, &engine, 1, 1, 0);
if (err) {
pr_err("Failed to wrap engine %s: err=%d\n",
engine->name, err);
- goto out_unlock;
+ return err;
}
}
@@ -1830,23 +2542,21 @@ static int live_virtual_engine(void *arg)
continue;
for (n = 1; n <= nsibling + 1; n++) {
- err = nop_virtual_engine(i915, siblings, nsibling,
+ err = nop_virtual_engine(gt, siblings, nsibling,
n, 0);
if (err)
- goto out_unlock;
+ return err;
}
- err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
+ err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
if (err)
- goto out_unlock;
+ return err;
}
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
+ return 0;
}
-static int mask_virtual_engine(struct drm_i915_private *i915,
+static int mask_virtual_engine(struct intel_gt *gt,
struct intel_engine_cs **siblings,
unsigned int nsibling)
{
@@ -1862,7 +2572,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
* restrict it to our desired engine within the virtual engine.
*/
- ctx = kernel_context(i915);
+ ctx = kernel_context(gt->i915);
if (!ctx)
return -ENOMEM;
@@ -1876,7 +2586,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
if (err)
goto out_put;
- err = igt_live_test_begin(&t, i915, __func__, ve->engine->name);
+ err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
if (err)
goto out_unpin;
@@ -1907,7 +2617,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
request[n]->fence.context,
request[n]->fence.seqno);
GEM_TRACE_DUMP();
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto out;
}
@@ -1922,11 +2632,8 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
}
err = igt_live_test_end(&t);
- if (err)
- goto out;
-
out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(gt->i915))
err = -EIO;
for (n = 0; n < nsibling; n++)
@@ -1943,17 +2650,14 @@ out_close:
static int live_virtual_mask(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
- struct intel_gt *gt = &i915->gt;
unsigned int class, inst;
- int err = 0;
+ int err;
- if (USES_GUC_SUBMISSION(i915))
+ if (USES_GUC_SUBMISSION(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
-
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
unsigned int nsibling;
@@ -1967,17 +2671,166 @@ static int live_virtual_mask(void *arg)
if (nsibling < 2)
continue;
- err = mask_virtual_engine(i915, siblings, nsibling);
+ err = mask_virtual_engine(gt, siblings, nsibling);
if (err)
- goto out_unlock;
+ return err;
+ }
+
+ return 0;
+}
+
+static int preserved_virtual_engine(struct intel_gt *gt,
+ struct intel_engine_cs **siblings,
+ unsigned int nsibling)
+{
+ struct i915_request *last = NULL;
+ struct i915_gem_context *ctx;
+ struct intel_context *ve;
+ struct i915_vma *scratch;
+ struct igt_live_test t;
+ unsigned int n;
+ int err = 0;
+ u32 *cs;
+
+ ctx = kernel_context(gt->i915);
+ if (!ctx)
+ return -ENOMEM;
+
+ scratch = create_scratch(siblings[0]->gt);
+ if (IS_ERR(scratch)) {
+ err = PTR_ERR(scratch);
+ goto out_close;
}
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
+ ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+ if (IS_ERR(ve)) {
+ err = PTR_ERR(ve);
+ goto out_scratch;
+ }
+
+ err = intel_context_pin(ve);
+ if (err)
+ goto out_put;
+
+ err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
+ if (err)
+ goto out_unpin;
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ struct intel_engine_cs *engine = siblings[n % nsibling];
+ struct i915_request *rq;
+
+ rq = i915_request_create(ve);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_end;
+ }
+
+ i915_request_put(last);
+ last = i915_request_get(rq);
+
+ cs = intel_ring_begin(rq, 8);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ err = PTR_ERR(cs);
+ goto out_end;
+ }
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = CS_GPR(engine, n);
+ *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
+ *cs++ = n + 1;
+
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ /* Restrict this request to run on a particular engine */
+ rq->execution_mask = engine->mask;
+ i915_request_add(rq);
+ }
+
+ if (i915_request_wait(last, 0, HZ / 5) < 0) {
+ err = -ETIME;
+ goto out_end;
+ }
+
+ cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto out_end;
+ }
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ if (cs[n] != n) {
+ pr_err("Incorrect value[%d] found for GPR[%d]\n",
+ cs[n], n);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ i915_gem_object_unpin_map(scratch->obj);
+
+out_end:
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ i915_request_put(last);
+out_unpin:
+ intel_context_unpin(ve);
+out_put:
+ intel_context_put(ve);
+out_scratch:
+ i915_vma_unpin_and_release(&scratch, 0);
+out_close:
+ kernel_context_close(ctx);
return err;
}
-static int bond_virtual_engine(struct drm_i915_private *i915,
+static int live_virtual_preserved(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+ unsigned int class, inst;
+
+ /*
+ * Check that the context image retains non-privileged (user) registers
+ * from one engine to the next. For this we check that the CS_GPR
+ * are preserved.
+ */
+
+ if (USES_GUC_SUBMISSION(gt->i915))
+ return 0;
+
+ /* As we use CS_GPR we cannot run before they existed on all engines. */
+ if (INTEL_GEN(gt->i915) < 9)
+ return 0;
+
+ for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+ int nsibling, err;
+
+ nsibling = 0;
+ for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+ if (!gt->engine_class[class][inst])
+ continue;
+
+ siblings[nsibling++] = gt->engine_class[class][inst];
+ }
+ if (nsibling < 2)
+ continue;
+
+ err = preserved_virtual_engine(gt, siblings, nsibling);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int bond_virtual_engine(struct intel_gt *gt,
unsigned int class,
struct intel_engine_cs **siblings,
unsigned int nsibling,
@@ -1993,13 +2846,13 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
- ctx = kernel_context(i915);
+ ctx = kernel_context(gt->i915);
if (!ctx)
return -ENOMEM;
err = 0;
rq[0] = ERR_PTR(-ENOMEM);
- for_each_engine(master, i915, id) {
+ for_each_engine(master, gt, id) {
struct i915_sw_fence fence = {};
if (master->class == class)
@@ -2104,7 +2957,7 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
out:
for (n = 0; !IS_ERR(rq[n]); n++)
i915_request_put(rq[n]);
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(gt->i915))
err = -EIO;
kernel_context_close(ctx);
@@ -2121,17 +2974,14 @@ static int live_virtual_bond(void *arg)
{ "schedule", BOND_SCHEDULE },
{ },
};
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
- struct intel_gt *gt = &i915->gt;
unsigned int class, inst;
- int err = 0;
+ int err;
- if (USES_GUC_SUBMISSION(i915))
+ if (USES_GUC_SUBMISSION(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
-
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
const struct phase *p;
int nsibling;
@@ -2148,38 +2998,42 @@ static int live_virtual_bond(void *arg)
continue;
for (p = phases; p->name; p++) {
- err = bond_virtual_engine(i915,
+ err = bond_virtual_engine(gt,
class, siblings, nsibling,
p->flags);
if (err) {
pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
__func__, p->name, class, nsibling, err);
- goto out_unlock;
+ return err;
}
}
}
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
+ return 0;
}
int intel_execlists_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_sanitycheck),
+ SUBTEST(live_unlite_switch),
+ SUBTEST(live_unlite_preempt),
SUBTEST(live_timeslice_preempt),
+ SUBTEST(live_timeslice_queue),
SUBTEST(live_busywait_preempt),
SUBTEST(live_preempt),
SUBTEST(live_late_preempt),
SUBTEST(live_nopreempt),
+ SUBTEST(live_preempt_cancel),
SUBTEST(live_suppress_self_preempt),
SUBTEST(live_suppress_wait_preempt),
SUBTEST(live_chain_preempt),
SUBTEST(live_preempt_hang),
+ SUBTEST(live_preempt_timeout),
SUBTEST(live_preempt_smoke),
SUBTEST(live_virtual_engine),
SUBTEST(live_virtual_mask),
+ SUBTEST(live_virtual_preserved),
SUBTEST(live_virtual_bond),
};
@@ -2189,5 +3043,512 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
if (intel_gt_is_wedged(&i915->gt))
return 0;
- return i915_live_subtests(tests, i915);
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
+
+static void hexdump(const void *buf, size_t len)
+{
+ const size_t rowsize = 8 * sizeof(u32);
+ const void *prev = NULL;
+ bool skip = false;
+ size_t pos;
+
+ for (pos = 0; pos < len; pos += rowsize) {
+ char line[128];
+
+ if (prev && !memcmp(prev, buf + pos, rowsize)) {
+ if (!skip) {
+ pr_info("*\n");
+ skip = true;
+ }
+ continue;
+ }
+
+ WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
+ rowsize, sizeof(u32),
+ line, sizeof(line),
+ false) >= sizeof(line));
+ pr_info("[%04zx] %s\n", pos, line);
+
+ prev = buf + pos;
+ skip = false;
+ }
+}
+
+static int live_lrc_layout(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ u32 *mem;
+ int err;
+
+ /*
+ * Check the registers offsets we use to create the initial reg state
+ * match the layout saved by HW.
+ */
+
+ mem = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!mem)
+ return -ENOMEM;
+
+ err = 0;
+ for_each_engine(engine, gt, id) {
+ u32 *hw, *lrc;
+ int dw;
+
+ if (!engine->default_state)
+ continue;
+
+ hw = i915_gem_object_pin_map(engine->default_state,
+ I915_MAP_WB);
+ if (IS_ERR(hw)) {
+ err = PTR_ERR(hw);
+ break;
+ }
+ hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+
+ lrc = memset(mem, 0, PAGE_SIZE);
+ execlists_init_reg_state(lrc,
+ engine->kernel_context,
+ engine,
+ engine->kernel_context->ring,
+ true);
+
+ dw = 0;
+ do {
+ u32 lri = hw[dw];
+
+ if (lri == 0) {
+ dw++;
+ continue;
+ }
+
+ if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+ pr_err("%s: Expected LRI command at dword %d, found %08x\n",
+ engine->name, dw, lri);
+ err = -EINVAL;
+ break;
+ }
+
+ if (lrc[dw] != lri) {
+ pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
+ engine->name, dw, lri, lrc[dw]);
+ err = -EINVAL;
+ break;
+ }
+
+ lri &= 0x7f;
+ lri++;
+ dw++;
+
+ while (lri) {
+ if (hw[dw] != lrc[dw]) {
+ pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
+ engine->name, dw, hw[dw], lrc[dw]);
+ err = -EINVAL;
+ break;
+ }
+
+ /*
+ * Skip over the actual register value as we
+ * expect that to differ.
+ */
+ dw += 2;
+ lri -= 2;
+ }
+ } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+ if (err) {
+ pr_info("%s: HW register image:\n", engine->name);
+ hexdump(hw, PAGE_SIZE);
+
+ pr_info("%s: SW register image:\n", engine->name);
+ hexdump(lrc, PAGE_SIZE);
+ }
+
+ i915_gem_object_unpin_map(engine->default_state);
+ if (err)
+ break;
+ }
+
+ kfree(mem);
+ return err;
+}
+
+static int find_offset(const u32 *lri, u32 offset)
+{
+ int i;
+
+ for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
+ if (lri[i] == offset)
+ return i;
+
+ return -1;
+}
+
+static int live_lrc_fixed(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check the assumed register offsets match the actual locations in
+ * the context image.
+ */
+
+ for_each_engine(engine, gt, id) {
+ const struct {
+ u32 reg;
+ u32 offset;
+ const char *name;
+ } tbl[] = {
+ {
+ i915_mmio_reg_offset(RING_START(engine->mmio_base)),
+ CTX_RING_BUFFER_START - 1,
+ "RING_START"
+ },
+ {
+ i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
+ CTX_RING_BUFFER_CONTROL - 1,
+ "RING_CTL"
+ },
+ {
+ i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
+ CTX_RING_HEAD - 1,
+ "RING_HEAD"
+ },
+ {
+ i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
+ CTX_RING_TAIL - 1,
+ "RING_TAIL"
+ },
+ {
+ i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
+ lrc_ring_mi_mode(engine),
+ "RING_MI_MODE"
+ },
+ {
+ engine->mmio_base + 0x110,
+ CTX_BB_STATE - 1,
+ "BB_STATE"
+ },
+ { },
+ }, *t;
+ u32 *hw;
+
+ if (!engine->default_state)
+ continue;
+
+ hw = i915_gem_object_pin_map(engine->default_state,
+ I915_MAP_WB);
+ if (IS_ERR(hw)) {
+ err = PTR_ERR(hw);
+ break;
+ }
+ hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+
+ for (t = tbl; t->name; t++) {
+ int dw = find_offset(hw, t->reg);
+
+ if (dw != t->offset) {
+ pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
+ engine->name,
+ t->name,
+ t->reg,
+ dw,
+ t->offset);
+ err = -EINVAL;
+ }
+ }
+
+ i915_gem_object_unpin_map(engine->default_state);
+ }
+
+ return err;
+}
+
+static int __live_lrc_state(struct i915_gem_context *fixme,
+ struct intel_engine_cs *engine,
+ struct i915_vma *scratch)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ enum {
+ RING_START_IDX = 0,
+ RING_TAIL_IDX,
+ MAX_IDX
+ };
+ u32 expected[MAX_IDX];
+ u32 *cs;
+ int err;
+ int n;
+
+ ce = intel_context_create(fixme, engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ err = intel_context_pin(ce);
+ if (err)
+ goto err_put;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_unpin;
+ }
+
+ cs = intel_ring_begin(rq, 4 * MAX_IDX);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ i915_request_add(rq);
+ goto err_unpin;
+ }
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
+ *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
+ *cs++ = 0;
+
+ expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
+ *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
+ *cs++ = 0;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ intel_engine_flush_submission(engine);
+ expected[RING_TAIL_IDX] = ce->ring->tail;
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -ETIME;
+ goto err_rq;
+ }
+
+ cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_rq;
+ }
+
+ for (n = 0; n < MAX_IDX; n++) {
+ if (cs[n] != expected[n]) {
+ pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
+ engine->name, n, cs[n], expected[n]);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+ i915_request_put(rq);
+err_unpin:
+ intel_context_unpin(ce);
+err_put:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_lrc_state(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ struct i915_gem_context *fixme;
+ struct i915_vma *scratch;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check the live register state matches what we expect for this
+ * intel_context.
+ */
+
+ fixme = kernel_context(gt->i915);
+ if (!fixme)
+ return -ENOMEM;
+
+ scratch = create_scratch(gt);
+ if (IS_ERR(scratch)) {
+ err = PTR_ERR(scratch);
+ goto out_close;
+ }
+
+ for_each_engine(engine, gt, id) {
+ err = __live_lrc_state(fixme, engine, scratch);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ i915_vma_unpin_and_release(&scratch, 0);
+out_close:
+ kernel_context_close(fixme);
+ return err;
+}
+
+static int gpr_make_dirty(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+ u32 *cs;
+ int n;
+
+ rq = i915_request_create(engine->kernel_context);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ return PTR_ERR(cs);
+ }
+
+ *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ *cs++ = CS_GPR(engine, n);
+ *cs++ = STACK_MAGIC;
+ }
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+ i915_request_add(rq);
+
+ return 0;
+}
+
+static int __live_gpr_clear(struct i915_gem_context *fixme,
+ struct intel_engine_cs *engine,
+ struct i915_vma *scratch)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ u32 *cs;
+ int err;
+ int n;
+
+ if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
+ return 0; /* GPR only on rcs0 for gen8 */
+
+ err = gpr_make_dirty(engine);
+ if (err)
+ return err;
+
+ ce = intel_context_create(fixme, engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_put;
+ }
+
+ cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ i915_request_add(rq);
+ goto err_put;
+ }
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = CS_GPR(engine, n);
+ *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+ *cs++ = 0;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -ETIME;
+ goto err_rq;
+ }
+
+ cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_rq;
+ }
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ if (cs[n]) {
+ pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
+ engine->name,
+ n / 2, n & 1 ? "udw" : "ldw",
+ cs[n]);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+ i915_request_put(rq);
+err_put:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_gpr_clear(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ struct i915_gem_context *fixme;
+ struct i915_vma *scratch;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check that GPR registers are cleared in new contexts as we need
+ * to avoid leaking any information from previous contexts.
+ */
+
+ fixme = kernel_context(gt->i915);
+ if (!fixme)
+ return -ENOMEM;
+
+ scratch = create_scratch(gt);
+ if (IS_ERR(scratch)) {
+ err = PTR_ERR(scratch);
+ goto out_close;
+ }
+
+ for_each_engine(engine, gt, id) {
+ err = __live_gpr_clear(fixme, engine, scratch);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ i915_vma_unpin_and_release(&scratch, 0);
+out_close:
+ kernel_context_close(fixme);
+ return err;
+}
+
+int intel_lrc_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_lrc_layout),
+ SUBTEST(live_lrc_fixed),
+ SUBTEST(live_lrc_state),
+ SUBTEST(live_gpr_clear),
+ };
+
+ if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 00a4f60cdfd5..6ad6aca315f6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -17,7 +17,7 @@ static int igt_global_reset(void *arg)
/* Check that we can issue a global GPU reset */
igt_global_reset_lock(gt);
- wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
reset_count = i915_reset_count(&gt->i915->gpu_error);
@@ -28,7 +28,7 @@ static int igt_global_reset(void *arg)
err = -EINVAL;
}
- intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
igt_global_reset_unlock(gt);
if (intel_gt_is_wedged(gt))
@@ -45,14 +45,14 @@ static int igt_wedged_reset(void *arg)
/* Check that we can recover a wedged device with a GPU reset */
igt_global_reset_lock(gt);
- wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
intel_gt_set_wedged(gt);
GEM_BUG_ON(!intel_gt_is_wedged(gt));
intel_gt_reset(gt, ALL_ENGINES, NULL);
- intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
igt_global_reset_unlock(gt);
return intel_gt_is_wedged(gt) ? -EIO : 0;
@@ -112,7 +112,7 @@ static int igt_atomic_engine_reset(void *arg)
/* Check that the resets are usable from atomic context */
- if (!intel_has_reset_engine(gt->i915))
+ if (!intel_has_reset_engine(gt))
return 0;
if (USES_GUC_SUBMISSION(gt->i915))
@@ -125,8 +125,8 @@ static int igt_atomic_engine_reset(void *arg)
if (!igt_force_reset(gt))
goto out_unlock;
- for_each_engine(engine, gt->i915, id) {
- tasklet_disable_nosync(&engine->execlists.tasklet);
+ for_each_engine(engine, gt, id) {
+ tasklet_disable(&engine->execlists.tasklet);
intel_engine_pm_get(engine);
for (p = igt_atomic_phases; p->name; p++) {
@@ -170,7 +170,7 @@ int intel_reset_live_selftests(struct drm_i915_private *i915)
};
struct intel_gt *gt = &i915->gt;
- if (!intel_has_gpu_reset(gt->i915))
+ if (!intel_has_gpu_reset(gt))
return 0;
if (intel_gt_is_wedged(gt))
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 321481403165..f04a59fe5d2c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -6,8 +6,10 @@
#include <linux/prime_numbers.h>
-#include "gem/i915_gem_pm.h"
+#include "intel_engine_pm.h"
#include "intel_gt.h"
+#include "intel_gt_requests.h"
+#include "intel_ring.h"
#include "../selftests/i915_random.h"
#include "../i915_selftest.h"
@@ -34,7 +36,7 @@ static unsigned long hwsp_cacheline(struct intel_timeline *tl)
#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
struct mock_hwsp_freelist {
- struct drm_i915_private *i915;
+ struct intel_gt *gt;
struct radix_tree_root cachelines;
struct intel_timeline **history;
unsigned long count, max;
@@ -67,7 +69,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
unsigned long cacheline;
int err;
- tl = intel_timeline_create(&state->i915->gt, NULL);
+ tl = intel_timeline_create(state->gt, NULL);
if (IS_ERR(tl))
return PTR_ERR(tl);
@@ -105,6 +107,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
static int mock_hwsp_freelist(void *arg)
{
struct mock_hwsp_freelist state;
+ struct drm_i915_private *i915;
const struct {
const char *name;
unsigned int flags;
@@ -116,12 +119,14 @@ static int mock_hwsp_freelist(void *arg)
unsigned int na;
int err = 0;
+ i915 = mock_gem_device();
+ if (!i915)
+ return -ENOMEM;
+
INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
- state.i915 = mock_gem_device();
- if (!state.i915)
- return -ENOMEM;
+ state.gt = &i915->gt;
/*
* Create a bunch of timelines and check that their HWSP do not overlap.
@@ -136,7 +141,6 @@ static int mock_hwsp_freelist(void *arg)
goto err_put;
}
- mutex_lock(&state.i915->drm.struct_mutex);
for (p = phases; p->name; p++) {
pr_debug("%s(%s)\n", __func__, p->name);
for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
@@ -149,10 +153,9 @@ static int mock_hwsp_freelist(void *arg)
out:
for (na = 0; na < state.max; na++)
__mock_hwsp_record(&state, na, NULL);
- mutex_unlock(&state.i915->drm.struct_mutex);
kfree(state.history);
err_put:
- drm_dev_put(&state.i915->drm);
+ drm_dev_put(&i915->drm);
return err;
}
@@ -449,8 +452,6 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
struct i915_request *rq;
int err;
- lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */
-
err = intel_timeline_pin(tl);
if (err) {
rq = ERR_PTR(err);
@@ -461,10 +462,14 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
if (IS_ERR(rq))
goto out_unpin;
+ i915_request_get(rq);
+
err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
i915_request_add(rq);
- if (err)
+ if (err) {
+ i915_request_put(rq);
rq = ERR_PTR(err);
+ }
out_unpin:
intel_timeline_unpin(tl);
@@ -475,11 +480,11 @@ out:
}
static struct intel_timeline *
-checked_intel_timeline_create(struct drm_i915_private *i915)
+checked_intel_timeline_create(struct intel_gt *gt)
{
struct intel_timeline *tl;
- tl = intel_timeline_create(&i915->gt, NULL);
+ tl = intel_timeline_create(gt, NULL);
if (IS_ERR(tl))
return tl;
@@ -496,11 +501,10 @@ checked_intel_timeline_create(struct drm_i915_private *i915)
static int live_hwsp_engine(void *arg)
{
#define NUM_TIMELINES 4096
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_timeline **timelines;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
unsigned long count, n;
int err = 0;
@@ -515,37 +519,40 @@ static int live_hwsp_engine(void *arg)
if (!timelines)
return -ENOMEM;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
count = 0;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
if (!intel_engine_can_store_dword(engine))
continue;
+ intel_engine_pm_get(engine);
+
for (n = 0; n < NUM_TIMELINES; n++) {
struct intel_timeline *tl;
struct i915_request *rq;
- tl = checked_intel_timeline_create(i915);
+ tl = checked_intel_timeline_create(gt);
if (IS_ERR(tl)) {
err = PTR_ERR(tl);
- goto out;
+ break;
}
rq = tl_write(tl, engine, count);
if (IS_ERR(rq)) {
intel_timeline_put(tl);
err = PTR_ERR(rq);
- goto out;
+ break;
}
timelines[count++] = tl;
+ i915_request_put(rq);
}
+
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
}
-out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(gt->i915))
err = -EIO;
for (n = 0; n < count; n++) {
@@ -559,11 +566,7 @@ out:
intel_timeline_put(tl);
}
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
-
kvfree(timelines);
-
return err;
#undef NUM_TIMELINES
}
@@ -571,11 +574,10 @@ out:
static int live_hwsp_alternate(void *arg)
{
#define NUM_TIMELINES 4096
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_timeline **timelines;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
unsigned long count, n;
int err = 0;
@@ -591,25 +593,25 @@ static int live_hwsp_alternate(void *arg)
if (!timelines)
return -ENOMEM;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
count = 0;
for (n = 0; n < NUM_TIMELINES; n++) {
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct intel_timeline *tl;
struct i915_request *rq;
if (!intel_engine_can_store_dword(engine))
continue;
- tl = checked_intel_timeline_create(i915);
+ tl = checked_intel_timeline_create(gt);
if (IS_ERR(tl)) {
+ intel_engine_pm_put(engine);
err = PTR_ERR(tl);
goto out;
}
+ intel_engine_pm_get(engine);
rq = tl_write(tl, engine, count);
+ intel_engine_pm_put(engine);
if (IS_ERR(rq)) {
intel_timeline_put(tl);
err = PTR_ERR(rq);
@@ -617,11 +619,12 @@ static int live_hwsp_alternate(void *arg)
}
timelines[count++] = tl;
+ i915_request_put(rq);
}
}
out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(gt->i915))
err = -EIO;
for (n = 0; n < count; n++) {
@@ -635,22 +638,17 @@ out:
intel_timeline_put(tl);
}
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
-
kvfree(timelines);
-
return err;
#undef NUM_TIMELINES
}
static int live_hwsp_wrap(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct intel_timeline *tl;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = 0;
/*
@@ -658,14 +656,10 @@ static int live_hwsp_wrap(void *arg)
* foreign GPU references.
*/
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ tl = intel_timeline_create(gt, NULL);
+ if (IS_ERR(tl))
+ return PTR_ERR(tl);
- tl = intel_timeline_create(&i915->gt, NULL);
- if (IS_ERR(tl)) {
- err = PTR_ERR(tl);
- goto out_rpm;
- }
if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
goto out_free;
@@ -673,7 +667,7 @@ static int live_hwsp_wrap(void *arg)
if (err)
goto out_free;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
const u32 *hwsp_seqno[2];
struct i915_request *rq;
u32 seqno[2];
@@ -681,7 +675,9 @@ static int live_hwsp_wrap(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
+ intel_engine_pm_get(engine);
rq = i915_request_create(engine->kernel_context);
+ intel_engine_pm_put(engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out;
@@ -743,29 +739,24 @@ static int live_hwsp_wrap(void *arg)
goto out;
}
- i915_retire_requests(i915); /* recycle HWSP */
+ intel_gt_retire_requests(gt); /* recycle HWSP */
}
out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(gt->i915))
err = -EIO;
intel_timeline_unpin(tl);
out_free:
intel_timeline_put(tl);
-out_rpm:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
-
return err;
}
static int live_hwsp_recycle(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
unsigned long count;
int err = 0;
@@ -775,38 +766,38 @@ static int live_hwsp_recycle(void *arg)
* want to confuse ourselves or the GPU.
*/
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
count = 0;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
IGT_TIMEOUT(end_time);
if (!intel_engine_can_store_dword(engine))
continue;
+ intel_engine_pm_get(engine);
+
do {
struct intel_timeline *tl;
struct i915_request *rq;
- tl = checked_intel_timeline_create(i915);
+ tl = checked_intel_timeline_create(gt);
if (IS_ERR(tl)) {
err = PTR_ERR(tl);
- goto out;
+ break;
}
rq = tl_write(tl, engine, count);
if (IS_ERR(rq)) {
intel_timeline_put(tl);
err = PTR_ERR(rq);
- goto out;
+ break;
}
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
pr_err("Wait for timeline writes timed out!\n");
+ i915_request_put(rq);
intel_timeline_put(tl);
err = -EIO;
- goto out;
+ break;
}
if (*tl->hwsp_seqno != count) {
@@ -815,17 +806,18 @@ static int live_hwsp_recycle(void *arg)
err = -EINVAL;
}
+ i915_request_put(rq);
intel_timeline_put(tl);
count++;
if (err)
- goto out;
+ break;
} while (!__igt_timeout(end_time, NULL));
- }
-out:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+ }
return err;
}
@@ -842,5 +834,5 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915)
if (intel_gt_is_wedged(&i915->gt))
return 0;
- return i915_live_subtests(tests, i915);
+ return intel_gt_live_subtests(tests, &i915->gt);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index d06d68ac2a3b..abce6e4ec9c0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -33,8 +33,32 @@ struct wa_lists {
} engine[I915_NUM_ENGINES];
};
+static int request_add_sync(struct i915_request *rq, int err)
+{
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -EIO;
+ i915_request_put(rq);
+
+ return err;
+}
+
+static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
+{
+ int err = 0;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (spin && !igt_wait_for_spinner(spin, rq))
+ err = -ETIMEDOUT;
+ i915_request_put(rq);
+
+ return err;
+}
+
static void
-reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists)
+reference_lists_init(struct intel_gt *gt, struct wa_lists *lists)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -42,10 +66,10 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists)
memset(lists, 0, sizeof(*lists));
wa_init_start(&lists->gt_wa_list, "GT_REF", "global");
- gt_init_workarounds(i915, &lists->gt_wa_list);
+ gt_init_workarounds(gt->i915, &lists->gt_wa_list);
wa_init_finish(&lists->gt_wa_list);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_wa_list *wal = &lists->engine[id].wa_list;
wa_init_start(wal, "REF", engine->name);
@@ -59,12 +83,12 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists)
}
static void
-reference_lists_fini(struct drm_i915_private *i915, struct wa_lists *lists)
+reference_lists_fini(struct intel_gt *gt, struct wa_lists *lists)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id)
+ for_each_engine(engine, gt, id)
intel_wa_list_free(&lists->engine[id].wa_list);
intel_wa_list_free(&lists->gt_wa_list);
@@ -191,10 +215,10 @@ static int check_whitelist(struct i915_gem_context *ctx,
err = 0;
i915_gem_object_lock(results);
- intel_wedge_on_timeout(&wedge, &ctx->i915->gt, HZ / 5) /* safety net! */
+ intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */
err = i915_gem_object_set_to_cpu_domain(results, false);
i915_gem_object_unlock(results);
- if (intel_gt_is_wedged(&ctx->i915->gt))
+ if (intel_gt_is_wedged(engine->gt))
err = -EIO;
if (err)
goto out_put;
@@ -243,7 +267,6 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
struct i915_gem_context *ctx;
struct intel_context *ce;
struct i915_request *rq;
- intel_wakeref_t wakeref;
int err = 0;
ctx = kernel_context(engine->i915);
@@ -255,12 +278,9 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
GEM_BUG_ON(IS_ERR(ce));
- rq = ERR_PTR(-ENODEV);
- with_intel_runtime_pm(&engine->i915->runtime_pm, wakeref)
- rq = igt_spinner_create_request(spin, ce, MI_NOOP);
+ rq = igt_spinner_create_request(spin, ce, MI_NOOP);
intel_context_put(ce);
- kernel_context_close(ctx);
if (IS_ERR(rq)) {
spin = NULL;
@@ -268,17 +288,12 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
goto err;
}
- i915_request_add(rq);
-
- if (spin && !igt_wait_for_spinner(spin, rq)) {
- pr_err("Spinner failed to start\n");
- err = -ETIMEDOUT;
- }
-
+ err = request_add_spin(rq, spin);
err:
if (err && spin)
igt_spinner_end(spin);
+ kernel_context_close(ctx);
return err;
}
@@ -313,7 +328,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
if (err)
goto out_spin;
- with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+ with_intel_runtime_pm(engine->uncore->rpm, wakeref)
err = reset(engine);
igt_spinner_end(&spin);
@@ -355,6 +370,7 @@ out_ctx:
static struct i915_vma *create_batch(struct i915_gem_context *ctx)
{
struct drm_i915_gem_object *obj;
+ struct i915_address_space *vm;
struct i915_vma *vma;
int err;
@@ -362,7 +378,9 @@ static struct i915_vma *create_batch(struct i915_gem_context *ctx)
if (IS_ERR(obj))
return ERR_CAST(obj);
- vma = i915_vma_instance(obj, ctx->vm, NULL);
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ vma = i915_vma_instance(obj, vm, NULL);
+ i915_vm_put(vm);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
@@ -463,12 +481,15 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
0xffff00ff,
0xffffffff,
};
+ struct i915_address_space *vm;
struct i915_vma *scratch;
struct i915_vma *batch;
int err = 0, i, v;
u32 *cs, *results;
- scratch = create_scratch(ctx->vm, 2 * ARRAY_SIZE(values) + 1);
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ scratch = create_scratch(vm, 2 * ARRAY_SIZE(values) + 1);
+ i915_vm_put(vm);
if (IS_ERR(scratch))
return PTR_ERR(scratch);
@@ -492,6 +513,9 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
ro_reg = ro_register(reg);
+ /* Clear non priv flags */
+ reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK;
+
srm = MI_STORE_REGISTER_MEM;
lrm = MI_LOAD_REGISTER_MEM;
if (INTEL_GEN(ctx->i915) >= 8)
@@ -565,6 +589,14 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
goto err_request;
}
+ i915_vma_lock(batch);
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(batch, rq, 0);
+ i915_vma_unlock(batch);
+ if (err)
+ goto err_request;
+
err = engine->emit_bb_start(rq,
batch->node.start, PAGE_SIZE,
0);
@@ -572,15 +604,11 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
goto err_request;
err_request:
- i915_request_add(rq);
- if (err)
- goto out_batch;
-
- if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = request_add_sync(rq, err);
+ if (err) {
pr_err("%s: Futzing %x timedout; cancelling test\n",
engine->name, reg);
- intel_gt_set_wedged(&ctx->i915->gt);
- err = -EIO;
+ intel_gt_set_wedged(engine->gt);
goto out_batch;
}
@@ -668,7 +696,7 @@ out_unpin:
break;
}
- if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(ctx->i915))
err = -EIO;
out_batch:
i915_vma_unpin_and_release(&batch, 0);
@@ -679,36 +707,29 @@ out_scratch:
static int live_dirty_whitelist(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct i915_gem_context *ctx;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
struct drm_file *file;
int err = 0;
/* Can the user write to the whitelisted registers? */
- if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */
+ if (INTEL_GEN(gt->i915) < 7) /* minimum requirement for LRI, SRM, LRM */
return 0;
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ file = mock_file(gt->i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
- mutex_unlock(&i915->drm.struct_mutex);
- file = mock_file(i915);
- mutex_lock(&i915->drm.struct_mutex);
- if (IS_ERR(file)) {
- err = PTR_ERR(file);
- goto out_rpm;
- }
-
- ctx = live_context(i915, file);
+ ctx = live_context(gt->i915, file);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out_file;
}
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
if (engine->whitelist.count == 0)
continue;
@@ -718,45 +739,43 @@ static int live_dirty_whitelist(void *arg)
}
out_file:
- mutex_unlock(&i915->drm.struct_mutex);
- mock_file_free(i915, file);
- mutex_lock(&i915->drm.struct_mutex);
-out_rpm:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+ mock_file_free(gt->i915, file);
return err;
}
static int live_reset_whitelist(void *arg)
{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine = i915->engine[RCS0];
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
int err = 0;
/* If we reset the gpu, we should not lose the RING_NONPRIV */
+ igt_global_reset_lock(gt);
- if (!engine || engine->whitelist.count == 0)
- return 0;
-
- igt_global_reset_lock(&i915->gt);
+ for_each_engine(engine, gt, id) {
+ if (engine->whitelist.count == 0)
+ continue;
- if (intel_has_reset_engine(i915)) {
- err = check_whitelist_across_reset(engine,
- do_engine_reset,
- "engine");
- if (err)
- goto out;
- }
+ if (intel_has_reset_engine(gt)) {
+ err = check_whitelist_across_reset(engine,
+ do_engine_reset,
+ "engine");
+ if (err)
+ goto out;
+ }
- if (intel_has_gpu_reset(i915)) {
- err = check_whitelist_across_reset(engine,
- do_device_reset,
- "device");
- if (err)
- goto out;
+ if (intel_has_gpu_reset(gt)) {
+ err = check_whitelist_across_reset(engine,
+ do_device_reset,
+ "device");
+ if (err)
+ goto out;
+ }
}
out:
- igt_global_reset_unlock(&i915->gt);
+ igt_global_reset_unlock(gt);
return err;
}
@@ -772,6 +791,14 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
if (IS_ERR(rq))
return PTR_ERR(rq);
+ i915_vma_lock(results);
+ err = i915_request_await_object(rq, results->obj, true);
+ if (err == 0)
+ err = i915_vma_move_to_active(results, rq, EXEC_OBJECT_WRITE);
+ i915_vma_unlock(results);
+ if (err)
+ goto err_req;
+
srm = MI_STORE_REGISTER_MEM;
if (INTEL_GEN(ctx->i915) >= 8)
srm++;
@@ -786,8 +813,8 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
u64 offset = results->node.start + sizeof(u32) * i;
u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
- /* Clear access permission field */
- reg &= ~RING_FORCE_TO_NONPRIV_ACCESS_MASK;
+ /* Clear non priv flags */
+ reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK;
*cs++ = srm;
*cs++ = reg;
@@ -797,12 +824,7 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
intel_ring_advance(rq, cs);
err_req:
- i915_request_add(rq);
-
- if (i915_request_wait(rq, 0, HZ / 5) < 0)
- err = -EIO;
-
- return err;
+ return request_add_sync(rq, err);
}
static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
@@ -830,6 +852,9 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
if (ro_register(reg))
continue;
+ /* Clear non priv flags */
+ reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK;
+
*cs++ = reg;
*cs++ = 0xffffffff;
}
@@ -850,13 +875,19 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
goto err_request;
}
+ i915_vma_lock(batch);
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(batch, rq, 0);
+ i915_vma_unlock(batch);
+ if (err)
+ goto err_request;
+
/* Perform the writes from an unprivileged "user" batch */
err = engine->emit_bb_start(rq, batch->node.start, 0, 0);
err_request:
- i915_request_add(rq);
- if (i915_request_wait(rq, 0, HZ / 5) < 0)
- err = -EIO;
+ err = request_add_sync(rq, err);
err_unpin:
i915_gem_object_unpin_map(batch->obj);
@@ -973,7 +1004,7 @@ err_a:
static int live_isolated_whitelist(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct {
struct i915_gem_context *ctx;
struct i915_vma *scratch[2];
@@ -987,40 +1018,46 @@ static int live_isolated_whitelist(void *arg)
* invisible to a second context.
*/
- if (!intel_engines_has_context_isolation(i915))
- return 0;
-
- if (!i915->kernel_context->vm)
+ if (!intel_engines_has_context_isolation(gt->i915))
return 0;
for (i = 0; i < ARRAY_SIZE(client); i++) {
+ struct i915_address_space *vm;
struct i915_gem_context *c;
- c = kernel_context(i915);
+ c = kernel_context(gt->i915);
if (IS_ERR(c)) {
err = PTR_ERR(c);
goto err;
}
- client[i].scratch[0] = create_scratch(c->vm, 1024);
+ vm = i915_gem_context_get_vm_rcu(c);
+
+ client[i].scratch[0] = create_scratch(vm, 1024);
if (IS_ERR(client[i].scratch[0])) {
err = PTR_ERR(client[i].scratch[0]);
+ i915_vm_put(vm);
kernel_context_close(c);
goto err;
}
- client[i].scratch[1] = create_scratch(c->vm, 1024);
+ client[i].scratch[1] = create_scratch(vm, 1024);
if (IS_ERR(client[i].scratch[1])) {
err = PTR_ERR(client[i].scratch[1]);
i915_vma_unpin_and_release(&client[i].scratch[0], 0);
+ i915_vm_put(vm);
kernel_context_close(c);
goto err;
}
client[i].ctx = c;
+ i915_vm_put(vm);
}
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
+ if (!engine->kernel_context->vm)
+ continue;
+
if (!whitelist_writable_count(engine))
continue;
@@ -1074,7 +1111,7 @@ err:
kernel_context_close(client[i].ctx);
}
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(gt->i915))
err = -EIO;
return err;
@@ -1109,16 +1146,16 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists,
static int
live_gpu_reset_workarounds(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx;
intel_wakeref_t wakeref;
struct wa_lists lists;
bool ok;
- if (!intel_has_gpu_reset(i915))
+ if (!intel_has_gpu_reset(gt))
return 0;
- ctx = kernel_context(i915);
+ ctx = kernel_context(gt->i915);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1126,25 +1163,25 @@ live_gpu_reset_workarounds(void *arg)
pr_info("Verifying after GPU reset...\n");
- igt_global_reset_lock(&i915->gt);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ igt_global_reset_lock(gt);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
- reference_lists_init(i915, &lists);
+ reference_lists_init(gt, &lists);
ok = verify_wa_lists(ctx, &lists, "before reset");
if (!ok)
goto out;
- intel_gt_reset(&i915->gt, ALL_ENGINES, "live_workarounds");
+ intel_gt_reset(gt, ALL_ENGINES, "live_workarounds");
ok = verify_wa_lists(ctx, &lists, "after reset");
out:
i915_gem_context_unlock_engines(ctx);
kernel_context_close(ctx);
- reference_lists_fini(i915, &lists);
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- igt_global_reset_unlock(&i915->gt);
+ reference_lists_fini(gt, &lists);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ igt_global_reset_unlock(gt);
return ok ? 0 : -ESRCH;
}
@@ -1152,7 +1189,7 @@ out:
static int
live_engine_reset_workarounds(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_engines_iter it;
struct i915_gem_context *ctx;
struct intel_context *ce;
@@ -1162,17 +1199,17 @@ live_engine_reset_workarounds(void *arg)
struct wa_lists lists;
int ret = 0;
- if (!intel_has_reset_engine(i915))
+ if (!intel_has_reset_engine(gt))
return 0;
- ctx = kernel_context(i915);
+ ctx = kernel_context(gt->i915);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- igt_global_reset_lock(&i915->gt);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ igt_global_reset_lock(gt);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
- reference_lists_init(i915, &lists);
+ reference_lists_init(gt, &lists);
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
struct intel_engine_cs *engine = ce->engine;
@@ -1205,12 +1242,10 @@ live_engine_reset_workarounds(void *arg)
goto err;
}
- i915_request_add(rq);
-
- if (!igt_wait_for_spinner(&spin, rq)) {
+ ret = request_add_spin(rq, &spin);
+ if (ret) {
pr_err("Spinner failed to start\n");
igt_spinner_fini(&spin);
- ret = -ETIMEDOUT;
goto err;
}
@@ -1227,12 +1262,12 @@ live_engine_reset_workarounds(void *arg)
}
err:
i915_gem_context_unlock_engines(ctx);
- reference_lists_fini(i915, &lists);
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- igt_global_reset_unlock(&i915->gt);
+ reference_lists_fini(gt, &lists);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ igt_global_reset_unlock(gt);
kernel_context_close(ctx);
- igt_flush_test(i915, I915_WAIT_LOCKED);
+ igt_flush_test(gt->i915);
return ret;
}
@@ -1246,14 +1281,9 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_gpu_reset_workarounds),
SUBTEST(live_engine_reset_workarounds),
};
- int err;
if (intel_gt_is_wedged(&i915->gt))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- err = i915_subtests(tests, i915);
- mutex_unlock(&i915->drm.struct_mutex);
-
- return err;
+ return intel_gt_live_subtests(tests, &i915->gt);
}
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
index 598170efcaf6..2a77c051f36a 100644
--- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
@@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context)
mutex_init(&timeline->mutex);
- INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
+ INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
INIT_LIST_HEAD(&timeline->requests);
i915_syncmap_init(&timeline->sync);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 249c747e9756..3ee4a4e7689d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -4,11 +4,34 @@
*/
#include "gt/intel_gt.h"
+#include "gt/intel_gt_irq.h"
+#include "gt/intel_gt_pm_irq.h"
#include "intel_guc.h"
#include "intel_guc_ads.h"
#include "intel_guc_submission.h"
#include "i915_drv.h"
+/**
+ * DOC: GuC
+ *
+ * The GuC is a microcontroller inside the GT HW, introduced in gen9. The GuC is
+ * designed to offload some of the functionality usually performed by the host
+ * driver; currently the main operations it can take care of are:
+ *
+ * - Authentication of the HuC, which is required to fully enable HuC usage.
+ * - Low latency graphics context scheduling (a.k.a. GuC submission).
+ * - GT Power management.
+ *
+ * The enable_guc module parameter can be used to select which of those
+ * operations to enable within GuC. Note that not all the operations are
+ * supported on all gen9+ platforms.
+ *
+ * Enabling the GuC is not mandatory and therefore the firmware is only loaded
+ * if at least one of the operations is selected. However, not loading the GuC
+ * might result in the loss of some features that do require the GuC (currently
+ * just the HuC, but more are expected to land in the future).
+ */
+
static void gen8_guc_raise_irq(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -56,6 +79,93 @@ void intel_guc_init_send_regs(struct intel_guc *guc)
guc->send_regs.fw_domains = fw_domains;
}
+static void gen9_reset_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+
+ spin_lock_irq(&gt->irq_lock);
+ gen6_gt_pm_reset_iir(gt, gt->pm_guc_events);
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen9_enable_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+
+ spin_lock_irq(&gt->irq_lock);
+ if (!guc->interrupts.enabled) {
+ WARN_ON_ONCE(intel_uncore_read(gt->uncore, GEN8_GT_IIR(2)) &
+ gt->pm_guc_events);
+ guc->interrupts.enabled = true;
+ gen6_gt_pm_enable_irq(gt, gt->pm_guc_events);
+ }
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen9_disable_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+
+ spin_lock_irq(&gt->irq_lock);
+ guc->interrupts.enabled = false;
+
+ gen6_gt_pm_disable_irq(gt, gt->pm_guc_events);
+
+ spin_unlock_irq(&gt->irq_lock);
+ intel_synchronize_irq(gt->i915);
+
+ gen9_reset_guc_interrupts(guc);
+}
+
+static void gen11_reset_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ spin_lock_irq(&gt->irq_lock);
+ gen11_gt_reset_one_iir(gt, 0, GEN11_GUC);
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen11_enable_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ spin_lock_irq(&gt->irq_lock);
+ if (!guc->interrupts.enabled) {
+ u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
+
+ WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC));
+ intel_uncore_write(gt->uncore,
+ GEN11_GUC_SG_INTR_ENABLE, events);
+ intel_uncore_write(gt->uncore,
+ GEN11_GUC_SG_INTR_MASK, ~events);
+ guc->interrupts.enabled = true;
+ }
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen11_disable_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ spin_lock_irq(&gt->irq_lock);
+ guc->interrupts.enabled = false;
+
+ intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0);
+ intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0);
+
+ spin_unlock_irq(&gt->irq_lock);
+ intel_synchronize_irq(gt->i915);
+
+ gen11_reset_guc_interrupts(guc);
+}
+
void intel_guc_init_early(struct intel_guc *guc)
{
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
@@ -82,32 +192,6 @@ void intel_guc_init_early(struct intel_guc *guc)
}
}
-static int guc_shared_data_create(struct intel_guc *guc)
-{
- struct i915_vma *vma;
- void *vaddr;
-
- vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
-
- vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- i915_vma_unpin_and_release(&vma, 0);
- return PTR_ERR(vaddr);
- }
-
- guc->shared_data = vma;
- guc->shared_data_vaddr = vaddr;
-
- return 0;
-}
-
-static void guc_shared_data_destroy(struct intel_guc *guc)
-{
- i915_vma_unpin_and_release(&guc->shared_data, I915_VMA_RELEASE_MAP);
-}
-
static u32 guc_ctl_debug_flags(struct intel_guc *guc)
{
u32 level = intel_guc_log_get_level(&guc->log);
@@ -254,14 +338,9 @@ int intel_guc_init(struct intel_guc *guc)
if (ret)
goto err_fetch;
- ret = guc_shared_data_create(guc);
- if (ret)
- goto err_fw;
- GEM_BUG_ON(!guc->shared_data);
-
ret = intel_guc_log_create(&guc->log);
if (ret)
- goto err_shared;
+ goto err_fw;
ret = intel_guc_ads_create(guc);
if (ret)
@@ -296,8 +375,6 @@ err_ads:
intel_guc_ads_destroy(guc);
err_log:
intel_guc_log_destroy(&guc->log);
-err_shared:
- guc_shared_data_destroy(guc);
err_fw:
intel_uc_fw_fini(&guc->fw);
err_fetch:
@@ -322,7 +399,6 @@ void intel_guc_fini(struct intel_guc *guc)
intel_guc_ads_destroy(guc);
intel_guc_log_destroy(&guc->log);
- guc_shared_data_destroy(guc);
intel_uc_fw_fini(&guc->fw);
intel_uc_fw_cleanup_fetch(&guc->fw);
}
@@ -478,6 +554,13 @@ int intel_guc_suspend(struct intel_guc *guc)
};
/*
+ * If GuC communication is enabled but submission is not supported,
+ * we do not need to suspend the GuC.
+ */
+ if (!intel_guc_submission_is_enabled(guc))
+ return 0;
+
+ /*
* The ENTER_S_STATE action queues the save/restore operation in GuC FW
* and then returns, so waiting on the H2G is not enough to guarantee
* GuC is done. When all the processing is done, GuC writes
@@ -518,19 +601,9 @@ int intel_guc_suspend(struct intel_guc *guc)
int intel_guc_reset_engine(struct intel_guc *guc,
struct intel_engine_cs *engine)
{
- u32 data[7];
-
- GEM_BUG_ON(!guc->execbuf_client);
-
- data[0] = INTEL_GUC_ACTION_REQUEST_ENGINE_RESET;
- data[1] = engine->guc_id;
- data[2] = 0;
- data[3] = 0;
- data[4] = 0;
- data[5] = guc->execbuf_client->stage_id;
- data[6] = intel_guc_ggtt_offset(guc, guc->shared_data);
+ /* XXX: to be implemented with submission interface rework */
- return intel_guc_send(guc, data, ARRAY_SIZE(data));
+ return -ENODEV;
}
/**
@@ -544,13 +617,27 @@ int intel_guc_resume(struct intel_guc *guc)
GUC_POWER_D0,
};
+ /*
+ * If GuC communication is enabled but submission is not supported,
+ * we do not need to resume the GuC but we do need to enable the
+ * GuC communication on resume (above).
+ */
+ if (!intel_guc_submission_is_enabled(guc))
+ return 0;
+
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
/**
- * DOC: GuC Address Space
+ * DOC: GuC Memory Management
*
- * The layout of GuC address space is shown below:
+ * GuC can't allocate any memory for its own usage, so all the allocations must
+ * be handled by the host driver. GuC accesses the memory via the GGTT, with the
+ * exception of the top and bottom parts of the 4GB address space, which are
+ * instead re-mapped by the GuC HW to memory location of the FW itself (WOPCM)
+ * or other parts of the HW. The driver must take care not to place objects that
+ * the GuC is going to access in these reserved ranges. The layout of the GuC
+ * address space is shown below:
*
* ::
*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 2b2f046d3cc3..e6400204a2bd 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -47,8 +47,6 @@ struct intel_guc {
struct i915_vma *stage_desc_pool;
void *stage_desc_pool_vaddr;
struct ida stage_ids;
- struct i915_vma *shared_data;
- void *shared_data_vaddr;
struct intel_guc_client *execbuf_client;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 1d3cdd67ca2f..a26a85d50209 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -548,6 +548,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10,
INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20,
INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30,
+ INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40,
INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302,
INTEL_GUC_ACTION_ENTER_S_STATE = 0x501,
INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
@@ -556,7 +557,6 @@ enum intel_guc_action {
INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
- INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000,
INTEL_GUC_ACTION_LIMIT
};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 36332064de9c..caed0d57e704 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -226,7 +226,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log)
mutex_lock(&log->relay.lock);
- if (WARN_ON(!intel_guc_log_relay_enabled(log)))
+ if (WARN_ON(!intel_guc_log_relay_created(log)))
goto out_unlock;
/* Get the pointer to shared GuC log buffer */
@@ -361,6 +361,7 @@ void intel_guc_log_init_early(struct intel_guc_log *log)
{
mutex_init(&log->relay.lock);
INIT_WORK(&log->relay.flush_work, capture_logs_work);
+ log->relay.started = false;
}
static int guc_log_relay_create(struct intel_guc_log *log)
@@ -546,7 +547,7 @@ out_unlock:
return ret;
}
-bool intel_guc_log_relay_enabled(const struct intel_guc_log *log)
+bool intel_guc_log_relay_created(const struct intel_guc_log *log)
{
return log->relay.buf_addr;
}
@@ -560,7 +561,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log)
mutex_lock(&log->relay.lock);
- if (intel_guc_log_relay_enabled(log)) {
+ if (intel_guc_log_relay_created(log)) {
ret = -EEXIST;
goto out_unlock;
}
@@ -585,6 +586,21 @@ int intel_guc_log_relay_open(struct intel_guc_log *log)
mutex_unlock(&log->relay.lock);
+ return 0;
+
+out_relay:
+ guc_log_relay_destroy(log);
+out_unlock:
+ mutex_unlock(&log->relay.lock);
+
+ return ret;
+}
+
+int intel_guc_log_relay_start(struct intel_guc_log *log)
+{
+ if (log->relay.started)
+ return -EEXIST;
+
guc_log_enable_flush_events(log);
/*
@@ -594,47 +610,59 @@ int intel_guc_log_relay_open(struct intel_guc_log *log)
*/
queue_work(system_highpri_wq, &log->relay.flush_work);
- return 0;
+ log->relay.started = true;
-out_relay:
- guc_log_relay_destroy(log);
-out_unlock:
- mutex_unlock(&log->relay.lock);
-
- return ret;
+ return 0;
}
void intel_guc_log_relay_flush(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
intel_wakeref_t wakeref;
+ if (!log->relay.started)
+ return;
+
/*
* Before initiating the forceful flush, wait for any pending/ongoing
* flush to complete otherwise forceful flush may not actually happen.
*/
flush_work(&log->relay.flush_work);
- with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+ with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref)
guc_action_flush_log(guc);
/* GuC would have updated log buffer by now, so capture it */
guc_log_capture_logs(log);
}
-void intel_guc_log_relay_close(struct intel_guc_log *log)
+/*
+ * Stops the relay log. Called from intel_guc_log_relay_close(), so no
+ * possibility of race with start/flush since relay_write cannot race
+ * relay_close.
+ */
+static void guc_log_relay_stop(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ if (!log->relay.started)
+ return;
+
guc_log_disable_flush_events(log);
intel_synchronize_irq(i915);
flush_work(&log->relay.flush_work);
+ log->relay.started = false;
+}
+
+void intel_guc_log_relay_close(struct intel_guc_log *log)
+{
+ guc_log_relay_stop(log);
+
mutex_lock(&log->relay.lock);
- GEM_BUG_ON(!intel_guc_log_relay_enabled(log));
+ GEM_BUG_ON(!intel_guc_log_relay_created(log));
guc_log_unmap(log);
guc_log_relay_destroy(log);
mutex_unlock(&log->relay.lock);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index 6f764879acb1..c252c022c5fc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -47,6 +47,7 @@ struct intel_guc_log {
struct i915_vma *vma;
struct {
void *buf_addr;
+ bool started;
struct work_struct flush_work;
struct rchan *channel;
struct mutex lock;
@@ -65,8 +66,9 @@ int intel_guc_log_create(struct intel_guc_log *log);
void intel_guc_log_destroy(struct intel_guc_log *log);
int intel_guc_log_set_level(struct intel_guc_log *log, u32 level);
-bool intel_guc_log_relay_enabled(const struct intel_guc_log *log);
+bool intel_guc_log_relay_created(const struct intel_guc_log *log);
int intel_guc_log_relay_open(struct intel_guc_log *log);
+int intel_guc_log_relay_start(struct intel_guc_log *log);
void intel_guc_log_relay_flush(struct intel_guc_log *log);
void intel_guc_log_relay_close(struct intel_guc_log *log);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
index edf194d23c6b..1949346e714e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
@@ -83,6 +83,9 @@
#define GEN8_GTCR _MMIO(0x4274)
#define GEN8_GTCR_INVALIDATE (1<<0)
+#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8)
+#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0)
+
#define GUC_ARAT_C6DIS _MMIO(0xA178)
#define GUC_SHIM_CONTROL _MMIO(0xc064)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index f325d3dd564f..2498c55e0ea5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -6,12 +6,13 @@
#include <linux/circ_buf.h>
#include "gem/i915_gem_context.h"
-
#include "gt/intel_context.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
#include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
+
#include "intel_guc_submission.h"
#include "i915_drv.h"
@@ -29,6 +30,12 @@ enum {
/**
* DOC: GuC-based command submission
*
+ * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC
+ * firmware is moving to an updated submission interface and we plan to
+ * turn submission back on when that lands. The below documentation (and related
+ * code) matches the old submission model and will be updated as part of the
+ * upgrade to the new flow.
+ *
* GuC client:
* A intel_guc_client refers to a submission path through GuC. Currently, there
* is only one client, which is charged with all submissions to the GuC. This
@@ -1004,7 +1011,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
static void guc_interrupts_capture(struct intel_gt *gt)
{
- struct intel_rps *rps = &gt->i915->gt_pm.rps;
+ struct intel_rps *rps = &gt->rps;
struct intel_uncore *uncore = gt->uncore;
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -1014,7 +1021,7 @@ static void guc_interrupts_capture(struct intel_gt *gt)
* to GuC
*/
irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING);
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
ENGINE_WRITE(engine, RING_MODE_GEN7, irqs);
/* route USER_INTERRUPT to Host, all others are sent to GuC. */
@@ -1050,7 +1057,7 @@ static void guc_interrupts_capture(struct intel_gt *gt)
static void guc_interrupts_release(struct intel_gt *gt)
{
- struct intel_rps *rps = &gt->i915->gt_pm.rps;
+ struct intel_rps *rps = &gt->rps;
struct intel_uncore *uncore = gt->uncore;
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -1062,7 +1069,7 @@ static void guc_interrupts_release(struct intel_gt *gt)
*/
irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER);
irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING);
- for_each_engine(engine, gt->i915, id)
+ for_each_engine(engine, gt, id)
ENGINE_WRITE(engine, RING_MODE_GEN7, irqs);
/* route all GT interrupts to the host */
@@ -1119,7 +1126,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
enum intel_engine_id id;
int err;
- err = i915_inject_load_error(gt->i915, -ENXIO);
+ err = i915_inject_probe_error(gt->i915, -ENXIO);
if (err)
return err;
@@ -1145,7 +1152,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
/* Take over from manual control of ELSP (execlists) */
guc_interrupts_capture(gt);
- for_each_engine(engine, gt->i915, id) {
+ for_each_engine(engine, gt, id) {
engine->set_default_submission = guc_set_default_submission;
engine->set_default_submission(engine);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index d4625c97b4f9..32a069841c14 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -9,6 +9,34 @@
#include "intel_huc.h"
#include "i915_drv.h"
+/**
+ * DOC: HuC
+ *
+ * The HuC is a dedicated microcontroller for usage in media HEVC (High
+ * Efficiency Video Coding) operations. Userspace can directly use the firmware
+ * capabilities by adding HuC specific commands to batch buffers.
+ *
+ * The kernel driver is only responsible for loading the HuC firmware and
+ * triggering its security authentication, which is performed by the GuC. For
+ * The GuC to correctly perform the authentication, the HuC binary must be
+ * loaded before the GuC one. Loading the HuC is optional; however, not using
+ * the HuC might negatively impact power usage and/or performance of media
+ * workloads, depending on the use-cases.
+ *
+ * See https://github.com/intel/media-driver for the latest details on HuC
+ * functionality.
+ */
+
+/**
+ * DOC: HuC Memory Management
+ *
+ * Similarly to the GuC, the HuC can't do any memory allocations on its own,
+ * with the difference being that the allocations for HuC usage are handled by
+ * the userspace driver instead of the kernel one. The HuC accesses the memory
+ * via the PPGTT belonging to the context loaded on the VCS executing the
+ * HuC-specific commands.
+ */
+
void intel_huc_init_early(struct intel_huc *huc)
{
struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
@@ -35,7 +63,7 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc)
void *vaddr;
int err;
- err = i915_inject_load_error(gt->i915, -ENXIO);
+ err = i915_inject_probe_error(gt->i915, -ENXIO);
if (err)
return err;
@@ -118,10 +146,9 @@ void intel_huc_fini(struct intel_huc *huc)
*
* Called after HuC and GuC firmware loading during intel_uc_init_hw().
*
- * This function pins HuC firmware image object into GGTT.
- * Then it invokes GuC action to authenticate passing the offset to RSA
- * signature through intel_guc_auth_huc(). It then waits for 50ms for
- * firmware verification ACK and unpins the object.
+ * This function invokes the GuC action to authenticate the HuC firmware,
+ * passing the offset of the RSA signature to intel_guc_auth_huc(). It then
+ * waits for up to 50ms for firmware verification ACK.
*/
int intel_huc_auth(struct intel_huc *huc)
{
@@ -134,7 +161,7 @@ int intel_huc_auth(struct intel_huc *huc)
if (!intel_uc_fw_is_loaded(&huc->fw))
return -ENOEXEC;
- ret = i915_inject_load_error(gt->i915, -ENXIO);
+ ret = i915_inject_probe_error(gt->i915, -ENXIO);
if (ret)
goto fail;
@@ -185,7 +212,7 @@ int intel_huc_check_status(struct intel_huc *huc)
if (!intel_huc_is_supported(huc))
return -ENODEV;
- with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
status = intel_uncore_read(gt->uncore, huc->status.reg);
return (status & huc->status.mask) == huc->status.value;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index 74602487ed67..d654340d4d03 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -8,21 +8,6 @@
#include "i915_drv.h"
/**
- * DOC: HuC Firmware
- *
- * Motivation:
- * GEN9 introduces a new dedicated firmware for usage in media HEVC (High
- * Efficiency Video Coding) operations. Userspace can use the firmware
- * capabilities by adding HuC specific commands to batch buffers.
- *
- * Implementation:
- * The same firmware loader is used as the GuC. However, the actual
- * loading to HW is deferred until GEM initialization is done.
- *
- * Note that HuC firmware loading must be done before GuC loading.
- */
-
-/**
* intel_huc_fw_init_early() - initializes HuC firmware struct
* @huc: intel_huc struct
*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 71ee7ab035cc..629b19377a29 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -20,7 +20,7 @@ static int __intel_uc_reset_hw(struct intel_uc *uc)
int ret;
u32 guc_status;
- ret = i915_inject_load_error(gt->i915, -ENXIO);
+ ret = i915_inject_probe_error(gt->i915, -ENXIO);
if (ret)
return ret;
@@ -197,7 +197,7 @@ static int guc_enable_communication(struct intel_guc *guc)
GEM_BUG_ON(guc_communication_enabled(guc));
- ret = i915_inject_load_error(i915, -ENXIO);
+ ret = i915_inject_probe_error(i915, -ENXIO);
if (ret)
return ret;
@@ -224,17 +224,7 @@ static int guc_enable_communication(struct intel_guc *guc)
return 0;
}
-static void guc_stop_communication(struct intel_guc *guc)
-{
- intel_guc_ct_stop(&guc->ct);
-
- guc->send = intel_guc_send_nop;
- guc->handler = intel_guc_to_host_event_handler_nop;
-
- guc_clear_mmio_msg(guc);
-}
-
-static void guc_disable_communication(struct intel_guc *guc)
+static void __guc_stop_communication(struct intel_guc *guc)
{
/*
* Events generated during or after CT disable are logged by guc in
@@ -247,6 +237,20 @@ static void guc_disable_communication(struct intel_guc *guc)
guc->send = intel_guc_send_nop;
guc->handler = intel_guc_to_host_event_handler_nop;
+}
+
+static void guc_stop_communication(struct intel_guc *guc)
+{
+ intel_guc_ct_stop(&guc->ct);
+
+ __guc_stop_communication(guc);
+
+ DRM_INFO("GuC communication stopped\n");
+}
+
+static void guc_disable_communication(struct intel_guc *guc)
+{
+ __guc_stop_communication(guc);
intel_guc_ct_disable(&guc->ct);
@@ -368,7 +372,7 @@ static int uc_init_wopcm(struct intel_uc *uc)
GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK));
GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK);
- err = i915_inject_load_error(gt->i915, -ENXIO);
+ err = i915_inject_probe_error(gt->i915, -ENXIO);
if (err)
return err;
@@ -537,7 +541,9 @@ void intel_uc_fini_hw(struct intel_uc *uc)
if (intel_uc_supports_guc_submission(uc))
intel_guc_submission_disable(guc);
- guc_disable_communication(guc);
+ if (guc_communication_enabled(guc))
+ guc_disable_communication(guc);
+
__uc_sanitize(uc);
}
@@ -581,7 +587,7 @@ void intel_uc_suspend(struct intel_uc *uc)
if (!intel_guc_is_running(guc))
return;
- with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref)
+ with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref)
intel_uc_runtime_suspend(uc);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index bd22bf11adad..66a30ab7044a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -37,27 +37,34 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
/*
* List of required GuC and HuC binaries per-platform.
* Must be ordered based on platform + revid, from newer to older.
+ *
+ * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas
+ * between 33.0 and 35.2 are only related to new additions to support new Gen12
+ * features.
*/
#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
- fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 8, 4, 3238)) \
- fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \
- fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 03, 01, 2893)) \
- fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \
- fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 01, 8, 2893)) \
- fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 01, 07, 1398))
-
-#define __MAKE_UC_FW_PATH(prefix_, name_, separator_, major_, minor_, patch_) \
+ fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 3)) \
+ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \
+ fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \
+ fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \
+ fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \
+ fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 4, 0, 0)) \
+ fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \
+ fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 2, 0, 0)) \
+ fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 2, 0, 0))
+
+#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
"i915/" \
__stringify(prefix_) name_ \
- __stringify(major_) separator_ \
- __stringify(minor_) separator_ \
+ __stringify(major_) "." \
+ __stringify(minor_) "." \
__stringify(patch_) ".bin"
#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \
- __MAKE_UC_FW_PATH(prefix_, "_guc_", ".", major_, minor_, patch_)
+ __MAKE_UC_FW_PATH(prefix_, "_guc_", major_, minor_, patch_)
#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \
- __MAKE_UC_FW_PATH(prefix_, "_huc_ver", "_", major_, minor_, bld_num_)
+ __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_)
/* All blobs need to be declared via MODULE_FIRMWARE() */
#define INTEL_UC_MODULE_FW(platform_, revid_, guc_, huc_) \
@@ -218,29 +225,31 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw,
{
bool user = e == -EINVAL;
- if (i915_inject_load_error(i915, e)) {
+ if (i915_inject_probe_error(i915, e)) {
/* non-existing blob */
uc_fw->path = "<invalid>";
uc_fw->user_overridden = user;
- } else if (i915_inject_load_error(i915, e)) {
+ } else if (i915_inject_probe_error(i915, e)) {
/* require next major version */
uc_fw->major_ver_wanted += 1;
uc_fw->minor_ver_wanted = 0;
uc_fw->user_overridden = user;
- } else if (i915_inject_load_error(i915, e)) {
+ } else if (i915_inject_probe_error(i915, e)) {
/* require next minor version */
uc_fw->minor_ver_wanted += 1;
uc_fw->user_overridden = user;
- } else if (uc_fw->major_ver_wanted && i915_inject_load_error(i915, e)) {
+ } else if (uc_fw->major_ver_wanted &&
+ i915_inject_probe_error(i915, e)) {
/* require prev major version */
uc_fw->major_ver_wanted -= 1;
uc_fw->minor_ver_wanted = 0;
uc_fw->user_overridden = user;
- } else if (uc_fw->minor_ver_wanted && i915_inject_load_error(i915, e)) {
+ } else if (uc_fw->minor_ver_wanted &&
+ i915_inject_probe_error(i915, e)) {
/* require prev minor version - hey, this should work! */
uc_fw->minor_ver_wanted -= 1;
uc_fw->user_overridden = user;
- } else if (user && i915_inject_load_error(i915, e)) {
+ } else if (user && i915_inject_probe_error(i915, e)) {
/* officially unsupported platform */
uc_fw->major_ver_wanted = 0;
uc_fw->minor_ver_wanted = 0;
@@ -269,7 +278,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915)
GEM_BUG_ON(!i915->wopcm.size);
GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw));
- err = i915_inject_load_error(i915, -ENXIO);
+ err = i915_inject_probe_error(i915, -ENXIO);
if (err)
return err;
@@ -337,25 +346,10 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915)
}
/* Get version numbers from the CSS header */
- switch (uc_fw->type) {
- case INTEL_UC_FW_TYPE_GUC:
- uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MAJOR,
- css->sw_version);
- uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MINOR,
- css->sw_version);
- break;
-
- case INTEL_UC_FW_TYPE_HUC:
- uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MAJOR,
- css->sw_version);
- uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MINOR,
- css->sw_version);
- break;
-
- default:
- MISSING_CASE(uc_fw->type);
- break;
- }
+ uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
+ css->sw_version);
+ uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
+ css->sw_version);
if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
@@ -400,7 +394,7 @@ static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt)
{
struct drm_mm_node *node = &ggtt->uc_fw;
- GEM_BUG_ON(!node->allocated);
+ GEM_BUG_ON(!drm_mm_node_allocated(node));
GEM_BUG_ON(upper_32_bits(node->start));
GEM_BUG_ON(upper_32_bits(node->start + node->size - 1));
@@ -445,7 +439,7 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
u64 offset;
int ret;
- ret = i915_inject_load_error(gt->i915, -ETIMEDOUT);
+ ret = i915_inject_probe_error(gt->i915, -ETIMEDOUT);
if (ret)
return ret;
@@ -506,7 +500,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
/* make sure the status was cleared the last time we reset the uc */
GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw));
- err = i915_inject_load_error(gt->i915, -ENOEXEC);
+ err = i915_inject_probe_error(gt->i915, -ENOEXEC);
if (err)
return err;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
index ae58e8a8c53b..029214cdedd5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
@@ -39,9 +39,6 @@
* 3. Length info of each component can be found in header, in dwords.
* 4. Modulus and exponent key are not required by driver. They may not appear
* in fw. So driver will load a truncated firmware in this case.
- *
- * The only difference between GuC and HuC firmwares is how the version
- * information is saved.
*/
struct uc_css_header {
@@ -69,11 +66,9 @@ struct uc_css_header {
char username[8];
char buildnumber[12];
u32 sw_version;
-#define CSS_SW_VERSION_GUC_MAJOR (0xFF << 16)
-#define CSS_SW_VERSION_GUC_MINOR (0xFF << 8)
-#define CSS_SW_VERSION_GUC_PATCH (0xFF << 0)
-#define CSS_SW_VERSION_HUC_MAJOR (0xFFFF << 16)
-#define CSS_SW_VERSION_HUC_MINOR (0xFFFF << 0)
+#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16)
+#define CSS_SW_VERSION_UC_MINOR (0xFF << 8)
+#define CSS_SW_VERSION_UC_PATCH (0xFF << 0)
u32 reserved[14];
u32 header_info;
} __packed;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index bba0eafe1cdb..d8a80388bd31 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -108,23 +108,15 @@ static bool client_doorbell_in_sync(struct intel_guc_client *client)
* validating that the doorbells status expected by the driver matches what the
* GuC/HW have.
*/
-static int igt_guc_clients(void *args)
+static int igt_guc_clients(void *arg)
{
- struct drm_i915_private *dev_priv = args;
+ struct intel_gt *gt = arg;
+ struct intel_guc *guc = &gt->uc.guc;
intel_wakeref_t wakeref;
- struct intel_guc *guc;
int err = 0;
- GEM_BUG_ON(!HAS_GT_UC(dev_priv));
- mutex_lock(&dev_priv->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
-
- guc = &dev_priv->gt.uc.guc;
- if (!guc) {
- pr_err("No guc object!\n");
- err = -EINVAL;
- goto unlock;
- }
+ GEM_BUG_ON(!HAS_GT_UC(gt->i915));
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
err = check_all_doorbells(guc);
if (err)
@@ -189,8 +181,7 @@ out:
guc_clients_create(guc);
guc_clients_enable(guc);
unlock:
- intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
- mutex_unlock(&dev_priv->drm.struct_mutex);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
return err;
}
@@ -201,22 +192,14 @@ unlock:
*/
static int igt_guc_doorbells(void *arg)
{
- struct drm_i915_private *dev_priv = arg;
+ struct intel_gt *gt = arg;
+ struct intel_guc *guc = &gt->uc.guc;
intel_wakeref_t wakeref;
- struct intel_guc *guc;
int i, err = 0;
u16 db_id;
- GEM_BUG_ON(!HAS_GT_UC(dev_priv));
- mutex_lock(&dev_priv->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
-
- guc = &dev_priv->gt.uc.guc;
- if (!guc) {
- pr_err("No guc object!\n");
- err = -EINVAL;
- goto unlock;
- }
+ GEM_BUG_ON(!HAS_GT_UC(gt->i915));
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
err = check_all_doorbells(guc);
if (err)
@@ -298,20 +281,19 @@ out:
guc_client_free(clients[i]);
}
unlock:
- intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
- mutex_unlock(&dev_priv->drm.struct_mutex);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
return err;
}
-int intel_guc_live_selftest(struct drm_i915_private *dev_priv)
+int intel_guc_live_selftest(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_guc_clients),
SUBTEST(igt_guc_doorbells),
};
- if (!USES_GUC_SUBMISSION(dev_priv))
+ if (!USES_GUC_SUBMISSION(i915))
return 0;
- return i915_subtests(tests, dev_priv);
+ return intel_gt_live_subtests(tests, &i915->gt);
}