aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c6
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c303
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h55
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c42
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c228
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc_reg.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c71
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c62
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c18
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline_types.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c147
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c29
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c13
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c8
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c378
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.c9
-rw-r--r--drivers/gpu/drm/i915/gt/sysfs_engines.c10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c24
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c132
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c31
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h80
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c28
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h6
43 files changed, 1386 insertions, 487 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index c30adc05fa98..680bd9442eb0 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -131,17 +131,17 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
do {
- GEM_BUG_ON(iter.sg->length < I915_GTT_PAGE_SIZE);
+ GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE);
vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
iter.dma += I915_GTT_PAGE_SIZE;
if (iter.dma == iter.max) {
iter.sg = __sg_next(iter.sg);
- if (!iter.sg)
+ if (!iter.sg || sg_dma_len(iter.sg) == 0)
break;
iter.dma = sg_dma_address(iter.sg);
- iter.max = iter.dma + iter.sg->length;
+ iter.max = iter.dma + sg_dma_len(iter.sg);
}
if (++act_pte == GEN6_PTES) {
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 38c7069b7749..a37c968ef8f7 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -372,19 +372,19 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
do {
- GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
+ GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE);
vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
iter->dma += I915_GTT_PAGE_SIZE;
if (iter->dma >= iter->max) {
iter->sg = __sg_next(iter->sg);
- if (!iter->sg) {
+ if (!iter->sg || sg_dma_len(iter->sg) == 0) {
idx = 0;
break;
}
iter->dma = sg_dma_address(iter->sg);
- iter->max = iter->dma + iter->sg->length;
+ iter->max = iter->dma + sg_dma_len(iter->sg);
}
if (gen8_pd_index(++idx, 0) == 0) {
@@ -413,8 +413,8 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
u32 flags)
{
const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+ unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma->node.start;
- dma_addr_t rem = iter->sg->length;
GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
@@ -456,7 +456,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
}
do {
- GEM_BUG_ON(iter->sg->length < page_size);
+ GEM_BUG_ON(sg_dma_len(iter->sg) < page_size);
vaddr[index++] = encode | iter->dma;
start += page_size;
@@ -467,7 +467,10 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
if (!iter->sg)
break;
- rem = iter->sg->length;
+ rem = sg_dma_len(iter->sg);
+ if (!rem)
+ break;
+
iter->dma = sg_dma_address(iter->sg);
iter->max = iter->dma + rem;
@@ -525,7 +528,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
}
vma->page_sizes.gtt |= page_size;
- } while (iter->sg);
+ } while (iter->sg && sg_dma_len(iter->sg));
}
static void gen8_ppgtt_insert(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index d8b206e53660..a24cc1ff08a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -30,18 +30,21 @@
#include "i915_trace.h"
#include "intel_breadcrumbs.h"
#include "intel_context.h"
+#include "intel_engine_pm.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
-static void irq_enable(struct intel_engine_cs *engine)
+static bool irq_enable(struct intel_engine_cs *engine)
{
if (!engine->irq_enable)
- return;
+ return false;
/* Caller disables interrupts */
spin_lock(&engine->gt->irq_lock);
engine->irq_enable(engine);
spin_unlock(&engine->gt->irq_lock);
+
+ return true;
}
static void irq_disable(struct intel_engine_cs *engine)
@@ -57,12 +60,11 @@ static void irq_disable(struct intel_engine_cs *engine)
static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
- lockdep_assert_held(&b->irq_lock);
-
- if (!b->irq_engine || b->irq_armed)
- return;
-
- if (!intel_gt_pm_get_if_awake(b->irq_engine->gt))
+ /*
+ * Since we are waiting on a request, the GPU should be busy
+ * and should have its own rpm reference.
+ */
+ if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
return;
/*
@@ -73,25 +75,24 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
*/
WRITE_ONCE(b->irq_armed, true);
- /*
- * Since we are waiting on a request, the GPU should be busy
- * and should have its own rpm reference. This is tracked
- * by i915->gt.awake, we can forgo holding our own wakref
- * for the interrupt as before i915->gt.awake is released (when
- * the driver is idle) we disarm the breadcrumbs.
- */
-
- if (!b->irq_enabled++)
- irq_enable(b->irq_engine);
+ /* Requests may have completed before we could enable the interrupt. */
+ if (!b->irq_enabled++ && irq_enable(b->irq_engine))
+ irq_work_queue(&b->irq_work);
}
-static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
- lockdep_assert_held(&b->irq_lock);
-
- if (!b->irq_engine || !b->irq_armed)
+ if (!b->irq_engine)
return;
+ spin_lock(&b->irq_lock);
+ if (!b->irq_armed)
+ __intel_breadcrumbs_arm_irq(b);
+ spin_unlock(&b->irq_lock);
+}
+
+static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
GEM_BUG_ON(!b->irq_enabled);
if (!--b->irq_enabled)
irq_disable(b->irq_engine);
@@ -100,20 +101,37 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
intel_gt_pm_put_async(b->irq_engine->gt);
}
+static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
+ spin_lock(&b->irq_lock);
+ if (b->irq_armed)
+ __intel_breadcrumbs_disarm_irq(b);
+ spin_unlock(&b->irq_lock);
+}
+
static void add_signaling_context(struct intel_breadcrumbs *b,
struct intel_context *ce)
{
- intel_context_get(ce);
- list_add_tail(&ce->signal_link, &b->signalers);
- if (list_is_first(&ce->signal_link, &b->signalers))
- __intel_breadcrumbs_arm_irq(b);
+ lockdep_assert_held(&ce->signal_lock);
+
+ spin_lock(&b->signalers_lock);
+ list_add_rcu(&ce->signal_link, &b->signalers);
+ spin_unlock(&b->signalers_lock);
}
-static void remove_signaling_context(struct intel_breadcrumbs *b,
+static bool remove_signaling_context(struct intel_breadcrumbs *b,
struct intel_context *ce)
{
- list_del(&ce->signal_link);
- intel_context_put(ce);
+ lockdep_assert_held(&ce->signal_lock);
+
+ if (!list_empty(&ce->signals))
+ return false;
+
+ spin_lock(&b->signalers_lock);
+ list_del_rcu(&ce->signal_link);
+ spin_unlock(&b->signalers_lock);
+
+ return true;
}
static inline bool __request_completed(const struct i915_request *rq)
@@ -174,73 +192,103 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
intel_engine_add_retire(b->irq_engine, tl);
}
-static bool __signal_request(struct i915_request *rq, struct list_head *signals)
+static bool __signal_request(struct i915_request *rq)
{
- clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+ GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
if (!__dma_fence_signal(&rq->fence)) {
i915_request_put(rq);
return false;
}
- list_add_tail(&rq->signal_link, signals);
return true;
}
+static struct llist_node *
+slist_add(struct llist_node *node, struct llist_node *head)
+{
+ node->next = head;
+ return node;
+}
+
static void signal_irq_work(struct irq_work *work)
{
struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
const ktime_t timestamp = ktime_get();
- struct intel_context *ce, *cn;
- struct list_head *pos, *next;
- LIST_HEAD(signal);
-
- spin_lock(&b->irq_lock);
+ struct llist_node *signal, *sn;
+ struct intel_context *ce;
- if (list_empty(&b->signalers))
- __intel_breadcrumbs_disarm_irq(b);
+ signal = NULL;
+ if (unlikely(!llist_empty(&b->signaled_requests)))
+ signal = llist_del_all(&b->signaled_requests);
- list_splice_init(&b->signaled_requests, &signal);
+ /*
+ * Keep the irq armed until the interrupt after all listeners are gone.
+ *
+ * Enabling/disabling the interrupt is rather costly, roughly a couple
+ * of hundred microseconds. If we are proactive and enable/disable
+ * the interrupt around every request that wants a breadcrumb, we
+ * quickly drown in the extra orders of magnitude of latency imposed
+ * on request submission.
+ *
+ * So we try to be lazy, and keep the interrupts enabled until no
+ * more listeners appear within a breadcrumb interrupt interval (that
+ * is until a request completes that no one cares about). The
+ * observation is that listeners come in batches, and will often
+ * listen to a bunch of requests in succession. Though note on icl+,
+ * interrupts are always enabled due to concerns with rc6 being
+ * dysfunctional with per-engine interrupt masking.
+ *
+ * We also try to avoid raising too many interrupts, as they may
+ * be generated by userspace batches and it is unfortunately rather
+ * too easy to drown the CPU under a flood of GPU interrupts. Thus
+ * whenever no one appears to be listening, we turn off the interrupts.
+ * Fewer interrupts should conserve power -- at the very least, fewer
+ * interrupt draw less ire from other users of the system and tools
+ * like powertop.
+ */
+ if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers))
+ intel_breadcrumbs_disarm_irq(b);
- list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
- GEM_BUG_ON(list_empty(&ce->signals));
+ rcu_read_lock();
+ list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
+ struct i915_request *rq;
- list_for_each_safe(pos, next, &ce->signals) {
- struct i915_request *rq =
- list_entry(pos, typeof(*rq), signal_link);
+ list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
+ bool release;
- GEM_BUG_ON(!check_signal_order(ce, rq));
if (!__request_completed(rq))
break;
+ if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
+ &rq->fence.flags))
+ break;
+
/*
* Queue for execution after dropping the signaling
* spinlock as the callback chain may end up adding
* more signalers to the same context or engine.
*/
- __signal_request(rq, &signal);
- }
+ spin_lock(&ce->signal_lock);
+ list_del_rcu(&rq->signal_link);
+ release = remove_signaling_context(b, ce);
+ spin_unlock(&ce->signal_lock);
- /*
- * We process the list deletion in bulk, only using a list_add
- * (not list_move) above but keeping the status of
- * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit.
- */
- if (!list_is_first(pos, &ce->signals)) {
- /* Advance the list to the first incomplete request */
- __list_del_many(&ce->signals, pos);
- if (&ce->signals == pos) { /* now empty */
+ if (__signal_request(rq))
+ /* We own signal_node now, xfer to local list */
+ signal = slist_add(&rq->signal_node, signal);
+
+ if (release) {
add_retire(b, ce->timeline);
- remove_signaling_context(b, ce);
+ intel_context_put(ce);
}
}
}
+ rcu_read_unlock();
- spin_unlock(&b->irq_lock);
-
- list_for_each_safe(pos, next, &signal) {
+ llist_for_each_safe(signal, sn, signal) {
struct i915_request *rq =
- list_entry(pos, typeof(*rq), signal_link);
+ llist_entry(signal, typeof(*rq), signal_node);
struct list_head cb_list;
spin_lock(&rq->lock);
@@ -251,6 +299,9 @@ static void signal_irq_work(struct irq_work *work)
i915_request_put(rq);
}
+
+ if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
+ intel_breadcrumbs_arm_irq(b);
}
struct intel_breadcrumbs *
@@ -262,14 +313,15 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
if (!b)
return NULL;
- spin_lock_init(&b->irq_lock);
+ b->irq_engine = irq_engine;
+
+ spin_lock_init(&b->signalers_lock);
INIT_LIST_HEAD(&b->signalers);
- INIT_LIST_HEAD(&b->signaled_requests);
+ init_llist_head(&b->signaled_requests);
+ spin_lock_init(&b->irq_lock);
init_irq_work(&b->irq_work, signal_irq_work);
- b->irq_engine = irq_engine;
-
return b;
}
@@ -292,27 +344,28 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
void intel_breadcrumbs_park(struct intel_breadcrumbs *b)
{
- unsigned long flags;
-
- if (!READ_ONCE(b->irq_armed))
- return;
-
- spin_lock_irqsave(&b->irq_lock, flags);
- __intel_breadcrumbs_disarm_irq(b);
- spin_unlock_irqrestore(&b->irq_lock, flags);
-
- if (!list_empty(&b->signalers))
- irq_work_queue(&b->irq_work);
+ /* Kick the work once more to drain the signalers */
+ irq_work_sync(&b->irq_work);
+ while (unlikely(READ_ONCE(b->irq_armed))) {
+ local_irq_disable();
+ signal_irq_work(&b->irq_work);
+ local_irq_enable();
+ cond_resched();
+ }
+ GEM_BUG_ON(!list_empty(&b->signalers));
}
void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
{
+ irq_work_sync(&b->irq_work);
+ GEM_BUG_ON(!list_empty(&b->signalers));
+ GEM_BUG_ON(b->irq_armed);
kfree(b);
}
-static void insert_breadcrumb(struct i915_request *rq,
- struct intel_breadcrumbs *b)
+static void insert_breadcrumb(struct i915_request *rq)
{
+ struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
struct intel_context *ce = rq->context;
struct list_head *pos;
@@ -327,12 +380,14 @@ static void insert_breadcrumb(struct i915_request *rq,
* its signal completion.
*/
if (__request_completed(rq)) {
- if (__signal_request(rq, &b->signaled_requests))
+ if (__signal_request(rq) &&
+ llist_add(&rq->signal_node, &b->signaled_requests))
irq_work_queue(&b->irq_work);
return;
}
if (list_empty(&ce->signals)) {
+ intel_context_get(ce);
add_signaling_context(b, ce);
pos = &ce->signals;
} else {
@@ -358,18 +413,22 @@ static void insert_breadcrumb(struct i915_request *rq,
break;
}
}
- list_add(&rq->signal_link, pos);
+ list_add_rcu(&rq->signal_link, pos);
GEM_BUG_ON(!check_signal_order(ce, rq));
+ GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
- /* Check after attaching to irq, interrupt may have already fired. */
- if (__request_completed(rq))
- irq_work_queue(&b->irq_work);
+ /*
+ * Defer enabling the interrupt to after HW submission and recheck
+ * the request as it may have completed and raised the interrupt as
+ * we were attaching it into the lists.
+ */
+ irq_work_queue(&b->irq_work);
}
bool i915_request_enable_breadcrumb(struct i915_request *rq)
{
- struct intel_breadcrumbs *b;
+ struct intel_context *ce = rq->context;
/* Serialises with i915_request_retire() using rq->lock */
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
@@ -384,67 +443,30 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
return true;
- /*
- * rq->engine is locked by rq->engine->active.lock. That however
- * is not known until after rq->engine has been dereferenced and
- * the lock acquired. Hence we acquire the lock and then validate
- * that rq->engine still matches the lock we hold for it.
- *
- * Here, we are using the breadcrumb lock as a proxy for the
- * rq->engine->active.lock, and we know that since the breadcrumb
- * will be serialised within i915_request_submit/i915_request_unsubmit,
- * the engine cannot change while active as long as we hold the
- * breadcrumb lock on that engine.
- *
- * From the dma_fence_enable_signaling() path, we are outside of the
- * request submit/unsubmit path, and so we must be more careful to
- * acquire the right lock.
- */
- b = READ_ONCE(rq->engine)->breadcrumbs;
- spin_lock(&b->irq_lock);
- while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) {
- spin_unlock(&b->irq_lock);
- b = READ_ONCE(rq->engine)->breadcrumbs;
- spin_lock(&b->irq_lock);
- }
-
- /*
- * Now that we are finally serialised with request submit/unsubmit,
- * [with b->irq_lock] and with i915_request_retire() [via checking
- * SIGNALED with rq->lock] confirm the request is indeed active. If
- * it is no longer active, the breadcrumb will be attached upon
- * i915_request_submit().
- */
+ spin_lock(&ce->signal_lock);
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
- insert_breadcrumb(rq, b);
-
- spin_unlock(&b->irq_lock);
+ insert_breadcrumb(rq);
+ spin_unlock(&ce->signal_lock);
return true;
}
void i915_request_cancel_breadcrumb(struct i915_request *rq)
{
- struct intel_breadcrumbs *b = rq->engine->breadcrumbs;
+ struct intel_context *ce = rq->context;
+ bool release;
- /*
- * We must wait for b->irq_lock so that we know the interrupt handler
- * has released its reference to the intel_context and has completed
- * the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if
- * required).
- */
- spin_lock(&b->irq_lock);
- if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
- struct intel_context *ce = rq->context;
+ if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
+ return;
- list_del(&rq->signal_link);
- if (list_empty(&ce->signals))
- remove_signaling_context(b, ce);
+ spin_lock(&ce->signal_lock);
+ list_del_rcu(&rq->signal_link);
+ release = remove_signaling_context(rq->engine->breadcrumbs, ce);
+ spin_unlock(&ce->signal_lock);
+ if (release)
+ intel_context_put(ce);
- clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
- i915_request_put(rq);
- }
- spin_unlock(&b->irq_lock);
+ i915_request_put(rq);
}
static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
@@ -454,18 +476,17 @@ static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
drm_printf(p, "Signals:\n");
- spin_lock_irq(&b->irq_lock);
- list_for_each_entry(ce, &b->signalers, signal_link) {
- list_for_each_entry(rq, &ce->signals, signal_link) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
+ list_for_each_entry_rcu(rq, &ce->signals, signal_link)
drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
rq->fence.context, rq->fence.seqno,
i915_request_completed(rq) ? "!" :
i915_request_started(rq) ? "*" :
"",
jiffies_to_msecs(jiffies - rq->emitted_jiffies));
- }
}
- spin_unlock_irq(&b->irq_lock);
+ rcu_read_unlock();
}
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
index 8e53b9942695..a74bb3062bd8 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -29,18 +29,16 @@
* the overhead of waking that client is much preferred.
*/
struct intel_breadcrumbs {
- spinlock_t irq_lock; /* protects the lists used in hardirq context */
-
/* Not all breadcrumbs are attached to physical HW */
struct intel_engine_cs *irq_engine;
+ spinlock_t signalers_lock; /* protects the list of signalers */
struct list_head signalers;
- struct list_head signaled_requests;
+ struct llist_head signaled_requests;
+ spinlock_t irq_lock; /* protects the interrupt from hardirq context */
struct irq_work irq_work; /* for use from inside irq_lock */
-
unsigned int irq_enabled;
-
bool irq_armed;
};
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 92a3f25c4006..349e7fa1488d 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -25,11 +25,18 @@ static struct intel_context *intel_context_alloc(void)
return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL);
}
-void intel_context_free(struct intel_context *ce)
+static void rcu_context_free(struct rcu_head *rcu)
{
+ struct intel_context *ce = container_of(rcu, typeof(*ce), rcu);
+
kmem_cache_free(global.slab_ce, ce);
}
+void intel_context_free(struct intel_context *ce)
+{
+ call_rcu(&ce->rcu, rcu_context_free);
+}
+
struct intel_context *
intel_context_create(struct intel_engine_cs *engine)
{
@@ -356,8 +363,7 @@ static int __intel_context_active(struct i915_active *active)
}
void
-intel_context_init(struct intel_context *ce,
- struct intel_engine_cs *engine)
+intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
{
GEM_BUG_ON(!engine->cops);
GEM_BUG_ON(!engine->gt->vm);
@@ -373,7 +379,8 @@ intel_context_init(struct intel_context *ce,
ce->vm = i915_vm_get(engine->gt->vm);
- INIT_LIST_HEAD(&ce->signal_link);
+ /* NB ce->signal_link/lock is used under RCU */
+ spin_lock_init(&ce->signal_lock);
INIT_LIST_HEAD(&ce->signals);
mutex_init(&ce->pin_mutex);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 552cb57a2e8c..52fa9c132746 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -25,6 +25,7 @@ DECLARE_EWMA(runtime, 3, 8);
struct i915_gem_context;
struct i915_gem_ww_ctx;
struct i915_vma;
+struct intel_breadcrumbs;
struct intel_context;
struct intel_ring;
@@ -44,7 +45,16 @@ struct intel_context_ops {
};
struct intel_context {
- struct kref ref;
+ /*
+ * Note: Some fields may be accessed under RCU.
+ *
+ * Unless otherwise noted a field can safely be assumed to be protected
+ * by strong reference counting.
+ */
+ union {
+ struct kref ref; /* no kref_get_unless_zero()! */
+ struct rcu_head rcu;
+ };
struct intel_engine_cs *engine;
struct intel_engine_cs *inflight;
@@ -54,8 +64,15 @@ struct intel_context {
struct i915_address_space *vm;
struct i915_gem_context __rcu *gem_context;
- struct list_head signal_link;
- struct list_head signals;
+ /*
+ * @signal_lock protects the list of requests that need signaling,
+ * @signals. While there are any requests that need signaling,
+ * we add the context to the breadcrumbs worker, and remove it
+ * upon completion/cancellation of the last request.
+ */
+ struct list_head signal_link; /* Accessed under RCU */
+ struct list_head signals; /* Guarded by signal_lock */
+ spinlock_t signal_lock; /* protects signals, the list of requests */
struct i915_vma *state;
struct intel_ring *ring;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 7c3a1012e702..760fefdfe392 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -245,22 +245,14 @@ static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u
}
static inline u32 *
-__gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
+__gen8_emit_write_rcs(u32 *cs, u32 value, u32 offset, u32 flags0, u32 flags1)
{
- /* We're using qword write, offset should be aligned to 8 bytes. */
- GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
-
- /* w/a for post sync ops following a GPGPU operation we
- * need a prior CS_STALL, which is emitted by the flush
- * following the batch.
- */
*cs++ = GFX_OP_PIPE_CONTROL(6) | flags0;
- *cs++ = flags1 | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
- *cs++ = gtt_offset;
+ *cs++ = flags1 | PIPE_CONTROL_QW_WRITE;
+ *cs++ = offset;
*cs++ = 0;
*cs++ = value;
- /* We're thrashing one dword of HWS. */
- *cs++ = 0;
+ *cs++ = 0; /* We're thrashing one extra dword. */
return cs;
}
@@ -268,13 +260,38 @@ __gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 f
static inline u32*
gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
{
- return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, 0, flags);
+ /* We're using qword write, offset should be aligned to 8 bytes. */
+ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+ return __gen8_emit_write_rcs(cs,
+ value,
+ gtt_offset,
+ 0,
+ flags | PIPE_CONTROL_GLOBAL_GTT_IVB);
}
static inline u32*
gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
{
- return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, flags0, flags1);
+ /* We're using qword write, offset should be aligned to 8 bytes. */
+ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+ return __gen8_emit_write_rcs(cs,
+ value,
+ gtt_offset,
+ flags0,
+ flags1 | PIPE_CONTROL_GLOBAL_GTT_IVB);
+}
+
+static inline u32 *
+__gen8_emit_flush_dw(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
+{
+ *cs++ = (MI_FLUSH_DW + 1) | flags;
+ *cs++ = gtt_offset;
+ *cs++ = 0;
+ *cs++ = value;
+
+ return cs;
}
static inline u32 *
@@ -285,12 +302,10 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
/* Offset should be aligned to 8 bytes for both (QW/DW) write types */
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
- *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags;
- *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
- *cs++ = 0;
- *cs++ = value;
-
- return cs;
+ return __gen8_emit_flush_dw(cs,
+ value,
+ gtt_offset | MI_FLUSH_DW_USE_GTT,
+ flags | MI_FLUSH_DW_OP_STOREDW);
}
static inline void __intel_engine_reset(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 5bfb5f7ed02c..0b31670343f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -305,8 +305,9 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->i915 = i915;
engine->gt = gt;
engine->uncore = gt->uncore;
- engine->hw_id = engine->guc_id = info->hw_id;
engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases);
+ engine->hw_id = info->hw_id;
+ engine->guc_id = MAKE_GUC_ID(info->class, info->instance);
engine->class = info->class;
engine->instance = info->instance;
@@ -371,7 +372,8 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine)
* instances.
*/
if ((INTEL_GEN(i915) >= 11 &&
- engine->gt->info.vdbox_sfc_access & engine->mask) ||
+ (engine->gt->info.vdbox_sfc_access &
+ BIT(engine->instance))) ||
(INTEL_GEN(i915) >= 9 && engine->instance == 0))
engine->uabi_capabilities |=
I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
@@ -1599,6 +1601,41 @@ static unsigned long list_count(struct list_head *list)
return count;
}
+static unsigned long read_ul(void *p, size_t x)
+{
+ return *(unsigned long *)(p + x);
+}
+
+static void print_properties(struct intel_engine_cs *engine,
+ struct drm_printer *m)
+{
+ static const struct pmap {
+ size_t offset;
+ const char *name;
+ } props[] = {
+#define P(x) { \
+ .offset = offsetof(typeof(engine->props), x), \
+ .name = #x \
+}
+ P(heartbeat_interval_ms),
+ P(max_busywait_duration_ns),
+ P(preempt_timeout_ms),
+ P(stop_timeout_ms),
+ P(timeslice_duration_ms),
+
+ {},
+#undef P
+ };
+ const struct pmap *p;
+
+ drm_printf(m, "\tProperties:\n");
+ for (p = props; p->name; p++)
+ drm_printf(m, "\t\t%s: %lu [default %lu]\n",
+ p->name,
+ read_ul(&engine->props, p->offset),
+ read_ul(&engine->defaults, p->offset));
+}
+
void intel_engine_dump(struct intel_engine_cs *engine,
struct drm_printer *m,
const char *header, ...)
@@ -1641,6 +1678,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
drm_printf(m, "\tReset count: %d (global %d)\n",
i915_reset_engine_count(error, engine),
i915_reset_count(error));
+ print_properties(engine, m);
drm_printf(m, "\tRequests:\n");
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 5067d0524d4b..9060385cd69e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -41,6 +41,8 @@ static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
{
engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
i915_request_add_active_barriers(rq);
+ if (!engine->heartbeat.systole && intel_engine_has_heartbeat(engine))
+ engine->heartbeat.systole = i915_request_get(rq);
}
static void show_heartbeat(const struct i915_request *rq,
@@ -144,8 +146,6 @@ static void heartbeat(struct work_struct *wrk)
goto unlock;
idle_pulse(engine, rq);
- if (engine->i915->params.enable_hangcheck)
- engine->heartbeat.systole = i915_request_get(rq);
__i915_request_commit(rq);
__i915_request_queue(rq, &attr);
@@ -153,7 +153,7 @@ static void heartbeat(struct work_struct *wrk)
unlock:
mutex_unlock(&ce->timeline->mutex);
out:
- if (!next_heartbeat(engine))
+ if (!engine->i915->params.enable_hangcheck || !next_heartbeat(engine))
i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
intel_engine_pm_put(engine);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index f7b2e07e2229..499b09cb4acf 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -17,6 +17,25 @@
#include "intel_ring.h"
#include "shmem_utils.h"
+static void dbg_poison_ce(struct intel_context *ce)
+{
+ if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ return;
+
+ if (ce->state) {
+ struct drm_i915_gem_object *obj = ce->state->obj;
+ int type = i915_coherent_map_type(ce->engine->i915);
+ void *map;
+
+ map = i915_gem_object_pin_map(obj, type);
+ if (!IS_ERR(map)) {
+ memset(map, CONTEXT_REDZONE, obj->base.size);
+ i915_gem_object_flush_map(obj);
+ i915_gem_object_unpin_map(obj);
+ }
+ }
+}
+
static int __engine_unpark(struct intel_wakeref *wf)
{
struct intel_engine_cs *engine =
@@ -32,20 +51,14 @@ static int __engine_unpark(struct intel_wakeref *wf)
if (ce) {
GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags));
+ /* Flush all pending HW writes before we touch the context */
+ while (unlikely(intel_context_inflight(ce)))
+ intel_engine_flush_submission(engine);
+
/* First poison the image to verify we never fully trust it */
- if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
- struct drm_i915_gem_object *obj = ce->state->obj;
- int type = i915_coherent_map_type(engine->i915);
- void *map;
-
- map = i915_gem_object_pin_map(obj, type);
- if (!IS_ERR(map)) {
- memset(map, CONTEXT_REDZONE, obj->base.size);
- i915_gem_object_flush_map(obj);
- i915_gem_object_unpin_map(obj);
- }
- }
+ dbg_poison_ce(ce);
+ /* Scrub the context image after our loss of control */
ce->ops->reset(ce);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 81c05f551b9c..cf94525be2c1 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -835,7 +835,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
u16 snb_gmch_ctl;
/* TODO: We're not aware of mappable constraints on gen8 yet */
- if (!IS_DGFX(i915)) {
+ if (!HAS_LMEM(i915)) {
ggtt->gmadr = pci_resource(pdev, 2);
ggtt->mappable_end = resource_size(&ggtt->gmadr);
}
@@ -1383,7 +1383,7 @@ intel_partial_pages(const struct i915_ggtt_view *view,
if (ret)
goto err_sg_alloc;
- iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
+ iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset);
GEM_BUG_ON(!iter);
sg = st->sgl;
@@ -1391,7 +1391,7 @@ intel_partial_pages(const struct i915_ggtt_view *view,
do {
unsigned int len;
- len = min(iter->length - (offset << PAGE_SHIFT),
+ len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT),
count << PAGE_SHIFT);
sg_set_page(sg, NULL, len, 0);
sg_dma_address(sg) =
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 39b428c5049c..44f1d51e5ae5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -614,6 +614,8 @@ void intel_gt_driver_remove(struct intel_gt *gt)
void intel_gt_driver_unregister(struct intel_gt *gt)
{
+ intel_wakeref_t wakeref;
+
intel_rps_driver_unregister(&gt->rps);
/*
@@ -622,16 +624,15 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
* resources.
*/
intel_gt_set_wedged(gt);
+
+ /* Scrub all HW state upon release */
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ __intel_gt_reset(gt, ALL_ENGINES);
}
void intel_gt_driver_release(struct intel_gt *gt)
{
struct i915_address_space *vm;
- intel_wakeref_t wakeref;
-
- /* Scrub all HW state upon release */
- with_intel_runtime_pm(gt->uncore->rpm, wakeref)
- __intel_gt_reset(gt, ALL_ENGINES);
vm = fetch_and_zero(&gt->vm);
if (vm) /* FIXME being called twice on error paths :( */
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 3f1114b58b01..7bfe9072be9a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -324,7 +324,7 @@ static void cnl_setup_private_ppat(struct intel_uncore *uncore)
GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
intel_uncore_write(uncore,
GEN10_PAT_INDEX(2),
- GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
+ GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
intel_uncore_write(uncore,
GEN10_PAT_INDEX(3),
GEN8_PPAT_UC);
@@ -349,17 +349,23 @@ static void cnl_setup_private_ppat(struct intel_uncore *uncore)
*/
static void bdw_setup_private_ppat(struct intel_uncore *uncore)
{
+ struct drm_i915_private *i915 = uncore->i915;
u64 pat;
pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
- GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
+ /* for scanout with eLLC */
+ if (INTEL_GEN(i915) >= 9)
+ pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
+ else
+ pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
+
intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index c13c650ced22..8a33940a71f3 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -580,7 +580,7 @@ static inline struct sgt_dma {
struct scatterlist *sg = vma->pages->sgl;
dma_addr_t addr = sg_dma_address(sg);
- return (struct sgt_dma){ sg, addr, addr + sg->length };
+ return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) };
}
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a32aabce7901..7614a3d24fca 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -182,6 +182,7 @@
struct virtual_engine {
struct intel_engine_cs base;
struct intel_context context;
+ struct rcu_work rcu;
/*
* We allow only a single request through the virtual engine at a time
@@ -1215,7 +1216,8 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
static void
execlists_check_context(const struct intel_context *ce,
- const struct intel_engine_cs *engine)
+ const struct intel_engine_cs *engine,
+ const char *when)
{
const struct intel_ring *ring = ce->ring;
u32 *regs = ce->lrc_reg_state;
@@ -1250,7 +1252,7 @@ execlists_check_context(const struct intel_context *ce,
valid = false;
}
- WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
+ WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
}
static void restore_default_state(struct intel_context *ce,
@@ -1346,7 +1348,7 @@ __execlists_schedule_in(struct i915_request *rq)
reset_active(rq, engine);
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
- execlists_check_context(ce, engine);
+ execlists_check_context(ce, engine, "before");
if (ce->tag) {
/* Use a fixed tag for OA and friends */
@@ -1417,6 +1419,9 @@ __execlists_schedule_out(struct i915_request *rq,
* refrain from doing non-trivial work here.
*/
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ execlists_check_context(ce, engine, "after");
+
/*
* If we have just completed this context, the engine may now be
* idle and we want to re-enter powersaving.
@@ -2495,25 +2500,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
* bits 47-57: sw context id of the lrc the GT switched away from
* bits 58-63: sw counter of the lrc the GT switched away from
*/
-static inline bool gen12_csb_parse(const u64 *csb)
+static inline bool gen12_csb_parse(const u64 csb)
{
- bool ctx_away_valid;
- bool new_queue;
- u64 entry;
-
- /* HSD#22011248461 */
- entry = READ_ONCE(*csb);
- if (unlikely(entry == -1)) {
- preempt_disable();
- if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
- GEM_WARN_ON("50us CSB timeout");
- preempt_enable();
- }
- WRITE_ONCE(*(u64 *)csb, -1);
-
- ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
- new_queue =
- lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+ bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
+ bool new_queue =
+ lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
/*
* The context switch detail is not guaranteed to be 5 when a preemption
@@ -2523,7 +2514,7 @@ static inline bool gen12_csb_parse(const u64 *csb)
* would require some extra handling, but we don't support that.
*/
if (!ctx_away_valid || new_queue) {
- GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
+ GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
return true;
}
@@ -2532,19 +2523,79 @@ static inline bool gen12_csb_parse(const u64 *csb)
* context switch on an unsuccessful wait instruction since we always
* use polling mode.
*/
- GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
+ GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
return false;
}
-static inline bool gen8_csb_parse(const u64 *csb)
+static inline bool gen8_csb_parse(const u64 csb)
+{
+ return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+}
+
+static noinline u64
+wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb)
+{
+ u64 entry;
+
+ /*
+ * Reading from the HWSP has one particular advantage: we can detect
+ * a stale entry. Since the write into HWSP is broken, we have no reason
+ * to trust the HW at all, the mmio entry may equally be unordered, so
+ * we prefer the path that is self-checking and as a last resort,
+ * return the mmio value.
+ *
+ * tgl,dg1:HSDES#22011327657
+ */
+ preempt_disable();
+ if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 10)) {
+ int idx = csb - engine->execlists.csb_status;
+ int status;
+
+ status = GEN8_EXECLISTS_STATUS_BUF;
+ if (idx >= 6) {
+ status = GEN11_EXECLISTS_STATUS_BUF2;
+ idx -= 6;
+ }
+ status += sizeof(u64) * idx;
+
+ entry = intel_uncore_read64(engine->uncore,
+ _MMIO(engine->mmio_base + status));
+ }
+ preempt_enable();
+
+ return entry;
+}
+
+static inline u64
+csb_read(const struct intel_engine_cs *engine, u64 * const csb)
{
- return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+ u64 entry = READ_ONCE(*csb);
+
+ /*
+ * Unfortunately, the GPU does not always serialise its write
+ * of the CSB entries before its write of the CSB pointer, at least
+ * from the perspective of the CPU, using what is known as a Global
+ * Observation Point. We may read a new CSB tail pointer, but then
+ * read the stale CSB entries, causing us to misinterpret the
+ * context-switch events, and eventually declare the GPU hung.
+ *
+ * icl:HSDES#1806554093
+ * tgl:HSDES#22011248461
+ */
+ if (unlikely(entry == -1))
+ entry = wa_csb_read(engine, csb);
+
+ /* Consume this entry so that we can spot its future reuse. */
+ WRITE_ONCE(*csb, -1);
+
+ /* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */
+ return entry;
}
static void process_csb(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- const u64 * const buf = execlists->csb_status;
+ u64 * const buf = execlists->csb_status;
const u8 num_entries = execlists->csb_size;
u8 head, tail;
@@ -2602,6 +2653,7 @@ static void process_csb(struct intel_engine_cs *engine)
rmb();
do {
bool promote;
+ u64 csb;
if (++head == num_entries)
head = 0;
@@ -2624,15 +2676,14 @@ static void process_csb(struct intel_engine_cs *engine)
* status notifier.
*/
+ csb = csb_read(engine, buf + head);
ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
- head,
- upper_32_bits(buf[head]),
- lower_32_bits(buf[head]));
+ head, upper_32_bits(csb), lower_32_bits(csb));
if (INTEL_GEN(engine->i915) >= 12)
- promote = gen12_csb_parse(buf + head);
+ promote = gen12_csb_parse(csb);
else
- promote = gen8_csb_parse(buf + head);
+ promote = gen8_csb_parse(csb);
if (promote) {
struct i915_request * const *old = execlists->active;
@@ -2787,6 +2838,9 @@ static void __execlists_hold(struct i915_request *rq)
static bool execlists_hold(struct intel_engine_cs *engine,
struct i915_request *rq)
{
+ if (i915_request_on_hold(rq))
+ return false;
+
spin_lock_irq(&engine->active.lock);
if (i915_request_completed(rq)) { /* too late! */
@@ -2987,6 +3041,8 @@ static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
if (!cap->error->gt->engine)
goto err_gt;
+ cap->error->gt->engine->hung = true;
+
return cap;
err_gt:
@@ -3168,8 +3224,10 @@ static void execlists_submission_tasklet(unsigned long data)
spin_unlock_irqrestore(&engine->active.lock, flags);
/* Recheck after serialising with direct-submission */
- if (unlikely(timeout && preempt_timeout(engine)))
+ if (unlikely(timeout && preempt_timeout(engine))) {
+ cancel_timer(&engine->execlists.preempt);
execlists_reset(engine, "preemption time out");
+ }
}
}
@@ -3547,6 +3605,19 @@ static const struct intel_context_ops execlists_context_ops = {
.destroy = execlists_context_destroy,
};
+static u32 hwsp_offset(const struct i915_request *rq)
+{
+ const struct intel_timeline_cacheline *cl;
+
+ /* Before the request is executed, the timeline/cachline is fixed */
+
+ cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
+ if (cl)
+ return cl->ggtt_offset;
+
+ return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
+}
+
static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
@@ -3569,7 +3640,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
*cs++ = MI_NOOP;
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *cs++ = i915_request_timeline(rq)->hwsp_offset;
+ *cs++ = hwsp_offset(rq);
*cs++ = 0;
*cs++ = rq->fence.seqno - 1;
@@ -4034,6 +4105,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
static void execlists_sanitize(struct intel_engine_cs *engine)
{
+ GEM_BUG_ON(execlists_active(&engine->execlists));
+
/*
* Poison residual state on resume, in case the suspend didn't!
*
@@ -4363,6 +4436,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine)
/* Mark all executing requests as skipped. */
list_for_each_entry(rq, &engine->active.requests, sched.link)
mark_eio(rq);
+ intel_engine_signal_breadcrumbs(engine);
/* Flush the queued requests to the timeline list (for retiring). */
while ((rb = rb_first_cached(&execlists->queue))) {
@@ -4886,11 +4960,9 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
return gen8_emit_wa_tail(request, cs);
}
-static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
{
- u32 addr = i915_request_active_timeline(request)->hwsp_offset;
-
- return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
+ return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
}
static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
@@ -4909,7 +4981,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
+ hwsp_offset(request),
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL);
@@ -4921,7 +4993,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
+ hwsp_offset(request),
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -4983,7 +5055,9 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
{
- return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
+ /* XXX Stalling flush before seqno write; post-sync not */
+ cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
+ return gen12_emit_fini_breadcrumb_tail(rq, cs);
}
static u32 *
@@ -4991,7 +5065,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
cs = gen12_emit_ggtt_write_rcs(cs,
request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
+ hwsp_offset(request),
PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
@@ -5412,44 +5486,90 @@ static struct list_head *virtual_queue(struct virtual_engine *ve)
return &ve->base.execlists.default_priolist.requests[0];
}
-static void virtual_context_destroy(struct kref *kref)
+static void rcu_virtual_context_destroy(struct work_struct *wrk)
{
struct virtual_engine *ve =
- container_of(kref, typeof(*ve), context.ref);
+ container_of(wrk, typeof(*ve), rcu.work);
unsigned int n;
- GEM_BUG_ON(!list_empty(virtual_queue(ve)));
- GEM_BUG_ON(ve->request);
GEM_BUG_ON(ve->context.inflight);
+ /* Preempt-to-busy may leave a stale request behind. */
+ if (unlikely(ve->request)) {
+ struct i915_request *old;
+
+ spin_lock_irq(&ve->base.active.lock);
+
+ old = fetch_and_zero(&ve->request);
+ if (old) {
+ GEM_BUG_ON(!i915_request_completed(old));
+ __i915_request_submit(old);
+ i915_request_put(old);
+ }
+
+ spin_unlock_irq(&ve->base.active.lock);
+ }
+
+ /*
+ * Flush the tasklet in case it is still running on another core.
+ *
+ * This needs to be done before we remove ourselves from the siblings'
+ * rbtrees as in the case it is running in parallel, it may reinsert
+ * the rb_node into a sibling.
+ */
+ tasklet_kill(&ve->base.execlists.tasklet);
+
+ /* Decouple ourselves from the siblings, no more access allowed. */
for (n = 0; n < ve->num_siblings; n++) {
struct intel_engine_cs *sibling = ve->siblings[n];
struct rb_node *node = &ve->nodes[sibling->id].rb;
- unsigned long flags;
if (RB_EMPTY_NODE(node))
continue;
- spin_lock_irqsave(&sibling->active.lock, flags);
+ spin_lock_irq(&sibling->active.lock);
/* Detachment is lazily performed in the execlists tasklet */
if (!RB_EMPTY_NODE(node))
rb_erase_cached(node, &sibling->execlists.virtual);
- spin_unlock_irqrestore(&sibling->active.lock, flags);
+ spin_unlock_irq(&sibling->active.lock);
}
GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+ GEM_BUG_ON(!list_empty(virtual_queue(ve)));
if (ve->context.state)
__execlists_context_fini(&ve->context);
intel_context_fini(&ve->context);
+ intel_breadcrumbs_free(ve->base.breadcrumbs);
intel_engine_free_request_pool(&ve->base);
kfree(ve->bonds);
kfree(ve);
}
+static void virtual_context_destroy(struct kref *kref)
+{
+ struct virtual_engine *ve =
+ container_of(kref, typeof(*ve), context.ref);
+
+ GEM_BUG_ON(!list_empty(&ve->context.signals));
+
+ /*
+ * When destroying the virtual engine, we have to be aware that
+ * it may still be in use from an hardirq/softirq context causing
+ * the resubmission of a completed request (background completion
+ * due to preempt-to-busy). Before we can free the engine, we need
+ * to flush the submission code and tasklets that are still potentially
+ * accessing the engine. Flushing the tasklets requires process context,
+ * and since we can guard the resubmit onto the engine with an RCU read
+ * lock, we can delegate the free of the engine to an RCU worker.
+ */
+ INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
+ queue_rcu_work(system_wq, &ve->rcu);
+}
+
static void virtual_engine_initial_hint(struct virtual_engine *ve)
{
int swp;
@@ -5909,18 +6029,6 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
return 0;
}
-struct intel_engine_cs *
-intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
- unsigned int sibling)
-{
- struct virtual_engine *ve = to_virtual_engine(engine);
-
- if (sibling >= ve->num_siblings)
- return NULL;
-
- return ve->siblings[sibling];
-}
-
void intel_execlists_show_requests(struct intel_engine_cs *engine,
struct drm_printer *m,
void (*show_request)(struct drm_printer *m,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 91fd8e452d9b..c2d287f25497 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -121,10 +121,6 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
const struct intel_engine_cs *master,
const struct intel_engine_cs *sibling);
-struct intel_engine_cs *
-intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
- unsigned int sibling);
-
bool
intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 93cb6c460508..1b51f7b9a5c3 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -49,4 +49,7 @@
#define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x1A
#define GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0xD
+#define GEN8_EXECLISTS_STATUS_BUF 0x370
+#define GEN11_EXECLISTS_STATUS_BUF2 0x3c0
+
#endif /* _INTEL_LRC_REG_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index b8f56e62158e..ab6870242e18 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -59,8 +59,7 @@ struct drm_i915_mocs_table {
#define _L3_CACHEABILITY(value) ((value) << 4)
/* Helper defines */
-#define GEN9_NUM_MOCS_ENTRIES 62 /* 62 out of 64 - 63 & 64 are reserved. */
-#define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */
+#define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */
/* (e)LLC caching options */
/*
@@ -109,7 +108,7 @@ struct drm_i915_mocs_table {
* they will be initialized to PTE. Gen >= 12 onwards don't have a setting for
* PTE and will be initialized to an invalid value.
*
- * The last two entries are reserved by the hardware. For ICL+ they
+ * The last few entries are reserved by the hardware. For ICL+ they
* should be initialized according to bspec and never used, for older
* platforms they should never be written to.
*
@@ -124,14 +123,26 @@ struct drm_i915_mocs_table {
LE_1_UC | LE_TC_2_LLC_ELLC, \
L3_1_UC), \
MOCS_ENTRY(I915_MOCS_PTE, \
- LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
+ LE_0_PAGETABLE | LE_TC_0_PAGETABLE | LE_LRUM(3), \
L3_3_WB)
static const struct drm_i915_mocs_entry skl_mocs_table[] = {
GEN9_MOCS_ENTRIES,
MOCS_ENTRY(I915_MOCS_CACHED,
LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
- L3_3_WB)
+ L3_3_WB),
+
+ /*
+ * mocs:63
+ * - used by the L3 for all of its evictions.
+ * Thus it is expected to allow LLC cacheability to enable coherent
+ * flows to be maintained.
+ * - used to force L3 uncachable cycles.
+ * Thus it is expected to make the surface L3 uncacheable.
+ */
+ MOCS_ENTRY(63,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_1_UC)
};
/* NOTE: the LE_TGT_CACHE is not used on Broxton */
@@ -243,8 +254,9 @@ static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
* only, __init_mocs_table() take care to program unused index with
* this entry.
*/
- MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
- L3_3_WB),
+ MOCS_ENTRY(I915_MOCS_PTE,
+ LE_0_PAGETABLE | LE_TC_0_PAGETABLE,
+ L3_1_UC),
GEN11_MOCS_ENTRIES,
/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
@@ -280,12 +292,45 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = {
L3_1_UC),
/* Base - L3 + LeCC:PAT (Deprecated) */
MOCS_ENTRY(I915_MOCS_PTE,
- LE_0_PAGETABLE | LE_TC_1_LLC,
+ LE_0_PAGETABLE | LE_TC_0_PAGETABLE,
L3_3_WB),
GEN11_MOCS_ENTRIES
};
+static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
+ /* Error */
+ MOCS_ENTRY(0, 0, L3_0_DIRECT),
+
+ /* UC */
+ MOCS_ENTRY(1, 0, L3_1_UC),
+
+ /* Reserved */
+ MOCS_ENTRY(2, 0, L3_0_DIRECT),
+ MOCS_ENTRY(3, 0, L3_0_DIRECT),
+ MOCS_ENTRY(4, 0, L3_0_DIRECT),
+
+ /* WB - L3 */
+ MOCS_ENTRY(5, 0, L3_3_WB),
+ /* WB - L3 50% */
+ MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB),
+ /* WB - L3 25% */
+ MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB),
+ /* WB - L3 12.5% */
+ MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB),
+
+ /* HDC:L1 + L3 */
+ MOCS_ENTRY(48, 0, L3_3_WB),
+ /* HDC:L1 */
+ MOCS_ENTRY(49, 0, L3_1_UC),
+
+ /* HW Reserved */
+ MOCS_ENTRY(60, 0, L3_1_UC),
+ MOCS_ENTRY(61, 0, L3_1_UC),
+ MOCS_ENTRY(62, 0, L3_1_UC),
+ MOCS_ENTRY(63, 0, L3_1_UC),
+};
+
enum {
HAS_GLOBAL_MOCS = BIT(0),
HAS_ENGINE_MOCS = BIT(1),
@@ -312,14 +357,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
{
unsigned int flags;
- if (INTEL_GEN(i915) >= 12) {
+ if (IS_DG1(i915)) {
+ table->size = ARRAY_SIZE(dg1_mocs_table);
+ table->table = dg1_mocs_table;
+ table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ } else if (INTEL_GEN(i915) >= 12) {
table->size = ARRAY_SIZE(tgl_mocs_table);
table->table = tgl_mocs_table;
- table->n_entries = GEN11_NUM_MOCS_ENTRIES;
+ table->n_entries = GEN9_NUM_MOCS_ENTRIES;
} else if (IS_GEN(i915, 11)) {
table->size = ARRAY_SIZE(icl_mocs_table);
table->table = icl_mocs_table;
- table->n_entries = GEN11_NUM_MOCS_ENTRIES;
+ table->n_entries = GEN9_NUM_MOCS_ENTRIES;
} else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) {
table->size = ARRAY_SIZE(skl_mocs_table);
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index ab675d35030d..d7b8e4457fc2 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -56,9 +56,12 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
static void gen11_rc6_enable(struct intel_rc6 *rc6)
{
- struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_gt *gt = rc6_to_gt(rc6);
+ struct intel_uncore *uncore = gt->uncore;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ u32 pg_enable;
+ int i;
/* 2b: Program RC6 thresholds.*/
set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
@@ -102,10 +105,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
GEN6_RC_CTL_RC6_ENABLE |
GEN6_RC_CTL_EI_MODE(1);
- set(uncore, GEN9_PG_ENABLE,
- GEN9_RENDER_PG_ENABLE |
- GEN9_MEDIA_PG_ENABLE |
- GEN11_MEDIA_SAMPLER_PG_ENABLE);
+ pg_enable =
+ GEN9_RENDER_PG_ENABLE |
+ GEN9_MEDIA_PG_ENABLE |
+ GEN11_MEDIA_SAMPLER_PG_ENABLE;
+
+ if (INTEL_GEN(gt->i915) >= 12) {
+ for (i = 0; i < I915_MAX_VCS; i++)
+ if (HAS_ENGINE(gt, _VCS(i)))
+ pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) |
+ VDN_MFX_POWERGATE_ENABLE(i));
+ }
+
+ set(uncore, GEN9_PG_ENABLE, pg_enable);
}
static void gen9_rc6_enable(struct intel_rc6 *rc6)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index ac36b67fb46b..3654c955e6be 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -19,6 +19,7 @@
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
#include "intel_reset.h"
#include "uc/intel_guc.h"
@@ -1190,14 +1191,14 @@ static void intel_gt_reset_global(struct intel_gt *gt,
/* Use a watchdog to ensure that our reset completes */
intel_wedge_on_timeout(&w, gt, 5 * HZ) {
- intel_prepare_reset(gt->i915);
+ intel_display_prepare_reset(gt->i915);
/* Flush everyone using a resource about to be clobbered */
synchronize_srcu_expedited(&gt->reset.backoff_srcu);
intel_gt_reset(gt, engine_mask, reason);
- intel_finish_reset(gt->i915);
+ intel_display_finish_reset(gt->i915);
}
if (!test_bit(I915_WEDGED, &gt->reset.flags))
@@ -1250,7 +1251,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
engine_mask &= gt->info.engine_mask;
if (flags & I915_ERROR_CAPTURE) {
- i915_capture_error_state(gt->i915);
+ i915_capture_error_state(gt, engine_mask);
intel_gt_clear_error_registers(gt, engine_mask);
}
@@ -1370,6 +1371,7 @@ void intel_gt_set_wedged_on_fini(struct intel_gt *gt)
{
intel_gt_set_wedged(gt);
set_bit(I915_WEDGED_ON_FINI, &gt->reset.flags);
+ intel_gt_retire_requests(gt); /* cleanup any wedged requests */
}
void intel_gt_init_reset(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 16b48e72c369..a41b43f445b8 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -444,6 +444,7 @@ static void reset_cancel(struct intel_engine_cs *engine)
i915_request_set_error_once(request, -EIO);
i915_request_mark_complete(request);
}
+ intel_engine_signal_breadcrumbs(engine);
/* Remaining _unready_ requests will be nop'ed when submitted */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index e6a00eea0631..b629eeb14002 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -390,6 +390,16 @@ static void gen5_rps_update(struct intel_rps *rps)
spin_unlock_irq(&mchdev_lock);
}
+static unsigned int gen5_invert_freq(struct intel_rps *rps,
+ unsigned int val)
+{
+ /* Invert the frequency bin into an ips delay */
+ val = rps->max_freq - val;
+ val = rps->min_freq + val;
+
+ return val;
+}
+
static bool gen5_rps_set(struct intel_rps *rps, u8 val)
{
struct intel_uncore *uncore = rps_to_uncore(rps);
@@ -404,8 +414,7 @@ static bool gen5_rps_set(struct intel_rps *rps, u8 val)
}
/* Invert the frequency bin into an ips delay */
- val = rps->max_freq - val;
- val = rps->min_freq + val;
+ val = gen5_invert_freq(rps, val);
rgvswctl =
(MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
@@ -500,6 +509,7 @@ static unsigned int init_emon(struct intel_uncore *uncore)
static bool gen5_rps_enable(struct intel_rps *rps)
{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
u8 fstart, vstart;
u32 rgvmodectl;
@@ -557,6 +567,10 @@ static bool gen5_rps_enable(struct intel_rps *rps)
rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
rps->ips.last_time2 = ktime_get_raw_ns();
+ spin_lock(&i915->irq_lock);
+ ilk_enable_display_irq(i915, DE_PCU_EVENT);
+ spin_unlock(&i915->irq_lock);
+
spin_unlock_irq(&mchdev_lock);
rps->ips.corr = init_emon(uncore);
@@ -566,11 +580,16 @@ static bool gen5_rps_enable(struct intel_rps *rps)
static void gen5_rps_disable(struct intel_rps *rps)
{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
u16 rgvswctl;
spin_lock_irq(&mchdev_lock);
+ spin_lock(&i915->irq_lock);
+ ilk_disable_display_irq(i915, DE_PCU_EVENT);
+ spin_unlock(&i915->irq_lock);
+
rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
/* Ack interrupts, disable EFC interrupt */
@@ -578,11 +597,6 @@ static void gen5_rps_disable(struct intel_rps *rps)
intel_uncore_read(uncore, MEMINTREN) &
~MEMINT_EVAL_CHG_EN);
intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
- intel_uncore_write(uncore, DEIER,
- intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT);
- intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT);
- intel_uncore_write(uncore, DEIMR,
- intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT);
/* Go back to the starting frequency */
gen5_rps_set(rps, rps->idle_freq);
@@ -883,6 +897,10 @@ void intel_rps_park(struct intel_rps *rps)
adj = -2;
rps->last_adj = adj;
rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq);
+ if (rps->cur_freq < rps->efficient_freq) {
+ rps->cur_freq = rps->efficient_freq;
+ rps->last_adj = 0;
+ }
GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
}
@@ -1272,8 +1290,9 @@ static unsigned long __ips_gfx_val(struct intel_ips *ips)
{
struct intel_rps *rps = container_of(ips, typeof(*rps), ips);
struct intel_uncore *uncore = rps_to_uncore(rps);
- unsigned long t, corr, state1, corr2, state2;
+ unsigned int t, state1, state2;
u32 pxvid, ext_v;
+ u64 corr, corr2;
lockdep_assert_held(&mchdev_lock);
@@ -1294,11 +1313,10 @@ static unsigned long __ips_gfx_val(struct intel_ips *ips)
else /* < 50 */
corr = t * 301 + 1004;
- corr = corr * 150142 * state1 / 10000 - 78642;
- corr /= 100000;
- corr2 = corr * ips->corr;
+ corr = div_u64(corr * 150142 * state1, 10000) - 78642;
+ corr2 = div_u64(corr, 100000) * ips->corr;
- state2 = corr2 * state1 / 10000;
+ state2 = div_u64(corr2 * state1, 10000);
state2 /= 100; /* convert to mW */
__gen5_ips_update(ips);
@@ -1432,8 +1450,10 @@ int intel_gpu_freq(struct intel_rps *rps, int val)
return chv_gpu_freq(rps, val);
else if (IS_VALLEYVIEW(i915))
return byt_gpu_freq(rps, val);
- else
+ else if (INTEL_GEN(i915) >= 6)
return val * GT_FREQUENCY_MULTIPLIER;
+ else
+ return val;
}
int intel_freq_opcode(struct intel_rps *rps, int val)
@@ -1447,8 +1467,10 @@ int intel_freq_opcode(struct intel_rps *rps, int val)
return chv_freq_opcode(rps, val);
else if (IS_VALLEYVIEW(i915))
return byt_freq_opcode(rps, val);
- else
+ else if (INTEL_GEN(i915) >= 6)
return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
+ else
+ return val;
}
static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
@@ -1864,8 +1886,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
- else
+ else if (INTEL_GEN(i915) >= 6)
cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
+ else
+ cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >>
+ MEMSTAT_PSTATE_SHIFT);
return cagf;
}
@@ -1873,14 +1898,17 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
static u32 read_cagf(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
u32 freq;
if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
+ } else if (INTEL_GEN(i915) >= 6) {
+ freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
} else {
- freq = intel_uncore_read(rps_to_uncore(rps), GEN6_RPSTAT1);
+ freq = intel_uncore_read(uncore, MEMSTAT_ILK);
}
return intel_rps_get_cagf(rps, freq);
@@ -1949,7 +1977,7 @@ static struct drm_i915_private *mchdev_get(void)
rcu_read_lock();
i915 = rcu_dereference(ips_mchdev);
- if (!kref_get_unless_zero(&i915->drm.ref))
+ if (i915 && !kref_get_unless_zero(&i915->drm.ref))
i915 = NULL;
rcu_read_unlock();
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index f1c039e1b5ad..8a72e0fe34ca 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -169,7 +169,7 @@ static void gen11_sseu_info_init(struct intel_gt *gt)
u8 eu_en;
u8 s_en;
- if (IS_ELKHARTLAKE(gt->i915))
+ if (IS_JSL_EHL(gt->i915))
intel_sseu_set_info(sseu, 1, 4, 8);
else
intel_sseu_set_info(sseu, 1, 8, 8);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index a2f74cefe4c3..7ea94d201fe6 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -188,10 +188,14 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
return cl;
}
-static void cacheline_acquire(struct intel_timeline_cacheline *cl)
+static void cacheline_acquire(struct intel_timeline_cacheline *cl,
+ u32 ggtt_offset)
{
- if (cl)
- i915_active_acquire(&cl->active);
+ if (!cl)
+ return;
+
+ cl->ggtt_offset = ggtt_offset;
+ i915_active_acquire(&cl->active);
}
static void cacheline_release(struct intel_timeline_cacheline *cl)
@@ -340,7 +344,7 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
tl->fence_context, tl->hwsp_offset);
- cacheline_acquire(tl->hwsp_cacheline);
+ cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset);
if (atomic_fetch_inc(&tl->pin_count)) {
cacheline_release(tl->hwsp_cacheline);
__i915_vma_unpin(tl->hwsp_ggtt);
@@ -515,7 +519,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
tl->fence_context, tl->hwsp_offset);
- cacheline_acquire(cl);
+ cacheline_acquire(cl, tl->hwsp_offset);
tl->hwsp_cacheline = cl;
*seqno = timeline_advance(tl);
@@ -573,9 +577,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
if (err)
goto out;
- *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
- ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
-
+ *hwsp = cl->ggtt_offset;
out:
i915_active_release(&cl->active);
return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 02181c5020db..4474f487f589 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -94,6 +94,8 @@ struct intel_timeline_cacheline {
struct intel_timeline_hwsp *hwsp;
void *vaddr;
+ u32 ggtt_offset;
+
struct rcu_head rcu;
};
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 6c580d0d9ea8..adc9a8ea410a 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -131,8 +131,10 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
return;
}
- if (wal->list)
+ if (wal->list) {
memcpy(list, wal->list, sizeof(*wa) * wal->count);
+ kfree(wal->list);
+ }
wal->list = list;
}
@@ -672,6 +674,20 @@ static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
0);
}
+static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ gen12_ctx_workarounds_init(engine, wal);
+
+ /* Wa_1409044764 */
+ WA_CLR_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
+ DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
+
+ /* Wa_22010493298 */
+ WA_SET_BIT_MASKED(HIZ_CHICKEN,
+ DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
+}
+
static void
__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
struct i915_wa_list *wal,
@@ -684,7 +700,9 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
wa_init_start(wal, name, engine->name);
- if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915))
+ if (IS_DG1(i915))
+ dg1_ctx_workarounds_init(engine, wal);
+ else if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915))
tgl_ctx_workarounds_init(engine, wal);
else if (IS_GEN(i915, 12))
gen12_ctx_workarounds_init(engine, wal);
@@ -1212,7 +1230,7 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
/* Wa_1607087056:icl,ehl,jsl */
if (IS_ICELAKE(i915) ||
- IS_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) {
+ IS_JSL_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) {
wa_write_or(wal,
SLICE_UNIT_LEVEL_CLKGATE,
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
@@ -1245,9 +1263,35 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
+dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ gen12_gt_workarounds_init(i915, wal);
+
+ /* Wa_1607087056:dg1 */
+ if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0))
+ wa_write_or(wal,
+ SLICE_UNIT_LEVEL_CLKGATE,
+ L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
+
+ /* Wa_1409420604:dg1 */
+ if (IS_DG1(i915))
+ wa_write_or(wal,
+ SUBSLICE_UNIT_LEVEL_CLKGATE2,
+ CPSSUNIT_CLKGATE_DIS);
+
+ /* Wa_1408615072:dg1 */
+ /* Empirical testing shows this register is unaffected by engine reset. */
+ if (IS_DG1(i915))
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
+ VSUNIT_CLKGATE_DIS_TGL);
+}
+
+static void
gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
- if (IS_TIGERLAKE(i915))
+ if (IS_DG1(i915))
+ dg1_gt_workarounds_init(i915, wal);
+ else if (IS_TIGERLAKE(i915))
tgl_gt_workarounds_init(i915, wal);
else if (IS_GEN(i915, 12))
gen12_gt_workarounds_init(i915, wal);
@@ -1612,6 +1656,20 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
}
}
+static void dg1_whitelist_build(struct intel_engine_cs *engine)
+{
+ struct i915_wa_list *w = &engine->whitelist;
+
+ tgl_whitelist_build(engine);
+
+ /* GEN:BUG:1409280441:dg1 */
+ if (IS_DG1_REVID(engine->i915, DG1_REVID_A0, DG1_REVID_A0) &&
+ (engine->class == RENDER_CLASS ||
+ engine->class == COPY_ENGINE_CLASS))
+ whitelist_reg_ext(w, RING_ID(engine->mmio_base),
+ RING_FORCE_TO_NONPRIV_ACCESS_RD);
+}
+
void intel_engine_init_whitelist(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
@@ -1619,7 +1677,9 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
wa_init_start(w, "whitelist", engine->name);
- if (IS_GEN(i915, 12))
+ if (IS_DG1(i915))
+ dg1_whitelist_build(engine);
+ else if (IS_GEN(i915, 12))
tgl_whitelist_build(engine);
else if (IS_GEN(i915, 11))
icl_whitelist_build(engine);
@@ -1673,15 +1733,18 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
- if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
+ if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
+ IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
/*
- * Wa_1607138336:tgl
- * Wa_1607063988:tgl
+ * Wa_1607138336:tgl[a0],dg1[a0]
+ * Wa_1607063988:tgl[a0],dg1[a0]
*/
wa_write_or(wal,
GEN9_CTX_PREEMPT_REG,
GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
+ }
+ if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
/*
* Wa_1606679103:tgl
* (see also Wa_1606682166:icl)
@@ -1695,35 +1758,41 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
VSUNIT_CLKGATE_DIS_TGL);
}
- if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
- /* Wa_1606931601:tgl,rkl */
+ if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
+ /* Wa_1606931601:tgl,rkl,dg1 */
wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
- /* Wa_1409804808:tgl,rkl */
+ /*
+ * Wa_1407928979:tgl A*
+ * Wa_18011464164:tgl[B0+],dg1[B0+]
+ * Wa_22010931296:tgl[B0+],dg1[B0+]
+ * Wa_14010919138:rkl, dg1
+ */
+ wa_write_or(wal, GEN7_FF_THREAD_MODE,
+ GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
+ }
+
+ if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
+ IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
+ /* Wa_1409804808:tgl,rkl,dg1[a0] */
wa_masked_en(wal, GEN7_ROW_CHICKEN2,
GEN12_PUSH_CONST_DEREF_HOLD_DIS);
/*
* Wa_1409085225:tgl
- * Wa_14010229206:tgl,rkl
+ * Wa_14010229206:tgl,rkl,dg1[a0]
*/
wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
/*
- * Wa_1407928979:tgl A*
- * Wa_18011464164:tgl B0+
- * Wa_22010931296:tgl B0+
- * Wa_14010919138:rkl,tgl
- */
- wa_write_or(wal, GEN7_FF_THREAD_MODE,
- GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
-
- /*
* Wa_1607030317:tgl
* Wa_1607186500:tgl
- * Wa_1607297627:tgl,rkl there are multiple entries for this
- * WA in the BSpec; some indicate this is an A0-only WA,
- * others indicate it applies to all steppings.
+ * Wa_1607297627:tgl,rkl,dg1[a0]
+ *
+ * On TGL and RKL there are multiple entries for this WA in the
+ * BSpec; some indicate this is an A0-only WA, others indicate
+ * it applies to all steppings so we trust the "all steppings."
+ * For DG1 this only applies to A0.
*/
wa_masked_en(wal,
GEN6_RC_SLEEP_PSMI_CONTROL,
@@ -1839,7 +1908,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
/* Wa_22010271021:ehl */
- if (IS_ELKHARTLAKE(i915))
+ if (IS_JSL_EHL(i915))
wa_masked_en(wal,
GEN9_CS_DEBUG_MODE1,
FF_DOP_CLOCK_GATE_DISABLE);
@@ -2031,10 +2100,12 @@ err_obj:
return ERR_PTR(err);
}
-static const struct {
+struct mcr_range {
u32 start;
u32 end;
-} mcr_ranges_gen8[] = {
+};
+
+static const struct mcr_range mcr_ranges_gen8[] = {
{ .start = 0x5500, .end = 0x55ff },
{ .start = 0x7000, .end = 0x7fff },
{ .start = 0x9400, .end = 0x97ff },
@@ -2043,11 +2114,25 @@ static const struct {
{},
};
+static const struct mcr_range mcr_ranges_gen12[] = {
+ { .start = 0x8150, .end = 0x815f },
+ { .start = 0x9520, .end = 0x955f },
+ { .start = 0xb100, .end = 0xb3ff },
+ { .start = 0xde80, .end = 0xe8ff },
+ { .start = 0x24a00, .end = 0x24a7f },
+ {},
+};
+
static bool mcr_range(struct drm_i915_private *i915, u32 offset)
{
+ const struct mcr_range *mcr_ranges;
int i;
- if (INTEL_GEN(i915) < 8)
+ if (INTEL_GEN(i915) >= 12)
+ mcr_ranges = mcr_ranges_gen12;
+ else if (INTEL_GEN(i915) >= 8)
+ mcr_ranges = mcr_ranges_gen8;
+ else
return false;
/*
@@ -2055,9 +2140,9 @@ static bool mcr_range(struct drm_i915_private *i915, u32 offset)
* which only controls CPU initiated MMIO. Routing does not
* work for CS access so we cannot verify them on this path.
*/
- for (i = 0; mcr_ranges_gen8[i].start; i++)
- if (offset >= mcr_ranges_gen8[i].start &&
- offset <= mcr_ranges_gen8[i].end)
+ for (i = 0; mcr_ranges[i].start; i++)
+ if (offset >= mcr_ranges[i].start &&
+ offset <= mcr_ranges[i].end)
return true;
return false;
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index dfd1cfb8a7ec..2f830017c51d 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -245,18 +245,39 @@ static void mock_reset_rewind(struct intel_engine_cs *engine, bool stalled)
GEM_BUG_ON(stalled);
}
+static void mark_eio(struct i915_request *rq)
+{
+ if (i915_request_completed(rq))
+ return;
+
+ GEM_BUG_ON(i915_request_signaled(rq));
+
+ i915_request_set_error_once(rq, -EIO);
+ i915_request_mark_complete(rq);
+}
+
static void mock_reset_cancel(struct intel_engine_cs *engine)
{
- struct i915_request *request;
+ struct mock_engine *mock =
+ container_of(engine, typeof(*mock), base);
+ struct i915_request *rq;
unsigned long flags;
+ del_timer_sync(&mock->hw_delay);
+
spin_lock_irqsave(&engine->active.lock, flags);
/* Mark all submitted requests as skipped. */
- list_for_each_entry(request, &engine->active.requests, sched.link) {
- i915_request_set_error_once(request, -EIO);
- i915_request_mark_complete(request);
+ list_for_each_entry(rq, &engine->active.requests, sched.link)
+ mark_eio(rq);
+ intel_engine_signal_breadcrumbs(engine);
+
+ /* Cancel and submit all pending requests. */
+ list_for_each_entry(rq, &mock->hw_queue, mock.link) {
+ mark_eio(rq);
+ __i915_request_submit(rq);
}
+ INIT_LIST_HEAD(&mock->hw_queue);
spin_unlock_irqrestore(&engine->active.lock, flags);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index e73854dd2fe0..b88aa35ad75b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -215,16 +215,17 @@ static int __live_heartbeat_fast(struct intel_engine_cs *engine)
goto err_pm;
for (i = 0; i < ARRAY_SIZE(times); i++) {
- /* Manufacture a tick */
do {
- while (READ_ONCE(engine->heartbeat.systole))
- flush_delayed_work(&engine->heartbeat.work);
+ /* Manufacture a tick */
+ intel_engine_park_heartbeat(engine);
+ GEM_BUG_ON(engine->heartbeat.systole);
+ engine->serial++; /* pretend we are not idle! */
+ intel_engine_unpark_heartbeat(engine);
- engine->serial++; /* quick, pretend we are not idle! */
flush_delayed_work(&engine->heartbeat.work);
if (!delayed_work_pending(&engine->heartbeat.work)) {
- pr_err("%s: heartbeat did not start\n",
- engine->name);
+ pr_err("%s: heartbeat %d did not start\n",
+ engine->name, i);
err = -EINVAL;
goto err_pm;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 3540ba9bd459..aa5675ecb5cc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -219,7 +219,7 @@ int live_rps_clock_interval(void *arg)
struct igt_spinner spin;
int err = 0;
- if (!intel_rps_is_enabled(rps))
+ if (!intel_rps_is_enabled(rps) || INTEL_GEN(gt->i915) < 6)
return 0;
if (igt_spinner_init(&spin, gt))
@@ -1028,7 +1028,7 @@ int live_rps_interrupt(void *arg)
* First, let's check whether or not we are receiving interrupts.
*/
- if (!intel_rps_has_interrupts(rps))
+ if (!intel_rps_has_interrupts(rps) || INTEL_GEN(gt->i915) < 6)
return 0;
intel_gt_pm_get(gt);
@@ -1133,7 +1133,7 @@ int live_rps_power(void *arg)
* that theory.
*/
- if (!intel_rps_is_enabled(rps))
+ if (!intel_rps_is_enabled(rps) || INTEL_GEN(gt->i915) < 6)
return 0;
if (!librapl_energy_uJ())
@@ -1237,7 +1237,7 @@ int live_rps_dynamic(void *arg)
* moving parts into dynamic reclocking based on load.
*/
- if (!intel_rps_is_enabled(rps))
+ if (!intel_rps_is_enabled(rps) || INTEL_GEN(gt->i915) < 6)
return 0;
if (igt_spinner_init(&spin, gt))
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 19c2cb166e7c..2edf2b15885f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -17,8 +17,9 @@
#include "../selftests/i915_random.h"
#include "../i915_selftest.h"
-#include "../selftests/igt_flush_test.h"
-#include "../selftests/mock_gem_device.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/lib_sw_fence.h"
+#include "selftests/mock_gem_device.h"
#include "selftests/mock_timeline.h"
static struct page *hwsp_page(struct intel_timeline *tl)
@@ -755,6 +756,378 @@ out_free:
return err;
}
+static int emit_read_hwsp(struct i915_request *rq,
+ u32 seqno, u32 hwsp,
+ u32 *addr)
+{
+ const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, 0));
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 12);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = *addr;
+ *cs++ = 0;
+ *cs++ = seqno;
+ *addr += 4;
+
+ *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = gpr;
+ *cs++ = hwsp;
+ *cs++ = 0;
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = gpr;
+ *cs++ = *addr;
+ *cs++ = 0;
+ *addr += 4;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+struct hwsp_watcher {
+ struct i915_vma *vma;
+ struct i915_request *rq;
+ u32 addr;
+ u32 *map;
+};
+
+static bool cmp_lt(u32 a, u32 b)
+{
+ return a < b;
+}
+
+static bool cmp_gte(u32 a, u32 b)
+{
+ return a >= b;
+}
+
+static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+
+ obj = i915_gem_object_create_internal(gt->i915, SZ_2M);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ w->map = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(w->map)) {
+ i915_gem_object_put(obj);
+ return PTR_ERR(w->map);
+ }
+
+ vma = i915_gem_object_ggtt_pin_ww(obj, NULL, NULL, 0, 0, 0);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return PTR_ERR(vma);
+ }
+
+ w->vma = vma;
+ w->addr = i915_ggtt_offset(vma);
+ return 0;
+}
+
+static int create_watcher(struct hwsp_watcher *w,
+ struct intel_engine_cs *engine,
+ int ringsz)
+{
+ struct intel_context *ce;
+ struct intel_timeline *tl;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ ce->ring = __intel_context_ring_size(ringsz);
+ w->rq = intel_context_create_request(ce);
+ intel_context_put(ce);
+ if (IS_ERR(w->rq))
+ return PTR_ERR(w->rq);
+
+ w->addr = i915_ggtt_offset(w->vma);
+ tl = w->rq->context->timeline;
+
+ /* some light mutex juggling required; think co-routines */
+ lockdep_unpin_lock(&tl->mutex, w->rq->cookie);
+ mutex_unlock(&tl->mutex);
+
+ return 0;
+}
+
+static int check_watcher(struct hwsp_watcher *w, const char *name,
+ bool (*op)(u32 hwsp, u32 seqno))
+{
+ struct i915_request *rq = fetch_and_zero(&w->rq);
+ struct intel_timeline *tl = rq->context->timeline;
+ u32 offset, end;
+ int err;
+
+ GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size);
+
+ i915_request_get(rq);
+ mutex_lock(&tl->mutex);
+ rq->cookie = lockdep_pin_lock(&tl->mutex);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ) < 0) {
+ err = -ETIME;
+ goto out;
+ }
+
+ err = 0;
+ offset = 0;
+ end = (w->addr - i915_ggtt_offset(w->vma)) / sizeof(*w->map);
+ while (offset < end) {
+ if (!op(w->map[offset + 1], w->map[offset])) {
+ pr_err("Watcher '%s' found HWSP value %x for seqno %x\n",
+ name, w->map[offset + 1], w->map[offset]);
+ err = -EINVAL;
+ }
+
+ offset += 2;
+ }
+
+out:
+ i915_request_put(rq);
+ return err;
+}
+
+static void cleanup_watcher(struct hwsp_watcher *w)
+{
+ if (w->rq) {
+ struct intel_timeline *tl = w->rq->context->timeline;
+
+ mutex_lock(&tl->mutex);
+ w->rq->cookie = lockdep_pin_lock(&tl->mutex);
+
+ i915_request_add(w->rq);
+ }
+
+ i915_vma_unpin_and_release(&w->vma, I915_VMA_RELEASE_MAP);
+}
+
+static bool retire_requests(struct intel_timeline *tl)
+{
+ struct i915_request *rq, *rn;
+
+ mutex_lock(&tl->mutex);
+ list_for_each_entry_safe(rq, rn, &tl->requests, link)
+ if (!i915_request_retire(rq))
+ break;
+ mutex_unlock(&tl->mutex);
+
+ return !i915_active_fence_isset(&tl->last_request);
+}
+
+static struct i915_request *wrap_timeline(struct i915_request *rq)
+{
+ struct intel_context *ce = rq->context;
+ struct intel_timeline *tl = ce->timeline;
+ u32 seqno = rq->fence.seqno;
+
+ while (tl->seqno >= seqno) { /* Cause a wrap */
+ i915_request_put(rq);
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return rq;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ }
+
+ i915_request_put(rq);
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return rq;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ return rq;
+}
+
+static int live_hwsp_read(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct hwsp_watcher watcher[2] = {};
+ struct intel_engine_cs *engine;
+ struct intel_timeline *tl;
+ enum intel_engine_id id;
+ int err = 0;
+ int i;
+
+ /*
+ * If we take a reference to the HWSP for reading on the GPU, that
+ * read may be arbitrarily delayed (either by foreign fence or
+ * priority saturation) and a wrap can happen within 30 minutes.
+ * When the GPU read is finally submitted it should be correct,
+ * even across multiple wraps.
+ */
+
+ if (INTEL_GEN(gt->i915) < 8) /* CS convenience [SRM/LRM] */
+ return 0;
+
+ tl = intel_timeline_create(gt);
+ if (IS_ERR(tl))
+ return PTR_ERR(tl);
+
+ if (!tl->hwsp_cacheline)
+ goto out_free;
+
+ for (i = 0; i < ARRAY_SIZE(watcher); i++) {
+ err = setup_watcher(&watcher[i], gt);
+ if (err)
+ goto out;
+ }
+
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+ unsigned long count = 0;
+ IGT_TIMEOUT(end_time);
+
+ /* Create a request we can use for remote reading of the HWSP */
+ err = create_watcher(&watcher[1], engine, SZ_512K);
+ if (err)
+ goto out;
+
+ do {
+ struct i915_sw_fence *submit;
+ struct i915_request *rq;
+ u32 hwsp;
+
+ submit = heap_fence_create(GFP_KERNEL);
+ if (!submit) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = create_watcher(&watcher[0], engine, SZ_4K);
+ if (err)
+ goto out;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out;
+ }
+
+ /* Skip to the end, saving 30 minutes of nops */
+ tl->seqno = -10u + 2 * (count & 3);
+ WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
+ ce->timeline = intel_timeline_get(tl);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ intel_context_put(ce);
+ goto out;
+ }
+
+ err = i915_sw_fence_await_dma_fence(&rq->submit,
+ &watcher[0].rq->fence, 0,
+ GFP_KERNEL);
+ if (err < 0) {
+ i915_request_add(rq);
+ intel_context_put(ce);
+ goto out;
+ }
+
+ mutex_lock(&watcher[0].rq->context->timeline->mutex);
+ err = intel_timeline_read_hwsp(rq, watcher[0].rq, &hwsp);
+ if (err == 0)
+ err = emit_read_hwsp(watcher[0].rq, /* before */
+ rq->fence.seqno, hwsp,
+ &watcher[0].addr);
+ mutex_unlock(&watcher[0].rq->context->timeline->mutex);
+ if (err) {
+ i915_request_add(rq);
+ intel_context_put(ce);
+ goto out;
+ }
+
+ mutex_lock(&watcher[1].rq->context->timeline->mutex);
+ err = intel_timeline_read_hwsp(rq, watcher[1].rq, &hwsp);
+ if (err == 0)
+ err = emit_read_hwsp(watcher[1].rq, /* after */
+ rq->fence.seqno, hwsp,
+ &watcher[1].addr);
+ mutex_unlock(&watcher[1].rq->context->timeline->mutex);
+ if (err) {
+ i915_request_add(rq);
+ intel_context_put(ce);
+ goto out;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ rq = wrap_timeline(rq);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ err = i915_sw_fence_await_dma_fence(&watcher[1].rq->submit,
+ &rq->fence, 0,
+ GFP_KERNEL);
+ if (err < 0) {
+ i915_request_put(rq);
+ goto out;
+ }
+
+ err = check_watcher(&watcher[0], "before", cmp_lt);
+ i915_sw_fence_commit(submit);
+ heap_fence_put(submit);
+ if (err) {
+ i915_request_put(rq);
+ goto out;
+ }
+ count++;
+
+ if (8 * watcher[1].rq->ring->emit >
+ 3 * watcher[1].rq->ring->size) {
+ i915_request_put(rq);
+ break;
+ }
+
+ /* Flush the timeline before manually wrapping again */
+ if (i915_request_wait(rq,
+ I915_WAIT_INTERRUPTIBLE,
+ HZ) < 0) {
+ err = -ETIME;
+ i915_request_put(rq);
+ goto out;
+ }
+
+ retire_requests(tl);
+ i915_request_put(rq);
+ } while (!__igt_timeout(end_time, NULL));
+ WRITE_ONCE(*(u32 *)tl->hwsp_seqno, 0xdeadbeef);
+
+ pr_info("%s: simulated %lu wraps\n", engine->name, count);
+ err = check_watcher(&watcher[1], "after", cmp_gte);
+ if (err)
+ goto out;
+ }
+
+out:
+ for (i = 0; i < ARRAY_SIZE(watcher); i++)
+ cleanup_watcher(&watcher[i]);
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+out_free:
+ intel_timeline_put(tl);
+ return err;
+}
+
static int live_hwsp_rollover_kernel(void *arg)
{
struct intel_gt *gt = arg;
@@ -998,6 +1371,7 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_hwsp_engine),
SUBTEST(live_hwsp_alternate),
SUBTEST(live_hwsp_wrap),
+ SUBTEST(live_hwsp_read),
SUBTEST(live_hwsp_rollover_kernel),
SUBTEST(live_hwsp_rollover_user),
};
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
index f011ea42487e..5982b62f913d 100644
--- a/drivers/gpu/drm/i915/gt/shmem_utils.c
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -73,7 +73,7 @@ void *shmem_pin_map(struct file *file)
mapping_set_unevictable(file->f_mapping);
return vaddr;
err_page:
- while (--i >= 0)
+ while (i--)
put_page(pages[i]);
kvfree(pages);
return NULL;
@@ -103,10 +103,13 @@ static int __shmem_rw(struct file *file, loff_t off,
return PTR_ERR(page);
vaddr = kmap(page);
- if (write)
+ if (write) {
memcpy(vaddr + offset_in_page(off), ptr, this);
- else
+ set_page_dirty(page);
+ } else {
memcpy(ptr, vaddr + offset_in_page(off), this);
+ }
+ mark_page_accessed(page);
kunmap(page);
put_page(page);
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
index 535cc1169e54..967031056202 100644
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -79,14 +79,12 @@ static ssize_t repr_trim(char *buf, ssize_t len)
static ssize_t
__caps_show(struct intel_engine_cs *engine,
- u32 caps, char *buf, bool show_unknown)
+ unsigned long caps, char *buf, bool show_unknown)
{
const char * const *repr;
int count, n;
ssize_t len;
- BUILD_BUG_ON(!typecheck(typeof(caps), engine->uabi_capabilities));
-
switch (engine->class) {
case VIDEO_DECODE_CLASS:
repr = vcs_caps;
@@ -103,12 +101,10 @@ __caps_show(struct intel_engine_cs *engine,
count = 0;
break;
}
- GEM_BUG_ON(count > BITS_PER_TYPE(typeof(caps)));
+ GEM_BUG_ON(count > BITS_PER_LONG);
len = 0;
- for_each_set_bit(n,
- (unsigned long *)&caps,
- show_unknown ? BITS_PER_TYPE(typeof(caps)) : count) {
+ for_each_set_bit(n, &caps, show_unknown ? BITS_PER_LONG : count) {
if (n >= count || !repr[n]) {
if (GEM_WARN_ON(show_unknown))
len += snprintf(buf + len, PAGE_SIZE - len,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 942c7c187adb..2a343a977987 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -213,23 +213,6 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc)
return flags;
}
-static u32 guc_ctl_ctxinfo_flags(struct intel_guc *guc)
-{
- u32 flags = 0;
-
- if (intel_guc_submission_is_used(guc)) {
- u32 ctxnum, base;
-
- base = intel_guc_ggtt_offset(guc, guc->stage_desc_pool);
- ctxnum = GUC_MAX_STAGE_DESCRIPTORS / 16;
-
- base >>= PAGE_SHIFT;
- flags |= (base << GUC_CTL_BASE_ADDR_SHIFT) |
- (ctxnum << GUC_CTL_CTXNUM_IN16_SHIFT);
- }
- return flags;
-}
-
static u32 guc_ctl_log_params_flags(struct intel_guc *guc)
{
u32 offset = intel_guc_ggtt_offset(guc, guc->log.vma) >> PAGE_SHIFT;
@@ -291,7 +274,6 @@ static void guc_init_params(struct intel_guc *guc)
BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
- params[GUC_CTL_CTXINFO] = guc_ctl_ctxinfo_flags(guc);
params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
@@ -312,18 +294,18 @@ void intel_guc_write_params(struct intel_guc *guc)
int i;
/*
- * All SOFT_SCRATCH registers are in FORCEWAKE_BLITTER domain and
+ * All SOFT_SCRATCH registers are in FORCEWAKE_GT domain and
* they are power context saved so it's ok to release forcewake
* when we are done here and take it again at xfer time.
*/
- intel_uncore_forcewake_get(uncore, FORCEWAKE_BLITTER);
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_GT);
intel_uncore_write(uncore, SOFT_SCRATCH(0), 0);
for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
intel_uncore_write(uncore, SOFT_SCRATCH(1 + i), guc->params[i]);
- intel_uncore_forcewake_put(uncore, FORCEWAKE_BLITTER);
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_GT);
}
int intel_guc_init(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index d44061033f23..5212ff844292 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -10,11 +10,52 @@
/*
* The Additional Data Struct (ADS) has pointers for different buffers used by
- * the GuC. One single gem object contains the ADS struct itself (guc_ads), the
- * scheduling policies (guc_policies), a structure describing a collection of
- * register sets (guc_mmio_reg_state) and some extra pages for the GuC to save
- * its internal state for sleep.
+ * the GuC. One single gem object contains the ADS struct itself (guc_ads) and
+ * all the extra buffers indirectly linked via the ADS struct's entries.
+ *
+ * Layout of the ADS blob allocated for the GuC:
+ *
+ * +---------------------------------------+ <== base
+ * | guc_ads |
+ * +---------------------------------------+
+ * | guc_policies |
+ * +---------------------------------------+
+ * | guc_gt_system_info |
+ * +---------------------------------------+
+ * | guc_clients_info |
+ * +---------------------------------------+
+ * | guc_ct_pool_entry[size] |
+ * +---------------------------------------+
+ * | padding |
+ * +---------------------------------------+ <== 4K aligned
+ * | private data |
+ * +---------------------------------------+
+ * | padding |
+ * +---------------------------------------+ <== 4K aligned
*/
+struct __guc_ads_blob {
+ struct guc_ads ads;
+ struct guc_policies policies;
+ struct guc_gt_system_info system_info;
+ struct guc_clients_info clients_info;
+ struct guc_ct_pool_entry ct_pool[GUC_CT_POOL_SIZE];
+} __packed;
+
+static u32 guc_ads_private_data_size(struct intel_guc *guc)
+{
+ return PAGE_ALIGN(guc->fw.private_data_size);
+}
+
+static u32 guc_ads_private_data_offset(struct intel_guc *guc)
+{
+ return PAGE_ALIGN(sizeof(struct __guc_ads_blob));
+}
+
+static u32 guc_ads_blob_size(struct intel_guc *guc)
+{
+ return guc_ads_private_data_offset(guc) +
+ guc_ads_private_data_size(guc);
+}
static void guc_policy_init(struct guc_policy *policy)
{
@@ -48,26 +89,37 @@ static void guc_ct_pool_entries_init(struct guc_ct_pool_entry *pool, u32 num)
memset(pool, 0, num * sizeof(*pool));
}
+static void guc_mapping_table_init(struct intel_gt *gt,
+ struct guc_gt_system_info *system_info)
+{
+ unsigned int i, j;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /* Table must be set to invalid values for entries not used */
+ for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
+ for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
+ system_info->mapping_table[i][j] =
+ GUC_MAX_INSTANCES_PER_CLASS;
+
+ for_each_engine(engine, gt, id) {
+ u8 guc_class = engine->class;
+
+ system_info->mapping_table[guc_class][engine->instance] =
+ engine->instance;
+ }
+}
+
/*
* The first 80 dwords of the register state context, containing the
* execlists and ppgtt registers.
*/
#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
-/* The ads obj includes the struct itself and buffers passed to GuC */
-struct __guc_ads_blob {
- struct guc_ads ads;
- struct guc_policies policies;
- struct guc_mmio_reg_state reg_state;
- struct guc_gt_system_info system_info;
- struct guc_clients_info clients_info;
- struct guc_ct_pool_entry ct_pool[GUC_CT_POOL_SIZE];
- u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE];
-} __packed;
-
static void __guc_ads_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
+ struct drm_i915_private *i915 = gt->i915;
struct __guc_ads_blob *blob = guc->ads_blob;
const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE;
u32 base;
@@ -99,13 +151,25 @@ static void __guc_ads_init(struct intel_guc *guc)
}
/* System info */
- blob->system_info.slice_enabled = hweight8(gt->info.sseu.slice_mask);
- blob->system_info.rcs_enabled = 1;
- blob->system_info.bcs_enabled = 1;
+ blob->system_info.engine_enabled_masks[RENDER_CLASS] = 1;
+ blob->system_info.engine_enabled_masks[COPY_ENGINE_CLASS] = 1;
+ blob->system_info.engine_enabled_masks[VIDEO_DECODE_CLASS] = VDBOX_MASK(gt);
+ blob->system_info.engine_enabled_masks[VIDEO_ENHANCEMENT_CLASS] = VEBOX_MASK(gt);
+
+ blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED] =
+ hweight8(gt->info.sseu.slice_mask);
+ blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK] =
+ gt->info.vdbox_sfc_access;
+
+ if (INTEL_GEN(i915) >= 12 && !IS_DGFX(i915)) {
+ u32 distdbreg = intel_uncore_read(gt->uncore,
+ GEN12_DIST_DBS_POPULATED);
+ blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI] =
+ ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT) &
+ GEN12_DOORBELLS_PER_SQIDI) + 1;
+ }
- blob->system_info.vdbox_enable_mask = VDBOX_MASK(gt);
- blob->system_info.vebox_enable_mask = VEBOX_MASK(gt);
- blob->system_info.vdbox_sfc_support_mask = gt->info.vdbox_sfc_access;
+ guc_mapping_table_init(guc_to_gt(guc), &blob->system_info);
base = intel_guc_ggtt_offset(guc, guc->ads_vma);
@@ -118,11 +182,12 @@ static void __guc_ads_init(struct intel_guc *guc)
/* ADS */
blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
- blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer);
- blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state);
blob->ads.gt_system_info = base + ptr_offset(blob, system_info);
blob->ads.clients_info = base + ptr_offset(blob, clients_info);
+ /* Private Data */
+ blob->ads.private_data = base + guc_ads_private_data_offset(guc);
+
i915_gem_object_flush_map(guc->ads_vma->obj);
}
@@ -135,14 +200,15 @@ static void __guc_ads_init(struct intel_guc *guc)
*/
int intel_guc_ads_create(struct intel_guc *guc)
{
- const u32 size = PAGE_ALIGN(sizeof(struct __guc_ads_blob));
+ u32 size;
int ret;
GEM_BUG_ON(guc->ads_vma);
+ size = guc_ads_blob_size(guc);
+
ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma,
(void **)&guc->ads_blob);
-
if (ret)
return ret;
@@ -154,6 +220,19 @@ int intel_guc_ads_create(struct intel_guc *guc)
void intel_guc_ads_destroy(struct intel_guc *guc)
{
i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP);
+ guc->ads_blob = NULL;
+}
+
+static void guc_ads_private_data_reset(struct intel_guc *guc)
+{
+ u32 size;
+
+ size = guc_ads_private_data_size(guc);
+ if (!size)
+ return;
+
+ memset((void *)guc->ads_blob + guc_ads_private_data_offset(guc), 0,
+ size);
}
/**
@@ -168,5 +247,8 @@ void intel_guc_ads_reset(struct intel_guc *guc)
{
if (!guc->ads_vma)
return;
+
__guc_ads_init(guc);
+
+ guc_ads_private_data_reset(guc);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 11742fca0e9e..fa9e048cc65f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -210,6 +210,7 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct)
GEM_BUG_ON(ct->enabled);
i915_vma_unpin_and_release(&ct->vma, I915_VMA_RELEASE_MAP);
+ memset(ct, 0, sizeof(*ct));
}
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index d4a87f4c9421..f9d0907ea1a5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -76,6 +76,7 @@ static inline bool guc_ready(struct intel_uncore *uncore, u32 *status)
static int guc_wait_ucode(struct intel_uncore *uncore)
{
+ struct drm_device *drm = &uncore->i915->drm;
u32 status;
int ret;
@@ -90,15 +91,27 @@ static int guc_wait_ucode(struct intel_uncore *uncore)
ret = wait_for(guc_ready(uncore, &status), 100);
DRM_DEBUG_DRIVER("GuC status %#x\n", status);
- if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
- DRM_ERROR("GuC firmware signature verification failed\n");
- ret = -ENOEXEC;
- }
-
- if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) {
- DRM_ERROR("GuC firmware exception. EIP: %#x\n",
- intel_uncore_read(uncore, SOFT_SCRATCH(13)));
- ret = -ENXIO;
+ if (ret) {
+ drm_err(drm, "GuC load failed: status = 0x%08X\n", status);
+ drm_err(drm, "GuC load failed: status: Reset = %d, "
+ "BootROM = 0x%02X, UKernel = 0x%02X, "
+ "MIA = 0x%02X, Auth = 0x%02X\n",
+ REG_FIELD_GET(GS_MIA_IN_RESET, status),
+ REG_FIELD_GET(GS_BOOTROM_MASK, status),
+ REG_FIELD_GET(GS_UKERNEL_MASK, status),
+ REG_FIELD_GET(GS_MIA_MASK, status),
+ REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+
+ if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
+ drm_err(drm, "GuC firmware signature verification failed\n");
+ ret = -ENOEXEC;
+ }
+
+ if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) {
+ drm_err(drm, "GuC firmware exception. EIP: %#x\n",
+ intel_uncore_read(uncore, SOFT_SCRATCH(13)));
+ ret = -ENXIO;
+ }
}
return ret;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index a6b733c146c9..79c560d9c0b6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -26,8 +26,8 @@
#define GUC_VIDEO_ENGINE2 4
#define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1)
-#define GUC_MAX_ENGINE_CLASSES 5
-#define GUC_MAX_INSTANCES_PER_CLASS 16
+#define GUC_MAX_ENGINE_CLASSES 16
+#define GUC_MAX_INSTANCES_PER_CLASS 32
#define GUC_DOORBELL_INVALID 256
@@ -62,12 +62,7 @@
#define GUC_STAGE_DESC_ATTR_PCH BIT(6)
#define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7)
-/* New GuC control data */
-#define GUC_CTL_CTXINFO 0
-#define GUC_CTL_CTXNUM_IN16_SHIFT 0
-#define GUC_CTL_BASE_ADDR_SHIFT 12
-
-#define GUC_CTL_LOG_PARAMS 1
+#define GUC_CTL_LOG_PARAMS 0
#define GUC_LOG_VALID (1 << 0)
#define GUC_LOG_NOTIFY_ON_HALF_FULL (1 << 1)
#define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3)
@@ -79,11 +74,11 @@
#define GUC_LOG_ISR_MASK (0x7 << GUC_LOG_ISR_SHIFT)
#define GUC_LOG_BUF_ADDR_SHIFT 12
-#define GUC_CTL_WA 2
-#define GUC_CTL_FEATURE 3
+#define GUC_CTL_WA 1
+#define GUC_CTL_FEATURE 2
#define GUC_CTL_DISABLE_SCHEDULER (1 << 14)
-#define GUC_CTL_DEBUG 4
+#define GUC_CTL_DEBUG 3
#define GUC_LOG_VERBOSITY_SHIFT 0
#define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT)
#define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT)
@@ -97,12 +92,37 @@
#define GUC_LOG_DISABLED (1 << 6)
#define GUC_PROFILE_ENABLED (1 << 7)
-#define GUC_CTL_ADS 5
+#define GUC_CTL_ADS 4
#define GUC_ADS_ADDR_SHIFT 1
#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT)
#define GUC_CTL_MAX_DWORDS (SOFT_SCRATCH_COUNT - 2) /* [1..14] */
+/* Generic GT SysInfo data types */
+#define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED 0
+#define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK 1
+#define GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI 2
+#define GUC_GENERIC_GT_SYSINFO_MAX 16
+
+/*
+ * The class goes in bits [0..2] of the GuC ID, the instance in bits [3..6].
+ * Bit 7 can be used for operations that apply to all engine classes&instances.
+ */
+#define GUC_ENGINE_CLASS_SHIFT 0
+#define GUC_ENGINE_CLASS_MASK (0x7 << GUC_ENGINE_CLASS_SHIFT)
+#define GUC_ENGINE_INSTANCE_SHIFT 3
+#define GUC_ENGINE_INSTANCE_MASK (0xf << GUC_ENGINE_INSTANCE_SHIFT)
+#define GUC_ENGINE_ALL_INSTANCES BIT(7)
+
+#define MAKE_GUC_ID(class, instance) \
+ (((class) << GUC_ENGINE_CLASS_SHIFT) | \
+ ((instance) << GUC_ENGINE_INSTANCE_SHIFT))
+
+#define GUC_ID_TO_ENGINE_CLASS(guc_id) \
+ (((guc_id) & GUC_ENGINE_CLASS_MASK) >> GUC_ENGINE_CLASS_SHIFT)
+#define GUC_ID_TO_ENGINE_INSTANCE(guc_id) \
+ (((guc_id) & GUC_ENGINE_INSTANCE_MASK) >> GUC_ENGINE_INSTANCE_SHIFT)
+
/* Work item for submitting workloads into work queue of GuC. */
struct guc_wq_item {
u32 header;
@@ -336,11 +356,6 @@ struct guc_policies {
} __packed;
/* GuC MMIO reg state struct */
-
-
-#define GUC_REGSET_MAX_REGISTERS 64
-#define GUC_S3_SAVE_SPACE_PAGES 10
-
struct guc_mmio_reg {
u32 offset;
u32 value;
@@ -348,28 +363,18 @@ struct guc_mmio_reg {
#define GUC_REGSET_MASKED (1 << 0)
} __packed;
-struct guc_mmio_regset {
- struct guc_mmio_reg registers[GUC_REGSET_MAX_REGISTERS];
- u32 values_valid;
- u32 number_of_registers;
-} __packed;
-
/* GuC register sets */
-struct guc_mmio_reg_state {
- struct guc_mmio_regset engine_reg[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
- u32 reserved[98];
+struct guc_mmio_reg_set {
+ u32 address;
+ u16 count;
+ u16 reserved;
} __packed;
/* HW info */
struct guc_gt_system_info {
- u32 slice_enabled;
- u32 rcs_enabled;
- u32 reserved0;
- u32 bcs_enabled;
- u32 vdbox_enable_mask;
- u32 vdbox_sfc_support_mask;
- u32 vebox_enable_mask;
- u32 reserved[9];
+ u8 mapping_table[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+ u32 engine_enabled_masks[GUC_MAX_ENGINE_CLASSES];
+ u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX];
} __packed;
/* Clients info */
@@ -390,15 +395,16 @@ struct guc_clients_info {
/* GuC Additional Data Struct */
struct guc_ads {
- u32 reg_state_addr;
- u32 reg_state_buffer;
+ struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+ u32 reserved0;
u32 scheduler_policies;
u32 gt_system_info;
u32 clients_info;
u32 control_data;
u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES];
u32 eng_state_size[GUC_MAX_ENGINE_CLASSES];
- u32 reserved[16];
+ u32 private_data;
+ u32 reserved[15];
} __packed;
/* GuC logging structures */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 9bbe8a795cb8..c92f2c056db4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -134,7 +134,7 @@ static int remove_buf_file_callback(struct dentry *dentry)
}
/* relay channel callbacks */
-static struct rchan_callbacks relay_callbacks = {
+static const struct rchan_callbacks relay_callbacks = {
.subbuf_start = subbuf_start_callback,
.create_buf_file = create_buf_file_callback,
.remove_buf_file = remove_buf_file_callback,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
index 1949346e714e..b37fc2ffaef2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
@@ -118,6 +118,11 @@ struct guc_doorbell_info {
#define GEN8_DRB_VALID (1<<0)
#define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4)
+#define GEN12_DIST_DBS_POPULATED _MMIO(0xd08)
+#define GEN12_DOORBELLS_PER_SQIDI_SHIFT 16
+#define GEN12_DOORBELLS_PER_SQIDI (0xff)
+#define GEN12_SQIDIS_DOORBELL_EXIST (0xffff)
+
#define DE_GUCRMR _MMIO(0x44054)
#define GUC_BCS_RCS_IER _MMIO(0xC550)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index d6f55f70889d..4e6070e95fe9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -231,13 +231,15 @@ static int guc_enable_communication(struct intel_guc *guc)
intel_guc_ct_event_handler(&guc->ct);
spin_unlock_irq(&i915->irq_lock);
- DRM_INFO("GuC communication enabled\n");
+ drm_dbg(&i915->drm, "GuC communication enabled\n");
return 0;
}
static void guc_disable_communication(struct intel_guc *guc)
{
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+
/*
* Events generated during or after CT disable are logged by guc in
* via mmio. Make sure the register is clear before disabling CT since
@@ -257,7 +259,7 @@ static void guc_disable_communication(struct intel_guc *guc)
*/
guc_get_mmio_msg(guc);
- DRM_INFO("GuC communication disabled\n");
+ drm_dbg(&i915->drm, "GuC communication disabled\n");
}
static void __uc_fetch_firmwares(struct intel_uc *uc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 80e8b6c3bc8c..180c23e2e25e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -44,23 +44,20 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* List of required GuC and HuC binaries per-platform.
* Must be ordered based on platform + revid, from newer to older.
*
- * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas
- * between 33.0 and 35.2 are only related to new additions to support new Gen12
- * features.
- *
* Note that RKL uses the same firmware as TGL.
*/
#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
- fw_def(ROCKETLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 5, 0)) \
- fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 5, 0)) \
- fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \
- fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \
- fw_def(COMETLAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \
- fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \
- fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 4, 0, 0)) \
- fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \
- fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 2, 0, 0)) \
- fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 2, 0, 0))
+ fw_def(ROCKETLAKE, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \
+ fw_def(TIGERLAKE, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \
+ fw_def(JASPERLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \
+ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \
+ fw_def(ICELAKE, 0, guc_def(icl, 49, 0, 1), huc_def(icl, 9, 0, 0)) \
+ fw_def(COMETLAKE, 5, guc_def(cml, 49, 0, 1), huc_def(cml, 4, 0, 0)) \
+ fw_def(COFFEELAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \
+ fw_def(GEMINILAKE, 0, guc_def(glk, 49, 0, 1), huc_def(glk, 4, 0, 0)) \
+ fw_def(KABYLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \
+ fw_def(BROXTON, 0, guc_def(bxt, 49, 0, 1), huc_def(bxt, 2, 0, 0)) \
+ fw_def(SKYLAKE, 0, guc_def(skl, 49, 0, 1), huc_def(skl, 2, 0, 0))
#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
"i915/" \
@@ -371,6 +368,9 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
}
}
+ if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
+ uc_fw->private_data_size = css->private_data_size;
+
obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 23d3a423ac0f..99bb1fe1af66 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -88,6 +88,8 @@ struct intel_uc_fw {
u32 rsa_size;
u32 ucode_size;
+
+ u32 private_data_size;
};
#ifdef CONFIG_DRM_I915_DEBUG_GUC
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
index 029214cdedd5..e41ffc7a7fbc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
@@ -69,7 +69,11 @@ struct uc_css_header {
#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16)
#define CSS_SW_VERSION_UC_MINOR (0xFF << 8)
#define CSS_SW_VERSION_UC_PATCH (0xFF << 0)
- u32 reserved[14];
+ u32 reserved0[13];
+ union {
+ u32 private_data_size; /* only applies to GuC */
+ u32 reserved1;
+ };
u32 header_info;
} __packed;
static_assert(sizeof(struct uc_css_header) == 128);