diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_context.c | 40 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_engine_types.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_gt_pm.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_lrc.c | 61 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_ring_submission.c | 31 |
5 files changed, 86 insertions, 53 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index ef7bc41ffffa..5b7ff3ccfa8e 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -123,6 +123,10 @@ static int __context_pin_state(struct i915_vma *vma) if (err) return err; + err = i915_active_acquire(&vma->active); + if (err) + goto err_unpin; + /* * And mark it as a globally pinned object to let the shrinker know * it cannot reclaim the object until we release it. @@ -131,14 +135,44 @@ static int __context_pin_state(struct i915_vma *vma) vma->obj->mm.dirty = true; return 0; + +err_unpin: + i915_vma_unpin(vma); + return err; } static void __context_unpin_state(struct i915_vma *vma) { i915_vma_make_shrinkable(vma); + i915_active_release(&vma->active); __i915_vma_unpin(vma); } +static int __ring_active(struct intel_ring *ring) +{ + int err; + + err = i915_active_acquire(&ring->vma->active); + if (err) + return err; + + err = intel_ring_pin(ring); + if (err) + goto err_active; + + return 0; + +err_active: + i915_active_release(&ring->vma->active); + return err; +} + +static void __ring_retire(struct intel_ring *ring) +{ + intel_ring_unpin(ring); + i915_active_release(&ring->vma->active); +} + __i915_active_call static void __intel_context_retire(struct i915_active *active) { @@ -151,7 +185,7 @@ static void __intel_context_retire(struct i915_active *active) __context_unpin_state(ce->state); intel_timeline_unpin(ce->timeline); - intel_ring_unpin(ce->ring); + __ring_retire(ce->ring); intel_context_put(ce); } @@ -163,7 +197,7 @@ static int __intel_context_active(struct i915_active *active) intel_context_get(ce); - err = intel_ring_pin(ce->ring); + err = __ring_active(ce->ring); if (err) goto err_put; @@ -183,7 +217,7 @@ static int __intel_context_active(struct i915_active *active) err_timeline: intel_timeline_unpin(ce->timeline); err_ring: - intel_ring_unpin(ce->ring); + __ring_retire(ce->ring); err_put: intel_context_put(ce); return err; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 17f1f1441efc..2b446474e010 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -274,8 +274,8 @@ struct intel_engine_cs { u8 class; u8 instance; - u8 uabi_class; - u8 uabi_instance; + u16 uabi_class; + u16 uabi_instance; u32 uabi_capabilities; u32 context_size; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index a459a42ad5c2..7e64b7d7d330 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -94,8 +94,9 @@ static int __gt_park(struct intel_wakeref *wf) intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); } + /* Defer dropping the display power well for 100ms, it's slow! */ GEM_BUG_ON(!wakeref); - intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); + intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref); i915_globals_park(); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 9fdefbdc3546..d925a1035c9d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -845,12 +845,6 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) } } -static void unwind_wa_tail(struct i915_request *rq) -{ - rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); - assert_ring_tail_valid(rq->ring, rq->tail); -} - static struct i915_request * __unwind_incomplete_requests(struct intel_engine_cs *engine) { @@ -863,12 +857,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, &engine->active.requests, sched.link) { - if (i915_request_completed(rq)) continue; /* XXX */ __i915_request_unsubmit(rq); - unwind_wa_tail(rq); /* * Push the request back into the queue for later resubmission. @@ -1161,13 +1153,29 @@ execlists_schedule_out(struct i915_request *rq) i915_request_put(rq); } -static u64 execlists_update_context(const struct i915_request *rq) +static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->hw_context; - u64 desc; + u64 desc = ce->lrc_desc; + u32 tail; - ce->lrc_reg_state[CTX_RING_TAIL] = - intel_ring_set_tail(rq->ring, rq->tail); + /* + * WaIdleLiteRestore:bdw,skl + * + * We should never submit the context with the same RING_TAIL twice + * just in case we submit an empty ring, which confuses the HW. + * + * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of + * the normal request to be able to always advance the RING_TAIL on + * subsequent resubmissions (for lite restore). Should that fail us, + * and we try and submit the same tail again, force the context + * reload. + */ + tail = intel_ring_set_tail(rq->ring, rq->tail); + if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail)) + desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc_reg_state[CTX_RING_TAIL] = tail; + rq->tail = rq->wa_tail; /* * Make sure the context image is complete before we submit it to HW. @@ -1186,13 +1194,11 @@ static u64 execlists_update_context(const struct i915_request *rq) */ mb(); - desc = ce->lrc_desc; - ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; - /* Wa_1607138340:tgl */ if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0)) desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; return desc; } @@ -1703,16 +1709,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) return; } - - /* - * WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent - * ring:HEAD == rq:TAIL as we resubmit the - * request. See gen8_emit_fini_breadcrumb() for - * where we prepare the padding after the - * end of the request. - */ - last->tail = last->wa_tail; } } @@ -2668,6 +2664,14 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ batch = gen8_emit_flush_coherentl3_wa(engine, batch); + /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */ + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_STORE_DATA_INDEX | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + LRC_PPHWSP_SCRATCH_ADDR); + batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); /* WaMediaPoolStateCmdInWABB:bxt,glk */ @@ -4120,17 +4124,18 @@ static void virtual_context_destroy(struct kref *kref) for (n = 0; n < ve->num_siblings; n++) { struct intel_engine_cs *sibling = ve->siblings[n]; struct rb_node *node = &ve->nodes[sibling->id].rb; + unsigned long flags; if (RB_EMPTY_NODE(node)) continue; - spin_lock_irq(&sibling->active.lock); + spin_lock_irqsave(&sibling->active.lock, flags); /* Detachment is lazily performed in the execlists tasklet */ if (!RB_EMPTY_NODE(node)) rb_erase_cached(node, &sibling->execlists.virtual); - spin_unlock_irq(&sibling->active.lock); + spin_unlock_irqrestore(&sibling->active.lock, flags); } GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); @@ -4419,9 +4424,11 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, ve->base.gt = siblings[0]->gt; ve->base.uncore = siblings[0]->uncore; ve->base.id = -1; + ve->base.class = OTHER_CLASS; ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; + ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; /* * The decision on whether to submit a request using semaphores diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index a47d5a7c32c9..93026217c121 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1413,14 +1413,6 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) int len; u32 *cs; - flags |= MI_MM_SPACE_GTT; - if (IS_HASWELL(i915)) - /* These flags are for resource streamer on HSW+ */ - flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; - else - /* We need to save the extended state for powersaving modes */ - flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; - len = 4; if (IS_GEN(i915, 7)) len += 2 + (num_engines ? 4 * num_engines + 6 : 0); @@ -1589,22 +1581,21 @@ static int switch_context(struct i915_request *rq) } if (ce->state) { - u32 hw_flags; + u32 flags; GEM_BUG_ON(rq->engine->id != RCS0); - /* - * The kernel context(s) is treated as pure scratch and is not - * expected to retain any state (as we sacrifice it during - * suspend and on resume it may be corrupted). This is ok, - * as nothing actually executes using the kernel context; it - * is purely used for flushing user contexts. - */ - hw_flags = 0; - if (i915_gem_context_is_kernel(rq->gem_context)) - hw_flags = MI_RESTORE_INHIBIT; + /* For resource streamer on HSW+ and power context elsewhere */ + BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN); + BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN); + + flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT; + if (!i915_gem_context_is_kernel(rq->gem_context)) + flags |= MI_RESTORE_EXT_STATE_EN; + else + flags |= MI_RESTORE_INHIBIT; - ret = mi_set_context(rq, hw_flags); + ret = mi_set_context(rq, flags); if (ret) return ret; } |