From 3ef7114982b844f0f31c5b92919fa0f45b662079 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Mar 2019 11:05:44 +0000 Subject: drm/i915: Introduce i915_timeline.mutex A simple mutex used for guarding the flow of requests in and out of the timeline. In the short-term, it will be used only to guard the addition of requests into the timeline, taken on alloc and released on commit so that only one caller can construct a request into the timeline (important as the seqno and ring pointers must be serialised). This will be used by observers to ensure that the seqno/hwsp is stable. Later, when we have reduced retiring to only operate on a single timeline at a time, we can then use the mutex as the sole guard required for retiring. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190301110547.14758-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_timeline.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_timeline.h') diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 7bec7d2e45bf..36c3849f7108 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -44,6 +44,8 @@ struct i915_timeline { #define TIMELINE_CLIENT 0 /* default subclass */ #define TIMELINE_ENGINE 1 + struct mutex mutex; /* protects the flow of requests */ + unsigned int pin_count; const u32 *hwsp_seqno; struct i915_vma *hwsp_ggtt; -- cgit v1.2.3-59-g8ed1b From ebece7539242a9204e5748fb6a6b5031d220b164 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Mar 2019 17:08:59 +0000 Subject: drm/i915: Keep timeline HWSP allocated until idle across the system In preparation for enabling HW semaphores, we need to keep in flight timeline HWSP alive until its use across entire system has completed, as any other timeline active on the GPU may still refer back to the already retired timeline. We both have to delay recycling available cachelines and unpinning old HWSP until the next idle point. An easy option would be to simply keep all used HWSP until the system as a whole was idle, i.e. we could release them all at once on parking. However, on a busy system, we may never see a global idle point, essentially meaning the resource will be leaked until we are forced to do a GC pass. We already employ a fine-grained idle detection mechanism for vma, which we can reuse here so that each cacheline can be freed immediately after the last request using it is retired. v3: Keep track of the activity of each cacheline. v4: cacheline_free() on canceling the seqno tracking v5: Finally with a testcase to exercise wraparound v6: Pack cacheline into empty bits of page-aligned vaddr v7: Use i915_utils to hide the pointer casting around bit manipulation Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190301170901.8340-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 31 +-- drivers/gpu/drm/i915/i915_request.h | 11 + drivers/gpu/drm/i915/i915_timeline.c | 293 +++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_timeline.h | 11 +- drivers/gpu/drm/i915/selftests/i915_timeline.c | 113 ++++++++++ 5 files changed, 420 insertions(+), 39 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_timeline.h') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 719d1a5ab082..d354967d6ae8 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -325,11 +325,6 @@ void i915_request_retire_upto(struct i915_request *rq) } while (tmp != rq); } -static u32 timeline_get_seqno(struct i915_timeline *tl) -{ - return tl->seqno += 1 + tl->has_initial_breadcrumb; -} - static void move_to_timeline(struct i915_request *request, struct i915_timeline *timeline) { @@ -532,8 +527,10 @@ struct i915_request * i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { struct drm_i915_private *i915 = engine->i915; - struct i915_request *rq; struct intel_context *ce; + struct i915_timeline *tl; + struct i915_request *rq; + u32 seqno; int ret; lockdep_assert_held(&i915->drm.struct_mutex); @@ -610,24 +607,27 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) } } - rq->rcustate = get_state_synchronize_rcu(); - INIT_LIST_HEAD(&rq->active_list); + + tl = ce->ring->timeline; + ret = i915_timeline_get_seqno(tl, rq, &seqno); + if (ret) + goto err_free; + rq->i915 = i915; rq->engine = engine; rq->gem_context = ctx; rq->hw_context = ce; rq->ring = ce->ring; - rq->timeline = ce->ring->timeline; + rq->timeline = tl; GEM_BUG_ON(rq->timeline == &engine->timeline); - rq->hwsp_seqno = rq->timeline->hwsp_seqno; + rq->hwsp_seqno = tl->hwsp_seqno; + rq->hwsp_cacheline = tl->hwsp_cacheline; + rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ spin_lock_init(&rq->lock); - dma_fence_init(&rq->fence, - &i915_fence_ops, - &rq->lock, - rq->timeline->fence_context, - timeline_get_seqno(rq->timeline)); + dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, + tl->fence_context, seqno); /* We bump the ref for the fence chain */ i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); @@ -687,6 +687,7 @@ err_unwind: GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); +err_free: kmem_cache_free(global.slab_requests, rq); err_unreserve: mutex_unlock(&ce->ring->timeline->mutex); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index be3ded6bcf56..09eaad06d2c6 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -38,6 +38,7 @@ struct drm_file; struct drm_i915_gem_object; struct i915_request; struct i915_timeline; +struct i915_timeline_cacheline; struct i915_capture_list { struct i915_capture_list *next; @@ -148,6 +149,16 @@ struct i915_request { */ const u32 *hwsp_seqno; + /* + * If we need to access the timeline's seqno for this request in + * another request, we need to keep a read reference to this associated + * cacheline, so that we do not free and recycle it before the foreign + * observers have completed. Hence, we keep a pointer to the cacheline + * inside the timeline's HWSP vma, but it is only valid while this + * request has not completed and guarded by the timeline mutex. + */ + struct i915_timeline_cacheline *hwsp_cacheline; + /** Position in the ring of the start of the request */ u32 head; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 87a80558da28..8484ba6e51d1 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -6,19 +6,32 @@ #include "i915_drv.h" -#include "i915_timeline.h" +#include "i915_active.h" #include "i915_syncmap.h" +#include "i915_timeline.h" + +#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) +#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) struct i915_timeline_hwsp { - struct i915_vma *vma; + struct i915_gt_timelines *gt; struct list_head free_link; + struct i915_vma *vma; u64 free_bitmap; }; -static inline struct i915_timeline_hwsp * -i915_timeline_hwsp(const struct i915_timeline *tl) +struct i915_timeline_cacheline { + struct i915_active active; + struct i915_timeline_hwsp *hwsp; + void *vaddr; +#define CACHELINE_BITS 6 +#define CACHELINE_FREE CACHELINE_BITS +}; + +static inline struct drm_i915_private * +hwsp_to_i915(struct i915_timeline_hwsp *hwsp) { - return tl->hwsp_ggtt->private; + return container_of(hwsp->gt, struct drm_i915_private, gt.timelines); } static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915) @@ -71,6 +84,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline) vma->private = hwsp; hwsp->vma = vma; hwsp->free_bitmap = ~0ull; + hwsp->gt = gt; spin_lock(>->hwsp_lock); list_add(&hwsp->free_link, >->hwsp_free_list); @@ -88,14 +102,9 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline) return hwsp->vma; } -static void hwsp_free(struct i915_timeline *timeline) +static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline) { - struct i915_gt_timelines *gt = &timeline->i915->gt.timelines; - struct i915_timeline_hwsp *hwsp; - - hwsp = i915_timeline_hwsp(timeline); - if (!hwsp) /* leave global HWSP alone! */ - return; + struct i915_gt_timelines *gt = hwsp->gt; spin_lock(>->hwsp_lock); @@ -103,7 +112,8 @@ static void hwsp_free(struct i915_timeline *timeline) if (!hwsp->free_bitmap) list_add_tail(&hwsp->free_link, >->hwsp_free_list); - hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES); + GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap)); + hwsp->free_bitmap |= BIT_ULL(cacheline); /* And if no one is left using it, give the page back to the system */ if (hwsp->free_bitmap == ~0ull) { @@ -115,6 +125,76 @@ static void hwsp_free(struct i915_timeline *timeline) spin_unlock(>->hwsp_lock); } +static void __idle_cacheline_free(struct i915_timeline_cacheline *cl) +{ + GEM_BUG_ON(!i915_active_is_idle(&cl->active)); + + i915_gem_object_unpin_map(cl->hwsp->vma->obj); + i915_vma_put(cl->hwsp->vma); + __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); + + i915_active_fini(&cl->active); + kfree(cl); +} + +static void __cacheline_retire(struct i915_active *active) +{ + struct i915_timeline_cacheline *cl = + container_of(active, typeof(*cl), active); + + i915_vma_unpin(cl->hwsp->vma); + if (ptr_test_bit(cl->vaddr, CACHELINE_FREE)) + __idle_cacheline_free(cl); +} + +static struct i915_timeline_cacheline * +cacheline_alloc(struct i915_timeline_hwsp *hwsp, unsigned int cacheline) +{ + struct i915_timeline_cacheline *cl; + void *vaddr; + + GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS)); + + cl = kmalloc(sizeof(*cl), GFP_KERNEL); + if (!cl) + return ERR_PTR(-ENOMEM); + + vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + kfree(cl); + return ERR_CAST(vaddr); + } + + i915_vma_get(hwsp->vma); + cl->hwsp = hwsp; + cl->vaddr = page_pack_bits(vaddr, cacheline); + + i915_active_init(hwsp_to_i915(hwsp), &cl->active, __cacheline_retire); + + return cl; +} + +static void cacheline_acquire(struct i915_timeline_cacheline *cl) +{ + if (cl && i915_active_acquire(&cl->active)) + __i915_vma_pin(cl->hwsp->vma); +} + +static void cacheline_release(struct i915_timeline_cacheline *cl) +{ + if (cl) + i915_active_release(&cl->active); +} + +static void cacheline_free(struct i915_timeline_cacheline *cl) +{ + GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); + cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); + + if (i915_active_is_idle(&cl->active)) + __idle_cacheline_free(cl); +} + int i915_timeline_init(struct drm_i915_private *i915, struct i915_timeline *timeline, const char *name, @@ -136,29 +216,40 @@ int i915_timeline_init(struct drm_i915_private *i915, timeline->name = name; timeline->pin_count = 0; timeline->has_initial_breadcrumb = !hwsp; + timeline->hwsp_cacheline = NULL; - timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; if (!hwsp) { + struct i915_timeline_cacheline *cl; unsigned int cacheline; hwsp = hwsp_alloc(timeline, &cacheline); if (IS_ERR(hwsp)) return PTR_ERR(hwsp); + cl = cacheline_alloc(hwsp->private, cacheline); + if (IS_ERR(cl)) { + __idle_hwsp_free(hwsp->private, cacheline); + return PTR_ERR(cl); + } + + timeline->hwsp_cacheline = cl; timeline->hwsp_offset = cacheline * CACHELINE_BYTES; - } - timeline->hwsp_ggtt = i915_vma_get(hwsp); - vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - hwsp_free(timeline); - i915_vma_put(hwsp); - return PTR_ERR(vaddr); + vaddr = page_mask_bits(cl->vaddr); + } else { + timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; + + vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); } timeline->hwsp_seqno = memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); + timeline->hwsp_ggtt = i915_vma_get(hwsp); + GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); + timeline->fence_context = dma_fence_context_alloc(1); spin_lock_init(&timeline->lock); @@ -240,9 +331,12 @@ void i915_timeline_fini(struct i915_timeline *timeline) GEM_BUG_ON(i915_active_request_isset(&timeline->barrier)); i915_syncmap_free(&timeline->sync); - hwsp_free(timeline); - i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); + if (timeline->hwsp_cacheline) + cacheline_free(timeline->hwsp_cacheline); + else + i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); + i915_vma_put(timeline->hwsp_ggtt); } @@ -285,6 +379,7 @@ int i915_timeline_pin(struct i915_timeline *tl) i915_ggtt_offset(tl->hwsp_ggtt) + offset_in_page(tl->hwsp_offset); + cacheline_acquire(tl->hwsp_cacheline); timeline_add_to_active(tl); return 0; @@ -294,6 +389,157 @@ unpin: return err; } +static u32 timeline_advance(struct i915_timeline *tl) +{ + GEM_BUG_ON(!tl->pin_count); + GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); + + return tl->seqno += 1 + tl->has_initial_breadcrumb; +} + +static void timeline_rollback(struct i915_timeline *tl) +{ + tl->seqno -= 1 + tl->has_initial_breadcrumb; +} + +static noinline int +__i915_timeline_get_seqno(struct i915_timeline *tl, + struct i915_request *rq, + u32 *seqno) +{ + struct i915_timeline_cacheline *cl; + unsigned int cacheline; + struct i915_vma *vma; + void *vaddr; + int err; + + /* + * If there is an outstanding GPU reference to this cacheline, + * such as it being sampled by a HW semaphore on another timeline, + * we cannot wraparound our seqno value (the HW semaphore does + * a strict greater-than-or-equals compare, not i915_seqno_passed). + * So if the cacheline is still busy, we must detach ourselves + * from it and leave it inflight alongside its users. + * + * However, if nobody is watching and we can guarantee that nobody + * will, we could simply reuse the same cacheline. + * + * if (i915_active_request_is_signaled(&tl->last_request) && + * i915_active_is_signaled(&tl->hwsp_cacheline->active)) + * return 0; + * + * That seems unlikely for a busy timeline that needed to wrap in + * the first place, so just replace the cacheline. + */ + + vma = hwsp_alloc(tl, &cacheline); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_rollback; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) { + __idle_hwsp_free(vma->private, cacheline); + goto err_rollback; + } + + cl = cacheline_alloc(vma->private, cacheline); + if (IS_ERR(cl)) { + err = PTR_ERR(cl); + __idle_hwsp_free(vma->private, cacheline); + goto err_unpin; + } + GEM_BUG_ON(cl->hwsp->vma != vma); + + /* + * Attach the old cacheline to the current request, so that we only + * free it after the current request is retired, which ensures that + * all writes into the cacheline from previous requests are complete. + */ + err = i915_active_ref(&tl->hwsp_cacheline->active, + tl->fence_context, rq); + if (err) + goto err_cacheline; + + cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */ + cacheline_free(tl->hwsp_cacheline); + + i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */ + i915_vma_put(tl->hwsp_ggtt); + + tl->hwsp_ggtt = i915_vma_get(vma); + + vaddr = page_mask_bits(cl->vaddr); + tl->hwsp_offset = cacheline * CACHELINE_BYTES; + tl->hwsp_seqno = + memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); + + tl->hwsp_offset += i915_ggtt_offset(vma); + + cacheline_acquire(cl); + tl->hwsp_cacheline = cl; + + *seqno = timeline_advance(tl); + GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); + return 0; + +err_cacheline: + cacheline_free(cl); +err_unpin: + i915_vma_unpin(vma); +err_rollback: + timeline_rollback(tl); + return err; +} + +int i915_timeline_get_seqno(struct i915_timeline *tl, + struct i915_request *rq, + u32 *seqno) +{ + *seqno = timeline_advance(tl); + + /* Replace the HWSP on wraparound for HW semaphores */ + if (unlikely(!*seqno && tl->hwsp_cacheline)) + return __i915_timeline_get_seqno(tl, rq, seqno); + + return 0; +} + +static int cacheline_ref(struct i915_timeline_cacheline *cl, + struct i915_request *rq) +{ + return i915_active_ref(&cl->active, rq->fence.context, rq); +} + +int i915_timeline_read_hwsp(struct i915_request *from, + struct i915_request *to, + u32 *hwsp) +{ + struct i915_timeline_cacheline *cl = from->hwsp_cacheline; + struct i915_timeline *tl = from->timeline; + int err; + + GEM_BUG_ON(to->timeline == tl); + + mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); + err = i915_request_completed(from); + if (!err) + err = cacheline_ref(cl, to); + if (!err) { + if (likely(cl == tl->hwsp_cacheline)) { + *hwsp = tl->hwsp_offset; + } else { /* across a seqno wrap, recover the original offset */ + *hwsp = i915_ggtt_offset(cl->hwsp->vma) + + ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * + CACHELINE_BYTES; + } + } + mutex_unlock(&tl->mutex); + + return err; +} + void i915_timeline_unpin(struct i915_timeline *tl) { GEM_BUG_ON(!tl->pin_count); @@ -301,6 +547,7 @@ void i915_timeline_unpin(struct i915_timeline *tl) return; timeline_remove_from_active(tl); + cacheline_release(tl->hwsp_cacheline); /* * Since this timeline is idle, all bariers upon which we were waiting diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 36c3849f7108..60b1dfad93ed 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -34,7 +34,7 @@ #include "i915_utils.h" struct i915_vma; -struct i915_timeline_hwsp; +struct i915_timeline_cacheline; struct i915_timeline { u64 fence_context; @@ -51,6 +51,8 @@ struct i915_timeline { struct i915_vma *hwsp_ggtt; u32 hwsp_offset; + struct i915_timeline_cacheline *hwsp_cacheline; + bool has_initial_breadcrumb; /** @@ -162,8 +164,15 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, } int i915_timeline_pin(struct i915_timeline *tl); +int i915_timeline_get_seqno(struct i915_timeline *tl, + struct i915_request *rq, + u32 *seqno); void i915_timeline_unpin(struct i915_timeline *tl); +int i915_timeline_read_hwsp(struct i915_request *from, + struct i915_request *until, + u32 *hwsp_offset); + void i915_timelines_init(struct drm_i915_private *i915); void i915_timelines_park(struct drm_i915_private *i915); void i915_timelines_fini(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index 12ea69b1a1e5..844701759ffc 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -641,6 +641,118 @@ out: #undef NUM_TIMELINES } +static int live_hwsp_wrap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_timeline *tl; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = 0; + + /* + * Across a seqno wrap, we need to keep the old cacheline alive for + * foreign GPU references. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + tl = i915_timeline_create(i915, __func__, NULL); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out_rpm; + } + if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) + goto out_free; + + err = i915_timeline_pin(tl); + if (err) + goto out_free; + + for_each_engine(engine, i915, id) { + const u32 *hwsp_seqno[2]; + struct i915_request *rq; + u32 seqno[2]; + + if (!intel_engine_can_store_dword(engine)) + continue; + + rq = i915_request_alloc(engine, i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + tl->seqno = -4u; + + err = i915_timeline_get_seqno(tl, rq, &seqno[0]); + if (err) { + i915_request_add(rq); + goto out; + } + pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n", + seqno[0], tl->hwsp_offset); + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]); + if (err) { + i915_request_add(rq); + goto out; + } + hwsp_seqno[0] = tl->hwsp_seqno; + + err = i915_timeline_get_seqno(tl, rq, &seqno[1]); + if (err) { + i915_request_add(rq); + goto out; + } + pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n", + seqno[1], tl->hwsp_offset); + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]); + if (err) { + i915_request_add(rq); + goto out; + } + hwsp_seqno[1] = tl->hwsp_seqno; + + /* With wrap should come a new hwsp */ + GEM_BUG_ON(seqno[1] >= seqno[0]); + GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]); + + i915_request_add(rq); + + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + pr_err("Wait for timeline writes timed out!\n"); + err = -EIO; + goto out; + } + + if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) { + pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n", + *hwsp_seqno[0], *hwsp_seqno[1], + seqno[0], seqno[1]); + err = -EINVAL; + goto out; + } + + i915_retire_requests(i915); /* recycle HWSP */ + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + i915_timeline_unpin(tl); +out_free: + i915_timeline_put(tl); +out_rpm: + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + static int live_hwsp_recycle(void *arg) { struct drm_i915_private *i915 = arg; @@ -723,6 +835,7 @@ int i915_timeline_live_selftests(struct drm_i915_private *i915) SUBTEST(live_hwsp_recycle), SUBTEST(live_hwsp_engine), SUBTEST(live_hwsp_alternate), + SUBTEST(live_hwsp_wrap), }; return i915_subtests(tests, i915); -- cgit v1.2.3-59-g8ed1b From 39e2f501c1b431bd9291308e1ef02b9a02fffbee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Mar 2019 13:25:17 +0000 Subject: drm/i915: Split struct intel_context definition to its own header This complex struct pulling in half the driver deserves its own isolation in preparation for intel_context becoming an outright complicated class of its own. In order to split this beast into its own header also requests splitting several of its dependent types and their dependencies into their own headers as well. v2: Add standalone compilation tests Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190308132522.21573-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 9 + drivers/gpu/drm/i915/i915_gem_context.h | 238 +--------- drivers/gpu/drm/i915/i915_gem_context_types.h | 181 +++++++ drivers/gpu/drm/i915/i915_timeline.h | 70 +-- drivers/gpu/drm/i915/i915_timeline_types.h | 80 ++++ drivers/gpu/drm/i915/intel_context.h | 47 ++ drivers/gpu/drm/i915/intel_context_types.h | 60 +++ drivers/gpu/drm/i915/intel_engine_types.h | 521 +++++++++++++++++++++ drivers/gpu/drm/i915/intel_guc.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 502 +------------------- drivers/gpu/drm/i915/intel_workarounds.h | 13 +- drivers/gpu/drm/i915/intel_workarounds_types.h | 27 ++ .../drm/i915/test_i915_active_types_standalone.c | 7 + .../i915/test_i915_gem_context_types_standalone.c | 7 + .../drm/i915/test_i915_timeline_types_standalone.c | 7 + .../drm/i915/test_intel_context_types_standalone.c | 7 + .../drm/i915/test_intel_engine_types_standalone.c | 7 + .../i915/test_intel_workarounds_types_standalone.c | 7 + 18 files changed, 974 insertions(+), 817 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_context_types.h create mode 100644 drivers/gpu/drm/i915/i915_timeline_types.h create mode 100644 drivers/gpu/drm/i915/intel_context.h create mode 100644 drivers/gpu/drm/i915/intel_context_types.h create mode 100644 drivers/gpu/drm/i915/intel_engine_types.h create mode 100644 drivers/gpu/drm/i915/intel_workarounds_types.h create mode 100644 drivers/gpu/drm/i915/test_i915_active_types_standalone.c create mode 100644 drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c create mode 100644 drivers/gpu/drm/i915/test_i915_timeline_types_standalone.c create mode 100644 drivers/gpu/drm/i915/test_intel_context_types_standalone.c create mode 100644 drivers/gpu/drm/i915/test_intel_engine_types_standalone.c create mode 100644 drivers/gpu/drm/i915/test_intel_workarounds_types_standalone.c (limited to 'drivers/gpu/drm/i915/i915_timeline.h') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index a1d834068765..a230553367de 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -56,6 +56,15 @@ i915-$(CONFIG_COMPAT) += i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o +# Test the headers are compilable as standalone units +i915-$(CONFIG_DRM_I915_WERROR) += \ + test_i915_active_types_standalone.o \ + test_i915_gem_context_types_standalone.o \ + test_i915_timeline_types_standalone.o \ + test_intel_context_types_standalone.o \ + test_intel_engine_types_standalone.o \ + test_intel_workarounds_types_standalone.o + # GEM code i915-y += \ i915_active.o \ diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 124c2a082b99..00698944a0ee 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -25,218 +25,17 @@ #ifndef __I915_GEM_CONTEXT_H__ #define __I915_GEM_CONTEXT_H__ -#include -#include -#include +#include "i915_gem_context_types.h" #include "i915_gem.h" #include "i915_scheduler.h" +#include "intel_context.h" #include "intel_device_info.h" #include "intel_ringbuffer.h" -struct pid; - struct drm_device; struct drm_file; -struct drm_i915_private; -struct drm_i915_file_private; -struct i915_hw_ppgtt; -struct i915_request; -struct i915_vma; -struct intel_ring; - -#define DEFAULT_CONTEXT_HANDLE 0 - -struct intel_context; - -struct intel_context_ops { - void (*unpin)(struct intel_context *ce); - void (*destroy)(struct intel_context *ce); -}; - -/* - * Powergating configuration for a particular (context,engine). - */ -struct intel_sseu { - u8 slice_mask; - u8 subslice_mask; - u8 min_eus_per_subslice; - u8 max_eus_per_subslice; -}; - -/** - * struct i915_gem_context - client state - * - * The struct i915_gem_context represents the combined view of the driver and - * logical hardware state for a particular client. - */ -struct i915_gem_context { - /** i915: i915 device backpointer */ - struct drm_i915_private *i915; - - /** file_priv: owning file descriptor */ - struct drm_i915_file_private *file_priv; - - /** - * @ppgtt: unique address space (GTT) - * - * In full-ppgtt mode, each context has its own address space ensuring - * complete seperation of one client from all others. - * - * In other modes, this is a NULL pointer with the expectation that - * the caller uses the shared global GTT. - */ - struct i915_hw_ppgtt *ppgtt; - - /** - * @pid: process id of creator - * - * Note that who created the context may not be the principle user, - * as the context may be shared across a local socket. However, - * that should only affect the default context, all contexts created - * explicitly by the client are expected to be isolated. - */ - struct pid *pid; - - /** - * @name: arbitrary name - * - * A name is constructed for the context from the creator's process - * name, pid and user handle in order to uniquely identify the - * context in messages. - */ - const char *name; - - /** link: place with &drm_i915_private.context_list */ - struct list_head link; - struct llist_node free_link; - - /** - * @ref: reference count - * - * A reference to a context is held by both the client who created it - * and on each request submitted to the hardware using the request - * (to ensure the hardware has access to the state until it has - * finished all pending writes). See i915_gem_context_get() and - * i915_gem_context_put() for access. - */ - struct kref ref; - - /** - * @rcu: rcu_head for deferred freeing. - */ - struct rcu_head rcu; - - /** - * @user_flags: small set of booleans controlled by the user - */ - unsigned long user_flags; -#define UCONTEXT_NO_ZEROMAP 0 -#define UCONTEXT_NO_ERROR_CAPTURE 1 -#define UCONTEXT_BANNABLE 2 -#define UCONTEXT_RECOVERABLE 3 - - /** - * @flags: small set of booleans - */ - unsigned long flags; -#define CONTEXT_BANNED 0 -#define CONTEXT_CLOSED 1 -#define CONTEXT_FORCE_SINGLE_SUBMISSION 2 - - /** - * @hw_id: - unique identifier for the context - * - * The hardware needs to uniquely identify the context for a few - * functions like fault reporting, PASID, scheduling. The - * &drm_i915_private.context_hw_ida is used to assign a unqiue - * id for the lifetime of the context. - * - * @hw_id_pin_count: - number of times this context had been pinned - * for use (should be, at most, once per engine). - * - * @hw_id_link: - all contexts with an assigned id are tracked - * for possible repossession. - */ - unsigned int hw_id; - atomic_t hw_id_pin_count; - struct list_head hw_id_link; - - struct list_head active_engines; - struct mutex mutex; - - /** - * @user_handle: userspace identifier - * - * A unique per-file identifier is generated from - * &drm_i915_file_private.contexts. - */ - u32 user_handle; - - struct i915_sched_attr sched; - - /** engine: per-engine logical HW state */ - struct intel_context { - struct i915_gem_context *gem_context; - struct intel_engine_cs *engine; - struct intel_engine_cs *active; - struct list_head active_link; - struct list_head signal_link; - struct list_head signals; - struct i915_vma *state; - struct intel_ring *ring; - u32 *lrc_reg_state; - u64 lrc_desc; - int pin_count; - - /** - * active_tracker: Active tracker for the external rq activity - * on this intel_context object. - */ - struct i915_active_request active_tracker; - - const struct intel_context_ops *ops; - - /** sseu: Control eu/slice partitioning */ - struct intel_sseu sseu; - } __engine[I915_NUM_ENGINES]; - - /** ring_size: size for allocating the per-engine ring buffer */ - u32 ring_size; - /** desc_template: invariant fields for the HW context descriptor */ - u32 desc_template; - - /** guilty_count: How many times this context has caused a GPU hang. */ - atomic_t guilty_count; - /** - * @active_count: How many times this context was active during a GPU - * hang, but did not cause it. - */ - atomic_t active_count; - - /** - * @hang_timestamp: The last time(s) this context caused a GPU hang - */ - unsigned long hang_timestamp[2]; -#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */ - - /** remap_slice: Bitmask of cache lines that need remapping */ - u8 remap_slice; - - /** handles_vma: rbtree to look up our context specific obj/vma for - * the user handle. (user handles are per fd, but the binding is - * per vm, which may be one per context or shared with the global GTT) - */ - struct radix_tree_root handles_vma; - - /** handles_list: reverse list of all the rbtree entries in use for - * this context, which allows us to free all the allocations on - * context close. - */ - struct list_head handles_list; -}; - static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx) { return test_bit(CONTEXT_CLOSED, &ctx->flags); @@ -338,35 +137,6 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) return !ctx->file_priv; } -static inline struct intel_context * -to_intel_context(struct i915_gem_context *ctx, - const struct intel_engine_cs *engine) -{ - return &ctx->__engine[engine->id]; -} - -static inline struct intel_context * -intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) -{ - return engine->context_pin(engine, ctx); -} - -static inline void __intel_context_pin(struct intel_context *ce) -{ - GEM_BUG_ON(!ce->pin_count); - ce->pin_count++; -} - -static inline void intel_context_unpin(struct intel_context *ce) -{ - GEM_BUG_ON(!ce->pin_count); - if (--ce->pin_count) - return; - - GEM_BUG_ON(!ce->ops); - ce->ops->unpin(ce); -} - /* i915_gem_context.c */ int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); void i915_gem_contexts_lost(struct drm_i915_private *dev_priv); @@ -410,10 +180,6 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } -void intel_context_init(struct intel_context *ce, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine); - struct i915_lut_handle *i915_lut_handle_alloc(void); void i915_lut_handle_free(struct i915_lut_handle *lut); diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h new file mode 100644 index 000000000000..59800d749510 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h @@ -0,0 +1,181 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_CONTEXT_TYPES_H__ +#define __I915_GEM_CONTEXT_TYPES_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "i915_gem.h" /* I915_NUM_ENGINES */ +#include "i915_scheduler.h" +#include "intel_context_types.h" + +struct pid; + +struct drm_i915_private; +struct drm_i915_file_private; +struct i915_hw_ppgtt; +struct i915_timeline; +struct intel_ring; + +/** + * struct i915_gem_context - client state + * + * The struct i915_gem_context represents the combined view of the driver and + * logical hardware state for a particular client. + */ +struct i915_gem_context { + /** i915: i915 device backpointer */ + struct drm_i915_private *i915; + + /** file_priv: owning file descriptor */ + struct drm_i915_file_private *file_priv; + + /** + * @ppgtt: unique address space (GTT) + * + * In full-ppgtt mode, each context has its own address space ensuring + * complete seperation of one client from all others. + * + * In other modes, this is a NULL pointer with the expectation that + * the caller uses the shared global GTT. + */ + struct i915_hw_ppgtt *ppgtt; + + /** + * @pid: process id of creator + * + * Note that who created the context may not be the principle user, + * as the context may be shared across a local socket. However, + * that should only affect the default context, all contexts created + * explicitly by the client are expected to be isolated. + */ + struct pid *pid; + + /** + * @name: arbitrary name + * + * A name is constructed for the context from the creator's process + * name, pid and user handle in order to uniquely identify the + * context in messages. + */ + const char *name; + + /** link: place with &drm_i915_private.context_list */ + struct list_head link; + struct llist_node free_link; + + /** + * @ref: reference count + * + * A reference to a context is held by both the client who created it + * and on each request submitted to the hardware using the request + * (to ensure the hardware has access to the state until it has + * finished all pending writes). See i915_gem_context_get() and + * i915_gem_context_put() for access. + */ + struct kref ref; + + /** + * @rcu: rcu_head for deferred freeing. + */ + struct rcu_head rcu; + + /** + * @user_flags: small set of booleans controlled by the user + */ + unsigned long user_flags; +#define UCONTEXT_NO_ZEROMAP 0 +#define UCONTEXT_NO_ERROR_CAPTURE 1 +#define UCONTEXT_BANNABLE 2 +#define UCONTEXT_RECOVERABLE 3 + + /** + * @flags: small set of booleans + */ + unsigned long flags; +#define CONTEXT_BANNED 0 +#define CONTEXT_CLOSED 1 +#define CONTEXT_FORCE_SINGLE_SUBMISSION 2 + + /** + * @hw_id: - unique identifier for the context + * + * The hardware needs to uniquely identify the context for a few + * functions like fault reporting, PASID, scheduling. The + * &drm_i915_private.context_hw_ida is used to assign a unqiue + * id for the lifetime of the context. + * + * @hw_id_pin_count: - number of times this context had been pinned + * for use (should be, at most, once per engine). + * + * @hw_id_link: - all contexts with an assigned id are tracked + * for possible repossession. + */ + unsigned int hw_id; + atomic_t hw_id_pin_count; + struct list_head hw_id_link; + + struct list_head active_engines; + struct mutex mutex; + + /** + * @user_handle: userspace identifier + * + * A unique per-file identifier is generated from + * &drm_i915_file_private.contexts. + */ + u32 user_handle; +#define DEFAULT_CONTEXT_HANDLE 0 + + struct i915_sched_attr sched; + + /** engine: per-engine logical HW state */ + struct intel_context __engine[I915_NUM_ENGINES]; + + /** ring_size: size for allocating the per-engine ring buffer */ + u32 ring_size; + /** desc_template: invariant fields for the HW context descriptor */ + u32 desc_template; + + /** guilty_count: How many times this context has caused a GPU hang. */ + atomic_t guilty_count; + /** + * @active_count: How many times this context was active during a GPU + * hang, but did not cause it. + */ + atomic_t active_count; + + /** + * @hang_timestamp: The last time(s) this context caused a GPU hang + */ + unsigned long hang_timestamp[2]; +#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */ + + /** remap_slice: Bitmask of cache lines that need remapping */ + u8 remap_slice; + + /** handles_vma: rbtree to look up our context specific obj/vma for + * the user handle. (user handles are per fd, but the binding is + * per vm, which may be one per context or shared with the global GTT) + */ + struct radix_tree_root handles_vma; + + /** handles_list: reverse list of all the rbtree entries in use for + * this context, which allows us to free all the allocations on + * context close. + */ + struct list_head handles_list; +}; + +#endif /* __I915_GEM_CONTEXT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 60b1dfad93ed..9126c8206490 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -25,76 +25,10 @@ #ifndef I915_TIMELINE_H #define I915_TIMELINE_H -#include -#include +#include -#include "i915_active.h" -#include "i915_request.h" #include "i915_syncmap.h" -#include "i915_utils.h" - -struct i915_vma; -struct i915_timeline_cacheline; - -struct i915_timeline { - u64 fence_context; - u32 seqno; - - spinlock_t lock; -#define TIMELINE_CLIENT 0 /* default subclass */ -#define TIMELINE_ENGINE 1 - - struct mutex mutex; /* protects the flow of requests */ - - unsigned int pin_count; - const u32 *hwsp_seqno; - struct i915_vma *hwsp_ggtt; - u32 hwsp_offset; - - struct i915_timeline_cacheline *hwsp_cacheline; - - bool has_initial_breadcrumb; - - /** - * List of breadcrumbs associated with GPU requests currently - * outstanding. - */ - struct list_head requests; - - /* Contains an RCU guarded pointer to the last request. No reference is - * held to the request, users must carefully acquire a reference to - * the request using i915_active_request_get_request_rcu(), or hold the - * struct_mutex. - */ - struct i915_active_request last_request; - - /** - * We track the most recent seqno that we wait on in every context so - * that we only have to emit a new await and dependency on a more - * recent sync point. As the contexts may be executed out-of-order, we - * have to track each individually and can not rely on an absolute - * global_seqno. When we know that all tracked fences are completed - * (i.e. when the driver is idle), we know that the syncmap is - * redundant and we can discard it without loss of generality. - */ - struct i915_syncmap *sync; - - /** - * Barrier provides the ability to serialize ordering between different - * timelines. - * - * Users can call i915_timeline_set_barrier which will make all - * subsequent submissions to this timeline be executed only after the - * barrier has been completed. - */ - struct i915_active_request barrier; - - struct list_head link; - const char *name; - struct drm_i915_private *i915; - - struct kref kref; -}; +#include "i915_timeline_types.h" int i915_timeline_init(struct drm_i915_private *i915, struct i915_timeline *tl, diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h new file mode 100644 index 000000000000..8ff146dc05ba --- /dev/null +++ b/drivers/gpu/drm/i915/i915_timeline_types.h @@ -0,0 +1,80 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __I915_TIMELINE_TYPES_H__ +#define __I915_TIMELINE_TYPES_H__ + +#include +#include +#include + +#include "i915_active.h" + +struct drm_i915_private; +struct i915_vma; +struct i915_timeline_cacheline; +struct i915_syncmap; + +struct i915_timeline { + u64 fence_context; + u32 seqno; + + spinlock_t lock; +#define TIMELINE_CLIENT 0 /* default subclass */ +#define TIMELINE_ENGINE 1 + struct mutex mutex; /* protects the flow of requests */ + + unsigned int pin_count; + const u32 *hwsp_seqno; + struct i915_vma *hwsp_ggtt; + u32 hwsp_offset; + + struct i915_timeline_cacheline *hwsp_cacheline; + + bool has_initial_breadcrumb; + + /** + * List of breadcrumbs associated with GPU requests currently + * outstanding. + */ + struct list_head requests; + + /* Contains an RCU guarded pointer to the last request. No reference is + * held to the request, users must carefully acquire a reference to + * the request using i915_active_request_get_request_rcu(), or hold the + * struct_mutex. + */ + struct i915_active_request last_request; + + /** + * We track the most recent seqno that we wait on in every context so + * that we only have to emit a new await and dependency on a more + * recent sync point. As the contexts may be executed out-of-order, we + * have to track each individually and can not rely on an absolute + * global_seqno. When we know that all tracked fences are completed + * (i.e. when the driver is idle), we know that the syncmap is + * redundant and we can discard it without loss of generality. + */ + struct i915_syncmap *sync; + + /** + * Barrier provides the ability to serialize ordering between different + * timelines. + * + * Users can call i915_timeline_set_barrier which will make all + * subsequent submissions to this timeline be executed only after the + * barrier has been completed. + */ + struct i915_active_request barrier; + + struct list_head link; + const char *name; + struct drm_i915_private *i915; + + struct kref kref; +}; + +#endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h new file mode 100644 index 000000000000..dd947692bb0b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_context.h @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_CONTEXT_H__ +#define __INTEL_CONTEXT_H__ + +#include "i915_gem_context_types.h" +#include "intel_context_types.h" +#include "intel_engine_types.h" + +void intel_context_init(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine); + +static inline struct intel_context * +to_intel_context(struct i915_gem_context *ctx, + const struct intel_engine_cs *engine) +{ + return &ctx->__engine[engine->id]; +} + +static inline struct intel_context * +intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + return engine->context_pin(engine, ctx); +} + +static inline void __intel_context_pin(struct intel_context *ce) +{ + GEM_BUG_ON(!ce->pin_count); + ce->pin_count++; +} + +static inline void intel_context_unpin(struct intel_context *ce) +{ + GEM_BUG_ON(!ce->pin_count); + if (--ce->pin_count) + return; + + GEM_BUG_ON(!ce->ops); + ce->ops->unpin(ce); +} + +#endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h new file mode 100644 index 000000000000..16e1306e9595 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_context_types.h @@ -0,0 +1,60 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_CONTEXT_TYPES__ +#define __INTEL_CONTEXT_TYPES__ + +#include +#include + +#include "i915_active_types.h" + +struct i915_gem_context; +struct i915_vma; +struct intel_context; +struct intel_ring; + +struct intel_context_ops { + void (*unpin)(struct intel_context *ce); + void (*destroy)(struct intel_context *ce); +}; + +/* + * Powergating configuration for a particular (context,engine). + */ +struct intel_sseu { + u8 slice_mask; + u8 subslice_mask; + u8 min_eus_per_subslice; + u8 max_eus_per_subslice; +}; + +struct intel_context { + struct i915_gem_context *gem_context; + struct intel_engine_cs *engine; + struct intel_engine_cs *active; + struct list_head active_link; + struct list_head signal_link; + struct list_head signals; + struct i915_vma *state; + struct intel_ring *ring; + u32 *lrc_reg_state; + u64 lrc_desc; + int pin_count; + + /** + * active_tracker: Active tracker for the external rq activity + * on this intel_context object. + */ + struct i915_active_request active_tracker; + + const struct intel_context_ops *ops; + + /** sseu: Control eu/slice partitioning */ + struct intel_sseu sseu; +}; + +#endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h new file mode 100644 index 000000000000..f7ceda334169 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_engine_types.h @@ -0,0 +1,521 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_ENGINE_TYPES__ +#define __INTEL_ENGINE_TYPES__ + +#include +#include +#include +#include + +#include "i915_timeline_types.h" +#include "intel_device_info.h" +#include "intel_workarounds_types.h" + +#include "i915_gem_batch_pool.h" +#include "i915_pmu.h" + +#define I915_MAX_SLICES 3 +#define I915_MAX_SUBSLICES 8 + +#define I915_CMD_HASH_ORDER 9 + +struct drm_i915_reg_table; +struct i915_gem_context; +struct i915_request; +struct i915_sched_attr; + +struct intel_hw_status_page { + struct i915_vma *vma; + u32 *addr; +}; + +struct intel_instdone { + u32 instdone; + /* The following exist only in the RCS engine */ + u32 slice_common; + u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; + u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; +}; + +struct intel_engine_hangcheck { + u64 acthd; + u32 last_seqno; + u32 next_seqno; + unsigned long action_timestamp; + struct intel_instdone instdone; +}; + +struct intel_ring { + struct i915_vma *vma; + void *vaddr; + + struct i915_timeline *timeline; + struct list_head request_list; + struct list_head active_link; + + u32 head; + u32 tail; + u32 emit; + + u32 space; + u32 size; + u32 effective_size; +}; + +/* + * we use a single page to load ctx workarounds so all of these + * values are referred in terms of dwords + * + * struct i915_wa_ctx_bb: + * offset: specifies batch starting position, also helpful in case + * if we want to have multiple batches at different offsets based on + * some criteria. It is not a requirement at the moment but provides + * an option for future use. + * size: size of the batch in DWORDS + */ +struct i915_ctx_workarounds { + struct i915_wa_ctx_bb { + u32 offset; + u32 size; + } indirect_ctx, per_ctx; + struct i915_vma *vma; +}; + +#define I915_MAX_VCS 4 +#define I915_MAX_VECS 2 + +/* + * Engine IDs definitions. + * Keep instances of the same type engine together. + */ +enum intel_engine_id { + RCS0 = 0, + BCS0, + VCS0, + VCS1, + VCS2, + VCS3, +#define _VCS(n) (VCS0 + (n)) + VECS0, + VECS1 +#define _VECS(n) (VECS0 + (n)) +}; + +struct st_preempt_hang { + struct completion completion; + unsigned int count; + bool inject_hang; +}; + +/** + * struct intel_engine_execlists - execlist submission queue and port state + * + * The struct intel_engine_execlists represents the combined logical state of + * driver and the hardware state for execlist mode of submission. + */ +struct intel_engine_execlists { + /** + * @tasklet: softirq tasklet for bottom handler + */ + struct tasklet_struct tasklet; + + /** + * @default_priolist: priority list for I915_PRIORITY_NORMAL + */ + struct i915_priolist default_priolist; + + /** + * @no_priolist: priority lists disabled + */ + bool no_priolist; + + /** + * @submit_reg: gen-specific execlist submission register + * set to the ExecList Submission Port (elsp) register pre-Gen11 and to + * the ExecList Submission Queue Contents register array for Gen11+ + */ + u32 __iomem *submit_reg; + + /** + * @ctrl_reg: the enhanced execlists control register, used to load the + * submit queue on the HW and to request preemptions to idle + */ + u32 __iomem *ctrl_reg; + + /** + * @port: execlist port states + * + * For each hardware ELSP (ExecList Submission Port) we keep + * track of the last request and the number of times we submitted + * that port to hw. We then count the number of times the hw reports + * a context completion or preemption. As only one context can + * be active on hw, we limit resubmission of context to port[0]. This + * is called Lite Restore, of the context. + */ + struct execlist_port { + /** + * @request_count: combined request and submission count + */ + struct i915_request *request_count; +#define EXECLIST_COUNT_BITS 2 +#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) +#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) +#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) +#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) +#define port_set(p, packed) ((p)->request_count = (packed)) +#define port_isset(p) ((p)->request_count) +#define port_index(p, execlists) ((p) - (execlists)->port) + + /** + * @context_id: context ID for port + */ + GEM_DEBUG_DECL(u32 context_id); + +#define EXECLIST_MAX_PORTS 2 + } port[EXECLIST_MAX_PORTS]; + + /** + * @active: is the HW active? We consider the HW as active after + * submitting any context for execution and until we have seen the + * last context completion event. After that, we do not expect any + * more events until we submit, and so can park the HW. + * + * As we have a small number of different sources from which we feed + * the HW, we track the state of each inside a single bitfield. + */ + unsigned int active; +#define EXECLISTS_ACTIVE_USER 0 +#define EXECLISTS_ACTIVE_PREEMPT 1 +#define EXECLISTS_ACTIVE_HWACK 2 + + /** + * @port_mask: number of execlist ports - 1 + */ + unsigned int port_mask; + + /** + * @queue_priority_hint: Highest pending priority. + * + * When we add requests into the queue, or adjust the priority of + * executing requests, we compute the maximum priority of those + * pending requests. We can then use this value to determine if + * we need to preempt the executing requests to service the queue. + * However, since the we may have recorded the priority of an inflight + * request we wanted to preempt but since completed, at the time of + * dequeuing the priority hint may no longer may match the highest + * available request priority. + */ + int queue_priority_hint; + + /** + * @queue: queue of requests, in priority lists + */ + struct rb_root_cached queue; + + /** + * @csb_write: control register for Context Switch buffer + * + * Note this register may be either mmio or HWSP shadow. + */ + u32 *csb_write; + + /** + * @csb_status: status array for Context Switch buffer + * + * Note these register may be either mmio or HWSP shadow. + */ + u32 *csb_status; + + /** + * @preempt_complete_status: expected CSB upon completing preemption + */ + u32 preempt_complete_status; + + /** + * @csb_head: context status buffer head + */ + u8 csb_head; + + I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) +}; + +#define INTEL_ENGINE_CS_MAX_NAME 8 + +struct intel_engine_cs { + struct drm_i915_private *i915; + char name[INTEL_ENGINE_CS_MAX_NAME]; + + enum intel_engine_id id; + unsigned int hw_id; + unsigned int guc_id; + intel_engine_mask_t mask; + + u8 uabi_class; + + u8 class; + u8 instance; + u32 context_size; + u32 mmio_base; + + struct intel_ring *buffer; + + struct i915_timeline timeline; + + struct drm_i915_gem_object *default_state; + void *pinned_default_state; + + /* Rather than have every client wait upon all user interrupts, + * with the herd waking after every interrupt and each doing the + * heavyweight seqno dance, we delegate the task (of being the + * bottom-half of the user interrupt) to the first client. After + * every interrupt, we wake up one client, who does the heavyweight + * coherent seqno read and either goes back to sleep (if incomplete), + * or wakes up all the completed clients in parallel, before then + * transferring the bottom-half status to the next client in the queue. + * + * Compared to walking the entire list of waiters in a single dedicated + * bottom-half, we reduce the latency of the first waiter by avoiding + * a context switch, but incur additional coherent seqno reads when + * following the chain of request breadcrumbs. Since it is most likely + * that we have a single client waiting on each seqno, then reducing + * the overhead of waking that client is much preferred. + */ + struct intel_breadcrumbs { + spinlock_t irq_lock; + struct list_head signalers; + + struct irq_work irq_work; /* for use from inside irq_lock */ + + unsigned int irq_enabled; + + bool irq_armed; + } breadcrumbs; + + struct intel_engine_pmu { + /** + * @enable: Bitmask of enable sample events on this engine. + * + * Bits correspond to sample event types, for instance + * I915_SAMPLE_QUEUED is bit 0 etc. + */ + u32 enable; + /** + * @enable_count: Reference count for the enabled samplers. + * + * Index number corresponds to @enum drm_i915_pmu_engine_sample. + */ + unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; + /** + * @sample: Counter values for sampling events. + * + * Our internal timer stores the current counters in this field. + * + * Index number corresponds to @enum drm_i915_pmu_engine_sample. + */ + struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; + } pmu; + + /* + * A pool of objects to use as shadow copies of client batch buffers + * when the command parser is enabled. Prevents the client from + * modifying the batch contents after software parsing. + */ + struct i915_gem_batch_pool batch_pool; + + struct intel_hw_status_page status_page; + struct i915_ctx_workarounds wa_ctx; + struct i915_wa_list ctx_wa_list; + struct i915_wa_list wa_list; + struct i915_wa_list whitelist; + + u32 irq_keep_mask; /* always keep these interrupts */ + u32 irq_enable_mask; /* bitmask to enable ring interrupt */ + void (*irq_enable)(struct intel_engine_cs *engine); + void (*irq_disable)(struct intel_engine_cs *engine); + + int (*init_hw)(struct intel_engine_cs *engine); + + struct { + void (*prepare)(struct intel_engine_cs *engine); + void (*reset)(struct intel_engine_cs *engine, bool stalled); + void (*finish)(struct intel_engine_cs *engine); + } reset; + + void (*park)(struct intel_engine_cs *engine); + void (*unpark)(struct intel_engine_cs *engine); + + void (*set_default_submission)(struct intel_engine_cs *engine); + + struct intel_context *(*context_pin)(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); + + int (*request_alloc)(struct i915_request *rq); + int (*init_context)(struct i915_request *rq); + + int (*emit_flush)(struct i915_request *request, u32 mode); +#define EMIT_INVALIDATE BIT(0) +#define EMIT_FLUSH BIT(1) +#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) + int (*emit_bb_start)(struct i915_request *rq, + u64 offset, u32 length, + unsigned int dispatch_flags); +#define I915_DISPATCH_SECURE BIT(0) +#define I915_DISPATCH_PINNED BIT(1) + int (*emit_init_breadcrumb)(struct i915_request *rq); + u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, + u32 *cs); + unsigned int emit_fini_breadcrumb_dw; + + /* Pass the request to the hardware queue (e.g. directly into + * the legacy ringbuffer or to the end of an execlist). + * + * This is called from an atomic context with irqs disabled; must + * be irq safe. + */ + void (*submit_request)(struct i915_request *rq); + + /* + * Call when the priority on a request has changed and it and its + * dependencies may need rescheduling. Note the request itself may + * not be ready to run! + */ + void (*schedule)(struct i915_request *request, + const struct i915_sched_attr *attr); + + /* + * Cancel all requests on the hardware, or queued for execution. + * This should only cancel the ready requests that have been + * submitted to the engine (via the engine->submit_request callback). + * This is called when marking the device as wedged. + */ + void (*cancel_requests)(struct intel_engine_cs *engine); + + void (*cleanup)(struct intel_engine_cs *engine); + + struct intel_engine_execlists execlists; + + /* Contexts are pinned whilst they are active on the GPU. The last + * context executed remains active whilst the GPU is idle - the + * switch away and write to the context object only occurs on the + * next execution. Contexts are only unpinned on retirement of the + * following request ensuring that we can always write to the object + * on the context switch even after idling. Across suspend, we switch + * to the kernel context and trash it as the save may not happen + * before the hardware is powered down. + */ + struct intel_context *last_retired_context; + + /* status_notifier: list of callbacks for context-switch changes */ + struct atomic_notifier_head context_status_notifier; + + struct intel_engine_hangcheck hangcheck; + +#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +#define I915_ENGINE_SUPPORTS_STATS BIT(1) +#define I915_ENGINE_HAS_PREEMPTION BIT(2) +#define I915_ENGINE_HAS_SEMAPHORES BIT(3) + unsigned int flags; + + /* + * Table of commands the command parser needs to know about + * for this engine. + */ + DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); + + /* + * Table of registers allowed in commands that read/write registers. + */ + const struct drm_i915_reg_table *reg_tables; + int reg_table_count; + + /* + * Returns the bitmask for the length field of the specified command. + * Return 0 for an unrecognized/invalid command. + * + * If the command parser finds an entry for a command in the engine's + * cmd_tables, it gets the command's length based on the table entry. + * If not, it calls this function to determine the per-engine length + * field encoding for the command (i.e. different opcode ranges use + * certain bits to encode the command length in the header). + */ + u32 (*get_cmd_length_mask)(u32 cmd_header); + + struct { + /** + * @lock: Lock protecting the below fields. + */ + seqlock_t lock; + /** + * @enabled: Reference count indicating number of listeners. + */ + unsigned int enabled; + /** + * @active: Number of contexts currently scheduled in. + */ + unsigned int active; + /** + * @enabled_at: Timestamp when busy stats were enabled. + */ + ktime_t enabled_at; + /** + * @start: Timestamp of the last idle to active transition. + * + * Idle is defined as active == 0, active is active > 0. + */ + ktime_t start; + /** + * @total: Total time this engine was busy. + * + * Accumulated time not counting the most recent block in cases + * where engine is currently busy (active > 0). + */ + ktime_t total; + } stats; +}; + +static inline bool +intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; +} + +static inline bool +intel_engine_supports_stats(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_SUPPORTS_STATS; +} + +static inline bool +intel_engine_has_preemption(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_HAS_PREEMPTION; +} + +static inline bool +intel_engine_has_semaphores(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_HAS_SEMAPHORES; +} + +#define instdone_slice_mask(dev_priv__) \ + (IS_GEN(dev_priv__, 7) ? \ + 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) + +#define instdone_subslice_mask(dev_priv__) \ + (IS_GEN(dev_priv__, 7) ? \ + 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0]) + +#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ + for ((slice__) = 0, (subslice__) = 0; \ + (slice__) < I915_MAX_SLICES; \ + (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ + (slice__) += ((subslice__) == 0)) \ + for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ + (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) + +#endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 744220296653..77ec1bd4df5a 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -32,6 +32,7 @@ #include "intel_guc_log.h" #include "intel_guc_reg.h" #include "intel_uc_fw.h" +#include "i915_utils.h" #include "i915_vma.h" struct guc_preempt_work { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9ccbe63d46e3..e612bdca9fd9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -15,14 +15,11 @@ #include "i915_request.h" #include "i915_selftest.h" #include "i915_timeline.h" -#include "intel_device_info.h" +#include "intel_engine_types.h" #include "intel_gpu_commands.h" #include "intel_workarounds.h" struct drm_printer; -struct i915_sched_attr; - -#define I915_CMD_HASH_ORDER 9 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, * but keeps the logic simple. Indeed, the whole purpose of this macro is just @@ -32,11 +29,6 @@ struct i915_sched_attr; #define CACHELINE_BYTES 64 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) -struct intel_hw_status_page { - struct i915_vma *vma; - u32 *addr; -}; - #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) #define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val) @@ -91,498 +83,6 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) return "unknown"; } -#define I915_MAX_SLICES 3 -#define I915_MAX_SUBSLICES 8 - -#define instdone_slice_mask(dev_priv__) \ - (IS_GEN(dev_priv__, 7) ? \ - 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) - -#define instdone_subslice_mask(dev_priv__) \ - (IS_GEN(dev_priv__, 7) ? \ - 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0]) - -#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ - for ((slice__) = 0, (subslice__) = 0; \ - (slice__) < I915_MAX_SLICES; \ - (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ - (slice__) += ((subslice__) == 0)) \ - for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ - (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) - -struct intel_instdone { - u32 instdone; - /* The following exist only in the RCS engine */ - u32 slice_common; - u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; - u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; -}; - -struct intel_engine_hangcheck { - u64 acthd; - u32 last_seqno; - u32 next_seqno; - unsigned long action_timestamp; - struct intel_instdone instdone; -}; - -struct intel_ring { - struct i915_vma *vma; - void *vaddr; - - struct i915_timeline *timeline; - struct list_head request_list; - struct list_head active_link; - - u32 head; - u32 tail; - u32 emit; - - u32 space; - u32 size; - u32 effective_size; -}; - -struct i915_gem_context; -struct drm_i915_reg_table; - -/* - * we use a single page to load ctx workarounds so all of these - * values are referred in terms of dwords - * - * struct i915_wa_ctx_bb: - * offset: specifies batch starting position, also helpful in case - * if we want to have multiple batches at different offsets based on - * some criteria. It is not a requirement at the moment but provides - * an option for future use. - * size: size of the batch in DWORDS - */ -struct i915_ctx_workarounds { - struct i915_wa_ctx_bb { - u32 offset; - u32 size; - } indirect_ctx, per_ctx; - struct i915_vma *vma; -}; - -struct i915_request; - -#define I915_MAX_VCS 4 -#define I915_MAX_VECS 2 - -/* - * Engine IDs definitions. - * Keep instances of the same type engine together. - */ -enum intel_engine_id { - RCS0 = 0, - BCS0, - VCS0, - VCS1, - VCS2, - VCS3, -#define _VCS(n) (VCS0 + (n)) - VECS0, - VECS1 -#define _VECS(n) (VECS0 + (n)) -}; - -struct st_preempt_hang { - struct completion completion; - unsigned int count; - bool inject_hang; -}; - -/** - * struct intel_engine_execlists - execlist submission queue and port state - * - * The struct intel_engine_execlists represents the combined logical state of - * driver and the hardware state for execlist mode of submission. - */ -struct intel_engine_execlists { - /** - * @tasklet: softirq tasklet for bottom handler - */ - struct tasklet_struct tasklet; - - /** - * @default_priolist: priority list for I915_PRIORITY_NORMAL - */ - struct i915_priolist default_priolist; - - /** - * @no_priolist: priority lists disabled - */ - bool no_priolist; - - /** - * @submit_reg: gen-specific execlist submission register - * set to the ExecList Submission Port (elsp) register pre-Gen11 and to - * the ExecList Submission Queue Contents register array for Gen11+ - */ - u32 __iomem *submit_reg; - - /** - * @ctrl_reg: the enhanced execlists control register, used to load the - * submit queue on the HW and to request preemptions to idle - */ - u32 __iomem *ctrl_reg; - - /** - * @port: execlist port states - * - * For each hardware ELSP (ExecList Submission Port) we keep - * track of the last request and the number of times we submitted - * that port to hw. We then count the number of times the hw reports - * a context completion or preemption. As only one context can - * be active on hw, we limit resubmission of context to port[0]. This - * is called Lite Restore, of the context. - */ - struct execlist_port { - /** - * @request_count: combined request and submission count - */ - struct i915_request *request_count; -#define EXECLIST_COUNT_BITS 2 -#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) -#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) -#define port_set(p, packed) ((p)->request_count = (packed)) -#define port_isset(p) ((p)->request_count) -#define port_index(p, execlists) ((p) - (execlists)->port) - - /** - * @context_id: context ID for port - */ - GEM_DEBUG_DECL(u32 context_id); - -#define EXECLIST_MAX_PORTS 2 - } port[EXECLIST_MAX_PORTS]; - - /** - * @active: is the HW active? We consider the HW as active after - * submitting any context for execution and until we have seen the - * last context completion event. After that, we do not expect any - * more events until we submit, and so can park the HW. - * - * As we have a small number of different sources from which we feed - * the HW, we track the state of each inside a single bitfield. - */ - unsigned int active; -#define EXECLISTS_ACTIVE_USER 0 -#define EXECLISTS_ACTIVE_PREEMPT 1 -#define EXECLISTS_ACTIVE_HWACK 2 - - /** - * @port_mask: number of execlist ports - 1 - */ - unsigned int port_mask; - - /** - * @queue_priority_hint: Highest pending priority. - * - * When we add requests into the queue, or adjust the priority of - * executing requests, we compute the maximum priority of those - * pending requests. We can then use this value to determine if - * we need to preempt the executing requests to service the queue. - * However, since the we may have recorded the priority of an inflight - * request we wanted to preempt but since completed, at the time of - * dequeuing the priority hint may no longer may match the highest - * available request priority. - */ - int queue_priority_hint; - - /** - * @queue: queue of requests, in priority lists - */ - struct rb_root_cached queue; - - /** - * @csb_write: control register for Context Switch buffer - * - * Note this register may be either mmio or HWSP shadow. - */ - u32 *csb_write; - - /** - * @csb_status: status array for Context Switch buffer - * - * Note these register may be either mmio or HWSP shadow. - */ - u32 *csb_status; - - /** - * @preempt_complete_status: expected CSB upon completing preemption - */ - u32 preempt_complete_status; - - /** - * @csb_head: context status buffer head - */ - u8 csb_head; - - I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) -}; - -#define INTEL_ENGINE_CS_MAX_NAME 8 - -struct intel_engine_cs { - struct drm_i915_private *i915; - char name[INTEL_ENGINE_CS_MAX_NAME]; - - enum intel_engine_id id; - unsigned int hw_id; - unsigned int guc_id; - intel_engine_mask_t mask; - - u8 uabi_class; - - u8 class; - u8 instance; - u32 context_size; - u32 mmio_base; - - struct intel_ring *buffer; - - struct i915_timeline timeline; - - struct drm_i915_gem_object *default_state; - void *pinned_default_state; - - /* Rather than have every client wait upon all user interrupts, - * with the herd waking after every interrupt and each doing the - * heavyweight seqno dance, we delegate the task (of being the - * bottom-half of the user interrupt) to the first client. After - * every interrupt, we wake up one client, who does the heavyweight - * coherent seqno read and either goes back to sleep (if incomplete), - * or wakes up all the completed clients in parallel, before then - * transferring the bottom-half status to the next client in the queue. - * - * Compared to walking the entire list of waiters in a single dedicated - * bottom-half, we reduce the latency of the first waiter by avoiding - * a context switch, but incur additional coherent seqno reads when - * following the chain of request breadcrumbs. Since it is most likely - * that we have a single client waiting on each seqno, then reducing - * the overhead of waking that client is much preferred. - */ - struct intel_breadcrumbs { - spinlock_t irq_lock; - struct list_head signalers; - - struct irq_work irq_work; /* for use from inside irq_lock */ - - unsigned int irq_enabled; - - bool irq_armed; - } breadcrumbs; - - struct intel_engine_pmu { - /** - * @enable: Bitmask of enable sample events on this engine. - * - * Bits correspond to sample event types, for instance - * I915_SAMPLE_QUEUED is bit 0 etc. - */ - u32 enable; - /** - * @enable_count: Reference count for the enabled samplers. - * - * Index number corresponds to @enum drm_i915_pmu_engine_sample. - */ - unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; - /** - * @sample: Counter values for sampling events. - * - * Our internal timer stores the current counters in this field. - * - * Index number corresponds to @enum drm_i915_pmu_engine_sample. - */ - struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; - } pmu; - - /* - * A pool of objects to use as shadow copies of client batch buffers - * when the command parser is enabled. Prevents the client from - * modifying the batch contents after software parsing. - */ - struct i915_gem_batch_pool batch_pool; - - struct intel_hw_status_page status_page; - struct i915_ctx_workarounds wa_ctx; - struct i915_wa_list ctx_wa_list; - struct i915_wa_list wa_list; - struct i915_wa_list whitelist; - - u32 irq_keep_mask; /* always keep these interrupts */ - u32 irq_enable_mask; /* bitmask to enable ring interrupt */ - void (*irq_enable)(struct intel_engine_cs *engine); - void (*irq_disable)(struct intel_engine_cs *engine); - - int (*init_hw)(struct intel_engine_cs *engine); - - struct { - void (*prepare)(struct intel_engine_cs *engine); - void (*reset)(struct intel_engine_cs *engine, bool stalled); - void (*finish)(struct intel_engine_cs *engine); - } reset; - - void (*park)(struct intel_engine_cs *engine); - void (*unpark)(struct intel_engine_cs *engine); - - void (*set_default_submission)(struct intel_engine_cs *engine); - - struct intel_context *(*context_pin)(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); - - int (*request_alloc)(struct i915_request *rq); - int (*init_context)(struct i915_request *rq); - - int (*emit_flush)(struct i915_request *request, u32 mode); -#define EMIT_INVALIDATE BIT(0) -#define EMIT_FLUSH BIT(1) -#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) - int (*emit_bb_start)(struct i915_request *rq, - u64 offset, u32 length, - unsigned int dispatch_flags); -#define I915_DISPATCH_SECURE BIT(0) -#define I915_DISPATCH_PINNED BIT(1) - int (*emit_init_breadcrumb)(struct i915_request *rq); - u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, - u32 *cs); - unsigned int emit_fini_breadcrumb_dw; - - /* Pass the request to the hardware queue (e.g. directly into - * the legacy ringbuffer or to the end of an execlist). - * - * This is called from an atomic context with irqs disabled; must - * be irq safe. - */ - void (*submit_request)(struct i915_request *rq); - - /* - * Call when the priority on a request has changed and it and its - * dependencies may need rescheduling. Note the request itself may - * not be ready to run! - */ - void (*schedule)(struct i915_request *request, - const struct i915_sched_attr *attr); - - /* - * Cancel all requests on the hardware, or queued for execution. - * This should only cancel the ready requests that have been - * submitted to the engine (via the engine->submit_request callback). - * This is called when marking the device as wedged. - */ - void (*cancel_requests)(struct intel_engine_cs *engine); - - void (*cleanup)(struct intel_engine_cs *engine); - - struct intel_engine_execlists execlists; - - /* Contexts are pinned whilst they are active on the GPU. The last - * context executed remains active whilst the GPU is idle - the - * switch away and write to the context object only occurs on the - * next execution. Contexts are only unpinned on retirement of the - * following request ensuring that we can always write to the object - * on the context switch even after idling. Across suspend, we switch - * to the kernel context and trash it as the save may not happen - * before the hardware is powered down. - */ - struct intel_context *last_retired_context; - - /* status_notifier: list of callbacks for context-switch changes */ - struct atomic_notifier_head context_status_notifier; - - struct intel_engine_hangcheck hangcheck; - -#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) -#define I915_ENGINE_SUPPORTS_STATS BIT(1) -#define I915_ENGINE_HAS_PREEMPTION BIT(2) -#define I915_ENGINE_HAS_SEMAPHORES BIT(3) - unsigned int flags; - - /* - * Table of commands the command parser needs to know about - * for this engine. - */ - DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); - - /* - * Table of registers allowed in commands that read/write registers. - */ - const struct drm_i915_reg_table *reg_tables; - int reg_table_count; - - /* - * Returns the bitmask for the length field of the specified command. - * Return 0 for an unrecognized/invalid command. - * - * If the command parser finds an entry for a command in the engine's - * cmd_tables, it gets the command's length based on the table entry. - * If not, it calls this function to determine the per-engine length - * field encoding for the command (i.e. different opcode ranges use - * certain bits to encode the command length in the header). - */ - u32 (*get_cmd_length_mask)(u32 cmd_header); - - struct { - /** - * @lock: Lock protecting the below fields. - */ - seqlock_t lock; - /** - * @enabled: Reference count indicating number of listeners. - */ - unsigned int enabled; - /** - * @active: Number of contexts currently scheduled in. - */ - unsigned int active; - /** - * @enabled_at: Timestamp when busy stats were enabled. - */ - ktime_t enabled_at; - /** - * @start: Timestamp of the last idle to active transition. - * - * Idle is defined as active == 0, active is active > 0. - */ - ktime_t start; - /** - * @total: Total time this engine was busy. - * - * Accumulated time not counting the most recent block in cases - * where engine is currently busy (active > 0). - */ - ktime_t total; - } stats; -}; - -static inline bool -intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) -{ - return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; -} - -static inline bool -intel_engine_supports_stats(const struct intel_engine_cs *engine) -{ - return engine->flags & I915_ENGINE_SUPPORTS_STATS; -} - -static inline bool -intel_engine_has_preemption(const struct intel_engine_cs *engine) -{ - return engine->flags & I915_ENGINE_HAS_PREEMPTION; -} - -static inline bool -intel_engine_has_semaphores(const struct intel_engine_cs *engine) -{ - return engine->flags & I915_ENGINE_HAS_SEMAPHORES; -} - void intel_engines_set_scheduler_caps(struct drm_i915_private *i915); static inline bool __execlists_need_preempt(int prio, int last) diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h index 7c734714b05e..a1bf51c611a9 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.h +++ b/drivers/gpu/drm/i915/intel_workarounds.h @@ -9,18 +9,7 @@ #include -struct i915_wa { - i915_reg_t reg; - u32 mask; - u32 val; -}; - -struct i915_wa_list { - const char *name; - struct i915_wa *list; - unsigned int count; - unsigned int wa_count; -}; +#include "intel_workarounds_types.h" static inline void intel_wa_list_free(struct i915_wa_list *wal) { diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/intel_workarounds_types.h new file mode 100644 index 000000000000..30918da180ff --- /dev/null +++ b/drivers/gpu/drm/i915/intel_workarounds_types.h @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef __INTEL_WORKAROUNDS_TYPES_H__ +#define __INTEL_WORKAROUNDS_TYPES_H__ + +#include + +#include "i915_reg.h" + +struct i915_wa { + i915_reg_t reg; + u32 mask; + u32 val; +}; + +struct i915_wa_list { + const char *name; + struct i915_wa *list; + unsigned int count; + unsigned int wa_count; +}; + +#endif /* __INTEL_WORKAROUNDS_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/test_i915_active_types_standalone.c b/drivers/gpu/drm/i915/test_i915_active_types_standalone.c new file mode 100644 index 000000000000..144ebd153e57 --- /dev/null +++ b/drivers/gpu/drm/i915/test_i915_active_types_standalone.c @@ -0,0 +1,7 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_active_types.h" diff --git a/drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c b/drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c new file mode 100644 index 000000000000..4e4da4860bc2 --- /dev/null +++ b/drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c @@ -0,0 +1,7 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_gem_context_types.h" diff --git a/drivers/gpu/drm/i915/test_i915_timeline_types_standalone.c b/drivers/gpu/drm/i915/test_i915_timeline_types_standalone.c new file mode 100644 index 000000000000..f58e148e8946 --- /dev/null +++ b/drivers/gpu/drm/i915/test_i915_timeline_types_standalone.c @@ -0,0 +1,7 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_timeline_types.h" diff --git a/drivers/gpu/drm/i915/test_intel_context_types_standalone.c b/drivers/gpu/drm/i915/test_intel_context_types_standalone.c new file mode 100644 index 000000000000..b39e3c4e6551 --- /dev/null +++ b/drivers/gpu/drm/i915/test_intel_context_types_standalone.c @@ -0,0 +1,7 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_context_types.h" diff --git a/drivers/gpu/drm/i915/test_intel_engine_types_standalone.c b/drivers/gpu/drm/i915/test_intel_engine_types_standalone.c new file mode 100644 index 000000000000..d05e4cdcbcf9 --- /dev/null +++ b/drivers/gpu/drm/i915/test_intel_engine_types_standalone.c @@ -0,0 +1,7 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_engine_types.h" diff --git a/drivers/gpu/drm/i915/test_intel_workarounds_types_standalone.c b/drivers/gpu/drm/i915/test_intel_workarounds_types_standalone.c new file mode 100644 index 000000000000..4f658bb00825 --- /dev/null +++ b/drivers/gpu/drm/i915/test_intel_workarounds_types_standalone.c @@ -0,0 +1,7 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_workarounds_types.h" -- cgit v1.2.3-59-g8ed1b From 4daffb664a69532efdfee54f3eac5ce54e8c37dd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 Mar 2019 14:07:11 +0000 Subject: drm/i915: Stop storing the context name as the timeline name The timeline->name is only used for convenience in pretty printing the i915_request.fence->ops->get_timeline_name() and it is just as convenient to pull it from the gem_context directly. The few instances of its use inside GEM_TRACE() has proven more of a nuisance than helpful, so not worth saving imo. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190321140711.11190-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 5 ++--- drivers/gpu/drm/i915/i915_request.c | 7 ++----- drivers/gpu/drm/i915/i915_timeline.c | 5 +---- drivers/gpu/drm/i915/i915_timeline.h | 2 -- drivers/gpu/drm/i915/i915_timeline_types.h | 1 - drivers/gpu/drm/i915/intel_engine_cs.c | 3 +-- drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +--- drivers/gpu/drm/i915/selftests/i915_timeline.c | 6 +++--- drivers/gpu/drm/i915/selftests/mock_engine.c | 9 ++------- 10 files changed, 13 insertions(+), 31 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_timeline.h') diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 9187910391d8..00dec72f6875 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -673,9 +673,8 @@ last_request_on_engine(struct i915_timeline *timeline, rq = i915_active_request_raw(&timeline->last_request, &engine->i915->drm.struct_mutex); if (rq && rq->engine == engine) { - GEM_TRACE("last request for %s on engine %s: %llx:%llu\n", - timeline->name, engine->name, - rq->fence.context, rq->fence.seqno); + GEM_TRACE("last request on engine %s: %llx:%llu\n", + engine->name, rq->fence.context, rq->fence.seqno); GEM_BUG_ON(rq->timeline != timeline); return rq; } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0a3d94517d0a..1529824d7c61 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -66,7 +66,7 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return "signaled"; - return to_request(fence)->timeline->name; + return to_request(fence)->gem_context->name ?: "[i915]"; } static bool i915_fence_signaled(struct dma_fence *fence) @@ -167,7 +167,6 @@ static void advance_ring(struct i915_request *request) * is just about to be. Either works, if we miss the last two * noops - they are safe to be replayed on a reset. */ - GEM_TRACE("marking %s as inactive\n", ring->timeline->name); tail = READ_ONCE(request->tail); list_del(&ring->active_link); } else { @@ -1064,10 +1063,8 @@ void i915_request_add(struct i915_request *request) __i915_active_request_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); - if (list_is_first(&request->ring_link, &ring->request_list)) { - GEM_TRACE("marking %s as active\n", ring->timeline->name); + if (list_is_first(&request->ring_link, &ring->request_list)) list_add(&ring->active_link, &request->i915->gt.active_rings); - } request->i915->gt.active_engines |= request->engine->mask; request->emitted_jiffies = jiffies; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 8484ba6e51d1..2f4907364920 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -197,7 +197,6 @@ static void cacheline_free(struct i915_timeline_cacheline *cl) int i915_timeline_init(struct drm_i915_private *i915, struct i915_timeline *timeline, - const char *name, struct i915_vma *hwsp) { void *vaddr; @@ -213,7 +212,6 @@ int i915_timeline_init(struct drm_i915_private *i915, BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); timeline->i915 = i915; - timeline->name = name; timeline->pin_count = 0; timeline->has_initial_breadcrumb = !hwsp; timeline->hwsp_cacheline = NULL; @@ -342,7 +340,6 @@ void i915_timeline_fini(struct i915_timeline *timeline) struct i915_timeline * i915_timeline_create(struct drm_i915_private *i915, - const char *name, struct i915_vma *global_hwsp) { struct i915_timeline *timeline; @@ -352,7 +349,7 @@ i915_timeline_create(struct drm_i915_private *i915, if (!timeline) return ERR_PTR(-ENOMEM); - err = i915_timeline_init(i915, timeline, name, global_hwsp); + err = i915_timeline_init(i915, timeline, global_hwsp); if (err) { kfree(timeline); return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 9126c8206490..c1e47a423d85 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -32,7 +32,6 @@ int i915_timeline_init(struct drm_i915_private *i915, struct i915_timeline *tl, - const char *name, struct i915_vma *hwsp); void i915_timeline_fini(struct i915_timeline *tl); @@ -57,7 +56,6 @@ i915_timeline_set_subclass(struct i915_timeline *timeline, struct i915_timeline * i915_timeline_create(struct drm_i915_private *i915, - const char *name, struct i915_vma *global_hwsp); static inline struct i915_timeline * diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h index 8ff146dc05ba..12ba3c573aa0 100644 --- a/drivers/gpu/drm/i915/i915_timeline_types.h +++ b/drivers/gpu/drm/i915/i915_timeline_types.h @@ -71,7 +71,6 @@ struct i915_timeline { struct i915_active_request barrier; struct list_head link; - const char *name; struct drm_i915_private *i915; struct kref kref; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 588c640b5a57..24de34289d68 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -581,7 +581,6 @@ int intel_engine_setup_common(struct intel_engine_cs *engine) err = i915_timeline_init(engine->i915, &engine->timeline, - engine->name, engine->status_page.vma); if (err) goto err_hwsp; @@ -660,7 +659,7 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) return -ENOMEM; if (i915_timeline_init(engine->i915, - &frame->timeline, "measure", + &frame->timeline, engine->status_page.vma)) goto out_frame; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 82ee3f669564..131b89972a78 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2805,7 +2805,7 @@ err_unpin_ctx: static struct i915_timeline *get_timeline(struct i915_gem_context *ctx) { - return i915_timeline_create(ctx->i915, ctx->name, NULL); + return i915_timeline_create(ctx->i915, NULL); } static int execlists_context_deferred_alloc(struct intel_context *ce, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 720d39729ead..03bbdf47e7e4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1553,9 +1553,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (err) return err; - timeline = i915_timeline_create(engine->i915, - engine->name, - engine->status_page.vma); + timeline = i915_timeline_create(engine->i915, engine->status_page.vma); if (IS_ERR(timeline)) { err = PTR_ERR(timeline); goto err; diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index 844701759ffc..8e7bcaa1eb66 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -64,7 +64,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, unsigned long cacheline; int err; - tl = i915_timeline_create(state->i915, "mock", NULL); + tl = i915_timeline_create(state->i915, NULL); if (IS_ERR(tl)) return PTR_ERR(tl); @@ -476,7 +476,7 @@ checked_i915_timeline_create(struct drm_i915_private *i915) { struct i915_timeline *tl; - tl = i915_timeline_create(i915, "live", NULL); + tl = i915_timeline_create(i915, NULL); if (IS_ERR(tl)) return tl; @@ -658,7 +658,7 @@ static int live_hwsp_wrap(void *arg) mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(i915); - tl = i915_timeline_create(i915, __func__, NULL); + tl = i915_timeline_create(i915, NULL); if (IS_ERR(tl)) { err = PTR_ERR(tl); goto out_rpm; diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 61744819172b..61a8206ed677 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -50,9 +50,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) if (!ring) return NULL; - if (i915_timeline_init(engine->i915, - &ring->timeline, engine->name, - NULL)) { + if (i915_timeline_init(engine->i915, &ring->timeline, NULL)) { kfree(ring); return NULL; } @@ -259,10 +257,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.reset.finish = mock_reset_finish; engine->base.cancel_requests = mock_cancel_requests; - if (i915_timeline_init(i915, - &engine->base.timeline, - engine->base.name, - NULL)) + if (i915_timeline_init(i915, &engine->base.timeline, NULL)) goto err_free; i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE); -- cgit v1.2.3-59-g8ed1b From 3a891a62679424e5625a551b9af9c33af6ea59b3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Apr 2019 17:26:39 +0100 Subject: drm/i915: Move intel_engine_mask_t around for use by i915_request_types.h We want to use intel_engine_mask_t inside i915_request.h, which means extracting it from the general header file mess and placing it inside a types.h. A knock on effect is that the compiler wants to warn about type-contraction of ALL_ENGINES into intel_engine_maskt_t, so prepare for the worst. v2: Use intel_engine_mask_t consistently v3: Move I915_NUM_ENGINES to its natural home at the end of the enum Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Cc: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20190401162641.10963-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gvt/execlist.c | 11 +-- drivers/gpu/drm/i915/gvt/execlist.h | 2 +- drivers/gpu/drm/i915/gvt/gvt.h | 8 +- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 8 +- drivers/gpu/drm/i915/gvt/scheduler.h | 6 +- drivers/gpu/drm/i915/gvt/vgpu.c | 4 +- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.h | 2 - drivers/gpu/drm/i915/i915_gem_context.c | 6 +- drivers/gpu/drm/i915/i915_gem_context.h | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 9 +- drivers/gpu/drm/i915/i915_gpu_error.h | 2 +- drivers/gpu/drm/i915/i915_reset.c | 43 +++++----- drivers/gpu/drm/i915/i915_reset.h | 9 +- drivers/gpu/drm/i915/i915_scheduler.h | 86 +------------------ drivers/gpu/drm/i915/i915_scheduler_types.h | 98 ++++++++++++++++++++++ drivers/gpu/drm/i915/i915_timeline.h | 1 + drivers/gpu/drm/i915/i915_timeline_types.h | 3 +- drivers/gpu/drm/i915/intel_device_info.h | 3 +- drivers/gpu/drm/i915/intel_engine_types.h | 11 ++- drivers/gpu/drm/i915/intel_guc_submission.h | 1 + drivers/gpu/drm/i915/intel_hangcheck.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 8 +- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 3 +- .../i915/test_i915_scheduler_types_standalone.c | 7 ++ 29 files changed, 191 insertions(+), 152 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_scheduler_types.h create mode 100644 drivers/gpu/drm/i915/test_i915_scheduler_types_standalone.c (limited to 'drivers/gpu/drm/i915/i915_timeline.h') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 60de05f3fa60..1f3e8b145fc0 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -61,6 +61,7 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o i915-$(CONFIG_DRM_I915_WERROR) += \ test_i915_active_types_standalone.o \ test_i915_gem_context_types_standalone.o \ + test_i915_scheduler_types_standalone.o \ test_i915_timeline_types_standalone.o \ test_intel_context_types_standalone.o \ test_intel_engine_types_standalone.o \ diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 1a93472cb34e..f21b8fb5b37e 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -526,12 +526,13 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id) vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; } -static void clean_execlist(struct intel_vgpu *vgpu, unsigned long engine_mask) +static void clean_execlist(struct intel_vgpu *vgpu, + intel_engine_mask_t engine_mask) { - unsigned int tmp; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_engine_cs *engine; struct intel_vgpu_submission *s = &vgpu->submission; + intel_engine_mask_t tmp; for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { kfree(s->ring_scan_buffer[engine->id]); @@ -541,18 +542,18 @@ static void clean_execlist(struct intel_vgpu *vgpu, unsigned long engine_mask) } static void reset_execlist(struct intel_vgpu *vgpu, - unsigned long engine_mask) + intel_engine_mask_t engine_mask) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_engine_cs *engine; - unsigned int tmp; + intel_engine_mask_t tmp; for_each_engine_masked(engine, dev_priv, engine_mask, tmp) init_vgpu_execlist(vgpu, engine->id); } static int init_execlist(struct intel_vgpu *vgpu, - unsigned long engine_mask) + intel_engine_mask_t engine_mask) { reset_execlist(vgpu, engine_mask); return 0; diff --git a/drivers/gpu/drm/i915/gvt/execlist.h b/drivers/gpu/drm/i915/gvt/execlist.h index 714d709829a2..5ccc2c695848 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.h +++ b/drivers/gpu/drm/i915/gvt/execlist.h @@ -180,6 +180,6 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu); int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id); void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu, - unsigned long engine_mask); + intel_engine_mask_t engine_mask); #endif /*_GVT_EXECLIST_H_*/ diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 8bce09de4b82..7a4e1a6387e5 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -144,9 +144,9 @@ enum { struct intel_vgpu_submission_ops { const char *name; - int (*init)(struct intel_vgpu *vgpu, unsigned long engine_mask); - void (*clean)(struct intel_vgpu *vgpu, unsigned long engine_mask); - void (*reset)(struct intel_vgpu *vgpu, unsigned long engine_mask); + int (*init)(struct intel_vgpu *vgpu, intel_engine_mask_t engine_mask); + void (*clean)(struct intel_vgpu *vgpu, intel_engine_mask_t engine_mask); + void (*reset)(struct intel_vgpu *vgpu, intel_engine_mask_t engine_mask); }; struct intel_vgpu_submission { @@ -488,7 +488,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu); void intel_gvt_release_vgpu(struct intel_vgpu *vgpu); void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, - unsigned int engine_mask); + intel_engine_mask_t engine_mask); void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu); void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu); void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index dbc749617922..86761b1def1e 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -311,7 +311,7 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu, static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - unsigned int engine_mask = 0; + intel_engine_mask_t engine_mask = 0; u32 data; write_vreg(vgpu, offset, p_data, bytes); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 3faf2438b9bc..b385edbeaa30 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -838,13 +838,13 @@ static void update_guest_context(struct intel_vgpu_workload *workload) } void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, - unsigned long engine_mask) + intel_engine_mask_t engine_mask) { struct intel_vgpu_submission *s = &vgpu->submission; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_engine_cs *engine; struct intel_vgpu_workload *pos, *n; - unsigned int tmp; + intel_engine_mask_t tmp; /* free the unsubmited workloads in the queues. */ for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { @@ -1137,7 +1137,7 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) * */ void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, - unsigned long engine_mask) + intel_engine_mask_t engine_mask) { struct intel_vgpu_submission *s = &vgpu->submission; @@ -1227,7 +1227,7 @@ out_shadow_ctx: * */ int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu, - unsigned long engine_mask, + intel_engine_mask_t engine_mask, unsigned int interface) { struct intel_vgpu_submission *s = &vgpu->submission; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 0635b2c4bed7..90c6756f5453 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -142,12 +142,12 @@ void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu); int intel_vgpu_setup_submission(struct intel_vgpu *vgpu); void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, - unsigned long engine_mask); + intel_engine_mask_t engine_mask); void intel_vgpu_clean_submission(struct intel_vgpu *vgpu); int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu, - unsigned long engine_mask, + intel_engine_mask_t engine_mask, unsigned int interface); extern const struct intel_vgpu_submission_ops @@ -160,6 +160,6 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload); void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, - unsigned long engine_mask); + intel_engine_mask_t engine_mask); #endif diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 314e40121e47..44ce3c2b9ac1 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -526,11 +526,11 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, * GPU engines. For FLR, engine_mask is ignored. */ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, - unsigned int engine_mask) + intel_engine_mask_t engine_mask) { struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - unsigned int resetting_eng = dmlr ? ALL_ENGINES : engine_mask; + intel_engine_mask_t resetting_eng = dmlr ? ALL_ENGINES : engine_mask; gvt_dbg_core("------------------------------------------\n"); gvt_dbg_core("resseting vgpu%d, dmlr %d, engine_mask %08x\n", diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3aef121067e4..4dd2d9ae3202 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2245,7 +2245,7 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data) const struct intel_guc *guc = &dev_priv->guc; struct guc_stage_desc *desc = guc->stage_desc_pool_vaddr; struct intel_guc_client *client = guc->execbuf_client; - unsigned int tmp; + intel_engine_mask_t tmp; int index; if (!USES_GUC_SUBMISSION(dev_priv)) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5a94c7430e62..0ab4826921f7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2505,7 +2505,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_GEN9_LP(dev_priv) (IS_GEN(dev_priv, 9) && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (IS_GEN(dev_priv, 9) && !IS_LP(dev_priv)) -#define ALL_ENGINES (~0u) #define HAS_ENGINE(dev_priv, id) (INTEL_INFO(dev_priv)->engine_mask & BIT(id)) #define ENGINE_INSTANCES_MASK(dev_priv, first, count) ({ \ diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 5c073fe73664..9074eb1e843f 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -73,8 +73,6 @@ struct drm_i915_private; #define GEM_TRACE_DUMP_ON(expr) BUILD_BUG_ON_INVALID(expr) #endif -#define I915_NUM_ENGINES 8 - #define I915_GEM_IDLE_TIMEOUT (HZ / 5) void i915_gem_park(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 141da4e71e46..fe7ddb1f59e1 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -858,9 +858,9 @@ static void cb_retire(struct i915_active *base) kfree(cb); } -I915_SELFTEST_DECLARE(static unsigned long context_barrier_inject_fault); +I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault); static int context_barrier_task(struct i915_gem_context *ctx, - unsigned long engines, + intel_engine_mask_t engines, int (*emit)(struct i915_request *rq, void *data), void (*task)(void *data), void *data) @@ -922,7 +922,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, } int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, - unsigned long mask) + intel_engine_mask_t mask) { struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index edc6ba3f0288..23dcb01bfd82 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -142,7 +142,7 @@ void i915_gem_context_close(struct drm_file *file); int i915_switch_context(struct i915_request *rq); int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, - unsigned long engine_mask); + intel_engine_mask_t engine_mask); void i915_gem_context_release(struct kref *ctx_ref); struct i915_gem_context * diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 83ded9fc761a..f597f35b109b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -390,7 +390,7 @@ struct i915_hw_ppgtt { struct i915_address_space vm; struct kref ref; - unsigned long pd_dirty_engines; + intel_engine_mask_t pd_dirty_engines; union { struct i915_pml4 pml4; /* GEN8+ & 48b PPGTT */ struct i915_page_directory_pointer pdp; /* GEN8+ */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 81a27b808273..c65d45bc63ee 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1096,7 +1096,7 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, * It's only a small step better than a random number in its current form. */ static u32 i915_error_generate_code(struct i915_gpu_state *error, - unsigned long engine_mask) + intel_engine_mask_t engine_mask) { /* * IPEHR would be an ideal way to detect errors, as it's the gross @@ -1641,7 +1641,8 @@ static void capture_reg_state(struct i915_gpu_state *error) } static const char * -error_msg(struct i915_gpu_state *error, unsigned long engines, const char *msg) +error_msg(struct i915_gpu_state *error, + intel_engine_mask_t engines, const char *msg) { int len; int i; @@ -1651,7 +1652,7 @@ error_msg(struct i915_gpu_state *error, unsigned long engines, const char *msg) engines &= ~BIT(i); len = scnprintf(error->error_msg, sizeof(error->error_msg), - "GPU HANG: ecode %d:%lx:0x%08x", + "GPU HANG: ecode %d:%x:0x%08x", INTEL_GEN(error->i915), engines, i915_error_generate_code(error, engines)); if (engines) { @@ -1790,7 +1791,7 @@ i915_capture_gpu_state(struct drm_i915_private *i915) * to pick up. */ void i915_capture_error_state(struct drm_i915_private *i915, - unsigned long engine_mask, + intel_engine_mask_t engine_mask, const char *msg) { static bool warned; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 302a14240b45..5dc761e85d9d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -263,7 +263,7 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); void i915_capture_error_state(struct drm_i915_private *dev_priv, - unsigned long engine_mask, + intel_engine_mask_t engine_mask, const char *error_msg); static inline struct i915_gpu_state * diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 2f25ed702ba0..ddc403ee8855 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -144,15 +144,15 @@ static void gen3_stop_engine(struct intel_engine_cs *engine) } static void i915_stop_engines(struct drm_i915_private *i915, - unsigned int engine_mask) + intel_engine_mask_t engine_mask) { struct intel_engine_cs *engine; - enum intel_engine_id id; + intel_engine_mask_t tmp; if (INTEL_GEN(i915) < 3) return; - for_each_engine_masked(engine, i915, engine_mask, id) + for_each_engine_masked(engine, i915, engine_mask, tmp) gen3_stop_engine(engine); } @@ -165,7 +165,7 @@ static bool i915_in_reset(struct pci_dev *pdev) } static int i915_do_reset(struct drm_i915_private *i915, - unsigned int engine_mask, + intel_engine_mask_t engine_mask, unsigned int retry) { struct pci_dev *pdev = i915->drm.pdev; @@ -194,7 +194,7 @@ static bool g4x_reset_complete(struct pci_dev *pdev) } static int g33_do_reset(struct drm_i915_private *i915, - unsigned int engine_mask, + intel_engine_mask_t engine_mask, unsigned int retry) { struct pci_dev *pdev = i915->drm.pdev; @@ -204,7 +204,7 @@ static int g33_do_reset(struct drm_i915_private *i915, } static int g4x_do_reset(struct drm_i915_private *dev_priv, - unsigned int engine_mask, + intel_engine_mask_t engine_mask, unsigned int retry) { struct pci_dev *pdev = dev_priv->drm.pdev; @@ -242,7 +242,7 @@ out: } static int ironlake_do_reset(struct drm_i915_private *dev_priv, - unsigned int engine_mask, + intel_engine_mask_t engine_mask, unsigned int retry) { struct intel_uncore *uncore = &dev_priv->uncore; @@ -303,7 +303,7 @@ static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv, } static int gen6_reset_engines(struct drm_i915_private *i915, - unsigned int engine_mask, + intel_engine_mask_t engine_mask, unsigned int retry) { struct intel_engine_cs *engine; @@ -319,7 +319,7 @@ static int gen6_reset_engines(struct drm_i915_private *i915, if (engine_mask == ALL_ENGINES) { hw_mask = GEN6_GRDOM_FULL; } else { - unsigned int tmp; + intel_engine_mask_t tmp; hw_mask = 0; for_each_engine_masked(engine, i915, engine_mask, tmp) { @@ -429,7 +429,7 @@ static void gen11_unlock_sfc(struct drm_i915_private *dev_priv, } static int gen11_reset_engines(struct drm_i915_private *i915, - unsigned int engine_mask, + intel_engine_mask_t engine_mask, unsigned int retry) { const u32 hw_engine_mask[] = { @@ -443,7 +443,7 @@ static int gen11_reset_engines(struct drm_i915_private *i915, [VECS1] = GEN11_GRDOM_VECS2, }; struct intel_engine_cs *engine; - unsigned int tmp; + intel_engine_mask_t tmp; u32 hw_mask; int ret; @@ -496,12 +496,12 @@ static void gen8_engine_reset_cancel(struct intel_engine_cs *engine) } static int gen8_reset_engines(struct drm_i915_private *i915, - unsigned int engine_mask, + intel_engine_mask_t engine_mask, unsigned int retry) { struct intel_engine_cs *engine; const bool reset_non_ready = retry >= 1; - unsigned int tmp; + intel_engine_mask_t tmp; int ret; for_each_engine_masked(engine, i915, engine_mask, tmp) { @@ -537,7 +537,7 @@ skip_reset: } typedef int (*reset_func)(struct drm_i915_private *, - unsigned int engine_mask, + intel_engine_mask_t engine_mask, unsigned int retry); static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) @@ -558,7 +558,8 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) return NULL; } -int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask) +int intel_gpu_reset(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask) { const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1; reset_func reset; @@ -692,7 +693,8 @@ static void gt_revoke(struct drm_i915_private *i915) revoke_mmaps(i915); } -static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask) +static int gt_reset(struct drm_i915_private *i915, + intel_engine_mask_t stalled_mask) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -951,7 +953,8 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) return result; } -static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask) +static int do_reset(struct drm_i915_private *i915, + intel_engine_mask_t stalled_mask) { int err, i; @@ -986,7 +989,7 @@ static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask) * - re-init display */ void i915_reset(struct drm_i915_private *i915, - unsigned int stalled_mask, + intel_engine_mask_t stalled_mask, const char *reason) { struct i915_gpu_error *error = &i915->gpu_error; @@ -1233,14 +1236,14 @@ void i915_clear_error_registers(struct drm_i915_private *dev_priv) * of a ring dump etc.). */ void i915_handle_error(struct drm_i915_private *i915, - u32 engine_mask, + intel_engine_mask_t engine_mask, unsigned long flags, const char *fmt, ...) { struct i915_gpu_error *error = &i915->gpu_error; struct intel_engine_cs *engine; intel_wakeref_t wakeref; - unsigned int tmp; + intel_engine_mask_t tmp; char error_msg[80]; char *msg = NULL; diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h index 16f2389f656f..86b1ac8116ce 100644 --- a/drivers/gpu/drm/i915/i915_reset.h +++ b/drivers/gpu/drm/i915/i915_reset.h @@ -11,13 +11,15 @@ #include #include +#include "intel_engine_types.h" + struct drm_i915_private; struct intel_engine_cs; struct intel_guc; __printf(4, 5) void i915_handle_error(struct drm_i915_private *i915, - u32 engine_mask, + intel_engine_mask_t engine_mask, unsigned long flags, const char *fmt, ...); #define I915_ERROR_CAPTURE BIT(0) @@ -25,7 +27,7 @@ void i915_handle_error(struct drm_i915_private *i915, void i915_clear_error_registers(struct drm_i915_private *i915); void i915_reset(struct drm_i915_private *i915, - unsigned int stalled_mask, + intel_engine_mask_t stalled_mask, const char *reason); int i915_reset_engine(struct intel_engine_cs *engine, const char *reason); @@ -41,7 +43,8 @@ int i915_terminally_wedged(struct drm_i915_private *i915); bool intel_has_gpu_reset(struct drm_i915_private *i915); bool intel_has_reset_engine(struct drm_i915_private *i915); -int intel_gpu_reset(struct drm_i915_private *i915, u32 engine_mask); +int intel_gpu_reset(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask); int intel_reset_guc(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 9a1d257f3d6e..07d243acf553 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -8,92 +8,10 @@ #define _I915_SCHEDULER_H_ #include +#include #include -#include - -struct drm_i915_private; -struct i915_request; -struct intel_engine_cs; - -enum { - I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, - I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, - I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, - - I915_PRIORITY_INVALID = INT_MIN -}; - -#define I915_USER_PRIORITY_SHIFT 3 -#define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) - -#define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) -#define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1) - -#define I915_PRIORITY_WAIT ((u8)BIT(0)) -#define I915_PRIORITY_NEWCLIENT ((u8)BIT(1)) -#define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(2)) - -#define __NO_PREEMPTION (I915_PRIORITY_WAIT) - -struct i915_sched_attr { - /** - * @priority: execution and service priority - * - * All clients are equal, but some are more equal than others! - * - * Requests from a context with a greater (more positive) value of - * @priority will be executed before those with a lower @priority - * value, forming a simple QoS. - * - * The &drm_i915_private.kernel_context is assigned the lowest priority. - */ - int priority; -}; - -/* - * "People assume that time is a strict progression of cause to effect, but - * actually, from a nonlinear, non-subjective viewpoint, it's more like a big - * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 - * - * Requests exist in a complex web of interdependencies. Each request - * has to wait for some other request to complete before it is ready to be run - * (e.g. we have to wait until the pixels have been rendering into a texture - * before we can copy from it). We track the readiness of a request in terms - * of fences, but we also need to keep the dependency tree for the lifetime - * of the request (beyond the life of an individual fence). We use the tree - * at various points to reorder the requests whilst keeping the requests - * in order with respect to their various dependencies. - * - * There is no active component to the "scheduler". As we know the dependency - * DAG of each request, we are able to insert it into a sorted queue when it - * is ready, and are able to reorder its portion of the graph to accommodate - * dynamic priority changes. - */ -struct i915_sched_node { - struct list_head signalers_list; /* those before us, we depend upon */ - struct list_head waiters_list; /* those after us, they depend upon us */ - struct list_head link; - struct i915_sched_attr attr; - unsigned int flags; -#define I915_SCHED_HAS_SEMAPHORE BIT(0) -}; - -struct i915_dependency { - struct i915_sched_node *signaler; - struct list_head signal_link; - struct list_head wait_link; - struct list_head dfs_link; - unsigned long flags; -#define I915_DEPENDENCY_ALLOC BIT(0) -}; - -struct i915_priolist { - struct list_head requests[I915_PRIORITY_COUNT]; - struct rb_node node; - unsigned long used; - int priority; -}; +#include "i915_scheduler_types.h" #define priolist_for_each_request(it, plist, idx) \ for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \ diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h new file mode 100644 index 000000000000..5c94b3eb5c81 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -0,0 +1,98 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef _I915_SCHEDULER_TYPES_H_ +#define _I915_SCHEDULER_TYPES_H_ + +#include +#include + +#include + +struct drm_i915_private; +struct i915_request; +struct intel_engine_cs; + +enum { + I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, + I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, + I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, + + I915_PRIORITY_INVALID = INT_MIN +}; + +#define I915_USER_PRIORITY_SHIFT 3 +#define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) + +#define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) +#define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1) + +#define I915_PRIORITY_WAIT ((u8)BIT(0)) +#define I915_PRIORITY_NEWCLIENT ((u8)BIT(1)) +#define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(2)) + +#define __NO_PREEMPTION (I915_PRIORITY_WAIT) + +struct i915_sched_attr { + /** + * @priority: execution and service priority + * + * All clients are equal, but some are more equal than others! + * + * Requests from a context with a greater (more positive) value of + * @priority will be executed before those with a lower @priority + * value, forming a simple QoS. + * + * The &drm_i915_private.kernel_context is assigned the lowest priority. + */ + int priority; +}; + +/* + * "People assume that time is a strict progression of cause to effect, but + * actually, from a nonlinear, non-subjective viewpoint, it's more like a big + * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 + * + * Requests exist in a complex web of interdependencies. Each request + * has to wait for some other request to complete before it is ready to be run + * (e.g. we have to wait until the pixels have been rendering into a texture + * before we can copy from it). We track the readiness of a request in terms + * of fences, but we also need to keep the dependency tree for the lifetime + * of the request (beyond the life of an individual fence). We use the tree + * at various points to reorder the requests whilst keeping the requests + * in order with respect to their various dependencies. + * + * There is no active component to the "scheduler". As we know the dependency + * DAG of each request, we are able to insert it into a sorted queue when it + * is ready, and are able to reorder its portion of the graph to accommodate + * dynamic priority changes. + */ +struct i915_sched_node { + struct list_head signalers_list; /* those before us, we depend upon */ + struct list_head waiters_list; /* those after us, they depend upon us */ + struct list_head link; + struct i915_sched_attr attr; + unsigned int flags; +#define I915_SCHED_HAS_SEMAPHORE BIT(0) +}; + +struct i915_dependency { + struct i915_sched_node *signaler; + struct list_head signal_link; + struct list_head wait_link; + struct list_head dfs_link; + unsigned long flags; +#define I915_DEPENDENCY_ALLOC BIT(0) +}; + +struct i915_priolist { + struct list_head requests[I915_PRIORITY_COUNT]; + struct rb_node node; + unsigned long used; + int priority; +}; + +#endif /* _I915_SCHEDULER_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index c1e47a423d85..4ca7f80bdf6d 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -27,6 +27,7 @@ #include +#include "i915_active.h" #include "i915_syncmap.h" #include "i915_timeline_types.h" diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h index 12ba3c573aa0..1f5b55d9ffb5 100644 --- a/drivers/gpu/drm/i915/i915_timeline_types.h +++ b/drivers/gpu/drm/i915/i915_timeline_types.h @@ -9,9 +9,10 @@ #include #include +#include #include -#include "i915_active.h" +#include "i915_active_types.h" struct drm_i915_private; struct i915_vma; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 616e9f707877..0e579f158016 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -27,6 +27,7 @@ #include +#include "intel_engine_types.h" #include "intel_display.h" struct drm_printer; @@ -165,8 +166,6 @@ struct sseu_dev_info { u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; }; -typedef u8 intel_engine_mask_t; - struct intel_device_info { u16 gen_mask; diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h index b3249bf6a65f..232e37c1f312 100644 --- a/drivers/gpu/drm/i915/intel_engine_types.h +++ b/drivers/gpu/drm/i915/intel_engine_types.h @@ -13,8 +13,10 @@ #include #include +#include "i915_gem.h" +#include "i915_scheduler_types.h" +#include "i915_selftest.h" #include "i915_timeline_types.h" -#include "intel_device_info.h" #include "intel_workarounds_types.h" #include "i915_gem_batch_pool.h" @@ -25,12 +27,16 @@ #define I915_CMD_HASH_ORDER 9 +struct dma_fence; struct drm_i915_reg_table; struct i915_gem_context; struct i915_request; struct i915_sched_attr; struct intel_uncore; +typedef u8 intel_engine_mask_t; +#define ALL_ENGINES ((intel_engine_mask_t)~0ul) + struct intel_hw_status_page { struct i915_vma *vma; u32 *addr; @@ -105,8 +111,9 @@ enum intel_engine_id { VCS3, #define _VCS(n) (VCS0 + (n)) VECS0, - VECS1 + VECS1, #define _VECS(n) (VECS0 + (n)) + I915_NUM_ENGINES }; struct st_preempt_hang { diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h index 169c54568340..aa5e6749c925 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/intel_guc_submission.h @@ -29,6 +29,7 @@ #include "i915_gem.h" #include "i915_selftest.h" +#include "intel_engine_types.h" struct drm_i915_private; diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 59232df11ada..3d51ed1428d4 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -221,8 +221,8 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915, unsigned int stuck) { struct intel_engine_cs *engine; + intel_engine_mask_t tmp; char msg[80]; - unsigned int tmp; int len; /* If some rings hung but others were still busy, only diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 45f73b8b4e6d..4e1b6efc6b22 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -1594,10 +1594,10 @@ out_unlock: } static __maybe_unused const char * -__engine_name(struct drm_i915_private *i915, unsigned int engines) +__engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines) { struct intel_engine_cs *engine; - unsigned int tmp; + intel_engine_mask_t tmp; if (engines == ALL_ENGINES) return "all"; @@ -1610,10 +1610,10 @@ __engine_name(struct drm_i915_private *i915, unsigned int engines) static int __igt_switch_to_kernel_context(struct drm_i915_private *i915, struct i915_gem_context *ctx, - unsigned int engines) + intel_engine_mask_t engines) { struct intel_engine_cs *engine; - unsigned int tmp; + intel_engine_mask_t tmp; int pass; GEM_TRACE("Testing %s\n", __engine_name(i915, engines)); diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 76b4fa150f2e..050bd1e19e02 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -1124,7 +1124,8 @@ static int igt_reset_engines(void *arg) return 0; } -static u32 fake_hangcheck(struct drm_i915_private *i915, u32 mask) +static u32 fake_hangcheck(struct drm_i915_private *i915, + intel_engine_mask_t mask) { u32 count = i915_reset_count(&i915->gpu_error); diff --git a/drivers/gpu/drm/i915/test_i915_scheduler_types_standalone.c b/drivers/gpu/drm/i915/test_i915_scheduler_types_standalone.c new file mode 100644 index 000000000000..8afa2c3719fb --- /dev/null +++ b/drivers/gpu/drm/i915/test_i915_scheduler_types_standalone.c @@ -0,0 +1,7 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_scheduler_types.h" -- cgit v1.2.3-59-g8ed1b From de220cc21967fd745d91fbd3fc23a13372730db8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 8 Apr 2019 10:17:03 +0100 Subject: drm/i915: Consolidate the timeline->barrier The timeline is strictly ordered, so by inserting the timeline->barrier request into the timeline->last_request it naturally provides the same barrier. Consolidate the pair of barriers into one as they serve the same purpose. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190408091728.20207-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 13 ++----------- drivers/gpu/drm/i915/i915_request.c | 9 --------- drivers/gpu/drm/i915/i915_timeline.c | 2 -- drivers/gpu/drm/i915/i915_timeline.h | 15 --------------- drivers/gpu/drm/i915/i915_timeline_types.h | 10 ---------- drivers/gpu/drm/i915/selftests/mock_timeline.c | 1 - 6 files changed, 2 insertions(+), 48 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_timeline.h') diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 66b6852cb4d2..7fc34ab6df87 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -1167,7 +1167,7 @@ static int gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) { struct drm_i915_private *i915 = ce->engine->i915; - struct i915_request *rq, *prev; + struct i915_request *rq; intel_wakeref_t wakeref; int ret; @@ -1192,16 +1192,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) } /* Queue this switch after all other activity by this context. */ - prev = i915_active_request_raw(&ce->ring->timeline->last_request, - &i915->drm.struct_mutex); - if (prev && !i915_request_completed(prev)) { - ret = i915_request_await_dma_fence(rq, &prev->fence); - if (ret < 0) - goto out_add; - } - - /* Order all following requests to be after. */ - ret = i915_timeline_set_barrier(ce->ring->timeline, rq); + ret = i915_active_request_set(&ce->ring->timeline->last_request, rq); if (ret) goto out_add; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2da0d6436a1a..96a9e8bcd805 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -584,11 +584,6 @@ out: return kmem_cache_alloc(global.slab_requests, GFP_KERNEL); } -static int add_timeline_barrier(struct i915_request *rq) -{ - return i915_request_await_active_request(rq, &rq->timeline->barrier); -} - /** * i915_request_alloc - allocate a request structure * @@ -738,10 +733,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) */ rq->head = rq->ring->emit; - ret = add_timeline_barrier(rq); - if (ret) - goto err_unwind; - ret = engine->request_alloc(rq); if (ret) goto err_unwind; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 2f4907364920..5fbea0892f33 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -253,7 +253,6 @@ int i915_timeline_init(struct drm_i915_private *i915, spin_lock_init(&timeline->lock); mutex_init(&timeline->mutex); - INIT_ACTIVE_REQUEST(&timeline->barrier); INIT_ACTIVE_REQUEST(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); @@ -326,7 +325,6 @@ void i915_timeline_fini(struct i915_timeline *timeline) { GEM_BUG_ON(timeline->pin_count); GEM_BUG_ON(!list_empty(&timeline->requests)); - GEM_BUG_ON(i915_active_request_isset(&timeline->barrier)); i915_syncmap_free(&timeline->sync); diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 4ca7f80bdf6d..27668a1a69a3 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -110,19 +110,4 @@ void i915_timelines_init(struct drm_i915_private *i915); void i915_timelines_park(struct drm_i915_private *i915); void i915_timelines_fini(struct drm_i915_private *i915); -/** - * i915_timeline_set_barrier - orders submission between different timelines - * @timeline: timeline to set the barrier on - * @rq: request after which new submissions can proceed - * - * Sets the passed in request as the serialization point for all subsequent - * submissions on @timeline. Subsequent requests will not be submitted to GPU - * until the barrier has been completed. - */ -static inline int -i915_timeline_set_barrier(struct i915_timeline *tl, struct i915_request *rq) -{ - return i915_active_request_set(&tl->barrier, rq); -} - #endif diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h index 1f5b55d9ffb5..5256a0b5c5f7 100644 --- a/drivers/gpu/drm/i915/i915_timeline_types.h +++ b/drivers/gpu/drm/i915/i915_timeline_types.h @@ -61,16 +61,6 @@ struct i915_timeline { */ struct i915_syncmap *sync; - /** - * Barrier provides the ability to serialize ordering between different - * timelines. - * - * Users can call i915_timeline_set_barrier which will make all - * subsequent submissions to this timeline be executed only after the - * barrier has been completed. - */ - struct i915_active_request barrier; - struct list_head link; struct drm_i915_private *i915; diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index 416d85233263..e084476469ef 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -16,7 +16,6 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) spin_lock_init(&timeline->lock); mutex_init(&timeline->mutex); - INIT_ACTIVE_REQUEST(&timeline->barrier); INIT_ACTIVE_REQUEST(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); -- cgit v1.2.3-59-g8ed1b