aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_execlists_submission.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c145
1 files changed, 56 insertions, 89 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index d7d5a58990bb..ac1be7a632d3 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -115,6 +115,7 @@
#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_engine_pm.h"
+#include "intel_engine_stats.h"
#include "intel_execlists_submission.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
@@ -230,8 +231,7 @@ active_request(const struct intel_timeline * const tl, struct i915_request *rq)
return __active_request(tl, rq, 0);
}
-static inline void
-ring_set_paused(const struct intel_engine_cs *engine, int state)
+static void ring_set_paused(const struct intel_engine_cs *engine, int state)
{
/*
* We inspect HWS_PREEMPT with a semaphore inside
@@ -244,12 +244,12 @@ ring_set_paused(const struct intel_engine_cs *engine, int state)
wmb();
}
-static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+static struct i915_priolist *to_priolist(struct rb_node *rb)
{
return rb_entry(rb, struct i915_priolist, node);
}
-static inline int rq_prio(const struct i915_request *rq)
+static int rq_prio(const struct i915_request *rq)
{
return READ_ONCE(rq->sched.attr.priority);
}
@@ -299,8 +299,8 @@ static int virtual_prio(const struct intel_engine_execlists *el)
return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN;
}
-static inline bool need_preempt(const struct intel_engine_cs *engine,
- const struct i915_request *rq)
+static bool need_preempt(const struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
int last_prio;
@@ -351,7 +351,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
queue_prio(&engine->execlists)) > last_prio;
}
-__maybe_unused static inline bool
+__maybe_unused static bool
assert_priority_queue(const struct i915_request *prev,
const struct i915_request *next)
{
@@ -418,7 +418,7 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
return __unwind_incomplete_requests(engine);
}
-static inline void
+static void
execlists_context_status_change(struct i915_request *rq, unsigned long status)
{
/*
@@ -432,39 +432,6 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
status, rq);
}
-static void intel_engine_context_in(struct intel_engine_cs *engine)
-{
- unsigned long flags;
-
- if (atomic_add_unless(&engine->stats.active, 1, 0))
- return;
-
- write_seqlock_irqsave(&engine->stats.lock, flags);
- if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
- engine->stats.start = ktime_get();
- atomic_inc(&engine->stats.active);
- }
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
-static void intel_engine_context_out(struct intel_engine_cs *engine)
-{
- unsigned long flags;
-
- GEM_BUG_ON(!atomic_read(&engine->stats.active));
-
- if (atomic_add_unless(&engine->stats.active, -1, 1))
- return;
-
- write_seqlock_irqsave(&engine->stats.lock, flags);
- if (atomic_dec_and_test(&engine->stats.active)) {
- engine->stats.total =
- ktime_add(engine->stats.total,
- ktime_sub(ktime_get(), engine->stats.start));
- }
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
static void reset_active(struct i915_request *rq,
struct intel_engine_cs *engine)
{
@@ -503,7 +470,7 @@ static void reset_active(struct i915_request *rq,
ce->lrc.lrca = lrc_update_regs(ce, engine, head);
}
-static inline struct intel_engine_cs *
+static struct intel_engine_cs *
__execlists_schedule_in(struct i915_request *rq)
{
struct intel_engine_cs * const engine = rq->engine;
@@ -539,7 +506,7 @@ __execlists_schedule_in(struct i915_request *rq)
ce->lrc.ccid |= engine->execlists.ccid;
__intel_gt_pm_get(engine->gt);
- if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active))
+ if (engine->fw_domain && !engine->fw_active++)
intel_uncore_forcewake_get(engine->uncore, engine->fw_domain);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(engine);
@@ -549,7 +516,7 @@ __execlists_schedule_in(struct i915_request *rq)
return engine;
}
-static inline void execlists_schedule_in(struct i915_request *rq, int idx)
+static void execlists_schedule_in(struct i915_request *rq, int idx)
{
struct intel_context * const ce = rq->context;
struct intel_engine_cs *old;
@@ -608,9 +575,9 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
tasklet_hi_schedule(&ve->base.execlists.tasklet);
}
-static inline void __execlists_schedule_out(struct i915_request *rq)
+static void __execlists_schedule_out(struct i915_request * const rq,
+ struct intel_context * const ce)
{
- struct intel_context * const ce = rq->context;
struct intel_engine_cs * const engine = rq->engine;
unsigned int ccid;
@@ -621,6 +588,7 @@ static inline void __execlists_schedule_out(struct i915_request *rq)
*/
CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid);
+ GEM_BUG_ON(ce->inflight != engine);
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
lrc_check_regs(ce, engine, "after");
@@ -645,7 +613,7 @@ static inline void __execlists_schedule_out(struct i915_request *rq)
lrc_update_runtime(ce);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
- if (engine->fw_domain && !atomic_dec_return(&engine->fw_active))
+ if (engine->fw_domain && !--engine->fw_active)
intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
intel_gt_pm_put_async(engine->gt);
@@ -660,10 +628,12 @@ static inline void __execlists_schedule_out(struct i915_request *rq)
*/
if (ce->engine != engine)
kick_siblings(rq, ce);
+
+ WRITE_ONCE(ce->inflight, NULL);
+ intel_context_put(ce);
}
-static inline void
-execlists_schedule_out(struct i915_request *rq)
+static inline void execlists_schedule_out(struct i915_request *rq)
{
struct intel_context * const ce = rq->context;
@@ -671,12 +641,8 @@ execlists_schedule_out(struct i915_request *rq)
GEM_BUG_ON(!ce->inflight);
ce->inflight = ptr_dec(ce->inflight);
- if (!__intel_context_inflight_count(ce->inflight)) {
- GEM_BUG_ON(ce->inflight != rq->engine);
- __execlists_schedule_out(rq);
- WRITE_ONCE(ce->inflight, NULL);
- intel_context_put(ce);
- }
+ if (!__intel_context_inflight_count(ce->inflight))
+ __execlists_schedule_out(rq, ce);
i915_request_put(rq);
}
@@ -728,7 +694,7 @@ static u64 execlists_update_context(struct i915_request *rq)
return desc;
}
-static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
+static void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
{
if (execlists->ctrl_reg) {
writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
@@ -757,7 +723,7 @@ dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
return buf;
}
-static __maybe_unused void
+static __maybe_unused noinline void
trace_ports(const struct intel_engine_execlists *execlists,
const char *msg,
struct i915_request * const *ports)
@@ -774,13 +740,13 @@ trace_ports(const struct intel_engine_execlists *execlists,
dump_port(p1, sizeof(p1), ", ", ports[1]));
}
-static inline bool
+static bool
reset_in_progress(const struct intel_engine_execlists *execlists)
{
return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
}
-static __maybe_unused bool
+static __maybe_unused noinline bool
assert_pending_valid(const struct intel_engine_execlists *execlists,
const char *msg)
{
@@ -1258,12 +1224,20 @@ static void set_preempt_timeout(struct intel_engine_cs *engine,
active_preempt_timeout(engine, rq));
}
+static bool completed(const struct i915_request *rq)
+{
+ if (i915_request_has_sentinel(rq))
+ return false;
+
+ return __i915_request_is_complete(rq);
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request **port = execlists->pending;
struct i915_request ** const last_port = port + execlists->port_mask;
- struct i915_request *last = *execlists->active;
+ struct i915_request *last, * const *active;
struct virtual_engine *ve;
struct rb_node *rb;
bool submit = false;
@@ -1300,21 +1274,13 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* i.e. we will retrigger preemption following the ack in case
* of trouble.
*
- * In theory we can skip over completed contexts that have not
- * yet been processed by events (as those events are in flight):
- *
- * while ((last = *active) && i915_request_completed(last))
- * active++;
- *
- * However, the GPU cannot handle this as it will ultimately
- * find itself trying to jump back into a context it has just
- * completed and barf.
*/
+ active = execlists->active;
+ while ((last = *active) && completed(last))
+ active++;
if (last) {
- if (__i915_request_is_complete(last)) {
- goto check_secondary;
- } else if (need_preempt(engine, last)) {
+ if (need_preempt(engine, last)) {
ENGINE_TRACE(engine,
"preempting last=%llx:%lld, prio=%d, hint=%d\n",
last->fence.context,
@@ -1393,9 +1359,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* we hopefully coalesce several updates into a single
* submission.
*/
-check_secondary:
- if (!list_is_last(&last->sched.link,
- &engine->active.requests)) {
+ if (active[1]) {
/*
* Even if ELSP[1] is occupied and not worthy
* of timeslices, our queue might be.
@@ -1596,7 +1560,7 @@ done:
* of ordered contexts.
*/
if (submit &&
- memcmp(execlists->active,
+ memcmp(active,
execlists->pending,
(port - execlists->pending) * sizeof(*port))) {
*port = NULL;
@@ -1604,7 +1568,7 @@ done:
execlists_schedule_in(*port, port - execlists->pending);
WRITE_ONCE(execlists->yield, -1);
- set_preempt_timeout(engine, *execlists->active);
+ set_preempt_timeout(engine, *active);
execlists_submit_ports(engine);
} else {
ring_set_paused(engine, 0);
@@ -1621,12 +1585,12 @@ static void execlists_dequeue_irq(struct intel_engine_cs *engine)
local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
}
-static inline void clear_ports(struct i915_request **ports, int count)
+static void clear_ports(struct i915_request **ports, int count)
{
memset_p((void **)ports, NULL, count);
}
-static inline void
+static void
copy_ports(struct i915_request **dst, struct i915_request **src, int count)
{
/* A memcpy_p() would be very useful here! */
@@ -1660,8 +1624,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists,
return inactive;
}
-static inline void
-invalidate_csb_entries(const u64 *first, const u64 *last)
+static void invalidate_csb_entries(const u64 *first, const u64 *last)
{
clflush((void *)first);
clflush((void *)last);
@@ -1693,7 +1656,7 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
* bits 47-57: sw context id of the lrc the GT switched away from
* bits 58-63: sw counter of the lrc the GT switched away from
*/
-static inline bool gen12_csb_parse(const u64 csb)
+static bool gen12_csb_parse(const u64 csb)
{
bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
bool new_queue =
@@ -1720,7 +1683,7 @@ static inline bool gen12_csb_parse(const u64 csb)
return false;
}
-static inline bool gen8_csb_parse(const u64 csb)
+static bool gen8_csb_parse(const u64 csb)
{
return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
}
@@ -1759,8 +1722,7 @@ wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb)
return entry;
}
-static inline u64
-csb_read(const struct intel_engine_cs *engine, u64 * const csb)
+static u64 csb_read(const struct intel_engine_cs *engine, u64 * const csb)
{
u64 entry = READ_ONCE(*csb);
@@ -2026,6 +1988,9 @@ static void __execlists_hold(struct i915_request *rq)
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
+ if (p->flags & I915_DEPENDENCY_WEAK)
+ continue;
+
/* Leave semaphores spinning on the other engines */
if (w->engine != rq->engine)
continue;
@@ -2124,6 +2089,9 @@ static void __execlists_unhold(struct i915_request *rq)
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
+ if (p->flags & I915_DEPENDENCY_WEAK)
+ continue;
+
/* Propagate any change in error status */
if (rq->fence.error)
i915_request_set_error_once(w, rq->fence.error);
@@ -3180,8 +3148,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
}
}
-static inline void
-logical_ring_default_irqs(struct intel_engine_cs *engine)
+static void logical_ring_default_irqs(struct intel_engine_cs *engine)
{
unsigned int shift = 0;
@@ -3296,7 +3263,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk)
old = fetch_and_zero(&ve->request);
if (old) {
- GEM_BUG_ON(!i915_request_completed(old));
+ GEM_BUG_ON(!__i915_request_is_complete(old));
__i915_request_submit(old);
i915_request_put(old);
}
@@ -3573,7 +3540,7 @@ static void virtual_submit_request(struct i915_request *rq)
}
if (ve->request) { /* background completion from preempt-to-busy */
- GEM_BUG_ON(!i915_request_completed(ve->request));
+ GEM_BUG_ON(!__i915_request_is_complete(ve->request));
__i915_request_submit(ve->request);
i915_request_put(ve->request);
}