aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/gpu/drm/i915/i915_request.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_request.c')
-rw-r--r--drivers/gpu/drm/i915/i915_request.c258
1 files changed, 148 insertions, 110 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a18b2a244706..c0df71d7d0ff 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -51,7 +51,6 @@ struct execute_cb {
static struct i915_global_request {
struct i915_global base;
struct kmem_cache *slab_requests;
- struct kmem_cache *slab_dependencies;
struct kmem_cache *slab_execute_cbs;
} global;
@@ -203,6 +202,19 @@ static void free_capture_list(struct i915_request *request)
}
}
+static void __i915_request_fill(struct i915_request *rq, u8 val)
+{
+ void *vaddr = rq->ring->vaddr;
+ u32 head;
+
+ head = rq->infix;
+ if (rq->postfix < head) {
+ memset(vaddr + head, val, rq->ring->size - head);
+ head = 0;
+ }
+ memset(vaddr + head, val, rq->postfix - head);
+}
+
static void remove_from_engine(struct i915_request *rq)
{
struct intel_engine_cs *engine, *locked;
@@ -247,6 +259,9 @@ bool i915_request_retire(struct i915_request *rq)
*/
GEM_BUG_ON(!list_is_first(&rq->link,
&i915_request_timeline(rq)->requests));
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ /* Poison before we release our space in the ring */
+ __i915_request_fill(rq, POISON_FREE);
rq->ring->head = rq->postfix;
/*
@@ -275,7 +290,7 @@ bool i915_request_retire(struct i915_request *rq)
spin_unlock_irq(&rq->lock);
remove_from_client(rq);
- list_del_rcu(&rq->link);
+ __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
intel_context_exit(rq->context);
intel_context_unpin(rq->context);
@@ -348,6 +363,50 @@ __await_execution(struct i915_request *rq,
return 0;
}
+static bool fatal_error(int error)
+{
+ switch (error) {
+ case 0: /* not an error! */
+ case -EAGAIN: /* innocent victim of a GT reset (__i915_request_reset) */
+ case -ETIMEDOUT: /* waiting for Godot (timer_i915_sw_fence_wake) */
+ return false;
+ default:
+ return true;
+ }
+}
+
+void __i915_request_skip(struct i915_request *rq)
+{
+ GEM_BUG_ON(!fatal_error(rq->fence.error));
+
+ if (rq->infix == rq->postfix)
+ return;
+
+ /*
+ * As this request likely depends on state from the lost
+ * context, clear out all the user operations leaving the
+ * breadcrumb at the end (so we get the fence notifications).
+ */
+ __i915_request_fill(rq, 0);
+ rq->infix = rq->postfix;
+}
+
+void i915_request_set_error_once(struct i915_request *rq, int error)
+{
+ int old;
+
+ GEM_BUG_ON(!IS_ERR_VALUE((long)error));
+
+ if (i915_request_signaled(rq))
+ return;
+
+ old = READ_ONCE(rq->fence.error);
+ do {
+ if (fatal_error(old))
+ return;
+ } while (!try_cmpxchg(&rq->fence.error, &old, error));
+}
+
bool __i915_request_submit(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
@@ -377,8 +436,10 @@ bool __i915_request_submit(struct i915_request *request)
if (i915_request_completed(request))
goto xfer;
- if (intel_context_is_banned(request->context))
- i915_request_skip(request, -EIO);
+ if (unlikely(intel_context_is_banned(request->context)))
+ i915_request_set_error_once(request, -EIO);
+ if (unlikely(fatal_error(request->fence.error)))
+ __i915_request_skip(request);
/*
* Are we using semaphores when the gpu is already saturated?
@@ -504,7 +565,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
trace_i915_request_submit(request);
if (unlikely(fence->error))
- i915_request_skip(request, fence->error);
+ i915_request_set_error_once(request, fence->error);
/*
* We need to serialize use of the submit_request() callback
@@ -688,6 +749,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
RCU_INIT_POINTER(rq->timeline, tl);
RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline);
rq->hwsp_seqno = tl->hwsp_seqno;
+ GEM_BUG_ON(i915_request_completed(rq));
rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
@@ -857,7 +919,7 @@ already_busywaiting(struct i915_request *rq)
*
* See the are-we-too-late? check in __i915_request_submit().
*/
- return rq->sched.semaphores | rq->engine->saturated;
+ return rq->sched.semaphores | READ_ONCE(rq->engine->saturated);
}
static int
@@ -915,8 +977,16 @@ emit_semaphore_wait(struct i915_request *to,
struct i915_request *from,
gfp_t gfp)
{
+ const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask;
+
+ if (!intel_context_use_semaphores(to->context))
+ goto await_fence;
+
+ if (!rcu_access_pointer(from->hwsp_cacheline))
+ goto await_fence;
+
/* Just emit the first semaphore we see as request space is limited. */
- if (already_busywaiting(to) & from->engine->mask)
+ if (already_busywaiting(to) & mask)
goto await_fence;
if (i915_request_await_start(to, from) < 0)
@@ -929,7 +999,7 @@ emit_semaphore_wait(struct i915_request *to,
if (__emit_semaphore_wait(to, from, from->fence.seqno))
goto await_fence;
- to->sched.semaphores |= from->engine->mask;
+ to->sched.semaphores |= mask;
to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
return 0;
@@ -960,12 +1030,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
&from->submit,
I915_FENCE_GFP);
- else if (intel_context_use_semaphores(to->context))
- ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
else
- ret = i915_sw_fence_await_dma_fence(&to->submit,
- &from->fence, 0,
- I915_FENCE_GFP);
+ ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
if (ret < 0)
return ret;
@@ -1064,6 +1130,8 @@ __i915_request_await_execution(struct i915_request *to,
{
int err;
+ GEM_BUG_ON(intel_context_is_barrier(from->context));
+
/* Submit both requests at the same time */
err = __await_execution(to, from, hook, I915_FENCE_GFP);
if (err)
@@ -1074,14 +1142,45 @@ __i915_request_await_execution(struct i915_request *to,
&from->fence))
return 0;
- /* Ensure both start together [after all semaphores in signal] */
- if (intel_engine_has_semaphores(to->engine))
- err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
- else
- err = i915_request_await_start(to, from);
+ /*
+ * Wait until the start of this request.
+ *
+ * The execution cb fires when we submit the request to HW. But in
+ * many cases this may be long before the request itself is ready to
+ * run (consider that we submit 2 requests for the same context, where
+ * the request of interest is behind an indefinite spinner). So we hook
+ * up to both to reduce our queues and keep the execution lag minimised
+ * in the worst case, though we hope that the await_start is elided.
+ */
+ err = i915_request_await_start(to, from);
if (err < 0)
return err;
+ /*
+ * Ensure both start together [after all semaphores in signal]
+ *
+ * Now that we are queued to the HW at roughly the same time (thanks
+ * to the execute cb) and are ready to run at roughly the same time
+ * (thanks to the await start), our signaler may still be indefinitely
+ * delayed by waiting on a semaphore from a remote engine. If our
+ * signaler depends on a semaphore, so indirectly do we, and we do not
+ * want to start our payload until our signaler also starts theirs.
+ * So we wait.
+ *
+ * However, there is also a second condition for which we need to wait
+ * for the precise start of the signaler. Consider that the signaler
+ * was submitted in a chain of requests following another context
+ * (with just an ordinary intra-engine fence dependency between the
+ * two). In this case the signaler is queued to HW, but not for
+ * immediate execution, and so we must wait until it reaches the
+ * active slot.
+ */
+ if (intel_engine_has_semaphores(to->engine)) {
+ err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
+ if (err < 0)
+ return err;
+ }
+
/* Couple the dependency tree for PI on this exposed to->fence */
if (to->engine->schedule) {
err = i915_sched_node_add_dependency(&to->sched, &from->sched);
@@ -1202,31 +1301,6 @@ i915_request_await_object(struct i915_request *to,
return ret;
}
-void i915_request_skip(struct i915_request *rq, int error)
-{
- void *vaddr = rq->ring->vaddr;
- u32 head;
-
- GEM_BUG_ON(!IS_ERR_VALUE((long)error));
- dma_fence_set_error(&rq->fence, error);
-
- if (rq->infix == rq->postfix)
- return;
-
- /*
- * As this request likely depends on state from the lost
- * context, clear out all the user operations leaving the
- * breadcrumb at the end (so we get the fence notifications).
- */
- head = rq->infix;
- if (rq->postfix < head) {
- memset(vaddr + head, 0, rq->ring->size - head);
- head = 0;
- }
- memset(vaddr + head, 0, rq->postfix - head);
- rq->infix = rq->postfix;
-}
-
static struct i915_request *
__i915_request_add_to_timeline(struct i915_request *rq)
{
@@ -1256,7 +1330,17 @@ __i915_request_add_to_timeline(struct i915_request *rq)
prev = to_request(__i915_active_fence_set(&timeline->last_request,
&rq->fence));
if (prev && !i915_request_completed(prev)) {
- if (is_power_of_2(prev->engine->mask | rq->engine->mask))
+ /*
+ * The requests are supposed to be kept in order. However,
+ * we need to be wary in case the timeline->last_request
+ * is used as a barrier for external modification to this
+ * context.
+ */
+ GEM_BUG_ON(prev->context == rq->context &&
+ i915_seqno_passed(prev->fence.seqno,
+ rq->fence.seqno));
+
+ if (is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask))
i915_sw_fence_await_sw_fence(&rq->submit,
&prev->submit,
&rq->submitq);
@@ -1340,39 +1424,23 @@ void i915_request_add(struct i915_request *rq)
{
struct intel_timeline * const tl = i915_request_timeline(rq);
struct i915_sched_attr attr = {};
- struct i915_request *prev;
+ struct i915_gem_context *ctx;
lockdep_assert_held(&tl->mutex);
lockdep_unpin_lock(&tl->mutex, rq->cookie);
trace_i915_request_add(rq);
+ __i915_request_commit(rq);
- prev = __i915_request_commit(rq);
-
- if (rcu_access_pointer(rq->context->gem_context))
- attr = i915_request_gem_context(rq)->sched;
+ /* XXX placeholder for selftests */
+ rcu_read_lock();
+ ctx = rcu_dereference(rq->context->gem_context);
+ if (ctx)
+ attr = ctx->sched;
+ rcu_read_unlock();
- /*
- * Boost actual workloads past semaphores!
- *
- * With semaphores we spin on one engine waiting for another,
- * simply to reduce the latency of starting our work when
- * the signaler completes. However, if there is any other
- * work that we could be doing on this engine instead, that
- * is better utilisation and will reduce the overall duration
- * of the current work. To avoid PI boosting a semaphore
- * far in the distance past over useful work, we keep a history
- * of any semaphore use along our dependency chain.
- */
if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
attr.priority |= I915_PRIORITY_NOSEMAPHORE;
-
- /*
- * Boost priorities to new clients (new request flows).
- *
- * Allow interactive/synchronous clients to jump ahead of
- * the bulk clients. (FQ_CODEL)
- */
if (list_empty(&rq->sched.signalers_list))
attr.priority |= I915_PRIORITY_WAIT;
@@ -1380,32 +1448,10 @@ void i915_request_add(struct i915_request *rq)
__i915_request_queue(rq, &attr);
local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
- /*
- * In typical scenarios, we do not expect the previous request on
- * the timeline to be still tracked by timeline->last_request if it
- * has been completed. If the completed request is still here, that
- * implies that request retirement is a long way behind submission,
- * suggesting that we haven't been retiring frequently enough from
- * the combination of retire-before-alloc, waiters and the background
- * retirement worker. So if the last request on this timeline was
- * already completed, do a catch up pass, flushing the retirement queue
- * up to this client. Since we have now moved the heaviest operations
- * during retirement onto secondary workers, such as freeing objects
- * or contexts, retiring a bunch of requests is mostly list management
- * (and cache misses), and so we should not be overly penalizing this
- * client by performing excess work, though we may still performing
- * work on behalf of others -- but instead we should benefit from
- * improved resource management. (Well, that's the theory at least.)
- */
- if (prev &&
- i915_request_completed(prev) &&
- rcu_access_pointer(prev->timeline) == tl)
- i915_request_retire_upto(prev);
-
mutex_unlock(&tl->mutex);
}
-static unsigned long local_clock_us(unsigned int *cpu)
+static unsigned long local_clock_ns(unsigned int *cpu)
{
unsigned long t;
@@ -1422,7 +1468,7 @@ static unsigned long local_clock_us(unsigned int *cpu)
* stop busywaiting, see busywait_stop().
*/
*cpu = get_cpu();
- t = local_clock() >> 10;
+ t = local_clock();
put_cpu();
return t;
@@ -1432,15 +1478,15 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
{
unsigned int this_cpu;
- if (time_after(local_clock_us(&this_cpu), timeout))
+ if (time_after(local_clock_ns(&this_cpu), timeout))
return true;
return this_cpu != cpu;
}
-static bool __i915_spin_request(const struct i915_request * const rq,
- int state, unsigned long timeout_us)
+static bool __i915_spin_request(const struct i915_request * const rq, int state)
{
+ unsigned long timeout_ns;
unsigned int cpu;
/*
@@ -1468,7 +1514,8 @@ static bool __i915_spin_request(const struct i915_request * const rq,
* takes to sleep on a request, on the order of a microsecond.
*/
- timeout_us += local_clock_us(&cpu);
+ timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
+ timeout_ns += local_clock_ns(&cpu);
do {
if (i915_request_completed(rq))
return true;
@@ -1476,7 +1523,7 @@ static bool __i915_spin_request(const struct i915_request * const rq,
if (signal_pending_state(state, current))
break;
- if (busywait_stop(timeout_us, cpu))
+ if (busywait_stop(timeout_ns, cpu))
break;
cpu_relax();
@@ -1562,8 +1609,8 @@ long i915_request_wait(struct i915_request *rq,
* completion. That requires having a good predictor for the request
* duration, which we currently lack.
*/
- if (IS_ACTIVE(CONFIG_DRM_I915_SPIN_REQUEST) &&
- __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) {
+ if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) &&
+ __i915_spin_request(rq, state)) {
dma_fence_signal(&rq->fence);
goto out;
}
@@ -1598,6 +1645,8 @@ long i915_request_wait(struct i915_request *rq,
break;
}
+ intel_engine_flush_submission(rq->engine);
+
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
@@ -1608,7 +1657,6 @@ long i915_request_wait(struct i915_request *rq,
break;
}
- intel_engine_flush_submission(rq->engine);
timeout = io_schedule_timeout(timeout);
}
__set_current_state(TASK_RUNNING);
@@ -1628,14 +1676,12 @@ out:
static void i915_global_request_shrink(void)
{
- kmem_cache_shrink(global.slab_dependencies);
kmem_cache_shrink(global.slab_execute_cbs);
kmem_cache_shrink(global.slab_requests);
}
static void i915_global_request_exit(void)
{
- kmem_cache_destroy(global.slab_dependencies);
kmem_cache_destroy(global.slab_execute_cbs);
kmem_cache_destroy(global.slab_requests);
}
@@ -1665,17 +1711,9 @@ int __init i915_global_request_init(void)
if (!global.slab_execute_cbs)
goto err_requests;
- global.slab_dependencies = KMEM_CACHE(i915_dependency,
- SLAB_HWCACHE_ALIGN |
- SLAB_RECLAIM_ACCOUNT);
- if (!global.slab_dependencies)
- goto err_execute_cbs;
-
i915_global_register(&global.base);
return 0;
-err_execute_cbs:
- kmem_cache_destroy(global.slab_execute_cbs);
err_requests:
kmem_cache_destroy(global.slab_requests);
return -ENOMEM;