aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem_request.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_request.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.c766
1 files changed, 500 insertions, 266 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8832f8ec1583..b8f403faadbb 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -23,31 +23,26 @@
*/
#include <linux/prefetch.h>
+#include <linux/dma-fence-array.h>
#include "i915_drv.h"
-static const char *i915_fence_get_driver_name(struct fence *fence)
+static const char *i915_fence_get_driver_name(struct dma_fence *fence)
{
return "i915";
}
-static const char *i915_fence_get_timeline_name(struct fence *fence)
+static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
{
- /* Timelines are bound by eviction to a VM. However, since
- * we only have a global seqno at the moment, we only have
- * a single timeline. Note that each timeline will have
- * multiple execution contexts (fence contexts) as we allow
- * engines within a single timeline to execute in parallel.
- */
- return "global";
+ return to_request(fence)->timeline->common->name;
}
-static bool i915_fence_signaled(struct fence *fence)
+static bool i915_fence_signaled(struct dma_fence *fence)
{
return i915_gem_request_completed(to_request(fence));
}
-static bool i915_fence_enable_signaling(struct fence *fence)
+static bool i915_fence_enable_signaling(struct dma_fence *fence)
{
if (i915_fence_signaled(fence))
return false;
@@ -56,63 +51,27 @@ static bool i915_fence_enable_signaling(struct fence *fence)
return true;
}
-static signed long i915_fence_wait(struct fence *fence,
+static signed long i915_fence_wait(struct dma_fence *fence,
bool interruptible,
- signed long timeout_jiffies)
+ signed long timeout)
{
- s64 timeout_ns, *timeout;
- int ret;
-
- if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
- timeout_ns = jiffies_to_nsecs(timeout_jiffies);
- timeout = &timeout_ns;
- } else {
- timeout = NULL;
- }
-
- ret = i915_wait_request(to_request(fence),
- interruptible, timeout,
- NO_WAITBOOST);
- if (ret == -ETIME)
- return 0;
-
- if (ret < 0)
- return ret;
-
- if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
- timeout_jiffies = nsecs_to_jiffies(timeout_ns);
-
- return timeout_jiffies;
+ return i915_wait_request(to_request(fence), interruptible, timeout);
}
-static void i915_fence_value_str(struct fence *fence, char *str, int size)
-{
- snprintf(str, size, "%u", fence->seqno);
-}
-
-static void i915_fence_timeline_value_str(struct fence *fence, char *str,
- int size)
-{
- snprintf(str, size, "%u",
- intel_engine_get_seqno(to_request(fence)->engine));
-}
-
-static void i915_fence_release(struct fence *fence)
+static void i915_fence_release(struct dma_fence *fence)
{
struct drm_i915_gem_request *req = to_request(fence);
kmem_cache_free(req->i915->requests, req);
}
-const struct fence_ops i915_fence_ops = {
+const struct dma_fence_ops i915_fence_ops = {
.get_driver_name = i915_fence_get_driver_name,
.get_timeline_name = i915_fence_get_timeline_name,
.enable_signaling = i915_fence_enable_signaling,
.signaled = i915_fence_signaled,
.wait = i915_fence_wait,
.release = i915_fence_release,
- .fence_value_str = i915_fence_value_str,
- .timeline_value_str = i915_fence_timeline_value_str,
};
int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
@@ -154,6 +113,82 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
spin_unlock(&file_priv->mm.lock);
}
+static struct i915_dependency *
+i915_dependency_alloc(struct drm_i915_private *i915)
+{
+ return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
+}
+
+static void
+i915_dependency_free(struct drm_i915_private *i915,
+ struct i915_dependency *dep)
+{
+ kmem_cache_free(i915->dependencies, dep);
+}
+
+static void
+__i915_priotree_add_dependency(struct i915_priotree *pt,
+ struct i915_priotree *signal,
+ struct i915_dependency *dep,
+ unsigned long flags)
+{
+ INIT_LIST_HEAD(&dep->dfs_link);
+ list_add(&dep->wait_link, &signal->waiters_list);
+ list_add(&dep->signal_link, &pt->signalers_list);
+ dep->signaler = signal;
+ dep->flags = flags;
+}
+
+static int
+i915_priotree_add_dependency(struct drm_i915_private *i915,
+ struct i915_priotree *pt,
+ struct i915_priotree *signal)
+{
+ struct i915_dependency *dep;
+
+ dep = i915_dependency_alloc(i915);
+ if (!dep)
+ return -ENOMEM;
+
+ __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC);
+ return 0;
+}
+
+static void
+i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
+{
+ struct i915_dependency *dep, *next;
+
+ GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
+
+ /* Everyone we depended upon (the fences we wait to be signaled)
+ * should retire before us and remove themselves from our list.
+ * However, retirement is run independently on each timeline and
+ * so we may be called out-of-order.
+ */
+ list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) {
+ list_del(&dep->wait_link);
+ if (dep->flags & I915_DEPENDENCY_ALLOC)
+ i915_dependency_free(i915, dep);
+ }
+
+ /* Remove ourselves from everyone who depends upon us */
+ list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) {
+ list_del(&dep->signal_link);
+ if (dep->flags & I915_DEPENDENCY_ALLOC)
+ i915_dependency_free(i915, dep);
+ }
+}
+
+static void
+i915_priotree_init(struct i915_priotree *pt)
+{
+ INIT_LIST_HEAD(&pt->signalers_list);
+ INIT_LIST_HEAD(&pt->waiters_list);
+ RB_CLEAR_NODE(&pt->node);
+ pt->priority = INT_MIN;
+}
+
void i915_gem_retire_noop(struct i915_gem_active *active,
struct drm_i915_gem_request *request)
{
@@ -164,8 +199,17 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
{
struct i915_gem_active *active, *next;
+ lockdep_assert_held(&request->i915->drm.struct_mutex);
+ GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
+ GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute));
+ GEM_BUG_ON(!i915_gem_request_completed(request));
+ GEM_BUG_ON(!request->i915->gt.active_requests);
+
trace_i915_gem_request_retire(request);
- list_del(&request->link);
+
+ spin_lock_irq(&request->engine->timeline->lock);
+ list_del_init(&request->link);
+ spin_unlock_irq(&request->engine->timeline->lock);
/* We know the GPU must have read the request to have
* sent us the seqno + interrupt, so use the position
@@ -177,6 +221,12 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
*/
list_del(&request->ring_link);
request->ring->last_retired_head = request->postfix;
+ if (!--request->i915->gt.active_requests) {
+ GEM_BUG_ON(!request->i915->gt.awake);
+ mod_delayed_work(request->i915->wq,
+ &request->i915->gt.idle_work,
+ msecs_to_jiffies(100));
+ }
/* Walk through the active list, calling retire on each. This allows
* objects to track their GPU activity and mark themselves as idle
@@ -214,6 +264,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
}
i915_gem_context_put(request->ctx);
+
+ dma_fence_signal(&request->fence);
+
+ i915_priotree_fini(request->i915, &request->priotree);
i915_gem_request_put(request);
}
@@ -223,10 +277,11 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
struct drm_i915_gem_request *tmp;
lockdep_assert_held(&req->i915->drm.struct_mutex);
- GEM_BUG_ON(list_empty(&req->link));
+ if (list_empty(&req->link))
+ return;
do {
- tmp = list_first_entry(&engine->request_list,
+ tmp = list_first_entry(&engine->timeline->requests,
typeof(*tmp), link);
i915_gem_request_retire(tmp);
@@ -253,39 +308,50 @@ static int i915_gem_check_wedge(struct drm_i915_private *dev_priv)
return 0;
}
-static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
+static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
{
+ struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
struct intel_engine_cs *engine;
+ enum intel_engine_id id;
int ret;
/* Carefully retire all requests without writing to the rings */
- for_each_engine(engine, dev_priv) {
- ret = intel_engine_idle(engine,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_LOCKED);
- if (ret)
- return ret;
- }
- i915_gem_retire_requests(dev_priv);
+ ret = i915_gem_wait_for_idle(i915,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED);
+ if (ret)
+ return ret;
+
+ i915_gem_retire_requests(i915);
+ GEM_BUG_ON(i915->gt.active_requests > 1);
/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
- if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
- while (intel_kick_waiters(dev_priv) ||
- intel_kick_signalers(dev_priv))
- yield();
+ if (!i915_seqno_passed(seqno, atomic_read(&timeline->next_seqno))) {
+ while (intel_breadcrumbs_busy(i915))
+ cond_resched(); /* spin until threads are complete */
}
+ atomic_set(&timeline->next_seqno, seqno);
/* Finally reset hw state */
- for_each_engine(engine, dev_priv)
- intel_engine_init_seqno(engine, seqno);
+ for_each_engine(engine, i915, id)
+ intel_engine_init_global_seqno(engine, seqno);
+
+ list_for_each_entry(timeline, &i915->gt.timelines, link) {
+ for_each_engine(engine, i915, id) {
+ struct intel_timeline *tl = &timeline->engine[id];
+
+ memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno));
+ }
+ }
return 0;
}
-int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
+int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
{
struct drm_i915_private *dev_priv = to_i915(dev);
- int ret;
+
+ lockdep_assert_held(&dev_priv->drm.struct_mutex);
if (seqno == 0)
return -EINVAL;
@@ -293,29 +359,87 @@ int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
/* HWS page needs to be set less than what we
* will inject to ring
*/
- ret = i915_gem_init_seqno(dev_priv, seqno - 1);
- if (ret)
+ return i915_gem_init_global_seqno(dev_priv, seqno - 1);
+}
+
+static int reserve_global_seqno(struct drm_i915_private *i915)
+{
+ u32 active_requests = ++i915->gt.active_requests;
+ u32 next_seqno = atomic_read(&i915->gt.global_timeline.next_seqno);
+ int ret;
+
+ /* Reservation is fine until we need to wrap around */
+ if (likely(next_seqno + active_requests > next_seqno))
+ return 0;
+
+ ret = i915_gem_init_global_seqno(i915, 0);
+ if (ret) {
+ i915->gt.active_requests--;
return ret;
+ }
- dev_priv->next_seqno = seqno;
return 0;
}
-static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
+static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
{
- /* reserve 0 for non-seqno */
- if (unlikely(dev_priv->next_seqno == 0)) {
- int ret;
+ /* next_seqno only incremented under a mutex */
+ return ++tl->next_seqno.counter;
+}
- ret = i915_gem_init_seqno(dev_priv, 0);
- if (ret)
- return ret;
+static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
+{
+ return atomic_inc_return(&tl->next_seqno);
+}
- dev_priv->next_seqno = 1;
- }
+void __i915_gem_request_submit(struct drm_i915_gem_request *request)
+{
+ struct intel_engine_cs *engine = request->engine;
+ struct intel_timeline *timeline;
+ u32 seqno;
- *seqno = dev_priv->next_seqno++;
- return 0;
+ /* Transfer from per-context onto the global per-engine timeline */
+ timeline = engine->timeline;
+ GEM_BUG_ON(timeline == request->timeline);
+ assert_spin_locked(&timeline->lock);
+
+ seqno = timeline_get_seqno(timeline->common);
+ GEM_BUG_ON(!seqno);
+ GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
+
+ GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
+ request->previous_seqno = timeline->last_submitted_seqno;
+ timeline->last_submitted_seqno = seqno;
+
+ /* We may be recursing from the signal callback of another i915 fence */
+ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+ request->global_seqno = seqno;
+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
+ intel_engine_enable_signaling(request);
+ spin_unlock(&request->lock);
+
+ GEM_BUG_ON(!request->global_seqno);
+ engine->emit_breadcrumb(request,
+ request->ring->vaddr + request->postfix);
+
+ spin_lock(&request->timeline->lock);
+ list_move_tail(&request->link, &timeline->requests);
+ spin_unlock(&request->timeline->lock);
+
+ i915_sw_fence_commit(&request->execute);
+}
+
+void i915_gem_request_submit(struct drm_i915_gem_request *request)
+{
+ struct intel_engine_cs *engine = request->engine;
+ unsigned long flags;
+
+ /* Will be called from irq-context when using foreign fences. */
+ spin_lock_irqsave(&engine->timeline->lock, flags);
+
+ __i915_gem_request_submit(request);
+
+ spin_unlock_irqrestore(&engine->timeline->lock, flags);
}
static int __i915_sw_fence_call
@@ -324,15 +448,31 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
struct drm_i915_gem_request *request =
container_of(fence, typeof(*request), submit);
- /* Will be called from irq-context when using foreign DMA fences */
-
switch (state) {
case FENCE_COMPLETE:
- request->engine->last_submitted_seqno = request->fence.seqno;
request->engine->submit_request(request);
break;
case FENCE_FREE:
+ i915_gem_request_put(request);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int __i915_sw_fence_call
+execute_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+ struct drm_i915_gem_request *request =
+ container_of(fence, typeof(*request), execute);
+
+ switch (state) {
+ case FENCE_COMPLETE:
+ break;
+
+ case FENCE_FREE:
+ i915_gem_request_put(request);
break;
}
@@ -357,9 +497,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
{
struct drm_i915_private *dev_priv = engine->i915;
struct drm_i915_gem_request *req;
- u32 seqno;
int ret;
+ lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
* EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
* and restart.
@@ -368,10 +509,14 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
if (ret)
return ERR_PTR(ret);
+ ret = reserve_global_seqno(dev_priv);
+ if (ret)
+ return ERR_PTR(ret);
+
/* Move the oldest request to the slab-cache (if not in use!) */
- req = list_first_entry_or_null(&engine->request_list,
+ req = list_first_entry_or_null(&engine->timeline->requests,
typeof(*req), link);
- if (req && i915_gem_request_completed(req))
+ if (req && __i915_gem_request_completed(req))
i915_gem_request_retire(req);
/* Beware: Dragons be flying overhead.
@@ -382,13 +527,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
* of being read by __i915_gem_active_get_rcu(). As such,
* we have to be very careful when overwriting the contents. During
* the RCU lookup, we change chase the request->engine pointer,
- * read the request->fence.seqno and increment the reference count.
+ * read the request->global_seqno and increment the reference count.
*
* The reference count is incremented atomically. If it is zero,
* the lookup knows the request is unallocated and complete. Otherwise,
* it is either still in use, or has been reallocated and reset
- * with fence_init(). This increment is safe for release as we check
- * that the request we have a reference to and matches the active
+ * with dma_fence_init(). This increment is safe for release as we
+ * check that the request we have a reference to and matches the active
* request.
*
* Before we increment the refcount, we chase the request->engine
@@ -403,21 +548,32 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
* Do not use kmem_cache_zalloc() here!
*/
req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
- if (!req)
- return ERR_PTR(-ENOMEM);
+ if (!req) {
+ ret = -ENOMEM;
+ goto err_unreserve;
+ }
- ret = i915_gem_get_seqno(dev_priv, &seqno);
- if (ret)
- goto err;
+ req->timeline = i915_gem_context_lookup_timeline(ctx, engine);
+ GEM_BUG_ON(req->timeline == engine->timeline);
spin_lock_init(&req->lock);
- fence_init(&req->fence,
- &i915_fence_ops,
- &req->lock,
- engine->fence_context,
- seqno);
+ dma_fence_init(&req->fence,
+ &i915_fence_ops,
+ &req->lock,
+ req->timeline->fence_context,
+ __timeline_get_seqno(req->timeline->common));
+
+ /* We bump the ref for the fence chain */
+ i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
+ i915_sw_fence_init(&i915_gem_request_get(req)->execute, execute_notify);
+
+ /* Ensure that the execute fence completes after the submit fence -
+ * as we complete the execute fence from within the submit fence
+ * callback, its completion would otherwise be visible first.
+ */
+ i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq);
- i915_sw_fence_init(&req->submit, submit_notify);
+ i915_priotree_init(&req->priotree);
INIT_LIST_HEAD(&req->active_list);
req->i915 = dev_priv;
@@ -425,6 +581,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
req->ctx = i915_gem_context_get(ctx);
/* No zalloc, must clear what we need by hand */
+ req->global_seqno = 0;
req->previous_context = NULL;
req->file_priv = NULL;
req->batch = NULL;
@@ -437,6 +594,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
* away, e.g. because a GPU scheduler has deferred it.
*/
req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
+ GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz);
if (i915.enable_execlists)
ret = intel_logical_ring_alloc_request_extras(req);
@@ -456,8 +614,9 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
err_ctx:
i915_gem_context_put(ctx);
-err:
kmem_cache_free(dev_priv->requests, req);
+err_unreserve:
+ dev_priv->gt.active_requests--;
return ERR_PTR(ret);
}
@@ -465,15 +624,36 @@ static int
i915_gem_request_await_request(struct drm_i915_gem_request *to,
struct drm_i915_gem_request *from)
{
- int idx, ret;
+ int ret;
GEM_BUG_ON(to == from);
- if (to->engine == from->engine)
+ if (to->engine->schedule) {
+ ret = i915_priotree_add_dependency(to->i915,
+ &to->priotree,
+ &from->priotree);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (to->timeline == from->timeline)
return 0;
- idx = intel_engine_sync_index(from->engine, to->engine);
- if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
+ if (to->engine == from->engine) {
+ ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
+ &from->submit,
+ GFP_KERNEL);
+ return ret < 0 ? ret : 0;
+ }
+
+ if (!from->global_seqno) {
+ ret = i915_sw_fence_await_dma_fence(&to->submit,
+ &from->fence, 0,
+ GFP_KERNEL);
+ return ret < 0 ? ret : 0;
+ }
+
+ if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id])
return 0;
trace_i915_gem_ring_sync_to(to, from);
@@ -491,7 +671,54 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
return ret;
}
- from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
+ to->timeline->sync_seqno[from->engine->id] = from->global_seqno;
+ return 0;
+}
+
+int
+i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
+ struct dma_fence *fence)
+{
+ struct dma_fence_array *array;
+ int ret;
+ int i;
+
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+ return 0;
+
+ if (dma_fence_is_i915(fence))
+ return i915_gem_request_await_request(req, to_request(fence));
+
+ if (!dma_fence_is_array(fence)) {
+ ret = i915_sw_fence_await_dma_fence(&req->submit,
+ fence, I915_FENCE_TIMEOUT,
+ GFP_KERNEL);
+ return ret < 0 ? ret : 0;
+ }
+
+ /* Note that if the fence-array was created in signal-on-any mode,
+ * we should *not* decompose it into its individual fences. However,
+ * we don't currently store which mode the fence-array is operating
+ * in. Fortunately, the only user of signal-on-any is private to
+ * amdgpu and we should not see any incoming fence-array from
+ * sync-file being in signal-on-any mode.
+ */
+
+ array = to_dma_fence_array(fence);
+ for (i = 0; i < array->num_fences; i++) {
+ struct dma_fence *child = array->fences[i];
+
+ if (dma_fence_is_i915(child))
+ ret = i915_gem_request_await_request(req,
+ to_request(child));
+ else
+ ret = i915_sw_fence_await_dma_fence(&req->submit,
+ child, I915_FENCE_TIMEOUT,
+ GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ }
+
return 0;
}
@@ -520,43 +747,52 @@ i915_gem_request_await_object(struct drm_i915_gem_request *to,
struct drm_i915_gem_object *obj,
bool write)
{
- struct i915_gem_active *active;
- unsigned long active_mask;
- int idx;
+ struct dma_fence *excl;
+ int ret = 0;
if (write) {
- active_mask = i915_gem_object_get_active(obj);
- active = obj->last_read;
+ struct dma_fence **shared;
+ unsigned int count, i;
+
+ ret = reservation_object_get_fences_rcu(obj->resv,
+ &excl, &count, &shared);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < count; i++) {
+ ret = i915_gem_request_await_dma_fence(to, shared[i]);
+ if (ret)
+ break;
+
+ dma_fence_put(shared[i]);
+ }
+
+ for (; i < count; i++)
+ dma_fence_put(shared[i]);
+ kfree(shared);
} else {
- active_mask = 1;
- active = &obj->last_write;
+ excl = reservation_object_get_excl_rcu(obj->resv);
}
- for_each_active(active_mask, idx) {
- struct drm_i915_gem_request *request;
- int ret;
-
- request = i915_gem_active_peek(&active[idx],
- &obj->base.dev->struct_mutex);
- if (!request)
- continue;
+ if (excl) {
+ if (ret == 0)
+ ret = i915_gem_request_await_dma_fence(to, excl);
- ret = i915_gem_request_await_request(to, request);
- if (ret)
- return ret;
+ dma_fence_put(excl);
}
- return 0;
+ return ret;
}
static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
- dev_priv->gt.active_engines |= intel_engine_flag(engine);
if (dev_priv->gt.awake)
return;
+ GEM_BUG_ON(!dev_priv->gt.active_requests);
+
intel_runtime_pm_get_noresume(dev_priv);
dev_priv->gt.awake = true;
@@ -579,11 +815,11 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
{
struct intel_engine_cs *engine = request->engine;
struct intel_ring *ring = request->ring;
+ struct intel_timeline *timeline = request->timeline;
struct drm_i915_gem_request *prev;
- u32 request_start;
- u32 reserved_tail;
- int ret;
+ int err;
+ lockdep_assert_held(&request->i915->drm.struct_mutex);
trace_i915_gem_request_add(request);
/*
@@ -591,8 +827,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
* should already have been reserved in the ring buffer. Let the ring
* know that it is time to use that space up.
*/
- request_start = ring->tail;
- reserved_tail = request->reserved_space;
request->reserved_space = 0;
/*
@@ -603,10 +837,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
* what.
*/
if (flush_caches) {
- ret = engine->emit_flush(request, EMIT_FLUSH);
+ err = engine->emit_flush(request, EMIT_FLUSH);
/* Not allowed to fail! */
- WARN(ret, "engine->emit_flush() failed: %d!\n", ret);
+ WARN(err, "engine->emit_flush() failed: %d!\n", err);
}
/* Record the position of the start of the breadcrumb so that
@@ -614,20 +848,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
* GPU processing the request, we never over-estimate the
* position of the ring's HEAD.
*/
+ err = intel_ring_begin(request, engine->emit_breadcrumb_sz);
+ GEM_BUG_ON(err);
request->postfix = ring->tail;
-
- /* Not allowed to fail! */
- ret = engine->emit_request(request);
- WARN(ret, "(%s)->emit_request failed: %d!\n", engine->name, ret);
-
- /* Sanity check that the reserved size was large enough. */
- ret = ring->tail - request_start;
- if (ret < 0)
- ret += ring->size;
- WARN_ONCE(ret > reserved_tail,
- "Not enough space reserved (%d bytes) "
- "for adding the request (%d bytes)\n",
- reserved_tail, ret);
+ ring->tail += engine->emit_breadcrumb_sz * sizeof(u32);
/* Seal the request and mark it as pending execution. Note that
* we may inspect this state, without holding any locks, during
@@ -635,21 +859,46 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
* see a more recent value in the hws than we are tracking.
*/
- prev = i915_gem_active_raw(&engine->last_request,
+ prev = i915_gem_active_raw(&timeline->last_request,
&request->i915->drm.struct_mutex);
- if (prev)
+ if (prev) {
i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
&request->submitq);
+ if (engine->schedule)
+ __i915_priotree_add_dependency(&request->priotree,
+ &prev->priotree,
+ &request->dep,
+ 0);
+ }
+
+ spin_lock_irq(&timeline->lock);
+ list_add_tail(&request->link, &timeline->requests);
+ spin_unlock_irq(&timeline->lock);
+
+ GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
+ request->fence.seqno));
+
+ timeline->last_submitted_seqno = request->fence.seqno;
+ i915_gem_active_set(&timeline->last_request, request);
- request->emitted_jiffies = jiffies;
- request->previous_seqno = engine->last_pending_seqno;
- engine->last_pending_seqno = request->fence.seqno;
- i915_gem_active_set(&engine->last_request, request);
- list_add_tail(&request->link, &engine->request_list);
list_add_tail(&request->ring_link, &ring->request_list);
+ request->emitted_jiffies = jiffies;
i915_gem_mark_busy(engine);
+ /* Let the backend know a new request has arrived that may need
+ * to adjust the existing execution schedule due to a high priority
+ * request - i.e. we may want to preempt the current request in order
+ * to run a high priority dependency chain *before* we can execute this
+ * request.
+ *
+ * This is called before the request is ready to run so that we can
+ * decide whether to preempt the entire chain so that it is ready to
+ * run at the earliest possible convenience.
+ */
+ if (engine->schedule)
+ engine->schedule(request, request->ctx->priority);
+
local_bh_disable();
i915_sw_fence_commit(&request->submit);
local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
@@ -714,7 +963,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
timeout_us += local_clock_us(&cpu);
do {
- if (i915_gem_request_completed(req))
+ if (__i915_gem_request_completed(req))
return true;
if (signal_pending_state(state, current))
@@ -723,82 +972,108 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
if (busywait_stop(timeout_us, cpu))
break;
- cpu_relax_lowlatency();
+ cpu_relax();
} while (!need_resched());
return false;
}
+static long
+__i915_request_wait_for_execute(struct drm_i915_gem_request *request,
+ unsigned int flags,
+ long timeout)
+{
+ const int state = flags & I915_WAIT_INTERRUPTIBLE ?
+ TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+ wait_queue_head_t *q = &request->i915->gpu_error.wait_queue;
+ DEFINE_WAIT(reset);
+ DEFINE_WAIT(wait);
+
+ if (flags & I915_WAIT_LOCKED)
+ add_wait_queue(q, &reset);
+
+ do {
+ prepare_to_wait(&request->execute.wait, &wait, state);
+
+ if (i915_sw_fence_done(&request->execute))
+ break;
+
+ if (flags & I915_WAIT_LOCKED &&
+ i915_reset_in_progress(&request->i915->gpu_error)) {
+ __set_current_state(TASK_RUNNING);
+ i915_reset(request->i915);
+ reset_wait_queue(q, &reset);
+ continue;
+ }
+
+ if (signal_pending_state(state, current)) {
+ timeout = -ERESTARTSYS;
+ break;
+ }
+
+ timeout = io_schedule_timeout(timeout);
+ } while (timeout);
+ finish_wait(&request->execute.wait, &wait);
+
+ if (flags & I915_WAIT_LOCKED)
+ remove_wait_queue(q, &reset);
+
+ return timeout;
+}
+
/**
* i915_wait_request - wait until execution of request has finished
- * @req: duh!
+ * @req: the request to wait upon
* @flags: how to wait
- * @timeout: in - how long to wait (NULL forever); out - how much time remaining
- * @rps: client to charge for RPS boosting
+ * @timeout: how long to wait in jiffies
+ *
+ * i915_wait_request() waits for the request to be completed, for a
+ * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
+ * unbounded wait).
*
- * Note: It is of utmost importance that the passed in seqno and reset_counter
- * values have been read by the caller in an smp safe manner. Where read-side
- * locks are involved, it is sufficient to read the reset_counter before
- * unlocking the lock that protects the seqno. For lockless tricks, the
- * reset_counter _must_ be read before, and an appropriate smp_rmb must be
- * inserted.
+ * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED
+ * in via the flags, and vice versa if the struct_mutex is not held, the caller
+ * must not specify that the wait is locked.
*
- * Returns 0 if the request was found within the alloted time. Else returns the
- * errno with remaining time filled in timeout argument.
+ * Returns the remaining time (in jiffies) if the request completed, which may
+ * be zero or -ETIME if the request is unfinished after the timeout expires.
+ * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
+ * pending before the request completes.
*/
-int i915_wait_request(struct drm_i915_gem_request *req,
- unsigned int flags,
- s64 *timeout,
- struct intel_rps_client *rps)
+long i915_wait_request(struct drm_i915_gem_request *req,
+ unsigned int flags,
+ long timeout)
{
const int state = flags & I915_WAIT_INTERRUPTIBLE ?
TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
DEFINE_WAIT(reset);
struct intel_wait wait;
- unsigned long timeout_remain;
- int ret = 0;
might_sleep();
#if IS_ENABLED(CONFIG_LOCKDEP)
- GEM_BUG_ON(!!lockdep_is_held(&req->i915->drm.struct_mutex) !=
+ GEM_BUG_ON(debug_locks &&
+ !!lockdep_is_held(&req->i915->drm.struct_mutex) !=
!!(flags & I915_WAIT_LOCKED));
#endif
+ GEM_BUG_ON(timeout < 0);
if (i915_gem_request_completed(req))
- return 0;
-
- timeout_remain = MAX_SCHEDULE_TIMEOUT;
- if (timeout) {
- if (WARN_ON(*timeout < 0))
- return -EINVAL;
+ return timeout;
- if (*timeout == 0)
- return -ETIME;
-
- /* Record current time in case interrupted, or wedged */
- timeout_remain = nsecs_to_jiffies_timeout(*timeout);
- *timeout += ktime_get_raw_ns();
- }
+ if (!timeout)
+ return -ETIME;
trace_i915_gem_request_wait_begin(req);
- /* This client is about to stall waiting for the GPU. In many cases
- * this is undesirable and limits the throughput of the system, as
- * many clients cannot continue processing user input/output whilst
- * blocked. RPS autotuning may take tens of milliseconds to respond
- * to the GPU load and thus incurs additional latency for the client.
- * We can circumvent that by promoting the GPU frequency to maximum
- * before we wait. This makes the GPU throttle up much more quickly
- * (good for benchmarks and user experience, e.g. window animations),
- * but at a cost of spending more power processing the workload
- * (bad for battery). Not all clients even want their results
- * immediately and for them we should just let the GPU select its own
- * frequency to maximise efficiency. To prevent a single client from
- * forcing the clocks too high for the whole system, we only allow
- * each client to waitboost once in a busy period.
- */
- if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6)
- gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
+ if (!i915_sw_fence_done(&req->execute)) {
+ timeout = __i915_request_wait_for_execute(req, flags, timeout);
+ if (timeout < 0)
+ goto complete;
+
+ GEM_BUG_ON(!i915_sw_fence_done(&req->execute));
+ }
+ GEM_BUG_ON(!i915_sw_fence_done(&req->submit));
+ GEM_BUG_ON(!req->global_seqno);
/* Optimistic short spin before touching IRQs */
if (i915_spin_request(req, state, 5))
@@ -808,7 +1083,7 @@ int i915_wait_request(struct drm_i915_gem_request *req,
if (flags & I915_WAIT_LOCKED)
add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
- intel_wait_init(&wait, req->fence.seqno);
+ intel_wait_init(&wait, req->global_seqno);
if (intel_engine_add_wait(req->engine, &wait))
/* In order to check that we haven't missed the interrupt
* as we enabled it, we need to kick ourselves to do a
@@ -818,16 +1093,17 @@ int i915_wait_request(struct drm_i915_gem_request *req,
for (;;) {
if (signal_pending_state(state, current)) {
- ret = -ERESTARTSYS;
+ timeout = -ERESTARTSYS;
break;
}
- timeout_remain = io_schedule_timeout(timeout_remain);
- if (timeout_remain == 0) {
- ret = -ETIME;
+ if (!timeout) {
+ timeout = -ETIME;
break;
}
+ timeout = io_schedule_timeout(timeout);
+
if (intel_wait_complete(&wait))
break;
@@ -874,74 +1150,32 @@ wakeup:
complete:
trace_i915_gem_request_wait_end(req);
- if (timeout) {
- *timeout -= ktime_get_raw_ns();
- if (*timeout < 0)
- *timeout = 0;
-
- /*
- * Apparently ktime isn't accurate enough and occasionally has a
- * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
- * things up to make the test happy. We allow up to 1 jiffy.
- *
- * This is a regrssion from the timespec->ktime conversion.
- */
- if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
- *timeout = 0;
- }
-
- if (IS_RPS_USER(rps) &&
- req->fence.seqno == req->engine->last_submitted_seqno) {
- /* The GPU is now idle and this client has stalled.
- * Since no other client has submitted a request in the
- * meantime, assume that this client is the only one
- * supplying work to the GPU but is unable to keep that
- * work supplied because it is waiting. Since the GPU is
- * then never kept fully busy, RPS autoclocking will
- * keep the clocks relatively low, causing further delays.
- * Compensate by giving the synchronous client credit for
- * a waitboost next time.
- */
- spin_lock(&req->i915->rps.client_lock);
- list_del_init(&rps->link);
- spin_unlock(&req->i915->rps.client_lock);
- }
-
- return ret;
+ return timeout;
}
-static bool engine_retire_requests(struct intel_engine_cs *engine)
+static void engine_retire_requests(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *request, *next;
- list_for_each_entry_safe(request, next, &engine->request_list, link) {
- if (!i915_gem_request_completed(request))
- return false;
+ list_for_each_entry_safe(request, next,
+ &engine->timeline->requests, link) {
+ if (!__i915_gem_request_completed(request))
+ return;
i915_gem_request_retire(request);
}
-
- return true;
}
void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
- unsigned int tmp;
+ enum intel_engine_id id;
lockdep_assert_held(&dev_priv->drm.struct_mutex);
- if (dev_priv->gt.active_engines == 0)
+ if (!dev_priv->gt.active_requests)
return;
- GEM_BUG_ON(!dev_priv->gt.awake);
-
- for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines, tmp)
- if (engine_retire_requests(engine))
- dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
-
- if (dev_priv->gt.active_engines == 0)
- queue_delayed_work(dev_priv->wq,
- &dev_priv->gt.idle_work,
- msecs_to_jiffies(100));
+ for_each_engine(engine, dev_priv, id)
+ engine_retire_requests(engine);
}