aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_perf.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2016-12-18 15:37:20 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2016-12-18 16:18:50 +0000
commite8a9c58fcd9a5081f71f57f370af1347ed6a310b (patch)
tree61b8fe4eb3219a120f8e941ac4b0aa4a8f10fa69 /drivers/gpu/drm/i915/i915_perf.c
parentdrm/i915: Move intel_lrc_context_pin() to avoid the forward declaration (diff)
downloadlinux-dev-e8a9c58fcd9a5081f71f57f370af1347ed6a310b.tar.xz
linux-dev-e8a9c58fcd9a5081f71f57f370af1347ed6a310b.zip
drm/i915: Unify active context tracking between legacy/execlists/guc
The requests conversion introduced a nasty bug where we could generate a new request in the middle of constructing a request if we needed to idle the system in order to evict space for a context. The request to idle would be executed (and waited upon) before the current one, creating a minor havoc in the seqno accounting, as we will consider the current request to already be completed (prior to deferred seqno assignment) but ring->last_retired_head would have been updated and still could allow us to overwrite the current request before execution. We also employed two different mechanisms to track the active context until it was switched out. The legacy method allowed for waiting upon an active context (it could forcibly evict any vma, including context's), but the execlists method took a step backwards by pinning the vma for the entire active lifespan of the context (the only way to evict was to idle the entire GPU, not individual contexts). However, to circumvent the tricky issue of locking (i.e. we cannot take struct_mutex at the time of i915_gem_request_submit(), where we would want to move the previous context onto the active tracker and unpin it), we take the execlists approach and keep the contexts pinned until retirement. The benefit of the execlists approach, more important for execlists than legacy, was the reduction in work in pinning the context for each request - as the context was kept pinned until idle, it could short circuit the pinning for all active contexts. We introduce new engine vfuncs to pin and unpin the context respectively. The context is pinned at the start of the request, and only unpinned when the following request is retired (this ensures that the context is idle and coherent in main memory before we unpin it). We move the engine->last_context tracking into the retirement itself (rather than during request submission) in order to allow the submission to be reordered or unwound without undue difficultly. And finally an ulterior motive for unifying context handling was to prepare for mock requests. v2: Rename to last_retired_context, split out legacy_context tracking for MI_SET_CONTEXT. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20161218153724.8439-3-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c18
1 files changed, 7 insertions, 11 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index ae7bd0ed7b1a..da8537cb8136 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -743,7 +743,7 @@ static int i915_oa_read(struct i915_perf_stream *stream,
static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- struct i915_vma *vma;
+ struct intel_engine_cs *engine = dev_priv->engine[RCS];
int ret;
ret = i915_mutex_lock_interruptible(&dev_priv->drm);
@@ -755,19 +755,16 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
*
* NB: implied RCS engine...
*/
- vma = i915_gem_context_pin_legacy(stream->ctx, 0);
- if (IS_ERR(vma)) {
- ret = PTR_ERR(vma);
+ ret = engine->context_pin(engine, stream->ctx);
+ if (ret)
goto unlock;
- }
-
- dev_priv->perf.oa.pinned_rcs_vma = vma;
/* Explicitly track the ID (instead of calling i915_ggtt_offset()
* on the fly) considering the difference with gen8+ and
* execlists
*/
- dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(vma);
+ dev_priv->perf.oa.specific_ctx_id =
+ i915_ggtt_offset(stream->ctx->engine[engine->id].state);
unlock:
mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -785,13 +782,12 @@ unlock:
static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct intel_engine_cs *engine = dev_priv->engine[RCS];
mutex_lock(&dev_priv->drm.struct_mutex);
- i915_vma_unpin(dev_priv->perf.oa.pinned_rcs_vma);
- dev_priv->perf.oa.pinned_rcs_vma = NULL;
-
dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
+ engine->context_unpin(engine, stream->ctx);
mutex_unlock(&dev_priv->drm.struct_mutex);
}