diff options
-rw-r--r-- | drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 214 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c | 3 |
2 files changed, 152 insertions, 65 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 2ccc566f9154..d3c47390ef53 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -56,7 +56,8 @@ enum { #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) #define __EXEC_HAS_RELOC BIT(31) -#define __EXEC_INTERNAL_FLAGS (~0u << 31) +#define __EXEC_ENGINE_PINNED BIT(30) +#define __EXEC_INTERNAL_FLAGS (~0u << 30) #define UPDATE PIN_OFFSET_FIXED #define BATCH_OFFSET_BIAS (256*1024) @@ -281,6 +282,7 @@ struct i915_execbuffer { } reloc_cache; struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */ + struct intel_context *reloc_context; u64 invalid_flags; /** Set of execobj.flags that are invalid */ u32 context_flags; /** Set of execobj.flags to insert from the ctx */ @@ -303,6 +305,9 @@ struct i915_execbuffer { }; static int eb_parse(struct i915_execbuffer *eb); +static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, + bool throttle); +static void eb_unpin_engine(struct i915_execbuffer *eb); static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { @@ -935,7 +940,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) } } -static void eb_release_vmas(const struct i915_execbuffer *eb, bool final) +static void eb_release_vmas(struct i915_execbuffer *eb, bool final) { const unsigned int count = eb->buffer_count; unsigned int i; @@ -952,6 +957,8 @@ static void eb_release_vmas(const struct i915_execbuffer *eb, bool final) if (final) i915_vma_put(vma); } + + eb_unpin_engine(eb); } static void eb_destroy(const struct i915_execbuffer *eb) @@ -1292,19 +1299,26 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (engine == eb->context->engine) { rq = i915_request_create(eb->context); } else { - struct intel_context *ce; + struct intel_context *ce = eb->reloc_context; - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto err_unpin; + if (!ce) { + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto err_unpin; + } + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(eb->context->vm); + eb->reloc_context = ce; } - i915_vm_put(ce->vm); - ce->vm = i915_vm_get(eb->context->vm); + err = intel_context_pin(ce); + if (err) + goto err_unpin; - rq = intel_context_create_request(ce); - intel_context_put(ce); + rq = i915_request_create(ce); + intel_context_unpin(ce); } if (IS_ERR(rq)) { err = PTR_ERR(rq); @@ -1871,7 +1885,8 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb) return 0; } -static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb) +static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb, + struct i915_request *rq) { bool have_copy = false; struct eb_vma *ev; @@ -1887,6 +1902,21 @@ repeat: eb_release_vmas(eb, false); i915_gem_ww_ctx_fini(&eb->ww); + if (rq) { + /* nonblocking is always false */ + if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) { + i915_request_put(rq); + rq = NULL; + + err = -EINTR; + goto err_relock; + } + + i915_request_put(rq); + rq = NULL; + } + /* * We take 3 passes through the slowpatch. * @@ -1910,14 +1940,25 @@ repeat: err = 0; } - flush_workqueue(eb->i915->mm.userptr_wq); + if (!err) + flush_workqueue(eb->i915->mm.userptr_wq); +err_relock: i915_gem_ww_ctx_init(&eb->ww, true); if (err) goto out; /* reacquire the objects */ repeat_validate: + rq = eb_pin_engine(eb, false); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err; + } + + /* We didn't throttle, should be NULL */ + GEM_WARN_ON(rq); + err = eb_validate_vmas(eb); if (err) goto err; @@ -1988,14 +2029,49 @@ out: } } + if (rq) + i915_request_put(rq); + return err; } static int eb_relocate_parse(struct i915_execbuffer *eb) { int err; + struct i915_request *rq = NULL; + bool throttle = true; retry: + rq = eb_pin_engine(eb, throttle); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + rq = NULL; + if (err != -EDEADLK) + return err; + + goto err; + } + + if (rq) { + bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; + + /* Need to drop all locks now for throttling, take slowpath */ + err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0); + if (err == -ETIME) { + if (nonblock) { + err = -EWOULDBLOCK; + i915_request_put(rq); + goto err; + } + goto slow; + } + i915_request_put(rq); + rq = NULL; + } + + /* only throttle once, even if we didn't need to throttle */ + throttle = false; + err = eb_validate_vmas(eb); if (err == -EAGAIN) goto slow; @@ -2032,7 +2108,7 @@ err: return err; slow: - err = eb_relocate_parse_slow(eb); + err = eb_relocate_parse_slow(eb, rq); if (err) /* * If the user expects the execobject.offset and @@ -2486,7 +2562,7 @@ static const enum intel_engine_id user_ring_map[] = { [I915_EXEC_VEBOX] = VECS0 }; -static struct i915_request *eb_throttle(struct intel_context *ce) +static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce) { struct intel_ring *ring = ce->ring; struct intel_timeline *tl = ce->timeline; @@ -2520,22 +2596,17 @@ static struct i915_request *eb_throttle(struct intel_context *ce) return i915_request_get(rq); } -static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) +static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle) { + struct intel_context *ce = eb->context; struct intel_timeline *tl; - struct i915_request *rq; + struct i915_request *rq = NULL; int err; - /* - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - err = intel_gt_terminally_wedged(ce->engine->gt); - if (err) - return err; + GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED); if (unlikely(intel_context_is_banned(ce))) - return -EIO; + return ERR_PTR(-EIO); /* * Pinning the contexts may generate requests in order to acquire @@ -2544,7 +2615,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) */ err = intel_context_pin(ce); if (err) - return err; + return ERR_PTR(err); /* * Take a local wakeref for preparing to dispatch the execbuf as @@ -2556,45 +2627,17 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) */ tl = intel_context_timeline_lock(ce); if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto err_unpin; + intel_context_unpin(ce); + return ERR_CAST(tl); } intel_context_enter(ce); - rq = eb_throttle(ce); - + if (throttle) + rq = eb_throttle(eb, ce); intel_context_timeline_unlock(tl); - if (rq) { - bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; - long timeout; - - timeout = MAX_SCHEDULE_TIMEOUT; - if (nonblock) - timeout = 0; - - timeout = i915_request_wait(rq, - I915_WAIT_INTERRUPTIBLE, - timeout); - i915_request_put(rq); - - if (timeout < 0) { - err = nonblock ? -EWOULDBLOCK : timeout; - goto err_exit; - } - } - - eb->engine = ce->engine; - eb->context = ce; - return 0; - -err_exit: - mutex_lock(&tl->mutex); - intel_context_exit(ce); - intel_context_timeline_unlock(tl); -err_unpin: - intel_context_unpin(ce); - return err; + eb->args->flags |= __EXEC_ENGINE_PINNED; + return rq; } static void eb_unpin_engine(struct i915_execbuffer *eb) @@ -2602,6 +2645,11 @@ static void eb_unpin_engine(struct i915_execbuffer *eb) struct intel_context *ce = eb->context; struct intel_timeline *tl = ce->timeline; + if (!(eb->args->flags & __EXEC_ENGINE_PINNED)) + return; + + eb->args->flags &= ~__EXEC_ENGINE_PINNED; + mutex_lock(&tl->mutex); intel_context_exit(ce); mutex_unlock(&tl->mutex); @@ -2653,7 +2701,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb) } static int -eb_pin_engine(struct i915_execbuffer *eb) +eb_select_engine(struct i915_execbuffer *eb) { struct intel_context *ce; unsigned int idx; @@ -2668,13 +2716,46 @@ eb_pin_engine(struct i915_execbuffer *eb) if (IS_ERR(ce)) return PTR_ERR(ce); - err = __eb_pin_engine(eb, ce); - intel_context_put(ce); + intel_gt_pm_get(ce->engine->gt); + if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { + err = intel_context_alloc_state(ce); + if (err) + goto err; + } + + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + err = intel_gt_terminally_wedged(ce->engine->gt); + if (err) + goto err; + + eb->context = ce; + eb->engine = ce->engine; + + /* + * Make sure engine pool stays alive even if we call intel_context_put + * during ww handling. The pool is destroyed when last pm reference + * is dropped, which breaks our -EDEADLK handling. + */ + return err; + +err: + intel_gt_pm_put(ce->engine->gt); + intel_context_put(ce); return err; } static void +eb_put_engine(struct i915_execbuffer *eb) +{ + intel_gt_pm_put(eb->engine->gt); + intel_context_put(eb->context); +} + +static void __free_fence_array(struct eb_fence *fences, unsigned int n) { while (n--) { @@ -3054,6 +3135,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); eb.vma[0].vma = NULL; eb.reloc_pool = eb.batch_pool = NULL; + eb.reloc_context = NULL; eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; reloc_cache_init(&eb.reloc_cache, eb.i915); @@ -3122,7 +3204,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_destroy; - err = eb_pin_engine(&eb); + err = eb_select_engine(&eb); if (unlikely(err)) goto err_context; @@ -3259,8 +3341,10 @@ err_vma: intel_gt_buffer_pool_put(eb.batch_pool); if (eb.reloc_pool) intel_gt_buffer_pool_put(eb.reloc_pool); + if (eb.reloc_context) + intel_context_put(eb.reloc_context); err_engine: - eb_unpin_engine(&eb); + eb_put_engine(&eb); err_context: i915_gem_context_put(eb.gem_context); err_destroy: diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c index bc08c02b5767..563839cbaf1c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -135,6 +135,7 @@ static int igt_gpu_reloc(void *arg) goto err_pm; } eb.reloc_pool = NULL; + eb.reloc_context = NULL; i915_gem_ww_ctx_init(&eb.ww, false); retry: @@ -153,6 +154,8 @@ retry: if (eb.reloc_pool) intel_gt_buffer_pool_put(eb.reloc_pool); + if (eb.reloc_context) + intel_context_put(eb.reloc_context); intel_context_put(eb.context); err_pm: |