diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 557 |
1 files changed, 349 insertions, 208 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fe531f904062..b6ac3df18b58 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,12 +29,14 @@ #include <drm/drm_vma_manager.h> #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" #include "intel_mocs.h" #include <linux/dma-fence-array.h> +#include <linux/kthread.h> #include <linux/reservation.h> #include <linux/shmem_fs.h> #include <linux/slab.h> @@ -47,18 +49,12 @@ static void i915_gem_flush_free_objects(struct drm_i915_private *i915); static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); -static bool cpu_cache_is_coherent(struct drm_device *dev, - enum i915_cache_level level) -{ - return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE; -} - static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) return false; - if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + if (!i915_gem_object_is_coherent(obj)) return true; return obj->pin_display; @@ -107,16 +103,13 @@ i915_gem_wait_for_error(struct i915_gpu_error *error) might_sleep(); - if (!i915_reset_in_progress(error)) - return 0; - /* * Only wait 10 seconds for the gpu reset to complete to avoid hanging * userspace. If it takes that long something really bad is going on and * we should simply try to bail out and fail as gracefully as possible. */ ret = wait_event_interruptible_timeout(error->reset_queue, - !i915_reset_in_progress(error), + !i915_reset_backoff(error), I915_RESET_TIMEOUT); if (ret == 0) { DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); @@ -254,7 +247,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, if (needs_clflush && (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && - !cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + !i915_gem_object_is_coherent(obj)) drm_clflush_sg(pages); obj->base.read_domains = I915_GEM_DOMAIN_CPU; @@ -312,6 +305,8 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .release = i915_gem_object_release_phys, }; +static const struct drm_i915_gem_object_ops i915_gem_object_ops; + int i915_gem_object_unbind(struct drm_i915_gem_object *obj) { struct i915_vma *vma; @@ -399,7 +394,7 @@ out: if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) i915_gem_request_retire_upto(rq); - if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) { + if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) { /* The GPU is now idle and this client has stalled. * Since no other client has submitted a request in the * meantime, assume that this client is the only one @@ -424,7 +419,9 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, long timeout, struct intel_rps_client *rps) { + unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; + bool prune_fences = false; if (flags & I915_WAIT_ALL) { struct dma_fence **shared; @@ -449,15 +446,31 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, for (; i < count; i++) dma_fence_put(shared[i]); kfree(shared); + + prune_fences = count && timeout >= 0; } else { excl = reservation_object_get_excl_rcu(resv); } - if (excl && timeout >= 0) + if (excl && timeout >= 0) { timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); + prune_fences = timeout >= 0; + } dma_fence_put(excl); + /* Oportunistically prune the fences iff we know they have *all* been + * signaled and that the reservation object has not been changed (i.e. + * no new fences have been added). + */ + if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { + if (reservation_object_trylock(resv)) { + if (!__read_seqcount_retry(&resv->seq, seq)) + reservation_object_add_excl_fence(resv, NULL); + reservation_object_unlock(resv); + } + } + return timeout; } @@ -585,9 +598,18 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, if (obj->mm.pages) return -EBUSY; + GEM_BUG_ON(obj->ops != &i915_gem_object_ops); obj->ops = &i915_gem_phys_ops; - return i915_gem_object_pin_pages(obj); + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err_xfer; + + return 0; + +err_xfer: + obj->ops = &i915_gem_object_ops; + return ret; } static int @@ -608,7 +630,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, drm_clflush_virt_range(vaddr, args->size); i915_gem_chipset_flush(to_i915(obj->base.dev)); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); return 0; } @@ -763,6 +785,15 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, if (ret) return ret; + if (i915_gem_object_is_coherent(obj) || + !static_cpu_has(X86_FEATURE_CLFLUSH)) { + ret = i915_gem_object_set_to_cpu_domain(obj, false); + if (ret) + goto err_unpin; + else + goto out; + } + i915_gem_object_flush_gtt_write_domain(obj); /* If we're not in the cpu read domain, set ourself into the gtt @@ -771,17 +802,9 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, * anyway again before the next pread happens. */ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) - *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, - obj->cache_level); - - if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { - ret = i915_gem_object_set_to_cpu_domain(obj, false); - if (ret) - goto err_unpin; - - *needs_clflush = 0; - } + *needs_clflush = CLFLUSH_BEFORE; +out: /* return with the pages pinned */ return 0; @@ -814,6 +837,15 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, if (ret) return ret; + if (i915_gem_object_is_coherent(obj) || + !static_cpu_has(X86_FEATURE_CLFLUSH)) { + ret = i915_gem_object_set_to_cpu_domain(obj, true); + if (ret) + goto err_unpin; + else + goto out; + } + i915_gem_object_flush_gtt_write_domain(obj); /* If we're not in the cpu write domain, set ourself into the @@ -822,26 +854,15 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, * right away and we therefore have to clflush anyway. */ if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) - *needs_clflush |= cpu_write_needs_clflush(obj) << 1; + *needs_clflush |= CLFLUSH_AFTER; /* Same trick applies to invalidate partially written cachelines read * before writing. */ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) - *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev, - obj->cache_level); - - if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) - goto err_unpin; - - *needs_clflush = 0; - } - - if ((*needs_clflush & CLFLUSH_AFTER) == 0) - obj->cache_dirty = true; + *needs_clflush |= CLFLUSH_BEFORE; +out: intel_fb_obj_invalidate(obj, ORIGIN_CPU); obj->mm.dirty = true; /* return with the pages pinned */ @@ -1257,7 +1278,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, user_data += page_length; offset += page_length; } - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); mutex_lock(&i915->drm.struct_mutex); out_unpin: @@ -1393,7 +1414,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, offset = 0; } - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); i915_gem_obj_finish_shmem_access(obj); return ret; } @@ -1602,23 +1623,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_sw_finish *args = data; struct drm_i915_gem_object *obj; - int err = 0; obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; /* Pinned buffers may be scanout, so flush the cache */ - if (READ_ONCE(obj->pin_display)) { - err = i915_mutex_lock_interruptible(dev); - if (!err) { - i915_gem_object_flush_cpu_write_domain(obj); - mutex_unlock(&dev->struct_mutex); - } - } - + i915_gem_object_flush_if_display(obj); i915_gem_object_put(obj); - return err; + + return 0; } /** @@ -2232,17 +2246,17 @@ unlock: mutex_unlock(&obj->mm.lock); } -static void i915_sg_trim(struct sg_table *orig_st) +static bool i915_sg_trim(struct sg_table *orig_st) { struct sg_table new_st; struct scatterlist *sg, *new_sg; unsigned int i; if (orig_st->nents == orig_st->orig_nents) - return; + return false; if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) - return; + return false; new_sg = new_st.sgl; for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { @@ -2255,6 +2269,7 @@ static void i915_sg_trim(struct sg_table *orig_st) sg_free_table(orig_st); *orig_st = new_st; + return true; } static struct sg_table * @@ -2306,7 +2321,7 @@ rebuild_st: st->nents = 0; for (i = 0; i < page_count; i++) { page = shmem_read_mapping_page_gfp(mapping, i, gfp); - if (IS_ERR(page)) { + if (unlikely(IS_ERR(page))) { i915_gem_shrink(dev_priv, page_count, I915_SHRINK_BOUND | @@ -2314,12 +2329,21 @@ rebuild_st: I915_SHRINK_PURGEABLE); page = shmem_read_mapping_page_gfp(mapping, i, gfp); } - if (IS_ERR(page)) { + if (unlikely(IS_ERR(page))) { + gfp_t reclaim; + /* We've tried hard to allocate the memory by reaping * our own buffer, now let the real VM do its job and * go down in flames if truly OOM. + * + * However, since graphics tend to be disposable, + * defer the oom here by reporting the ENOMEM back + * to userspace. */ - page = shmem_read_mapping_page(mapping, i); + reclaim = mapping_gfp_mask(mapping); + reclaim |= __GFP_NORETRY; /* reclaim, but no oom */ + + page = shmem_read_mapping_page_gfp(mapping, i, reclaim); if (IS_ERR(page)) { ret = PTR_ERR(page); goto err_sg; @@ -2674,7 +2698,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request; + struct drm_i915_gem_request *request, *active = NULL; + unsigned long flags; /* We are called by the error capture and reset at a random * point in time. In particular, note that neither is crucially @@ -2684,15 +2709,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ + spin_lock_irqsave(&engine->timeline->lock, flags); list_for_each_entry(request, &engine->timeline->requests, link) { - if (__i915_gem_request_completed(request)) + if (__i915_gem_request_completed(request, + request->global_seqno)) continue; GEM_BUG_ON(request->engine != engine); - return request; + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &request->fence.flags)); + + active = request; + break; } + spin_unlock_irqrestore(&engine->timeline->lock, flags); - return NULL; + return active; } static bool engine_stalled(struct intel_engine_cs *engine) @@ -2719,6 +2751,17 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct drm_i915_gem_request *request; + /* Prevent the signaler thread from updating the request + * state (by calling dma_fence_signal) as we are processing + * the reset. The write from the GPU of the seqno is + * asynchronous and the signaler thread may see a different + * value to us and declare the request complete, even though + * the reset routine have picked that request as the active + * (incomplete) request. This conflict is not handled + * gracefully! + */ + kthread_park(engine->breadcrumbs.signaler); + /* Prevent request submission to the hardware until we have * completed the reset in i915_gem_reset_finish(). If a request * is completed by one engine, it may then queue a request @@ -2730,6 +2773,9 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) tasklet_kill(&engine->irq_tasklet); tasklet_disable(&engine->irq_tasklet); + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); + if (engine_stalled(engine)) { request = i915_gem_find_active_request(engine); if (request && request->fence.error == -EIO) @@ -2826,9 +2872,6 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - request = i915_gem_find_active_request(engine); if (request && i915_gem_reset_request(request)) { DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", @@ -2852,8 +2895,14 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) i915_gem_retire_requests(dev_priv); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + struct i915_gem_context *ctx; + i915_gem_reset_engine(engine); + ctx = fetch_and_zero(&engine->last_retired_context); + if (ctx) + engine->context_unpin(engine, ctx); + } i915_gem_restore_fences(dev_priv); @@ -2872,8 +2921,10 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { tasklet_enable(&engine->irq_tasklet); + kthread_unpark(engine->breadcrumbs.signaler); + } } static void nop_submit_request(struct drm_i915_gem_request *request) @@ -2947,14 +2998,78 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); set_bit(I915_WEDGED, &dev_priv->gpu_error.flags); + /* Retire completed requests first so the list of inflight/incomplete + * requests is accurate and we don't try and mark successful requests + * as in error during __i915_gem_set_wedged_BKL(). + */ + i915_gem_retire_requests(dev_priv); + stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); i915_gem_context_lost(dev_priv); - i915_gem_retire_requests(dev_priv); mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); } +bool i915_gem_unset_wedged(struct drm_i915_private *i915) +{ + struct i915_gem_timeline *tl; + int i; + + lockdep_assert_held(&i915->drm.struct_mutex); + if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) + return true; + + /* Before unwedging, make sure that all pending operations + * are flushed and errored out - we may have requests waiting upon + * third party fences. We marked all inflight requests as EIO, and + * every execbuf since returned EIO, for consistency we want all + * the currently pending requests to also be marked as EIO, which + * is done inside our nop_submit_request - and so we must wait. + * + * No more can be submitted until we reset the wedged bit. + */ + list_for_each_entry(tl, &i915->gt.timelines, link) { + for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { + struct drm_i915_gem_request *rq; + + rq = i915_gem_active_peek(&tl->engine[i].last_request, + &i915->drm.struct_mutex); + if (!rq) + continue; + + /* We can't use our normal waiter as we want to + * avoid recursively trying to handle the current + * reset. The basic dma_fence_default_wait() installs + * a callback for dma_fence_signal(), which is + * triggered by our nop handler (indirectly, the + * callback enables the signaler thread which is + * woken by the nop_submit_request() advancing the seqno + * and when the seqno passes the fence, the signaler + * then signals the fence waking us up). + */ + if (dma_fence_default_wait(&rq->fence, true, + MAX_SCHEDULE_TIMEOUT) < 0) + return false; + } + } + + /* Undo nop_submit_request. We prevent all new i915 requests from + * being queued (by disallowing execbuf whilst wedged) so having + * waited for all active requests above, we know the system is idle + * and do not have to worry about a thread being inside + * engine->submit_request() as we swap over. So unlike installing + * the nop_submit_request on reset, we can do this from normal + * context and do not require stop_machine(). + */ + intel_engines_reset_default_submission(i915); + + smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ + clear_bit(I915_WEDGED, &i915->gpu_error.flags); + + return true; +} + static void i915_gem_retire_work_handler(struct work_struct *work) { @@ -2997,9 +3112,7 @@ i915_gem_idle_work_handler(struct work_struct *work) * Wait for last execlists context complete, but bail out in case a * new request is submitted. */ - wait_for(READ_ONCE(dev_priv->gt.active_requests) || - intel_execlists_idle(dev_priv), 10); - + wait_for(intel_engines_are_idle(dev_priv), 10); if (READ_ONCE(dev_priv->gt.active_requests)) return; @@ -3024,11 +3137,13 @@ i915_gem_idle_work_handler(struct work_struct *work) if (dev_priv->gt.active_requests) goto out_unlock; - if (wait_for(intel_execlists_idle(dev_priv), 10)) + if (wait_for(intel_engines_are_idle(dev_priv), 10)) DRM_ERROR("Timeout waiting for engines to idle\n"); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + intel_engine_disarm_breadcrumbs(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + } GEM_BUG_ON(!dev_priv->gt.awake); dev_priv->gt.awake = false; @@ -3156,6 +3271,29 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) return 0; } +static int wait_for_engine(struct intel_engine_cs *engine, int timeout_ms) +{ + return wait_for(intel_engine_is_idle(engine), timeout_ms); +} + +static int wait_for_engines(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + if (GEM_WARN_ON(wait_for_engine(engine, 50))) { + i915_gem_set_wedged(i915); + return -EIO; + } + + GEM_BUG_ON(intel_engine_get_seqno(engine) != + intel_engine_last_submit(engine)); + } + + return 0; +} + int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) { int ret; @@ -3170,48 +3308,16 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) if (ret) return ret; } - } else { - ret = wait_for_timeline(&i915->gt.global_timeline, flags); - if (ret) - return ret; - } - return 0; -} - -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, - bool force) -{ - /* If we don't have a page list set up, then we're not pinned - * to GPU, and we can ignore the cache flush because it'll happen - * again at bind time. - */ - if (!obj->mm.pages) - return; + i915_gem_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests); - /* - * Stolen memory is always coherent with the GPU as it is explicitly - * marked as wc by the system, or the system is cache-coherent. - */ - if (obj->stolen || obj->phys_handle) - return; - - /* If the GPU is snooping the contents of the CPU cache, - * we do not need to manually clear the CPU cache lines. However, - * the caches are only snooped when the render cache is - * flushed/invalidated. As we always have to emit invalidations - * and flushes when moving into and out of the RENDER domain, correct - * snooping behaviour occurs naturally as the result of our domain - * tracking. - */ - if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { - obj->cache_dirty = true; - return; + ret = wait_for_engines(i915); + } else { + ret = wait_for_timeline(&i915->gt.global_timeline, flags); } - trace_i915_gem_object_clflush(obj); - drm_clflush_sg(obj->mm.pages); - obj->cache_dirty = false; + return ret; } /** Flushes the GTT write domain for the object if it's dirty. */ @@ -3239,15 +3345,18 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) * system agents we cannot reproduce this behaviour). */ wmb(); - if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) - POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { + if (intel_runtime_pm_get_if_in_use(dev_priv)) { + spin_lock_irq(&dev_priv->uncore.lock); + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); + spin_unlock_irq(&dev_priv->uncore.lock); + intel_runtime_pm_put(dev_priv); + } + } - intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT)); + intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT)); obj->base.write_domain = 0; - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - I915_GEM_DOMAIN_GTT); } /** Flushes the CPU write domain for the object if it's dirty. */ @@ -3257,13 +3366,27 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; - i915_gem_clflush_object(obj, obj->pin_display); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); + obj->base.write_domain = 0; +} + +static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) +{ + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty) + return; + i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); obj->base.write_domain = 0; - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - I915_GEM_DOMAIN_CPU); +} + +void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) +{ + if (!READ_ONCE(obj->pin_display)) + return; + + mutex_lock(&obj->base.dev->struct_mutex); + __i915_gem_object_flush_for_display(obj); + mutex_unlock(&obj->base.dev->struct_mutex); } /** @@ -3277,7 +3400,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { - uint32_t old_write_domain, old_read_domains; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3315,9 +3437,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) mb(); - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ @@ -3329,10 +3448,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) obj->mm.dirty = true; } - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - i915_gem_object_unpin_pages(obj); return 0; } @@ -3457,7 +3572,7 @@ restart: } if (obj->base.write_domain == I915_GEM_DOMAIN_CPU && - cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + i915_gem_object_is_coherent(obj)) obj->cache_dirty = true; list_for_each_entry(vma, &obj->vma_list, obj_link) @@ -3569,7 +3684,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view) { struct i915_vma *vma; - u32 old_read_domains, old_write_domain; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3629,24 +3743,14 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ - if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj, true); - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); - } - - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; + __i915_gem_object_flush_for_display(obj); + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - obj->base.write_domain = 0; obj->base.read_domains |= I915_GEM_DOMAIN_GTT; - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - return vma; err_unpin_display: @@ -3682,7 +3786,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) { - uint32_t old_write_domain, old_read_domains; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3701,13 +3804,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) i915_gem_object_flush_gtt_write_domain(obj); - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { - i915_gem_clflush_object(obj, false); - + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); obj->base.read_domains |= I915_GEM_DOMAIN_CPU; } @@ -3724,10 +3823,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) obj->base.write_domain = I915_GEM_DOMAIN_CPU; } - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - return 0; } @@ -3755,16 +3850,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) return -EIO; spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_list) { + list_for_each_entry(request, &file_priv->mm.request_list, client_link) { if (time_after_eq(request->emitted_jiffies, recent_enough)) break; - /* - * Note that the request might not have been submitted yet. - * In which case emitted_jiffies will be zero. - */ - if (!request->emitted_jiffies) - continue; + if (target) { + list_del(&target->client_link); + target->file_priv = NULL; + } target = request; } @@ -4050,7 +4143,7 @@ frontbuffer_retire(struct i915_gem_active *active, struct drm_i915_gem_object *obj = container_of(active, typeof(*obj), frontbuffer_write); - intel_fb_obj_flush(obj, true, ORIGIN_CS); + intel_fb_obj_flush(obj, ORIGIN_CS); } void i915_gem_object_init(struct drm_i915_gem_object *obj, @@ -4314,11 +4407,29 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv) !i915_gem_context_is_kernel(engine->last_retired_context)); } +void i915_gem_sanitize(struct drm_i915_private *i915) +{ + /* + * If we inherit context state from the BIOS or earlier occupants + * of the GPU, the GPU may be in an inconsistent state when we + * try to take over. The only way to remove the earlier state + * is by resetting. However, resetting on earlier gen is tricky as + * it may impact the display and we are uncertain about the stability + * of the reset, so we only reset recent machines with logical + * context support (that must be reset to remove any stray contexts). + */ + if (HAS_HW_CONTEXTS(i915)) { + int reset = intel_gpu_reset(i915, ALL_ENGINES); + WARN_ON(reset && reset != -ENODEV); + } +} + int i915_gem_suspend(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; int ret; + intel_runtime_pm_get(dev_priv); intel_suspend_gt_powersave(dev_priv); mutex_lock(&dev->struct_mutex); @@ -4333,16 +4444,13 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) */ ret = i915_gem_switch_to_kernel_context(dev_priv); if (ret) - goto err; + goto err_unlock; ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED); if (ret) - goto err; - - i915_gem_retire_requests(dev_priv); - GEM_BUG_ON(dev_priv->gt.active_requests); + goto err_unlock; assert_kernel_context_is_current(dev_priv); i915_gem_context_lost(dev_priv); @@ -4365,7 +4473,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * reset the GPU back to its idle, low power state. */ WARN_ON(dev_priv->gt.awake); - WARN_ON(!intel_execlists_idle(dev_priv)); + WARN_ON(!intel_engines_are_idle(dev_priv)); /* * Neither the BIOS, ourselves or any other kernel @@ -4386,15 +4494,13 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * machines is a good idea, we don't - just in case it leaves the * machine in an unusable condition. */ - if (HAS_HW_CONTEXTS(dev_priv)) { - int reset = intel_gpu_reset(dev_priv, ALL_ENGINES); - WARN_ON(reset && reset != -ENODEV); - } - - return 0; + i915_gem_sanitize(dev_priv); + goto out_rpm_put; -err: +err_unlock: mutex_unlock(&dev->struct_mutex); +out_rpm_put: + intel_runtime_pm_put(dev_priv); return ret; } @@ -4464,11 +4570,24 @@ static void init_unused_rings(struct drm_i915_private *dev_priv) } } -int -i915_gem_init_hw(struct drm_i915_private *dev_priv) +static int __i915_gem_restart_engines(void *data) { + struct drm_i915_private *i915 = data; struct intel_engine_cs *engine; enum intel_engine_id id; + int err; + + for_each_engine(engine, i915, id) { + err = engine->init_hw(engine); + if (err) + return err; + } + + return 0; +} + +int i915_gem_init_hw(struct drm_i915_private *dev_priv) +{ int ret; dev_priv->gt.last_init_time = ktime_get(); @@ -4514,16 +4633,14 @@ i915_gem_init_hw(struct drm_i915_private *dev_priv) } /* Need to do basic initialisation of all rings first: */ - for_each_engine(engine, dev_priv, id) { - ret = engine->init_hw(engine); - if (ret) - goto out; - } + ret = __i915_gem_restart_engines(dev_priv); + if (ret) + goto out; intel_mocs_init_l3cc_table(dev_priv); /* We can't enable contexts until all firmware is loaded */ - ret = intel_guc_setup(dev_priv); + ret = intel_uc_init_hw(dev_priv); if (ret) goto out; @@ -4559,6 +4676,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_clflush_init(dev_priv); + if (!i915.enable_execlists) { dev_priv->gt.resume = intel_legacy_submission_resume; dev_priv->gt.cleanup_engine = intel_engine_cleanup; @@ -4607,6 +4726,11 @@ out_unlock: return ret; } +void i915_gem_init_mmio(struct drm_i915_private *i915) +{ + i915_gem_sanitize(i915); +} + void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) { @@ -4665,7 +4789,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) dev_priv->requests = KMEM_CACHE(drm_i915_gem_request, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | - SLAB_DESTROY_BY_RCU); + SLAB_TYPESAFE_BY_RCU); if (!dev_priv->requests) goto err_vmas; @@ -4720,7 +4844,9 @@ err_out: void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) { + i915_gem_drain_freed_objects(dev_priv); WARN_ON(!llist_empty(&dev_priv->mm.free_list)); + WARN_ON(dev_priv->mm.object_count); mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_timeline_fini(&dev_priv->gt.global_timeline); @@ -4738,14 +4864,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) int i915_gem_freeze(struct drm_i915_private *dev_priv) { - intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_shrink_all(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); - return 0; } @@ -4796,7 +4918,7 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) * file_priv. */ spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_list) + list_for_each_entry(request, &file_priv->mm.request_list, client_link) request->file_priv = NULL; spin_unlock(&file_priv->mm.lock); @@ -4874,38 +4996,49 @@ i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, const void *data, size_t size) { struct drm_i915_gem_object *obj; - struct sg_table *sg; - size_t bytes; - int ret; + struct file *file; + size_t offset; + int err; obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); if (IS_ERR(obj)) return obj; - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) - goto fail; + GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); - ret = i915_gem_object_pin_pages(obj); - if (ret) - goto fail; + file = obj->base.filp; + offset = 0; + do { + unsigned int len = min_t(typeof(size), size, PAGE_SIZE); + struct page *page; + void *pgdata, *vaddr; - sg = obj->mm.pages; - bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); - obj->mm.dirty = true; /* Backing store is now out of date */ - i915_gem_object_unpin_pages(obj); + err = pagecache_write_begin(file, file->f_mapping, + offset, len, 0, + &page, &pgdata); + if (err < 0) + goto fail; - if (WARN_ON(bytes != size)) { - DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); - ret = -EFAULT; - goto fail; - } + vaddr = kmap(page); + memcpy(vaddr, data, len); + kunmap(page); + + err = pagecache_write_end(file, file->f_mapping, + offset, len, len, + page, pgdata); + if (err < 0) + goto fail; + + size -= len; + data += len; + offset += len; + } while (size); return obj; fail: i915_gem_object_put(obj); - return ERR_PTR(ret); + return ERR_PTR(err); } struct scatterlist * @@ -5060,3 +5193,11 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, sg = i915_gem_object_get_sg(obj, n, &offset); return sg_dma_address(sg) + (offset << PAGE_SHIFT); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/scatterlist.c" +#include "selftests/mock_gem_device.c" +#include "selftests/huge_gem_object.c" +#include "selftests/i915_gem_object.c" +#include "selftests/i915_gem_coherency.c" +#endif |