aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c557
1 files changed, 349 insertions, 208 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fe531f904062..b6ac3df18b58 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -29,12 +29,14 @@
#include <drm/drm_vma_manager.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"
+#include "i915_gem_clflush.h"
#include "i915_vgpu.h"
#include "i915_trace.h"
#include "intel_drv.h"
#include "intel_frontbuffer.h"
#include "intel_mocs.h"
#include <linux/dma-fence-array.h>
+#include <linux/kthread.h>
#include <linux/reservation.h>
#include <linux/shmem_fs.h>
#include <linux/slab.h>
@@ -47,18 +49,12 @@ static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
-static bool cpu_cache_is_coherent(struct drm_device *dev,
- enum i915_cache_level level)
-{
- return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE;
-}
-
static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
{
if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
return false;
- if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+ if (!i915_gem_object_is_coherent(obj))
return true;
return obj->pin_display;
@@ -107,16 +103,13 @@ i915_gem_wait_for_error(struct i915_gpu_error *error)
might_sleep();
- if (!i915_reset_in_progress(error))
- return 0;
-
/*
* Only wait 10 seconds for the gpu reset to complete to avoid hanging
* userspace. If it takes that long something really bad is going on and
* we should simply try to bail out and fail as gracefully as possible.
*/
ret = wait_event_interruptible_timeout(error->reset_queue,
- !i915_reset_in_progress(error),
+ !i915_reset_backoff(error),
I915_RESET_TIMEOUT);
if (ret == 0) {
DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
@@ -254,7 +247,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
if (needs_clflush &&
(obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
- !cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+ !i915_gem_object_is_coherent(obj))
drm_clflush_sg(pages);
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
@@ -312,6 +305,8 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
.release = i915_gem_object_release_phys,
};
+static const struct drm_i915_gem_object_ops i915_gem_object_ops;
+
int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
{
struct i915_vma *vma;
@@ -399,7 +394,7 @@ out:
if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
i915_gem_request_retire_upto(rq);
- if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
+ if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) {
/* The GPU is now idle and this client has stalled.
* Since no other client has submitted a request in the
* meantime, assume that this client is the only one
@@ -424,7 +419,9 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
long timeout,
struct intel_rps_client *rps)
{
+ unsigned int seq = __read_seqcount_begin(&resv->seq);
struct dma_fence *excl;
+ bool prune_fences = false;
if (flags & I915_WAIT_ALL) {
struct dma_fence **shared;
@@ -449,15 +446,31 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
for (; i < count; i++)
dma_fence_put(shared[i]);
kfree(shared);
+
+ prune_fences = count && timeout >= 0;
} else {
excl = reservation_object_get_excl_rcu(resv);
}
- if (excl && timeout >= 0)
+ if (excl && timeout >= 0) {
timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps);
+ prune_fences = timeout >= 0;
+ }
dma_fence_put(excl);
+ /* Oportunistically prune the fences iff we know they have *all* been
+ * signaled and that the reservation object has not been changed (i.e.
+ * no new fences have been added).
+ */
+ if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
+ if (reservation_object_trylock(resv)) {
+ if (!__read_seqcount_retry(&resv->seq, seq))
+ reservation_object_add_excl_fence(resv, NULL);
+ reservation_object_unlock(resv);
+ }
+ }
+
return timeout;
}
@@ -585,9 +598,18 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
if (obj->mm.pages)
return -EBUSY;
+ GEM_BUG_ON(obj->ops != &i915_gem_object_ops);
obj->ops = &i915_gem_phys_ops;
- return i915_gem_object_pin_pages(obj);
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ goto err_xfer;
+
+ return 0;
+
+err_xfer:
+ obj->ops = &i915_gem_object_ops;
+ return ret;
}
static int
@@ -608,7 +630,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
drm_clflush_virt_range(vaddr, args->size);
i915_gem_chipset_flush(to_i915(obj->base.dev));
- intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+ intel_fb_obj_flush(obj, ORIGIN_CPU);
return 0;
}
@@ -763,6 +785,15 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
if (ret)
return ret;
+ if (i915_gem_object_is_coherent(obj) ||
+ !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ ret = i915_gem_object_set_to_cpu_domain(obj, false);
+ if (ret)
+ goto err_unpin;
+ else
+ goto out;
+ }
+
i915_gem_object_flush_gtt_write_domain(obj);
/* If we're not in the cpu read domain, set ourself into the gtt
@@ -771,17 +802,9 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
* anyway again before the next pread happens.
*/
if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
- *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
- obj->cache_level);
-
- if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
- ret = i915_gem_object_set_to_cpu_domain(obj, false);
- if (ret)
- goto err_unpin;
-
- *needs_clflush = 0;
- }
+ *needs_clflush = CLFLUSH_BEFORE;
+out:
/* return with the pages pinned */
return 0;
@@ -814,6 +837,15 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
if (ret)
return ret;
+ if (i915_gem_object_is_coherent(obj) ||
+ !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ ret = i915_gem_object_set_to_cpu_domain(obj, true);
+ if (ret)
+ goto err_unpin;
+ else
+ goto out;
+ }
+
i915_gem_object_flush_gtt_write_domain(obj);
/* If we're not in the cpu write domain, set ourself into the
@@ -822,26 +854,15 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
* right away and we therefore have to clflush anyway.
*/
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
- *needs_clflush |= cpu_write_needs_clflush(obj) << 1;
+ *needs_clflush |= CLFLUSH_AFTER;
/* Same trick applies to invalidate partially written cachelines read
* before writing.
*/
if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
- *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
- obj->cache_level);
-
- if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
- ret = i915_gem_object_set_to_cpu_domain(obj, true);
- if (ret)
- goto err_unpin;
-
- *needs_clflush = 0;
- }
-
- if ((*needs_clflush & CLFLUSH_AFTER) == 0)
- obj->cache_dirty = true;
+ *needs_clflush |= CLFLUSH_BEFORE;
+out:
intel_fb_obj_invalidate(obj, ORIGIN_CPU);
obj->mm.dirty = true;
/* return with the pages pinned */
@@ -1257,7 +1278,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
user_data += page_length;
offset += page_length;
}
- intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+ intel_fb_obj_flush(obj, ORIGIN_CPU);
mutex_lock(&i915->drm.struct_mutex);
out_unpin:
@@ -1393,7 +1414,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
offset = 0;
}
- intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+ intel_fb_obj_flush(obj, ORIGIN_CPU);
i915_gem_obj_finish_shmem_access(obj);
return ret;
}
@@ -1602,23 +1623,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
{
struct drm_i915_gem_sw_finish *args = data;
struct drm_i915_gem_object *obj;
- int err = 0;
obj = i915_gem_object_lookup(file, args->handle);
if (!obj)
return -ENOENT;
/* Pinned buffers may be scanout, so flush the cache */
- if (READ_ONCE(obj->pin_display)) {
- err = i915_mutex_lock_interruptible(dev);
- if (!err) {
- i915_gem_object_flush_cpu_write_domain(obj);
- mutex_unlock(&dev->struct_mutex);
- }
- }
-
+ i915_gem_object_flush_if_display(obj);
i915_gem_object_put(obj);
- return err;
+
+ return 0;
}
/**
@@ -2232,17 +2246,17 @@ unlock:
mutex_unlock(&obj->mm.lock);
}
-static void i915_sg_trim(struct sg_table *orig_st)
+static bool i915_sg_trim(struct sg_table *orig_st)
{
struct sg_table new_st;
struct scatterlist *sg, *new_sg;
unsigned int i;
if (orig_st->nents == orig_st->orig_nents)
- return;
+ return false;
if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
- return;
+ return false;
new_sg = new_st.sgl;
for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
@@ -2255,6 +2269,7 @@ static void i915_sg_trim(struct sg_table *orig_st)
sg_free_table(orig_st);
*orig_st = new_st;
+ return true;
}
static struct sg_table *
@@ -2306,7 +2321,7 @@ rebuild_st:
st->nents = 0;
for (i = 0; i < page_count; i++) {
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
- if (IS_ERR(page)) {
+ if (unlikely(IS_ERR(page))) {
i915_gem_shrink(dev_priv,
page_count,
I915_SHRINK_BOUND |
@@ -2314,12 +2329,21 @@ rebuild_st:
I915_SHRINK_PURGEABLE);
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
}
- if (IS_ERR(page)) {
+ if (unlikely(IS_ERR(page))) {
+ gfp_t reclaim;
+
/* We've tried hard to allocate the memory by reaping
* our own buffer, now let the real VM do its job and
* go down in flames if truly OOM.
+ *
+ * However, since graphics tend to be disposable,
+ * defer the oom here by reporting the ENOMEM back
+ * to userspace.
*/
- page = shmem_read_mapping_page(mapping, i);
+ reclaim = mapping_gfp_mask(mapping);
+ reclaim |= __GFP_NORETRY; /* reclaim, but no oom */
+
+ page = shmem_read_mapping_page_gfp(mapping, i, reclaim);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
goto err_sg;
@@ -2674,7 +2698,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
struct drm_i915_gem_request *
i915_gem_find_active_request(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_request *request;
+ struct drm_i915_gem_request *request, *active = NULL;
+ unsigned long flags;
/* We are called by the error capture and reset at a random
* point in time. In particular, note that neither is crucially
@@ -2684,15 +2709,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
* extra delay for a recent interrupt is pointless. Hence, we do
* not need an engine->irq_seqno_barrier() before the seqno reads.
*/
+ spin_lock_irqsave(&engine->timeline->lock, flags);
list_for_each_entry(request, &engine->timeline->requests, link) {
- if (__i915_gem_request_completed(request))
+ if (__i915_gem_request_completed(request,
+ request->global_seqno))
continue;
GEM_BUG_ON(request->engine != engine);
- return request;
+ GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &request->fence.flags));
+
+ active = request;
+ break;
}
+ spin_unlock_irqrestore(&engine->timeline->lock, flags);
- return NULL;
+ return active;
}
static bool engine_stalled(struct intel_engine_cs *engine)
@@ -2719,6 +2751,17 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
for_each_engine(engine, dev_priv, id) {
struct drm_i915_gem_request *request;
+ /* Prevent the signaler thread from updating the request
+ * state (by calling dma_fence_signal) as we are processing
+ * the reset. The write from the GPU of the seqno is
+ * asynchronous and the signaler thread may see a different
+ * value to us and declare the request complete, even though
+ * the reset routine have picked that request as the active
+ * (incomplete) request. This conflict is not handled
+ * gracefully!
+ */
+ kthread_park(engine->breadcrumbs.signaler);
+
/* Prevent request submission to the hardware until we have
* completed the reset in i915_gem_reset_finish(). If a request
* is completed by one engine, it may then queue a request
@@ -2730,6 +2773,9 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
tasklet_kill(&engine->irq_tasklet);
tasklet_disable(&engine->irq_tasklet);
+ if (engine->irq_seqno_barrier)
+ engine->irq_seqno_barrier(engine);
+
if (engine_stalled(engine)) {
request = i915_gem_find_active_request(engine);
if (request && request->fence.error == -EIO)
@@ -2826,9 +2872,6 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *request;
- if (engine->irq_seqno_barrier)
- engine->irq_seqno_barrier(engine);
-
request = i915_gem_find_active_request(engine);
if (request && i915_gem_reset_request(request)) {
DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
@@ -2852,8 +2895,14 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
i915_gem_retire_requests(dev_priv);
- for_each_engine(engine, dev_priv, id)
+ for_each_engine(engine, dev_priv, id) {
+ struct i915_gem_context *ctx;
+
i915_gem_reset_engine(engine);
+ ctx = fetch_and_zero(&engine->last_retired_context);
+ if (ctx)
+ engine->context_unpin(engine, ctx);
+ }
i915_gem_restore_fences(dev_priv);
@@ -2872,8 +2921,10 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
lockdep_assert_held(&dev_priv->drm.struct_mutex);
- for_each_engine(engine, dev_priv, id)
+ for_each_engine(engine, dev_priv, id) {
tasklet_enable(&engine->irq_tasklet);
+ kthread_unpark(engine->breadcrumbs.signaler);
+ }
}
static void nop_submit_request(struct drm_i915_gem_request *request)
@@ -2947,14 +2998,78 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
lockdep_assert_held(&dev_priv->drm.struct_mutex);
set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
+ /* Retire completed requests first so the list of inflight/incomplete
+ * requests is accurate and we don't try and mark successful requests
+ * as in error during __i915_gem_set_wedged_BKL().
+ */
+ i915_gem_retire_requests(dev_priv);
+
stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);
i915_gem_context_lost(dev_priv);
- i915_gem_retire_requests(dev_priv);
mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
}
+bool i915_gem_unset_wedged(struct drm_i915_private *i915)
+{
+ struct i915_gem_timeline *tl;
+ int i;
+
+ lockdep_assert_held(&i915->drm.struct_mutex);
+ if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
+ return true;
+
+ /* Before unwedging, make sure that all pending operations
+ * are flushed and errored out - we may have requests waiting upon
+ * third party fences. We marked all inflight requests as EIO, and
+ * every execbuf since returned EIO, for consistency we want all
+ * the currently pending requests to also be marked as EIO, which
+ * is done inside our nop_submit_request - and so we must wait.
+ *
+ * No more can be submitted until we reset the wedged bit.
+ */
+ list_for_each_entry(tl, &i915->gt.timelines, link) {
+ for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
+ struct drm_i915_gem_request *rq;
+
+ rq = i915_gem_active_peek(&tl->engine[i].last_request,
+ &i915->drm.struct_mutex);
+ if (!rq)
+ continue;
+
+ /* We can't use our normal waiter as we want to
+ * avoid recursively trying to handle the current
+ * reset. The basic dma_fence_default_wait() installs
+ * a callback for dma_fence_signal(), which is
+ * triggered by our nop handler (indirectly, the
+ * callback enables the signaler thread which is
+ * woken by the nop_submit_request() advancing the seqno
+ * and when the seqno passes the fence, the signaler
+ * then signals the fence waking us up).
+ */
+ if (dma_fence_default_wait(&rq->fence, true,
+ MAX_SCHEDULE_TIMEOUT) < 0)
+ return false;
+ }
+ }
+
+ /* Undo nop_submit_request. We prevent all new i915 requests from
+ * being queued (by disallowing execbuf whilst wedged) so having
+ * waited for all active requests above, we know the system is idle
+ * and do not have to worry about a thread being inside
+ * engine->submit_request() as we swap over. So unlike installing
+ * the nop_submit_request on reset, we can do this from normal
+ * context and do not require stop_machine().
+ */
+ intel_engines_reset_default_submission(i915);
+
+ smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
+ clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+
+ return true;
+}
+
static void
i915_gem_retire_work_handler(struct work_struct *work)
{
@@ -2997,9 +3112,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
* Wait for last execlists context complete, but bail out in case a
* new request is submitted.
*/
- wait_for(READ_ONCE(dev_priv->gt.active_requests) ||
- intel_execlists_idle(dev_priv), 10);
-
+ wait_for(intel_engines_are_idle(dev_priv), 10);
if (READ_ONCE(dev_priv->gt.active_requests))
return;
@@ -3024,11 +3137,13 @@ i915_gem_idle_work_handler(struct work_struct *work)
if (dev_priv->gt.active_requests)
goto out_unlock;
- if (wait_for(intel_execlists_idle(dev_priv), 10))
+ if (wait_for(intel_engines_are_idle(dev_priv), 10))
DRM_ERROR("Timeout waiting for engines to idle\n");
- for_each_engine(engine, dev_priv, id)
+ for_each_engine(engine, dev_priv, id) {
+ intel_engine_disarm_breadcrumbs(engine);
i915_gem_batch_pool_fini(&engine->batch_pool);
+ }
GEM_BUG_ON(!dev_priv->gt.awake);
dev_priv->gt.awake = false;
@@ -3156,6 +3271,29 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
return 0;
}
+static int wait_for_engine(struct intel_engine_cs *engine, int timeout_ms)
+{
+ return wait_for(intel_engine_is_idle(engine), timeout_ms);
+}
+
+static int wait_for_engines(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, i915, id) {
+ if (GEM_WARN_ON(wait_for_engine(engine, 50))) {
+ i915_gem_set_wedged(i915);
+ return -EIO;
+ }
+
+ GEM_BUG_ON(intel_engine_get_seqno(engine) !=
+ intel_engine_last_submit(engine));
+ }
+
+ return 0;
+}
+
int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
{
int ret;
@@ -3170,48 +3308,16 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
if (ret)
return ret;
}
- } else {
- ret = wait_for_timeline(&i915->gt.global_timeline, flags);
- if (ret)
- return ret;
- }
- return 0;
-}
-
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
- bool force)
-{
- /* If we don't have a page list set up, then we're not pinned
- * to GPU, and we can ignore the cache flush because it'll happen
- * again at bind time.
- */
- if (!obj->mm.pages)
- return;
+ i915_gem_retire_requests(i915);
+ GEM_BUG_ON(i915->gt.active_requests);
- /*
- * Stolen memory is always coherent with the GPU as it is explicitly
- * marked as wc by the system, or the system is cache-coherent.
- */
- if (obj->stolen || obj->phys_handle)
- return;
-
- /* If the GPU is snooping the contents of the CPU cache,
- * we do not need to manually clear the CPU cache lines. However,
- * the caches are only snooped when the render cache is
- * flushed/invalidated. As we always have to emit invalidations
- * and flushes when moving into and out of the RENDER domain, correct
- * snooping behaviour occurs naturally as the result of our domain
- * tracking.
- */
- if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
- obj->cache_dirty = true;
- return;
+ ret = wait_for_engines(i915);
+ } else {
+ ret = wait_for_timeline(&i915->gt.global_timeline, flags);
}
- trace_i915_gem_object_clflush(obj);
- drm_clflush_sg(obj->mm.pages);
- obj->cache_dirty = false;
+ return ret;
}
/** Flushes the GTT write domain for the object if it's dirty. */
@@ -3239,15 +3345,18 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
* system agents we cannot reproduce this behaviour).
*/
wmb();
- if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
- POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+ if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+ if (intel_runtime_pm_get_if_in_use(dev_priv)) {
+ spin_lock_irq(&dev_priv->uncore.lock);
+ POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+ spin_unlock_irq(&dev_priv->uncore.lock);
+ intel_runtime_pm_put(dev_priv);
+ }
+ }
- intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
+ intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT));
obj->base.write_domain = 0;
- trace_i915_gem_object_change_domain(obj,
- obj->base.read_domains,
- I915_GEM_DOMAIN_GTT);
}
/** Flushes the CPU write domain for the object if it's dirty. */
@@ -3257,13 +3366,27 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
return;
- i915_gem_clflush_object(obj, obj->pin_display);
- intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+ i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+ obj->base.write_domain = 0;
+}
+
+static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
+{
+ if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
+ return;
+ i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
obj->base.write_domain = 0;
- trace_i915_gem_object_change_domain(obj,
- obj->base.read_domains,
- I915_GEM_DOMAIN_CPU);
+}
+
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
+{
+ if (!READ_ONCE(obj->pin_display))
+ return;
+
+ mutex_lock(&obj->base.dev->struct_mutex);
+ __i915_gem_object_flush_for_display(obj);
+ mutex_unlock(&obj->base.dev->struct_mutex);
}
/**
@@ -3277,7 +3400,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
int
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
{
- uint32_t old_write_domain, old_read_domains;
int ret;
lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3315,9 +3437,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
mb();
- old_write_domain = obj->base.write_domain;
- old_read_domains = obj->base.read_domains;
-
/* It should now be out of any other write domains, and we can update
* the domain values for our changes.
*/
@@ -3329,10 +3448,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
obj->mm.dirty = true;
}
- trace_i915_gem_object_change_domain(obj,
- old_read_domains,
- old_write_domain);
-
i915_gem_object_unpin_pages(obj);
return 0;
}
@@ -3457,7 +3572,7 @@ restart:
}
if (obj->base.write_domain == I915_GEM_DOMAIN_CPU &&
- cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+ i915_gem_object_is_coherent(obj))
obj->cache_dirty = true;
list_for_each_entry(vma, &obj->vma_list, obj_link)
@@ -3569,7 +3684,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view)
{
struct i915_vma *vma;
- u32 old_read_domains, old_write_domain;
int ret;
lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3629,24 +3743,14 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
- if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
- i915_gem_clflush_object(obj, true);
- intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
- }
-
- old_write_domain = obj->base.write_domain;
- old_read_domains = obj->base.read_domains;
+ __i915_gem_object_flush_for_display(obj);
+ intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
/* It should now be out of any other write domains, and we can update
* the domain values for our changes.
*/
- obj->base.write_domain = 0;
obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
- trace_i915_gem_object_change_domain(obj,
- old_read_domains,
- old_write_domain);
-
return vma;
err_unpin_display:
@@ -3682,7 +3786,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
int
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
{
- uint32_t old_write_domain, old_read_domains;
int ret;
lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3701,13 +3804,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
i915_gem_object_flush_gtt_write_domain(obj);
- old_write_domain = obj->base.write_domain;
- old_read_domains = obj->base.read_domains;
-
/* Flush the CPU cache if it's still invalid. */
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
- i915_gem_clflush_object(obj, false);
-
+ i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
}
@@ -3724,10 +3823,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
}
- trace_i915_gem_object_change_domain(obj,
- old_read_domains,
- old_write_domain);
-
return 0;
}
@@ -3755,16 +3850,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
return -EIO;
spin_lock(&file_priv->mm.lock);
- list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
+ list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
if (time_after_eq(request->emitted_jiffies, recent_enough))
break;
- /*
- * Note that the request might not have been submitted yet.
- * In which case emitted_jiffies will be zero.
- */
- if (!request->emitted_jiffies)
- continue;
+ if (target) {
+ list_del(&target->client_link);
+ target->file_priv = NULL;
+ }
target = request;
}
@@ -4050,7 +4143,7 @@ frontbuffer_retire(struct i915_gem_active *active,
struct drm_i915_gem_object *obj =
container_of(active, typeof(*obj), frontbuffer_write);
- intel_fb_obj_flush(obj, true, ORIGIN_CS);
+ intel_fb_obj_flush(obj, ORIGIN_CS);
}
void i915_gem_object_init(struct drm_i915_gem_object *obj,
@@ -4314,11 +4407,29 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv)
!i915_gem_context_is_kernel(engine->last_retired_context));
}
+void i915_gem_sanitize(struct drm_i915_private *i915)
+{
+ /*
+ * If we inherit context state from the BIOS or earlier occupants
+ * of the GPU, the GPU may be in an inconsistent state when we
+ * try to take over. The only way to remove the earlier state
+ * is by resetting. However, resetting on earlier gen is tricky as
+ * it may impact the display and we are uncertain about the stability
+ * of the reset, so we only reset recent machines with logical
+ * context support (that must be reset to remove any stray contexts).
+ */
+ if (HAS_HW_CONTEXTS(i915)) {
+ int reset = intel_gpu_reset(i915, ALL_ENGINES);
+ WARN_ON(reset && reset != -ENODEV);
+ }
+}
+
int i915_gem_suspend(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = &dev_priv->drm;
int ret;
+ intel_runtime_pm_get(dev_priv);
intel_suspend_gt_powersave(dev_priv);
mutex_lock(&dev->struct_mutex);
@@ -4333,16 +4444,13 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
*/
ret = i915_gem_switch_to_kernel_context(dev_priv);
if (ret)
- goto err;
+ goto err_unlock;
ret = i915_gem_wait_for_idle(dev_priv,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED);
if (ret)
- goto err;
-
- i915_gem_retire_requests(dev_priv);
- GEM_BUG_ON(dev_priv->gt.active_requests);
+ goto err_unlock;
assert_kernel_context_is_current(dev_priv);
i915_gem_context_lost(dev_priv);
@@ -4365,7 +4473,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
* reset the GPU back to its idle, low power state.
*/
WARN_ON(dev_priv->gt.awake);
- WARN_ON(!intel_execlists_idle(dev_priv));
+ WARN_ON(!intel_engines_are_idle(dev_priv));
/*
* Neither the BIOS, ourselves or any other kernel
@@ -4386,15 +4494,13 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
* machines is a good idea, we don't - just in case it leaves the
* machine in an unusable condition.
*/
- if (HAS_HW_CONTEXTS(dev_priv)) {
- int reset = intel_gpu_reset(dev_priv, ALL_ENGINES);
- WARN_ON(reset && reset != -ENODEV);
- }
-
- return 0;
+ i915_gem_sanitize(dev_priv);
+ goto out_rpm_put;
-err:
+err_unlock:
mutex_unlock(&dev->struct_mutex);
+out_rpm_put:
+ intel_runtime_pm_put(dev_priv);
return ret;
}
@@ -4464,11 +4570,24 @@ static void init_unused_rings(struct drm_i915_private *dev_priv)
}
}
-int
-i915_gem_init_hw(struct drm_i915_private *dev_priv)
+static int __i915_gem_restart_engines(void *data)
{
+ struct drm_i915_private *i915 = data;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ int err;
+
+ for_each_engine(engine, i915, id) {
+ err = engine->init_hw(engine);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int i915_gem_init_hw(struct drm_i915_private *dev_priv)
+{
int ret;
dev_priv->gt.last_init_time = ktime_get();
@@ -4514,16 +4633,14 @@ i915_gem_init_hw(struct drm_i915_private *dev_priv)
}
/* Need to do basic initialisation of all rings first: */
- for_each_engine(engine, dev_priv, id) {
- ret = engine->init_hw(engine);
- if (ret)
- goto out;
- }
+ ret = __i915_gem_restart_engines(dev_priv);
+ if (ret)
+ goto out;
intel_mocs_init_l3cc_table(dev_priv);
/* We can't enable contexts until all firmware is loaded */
- ret = intel_guc_setup(dev_priv);
+ ret = intel_uc_init_hw(dev_priv);
if (ret)
goto out;
@@ -4559,6 +4676,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
mutex_lock(&dev_priv->drm.struct_mutex);
+ i915_gem_clflush_init(dev_priv);
+
if (!i915.enable_execlists) {
dev_priv->gt.resume = intel_legacy_submission_resume;
dev_priv->gt.cleanup_engine = intel_engine_cleanup;
@@ -4607,6 +4726,11 @@ out_unlock:
return ret;
}
+void i915_gem_init_mmio(struct drm_i915_private *i915)
+{
+ i915_gem_sanitize(i915);
+}
+
void
i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
{
@@ -4665,7 +4789,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
- SLAB_DESTROY_BY_RCU);
+ SLAB_TYPESAFE_BY_RCU);
if (!dev_priv->requests)
goto err_vmas;
@@ -4720,7 +4844,9 @@ err_out:
void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
{
+ i915_gem_drain_freed_objects(dev_priv);
WARN_ON(!llist_empty(&dev_priv->mm.free_list));
+ WARN_ON(dev_priv->mm.object_count);
mutex_lock(&dev_priv->drm.struct_mutex);
i915_gem_timeline_fini(&dev_priv->gt.global_timeline);
@@ -4738,14 +4864,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
int i915_gem_freeze(struct drm_i915_private *dev_priv)
{
- intel_runtime_pm_get(dev_priv);
-
mutex_lock(&dev_priv->drm.struct_mutex);
i915_gem_shrink_all(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
- intel_runtime_pm_put(dev_priv);
-
return 0;
}
@@ -4796,7 +4918,7 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
* file_priv.
*/
spin_lock(&file_priv->mm.lock);
- list_for_each_entry(request, &file_priv->mm.request_list, client_list)
+ list_for_each_entry(request, &file_priv->mm.request_list, client_link)
request->file_priv = NULL;
spin_unlock(&file_priv->mm.lock);
@@ -4874,38 +4996,49 @@ i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
const void *data, size_t size)
{
struct drm_i915_gem_object *obj;
- struct sg_table *sg;
- size_t bytes;
- int ret;
+ struct file *file;
+ size_t offset;
+ int err;
obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
if (IS_ERR(obj))
return obj;
- ret = i915_gem_object_set_to_cpu_domain(obj, true);
- if (ret)
- goto fail;
+ GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
- ret = i915_gem_object_pin_pages(obj);
- if (ret)
- goto fail;
+ file = obj->base.filp;
+ offset = 0;
+ do {
+ unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
+ struct page *page;
+ void *pgdata, *vaddr;
- sg = obj->mm.pages;
- bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
- obj->mm.dirty = true; /* Backing store is now out of date */
- i915_gem_object_unpin_pages(obj);
+ err = pagecache_write_begin(file, file->f_mapping,
+ offset, len, 0,
+ &page, &pgdata);
+ if (err < 0)
+ goto fail;
- if (WARN_ON(bytes != size)) {
- DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
- ret = -EFAULT;
- goto fail;
- }
+ vaddr = kmap(page);
+ memcpy(vaddr, data, len);
+ kunmap(page);
+
+ err = pagecache_write_end(file, file->f_mapping,
+ offset, len, len,
+ page, pgdata);
+ if (err < 0)
+ goto fail;
+
+ size -= len;
+ data += len;
+ offset += len;
+ } while (size);
return obj;
fail:
i915_gem_object_put(obj);
- return ERR_PTR(ret);
+ return ERR_PTR(err);
}
struct scatterlist *
@@ -5060,3 +5193,11 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
sg = i915_gem_object_get_sg(obj, n, &offset);
return sg_dma_address(sg) + (offset << PAGE_SHIFT);
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/scatterlist.c"
+#include "selftests/mock_gem_device.c"
+#include "selftests/huge_gem_object.c"
+#include "selftests/i915_gem_object.c"
+#include "selftests/i915_gem_coherency.c"
+#endif