aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c621
1 files changed, 362 insertions, 259 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d44ad7bc1e94..fcc73a6ab503 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -65,7 +65,7 @@ insert_mappable_node(struct i915_ggtt *ggtt,
struct drm_mm_node *node, u32 size)
{
memset(node, 0, sizeof(*node));
- return drm_mm_insert_node_in_range(&ggtt->base.mm, node,
+ return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
size, 0, I915_COLOR_UNEVICTABLE,
0, ggtt->mappable_end,
DRM_MM_INSERT_LOW);
@@ -139,6 +139,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)
static u32 __i915_gem_park(struct drm_i915_private *i915)
{
+ GEM_TRACE("\n");
+
lockdep_assert_held(&i915->drm.struct_mutex);
GEM_BUG_ON(i915->gt.active_requests);
GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
@@ -181,6 +183,8 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
void i915_gem_park(struct drm_i915_private *i915)
{
+ GEM_TRACE("\n");
+
lockdep_assert_held(&i915->drm.struct_mutex);
GEM_BUG_ON(i915->gt.active_requests);
@@ -193,6 +197,8 @@ void i915_gem_park(struct drm_i915_private *i915)
void i915_gem_unpark(struct drm_i915_private *i915)
{
+ GEM_TRACE("\n");
+
lockdep_assert_held(&i915->drm.struct_mutex);
GEM_BUG_ON(!i915->gt.active_requests);
@@ -243,17 +249,17 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
struct i915_vma *vma;
u64 pinned;
- pinned = ggtt->base.reserved;
+ pinned = ggtt->vm.reserved;
mutex_lock(&dev->struct_mutex);
- list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
+ list_for_each_entry(vma, &ggtt->vm.active_list, vm_link)
if (i915_vma_is_pinned(vma))
pinned += vma->node.size;
- list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
+ list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link)
if (i915_vma_is_pinned(vma))
pinned += vma->node.size;
mutex_unlock(&dev->struct_mutex);
- args->aper_size = ggtt->base.total;
+ args->aper_size = ggtt->vm.total;
args->aper_available_size = args->aper_size - pinned;
return 0;
@@ -796,7 +802,7 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
* that was!).
*/
- wmb();
+ i915_gem_chipset_flush(dev_priv);
intel_runtime_pm_get(dev_priv);
spin_lock_irq(&dev_priv->uncore.lock);
@@ -831,6 +837,10 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
}
break;
+ case I915_GEM_DOMAIN_WC:
+ wmb();
+ break;
+
case I915_GEM_DOMAIN_CPU:
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
break;
@@ -1217,9 +1227,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
page_length = remain < page_length ? remain : page_length;
if (node.allocated) {
wmb();
- ggtt->base.insert_page(&ggtt->base,
- i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
- node.start, I915_CACHE_NONE, 0);
+ ggtt->vm.insert_page(&ggtt->vm,
+ i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
+ node.start, I915_CACHE_NONE, 0);
wmb();
} else {
page_base += offset & PAGE_MASK;
@@ -1240,8 +1250,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
out_unpin:
if (node.allocated) {
wmb();
- ggtt->base.clear_range(&ggtt->base,
- node.start, node.size);
+ ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
remove_mappable_node(&node);
} else {
i915_vma_unpin(vma);
@@ -1420,9 +1429,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
page_length = remain < page_length ? remain : page_length;
if (node.allocated) {
wmb(); /* flush the write before we modify the GGTT */
- ggtt->base.insert_page(&ggtt->base,
- i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
- node.start, I915_CACHE_NONE, 0);
+ ggtt->vm.insert_page(&ggtt->vm,
+ i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
+ node.start, I915_CACHE_NONE, 0);
wmb(); /* flush modifications to the GGTT (insert_page) */
} else {
page_base += offset & PAGE_MASK;
@@ -1449,8 +1458,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
out_unpin:
if (node.allocated) {
wmb();
- ggtt->base.clear_range(&ggtt->base,
- node.start, node.size);
+ ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
remove_mappable_node(&node);
} else {
i915_vma_unpin(vma);
@@ -1619,6 +1627,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
goto err;
}
+ /* Writes not allowed into this read-only object */
+ if (i915_gem_object_is_readonly(obj)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
trace_i915_gem_object_pwrite(obj, args->offset, args->size);
ret = -ENODEV;
@@ -1991,9 +2005,9 @@ compute_partial_view(struct drm_i915_gem_object *obj,
* The current feature set supported by i915_gem_fault() and thus GTT mmaps
* is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
*/
-int i915_gem_fault(struct vm_fault *vmf)
+vm_fault_t i915_gem_fault(struct vm_fault *vmf)
{
-#define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
+#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
struct vm_area_struct *area = vmf->vma;
struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
struct drm_device *dev = obj->base.dev;
@@ -2002,9 +2016,12 @@ int i915_gem_fault(struct vm_fault *vmf)
bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
struct i915_vma *vma;
pgoff_t page_offset;
- unsigned int flags;
int ret;
+ /* Sanity check that we allow writing into this object */
+ if (i915_gem_object_is_readonly(obj) && write)
+ return VM_FAULT_SIGBUS;
+
/* We don't use vmf->pgoff since that has the fake offset */
page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
@@ -2038,27 +2055,34 @@ int i915_gem_fault(struct vm_fault *vmf)
goto err_unlock;
}
- /* If the object is smaller than a couple of partial vma, it is
- * not worth only creating a single partial vma - we may as well
- * clear enough space for the full object.
- */
- flags = PIN_MAPPABLE;
- if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
- flags |= PIN_NONBLOCK | PIN_NONFAULT;
/* Now pin it into the GTT as needed */
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK |
+ PIN_NONFAULT);
if (IS_ERR(vma)) {
/* Use a partial view if it is bigger than available space */
struct i915_ggtt_view view =
compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
+ unsigned int flags;
+
+ flags = PIN_MAPPABLE;
+ if (view.type == I915_GGTT_VIEW_NORMAL)
+ flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
- /* Userspace is now writing through an untracked VMA, abandon
+ /*
+ * Userspace is now writing through an untracked VMA, abandon
* all hope that the hardware is able to track future writes.
*/
obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
- vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+ vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+ if (IS_ERR(vma) && !view.type) {
+ flags = PIN_MAPPABLE;
+ view.type = I915_GGTT_VIEW_PARTIAL;
+ vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+ }
}
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
@@ -2108,10 +2132,9 @@ err:
* fail). But any other -EIO isn't ours (e.g. swap in failure)
* and so needs to be reported.
*/
- if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
- ret = VM_FAULT_SIGBUS;
- break;
- }
+ if (!i915_terminally_wedged(&dev_priv->gpu_error))
+ return VM_FAULT_SIGBUS;
+ /* else: fall through */
case -EAGAIN:
/*
* EAGAIN means the gpu is hung and we'll wait for the error
@@ -2126,21 +2149,16 @@ err:
* EBUSY is ok: this just means that another thread
* already did the job.
*/
- ret = VM_FAULT_NOPAGE;
- break;
+ return VM_FAULT_NOPAGE;
case -ENOMEM:
- ret = VM_FAULT_OOM;
- break;
+ return VM_FAULT_OOM;
case -ENOSPC:
case -EFAULT:
- ret = VM_FAULT_SIGBUS;
- break;
+ return VM_FAULT_SIGBUS;
default:
WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
- ret = VM_FAULT_SIGBUS;
- break;
+ return VM_FAULT_SIGBUS;
}
- return ret;
}
static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
@@ -2259,7 +2277,9 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
/* Attempt to reap some mmap space from dead objects */
do {
- err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+ err = i915_gem_wait_for_idle(dev_priv,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT);
if (err)
break;
@@ -2404,29 +2424,15 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
rcu_read_unlock();
}
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
- enum i915_mm_subclass subclass)
+static struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct sg_table *pages;
- if (i915_gem_object_has_pinned_pages(obj))
- return;
-
- GEM_BUG_ON(obj->bind_count);
- if (!i915_gem_object_has_pages(obj))
- return;
-
- /* May be called by shrinker from within get_pages() (on another bo) */
- mutex_lock_nested(&obj->mm.lock, subclass);
- if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
- goto unlock;
-
- /* ->put_pages might need to allocate memory for the bit17 swizzle
- * array, hence protect them from being reaped by removing them from gtt
- * lists early. */
pages = fetch_and_zero(&obj->mm.pages);
- GEM_BUG_ON(!pages);
+ if (!pages)
+ return NULL;
spin_lock(&i915->mm.obj_lock);
list_del(&obj->mm.link);
@@ -2445,12 +2451,37 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
}
__i915_gem_object_reset_page_iter(obj);
+ obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+
+ return pages;
+}
+
+void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+ enum i915_mm_subclass subclass)
+{
+ struct sg_table *pages;
+
+ if (i915_gem_object_has_pinned_pages(obj))
+ return;
+
+ GEM_BUG_ON(obj->bind_count);
+ if (!i915_gem_object_has_pages(obj))
+ return;
+ /* May be called by shrinker from within get_pages() (on another bo) */
+ mutex_lock_nested(&obj->mm.lock, subclass);
+ if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
+ goto unlock;
+
+ /*
+ * ->put_pages might need to allocate memory for the bit17 swizzle
+ * array, hence protect them from being reaped by removing them from gtt
+ * lists early.
+ */
+ pages = __i915_gem_object_unset_pages(obj);
if (!IS_ERR(pages))
obj->ops->put_pages(obj, pages);
- obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
-
unlock:
mutex_unlock(&obj->mm.lock);
}
@@ -2968,16 +2999,16 @@ static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
- DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, ban %s\n",
- ctx->name, atomic_read(&ctx->guilty_count),
- score, yesno(banned && bannable));
-
/* Cool contexts don't accumulate client ban score */
if (!bannable)
return;
- if (banned)
+ if (banned) {
+ DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
+ ctx->name, atomic_read(&ctx->guilty_count),
+ score);
i915_gem_context_set_banned(ctx);
+ }
if (!IS_ERR_OR_NULL(ctx->file_priv))
i915_gem_client_mark_guilty(ctx->file_priv, ctx);
@@ -3025,7 +3056,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
struct i915_request *
i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
{
- struct i915_request *request = NULL;
+ struct i915_request *request;
/*
* During the reset sequence, we must prevent the engine from
@@ -3036,52 +3067,7 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
*/
intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
- /*
- * Prevent the signaler thread from updating the request
- * state (by calling dma_fence_signal) as we are processing
- * the reset. The write from the GPU of the seqno is
- * asynchronous and the signaler thread may see a different
- * value to us and declare the request complete, even though
- * the reset routine have picked that request as the active
- * (incomplete) request. This conflict is not handled
- * gracefully!
- */
- kthread_park(engine->breadcrumbs.signaler);
-
- /*
- * Prevent request submission to the hardware until we have
- * completed the reset in i915_gem_reset_finish(). If a request
- * is completed by one engine, it may then queue a request
- * to a second via its execlists->tasklet *just* as we are
- * calling engine->init_hw() and also writing the ELSP.
- * Turning off the execlists->tasklet until the reset is over
- * prevents the race.
- *
- * Note that this needs to be a single atomic operation on the
- * tasklet (flush existing tasks, prevent new tasks) to prevent
- * a race between reset and set-wedged. It is not, so we do the best
- * we can atm and make sure we don't lock the machine up in the more
- * common case of recursively being called from set-wedged from inside
- * i915_reset.
- */
- if (!atomic_read(&engine->execlists.tasklet.count))
- tasklet_kill(&engine->execlists.tasklet);
- tasklet_disable(&engine->execlists.tasklet);
-
- /*
- * We're using worker to queue preemption requests from the tasklet in
- * GuC submission mode.
- * Even though tasklet was disabled, we may still have a worker queued.
- * Let's make sure that all workers scheduled before disabling the
- * tasklet are completed before continuing with the reset.
- */
- if (engine->i915->guc.preempt_wq)
- flush_workqueue(engine->i915->guc.preempt_wq);
-
- if (engine->irq_seqno_barrier)
- engine->irq_seqno_barrier(engine);
-
- request = i915_gem_find_active_request(engine);
+ request = engine->reset.prepare(engine);
if (request && request->fence.error == -EIO)
request = ERR_PTR(-EIO); /* Previous reset failed! */
@@ -3111,43 +3097,24 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
return err;
}
-static void skip_request(struct i915_request *request)
-{
- void *vaddr = request->ring->vaddr;
- u32 head;
-
- /* As this request likely depends on state from the lost
- * context, clear out all the user operations leaving the
- * breadcrumb at the end (so we get the fence notifications).
- */
- head = request->head;
- if (request->postfix < head) {
- memset(vaddr + head, 0, request->ring->size - head);
- head = 0;
- }
- memset(vaddr + head, 0, request->postfix - head);
-
- dma_fence_set_error(&request->fence, -EIO);
-}
-
static void engine_skip_context(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
- struct i915_gem_context *hung_ctx = request->ctx;
+ struct i915_gem_context *hung_ctx = request->gem_context;
struct i915_timeline *timeline = request->timeline;
unsigned long flags;
GEM_BUG_ON(timeline == &engine->timeline);
spin_lock_irqsave(&engine->timeline.lock, flags);
- spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING);
+ spin_lock(&timeline->lock);
list_for_each_entry_continue(request, &engine->timeline.requests, link)
- if (request->ctx == hung_ctx)
- skip_request(request);
+ if (request->gem_context == hung_ctx)
+ i915_request_skip(request, -EIO);
list_for_each_entry(request, &timeline->requests, link)
- skip_request(request);
+ i915_request_skip(request, -EIO);
spin_unlock(&timeline->lock);
spin_unlock_irqrestore(&engine->timeline.lock, flags);
@@ -3189,11 +3156,11 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
}
if (stalled) {
- i915_gem_context_mark_guilty(request->ctx);
- skip_request(request);
+ i915_gem_context_mark_guilty(request->gem_context);
+ i915_request_skip(request, -EIO);
/* If this context is now banned, skip all pending requests. */
- if (i915_gem_context_is_banned(request->ctx))
+ if (i915_gem_context_is_banned(request->gem_context))
engine_skip_context(request);
} else {
/*
@@ -3203,15 +3170,17 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
*/
request = i915_gem_find_active_request(engine);
if (request) {
- i915_gem_context_mark_innocent(request->ctx);
+ unsigned long flags;
+
+ i915_gem_context_mark_innocent(request->gem_context);
dma_fence_set_error(&request->fence, -EAGAIN);
/* Rewind the engine to replay the incomplete rq */
- spin_lock_irq(&engine->timeline.lock);
+ spin_lock_irqsave(&engine->timeline.lock, flags);
request = list_prev_entry(request, link);
if (&request->link == &engine->timeline.requests)
request = NULL;
- spin_unlock_irq(&engine->timeline.lock);
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
}
}
@@ -3232,13 +3201,8 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine,
if (request)
request = i915_gem_reset_request(engine, request, stalled);
- if (request) {
- DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
- engine->name, request->global_seqno);
- }
-
/* Setup the CS to resume from the breadcrumb of the hung request */
- engine->reset_hw(engine, request);
+ engine->reset.reset(engine, request);
}
void i915_gem_reset(struct drm_i915_private *dev_priv,
@@ -3252,14 +3216,14 @@ void i915_gem_reset(struct drm_i915_private *dev_priv,
i915_retire_requests(dev_priv);
for_each_engine(engine, dev_priv, id) {
- struct i915_gem_context *ctx;
+ struct intel_context *ce;
i915_gem_reset_engine(engine,
engine->hangcheck.active_request,
stalled_mask & ENGINE_MASK(id));
- ctx = fetch_and_zero(&engine->last_retired_context);
- if (ctx)
- intel_context_unpin(ctx, engine);
+ ce = fetch_and_zero(&engine->last_retired_context);
+ if (ce)
+ intel_context_unpin(ce);
/*
* Ostensibily, we always want a context loaded for powersaving,
@@ -3277,7 +3241,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv,
rq = i915_request_alloc(engine,
dev_priv->kernel_context);
if (!IS_ERR(rq))
- __i915_request_add(rq, false);
+ i915_request_add(rq);
}
}
@@ -3286,8 +3250,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv,
void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
{
- tasklet_enable(&engine->execlists.tasklet);
- kthread_unpark(engine->breadcrumbs.signaler);
+ engine->reset.finish(engine);
intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
}
@@ -3565,6 +3528,22 @@ new_requests_since_last_retire(const struct drm_i915_private *i915)
work_pending(&i915->gt.idle_work.work));
}
+static void assert_kernel_context_is_current(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ if (i915_terminally_wedged(&i915->gpu_error))
+ return;
+
+ GEM_BUG_ON(i915->gt.active_requests);
+ for_each_engine(engine, i915, id) {
+ GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request));
+ GEM_BUG_ON(engine->last_retired_context !=
+ to_intel_context(i915->kernel_context, engine));
+ }
+}
+
static void
i915_gem_idle_work_handler(struct work_struct *work)
{
@@ -3576,6 +3555,24 @@ i915_gem_idle_work_handler(struct work_struct *work)
if (!READ_ONCE(dev_priv->gt.awake))
return;
+ if (READ_ONCE(dev_priv->gt.active_requests))
+ return;
+
+ /*
+ * Flush out the last user context, leaving only the pinned
+ * kernel context resident. When we are idling on the kernel_context,
+ * no more new requests (with a context switch) are emitted and we
+ * can finally rest. A consequence is that the idle work handler is
+ * always called at least twice before idling (and if the system is
+ * idle that implies a round trip through the retire worker).
+ */
+ mutex_lock(&dev_priv->drm.struct_mutex);
+ i915_gem_switch_to_kernel_context(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
+ GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n",
+ READ_ONCE(dev_priv->gt.active_requests));
+
/*
* Wait for last execlists context complete, but bail out in case a
* new request is submitted. As we don't trust the hardware, we
@@ -3609,6 +3606,8 @@ i915_gem_idle_work_handler(struct work_struct *work)
epoch = __i915_gem_park(dev_priv);
+ assert_kernel_context_is_current(dev_priv);
+
rearm_hangcheck = false;
out_unlock:
mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -3755,9 +3754,31 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
return ret;
}
-static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags)
+static long wait_for_timeline(struct i915_timeline *tl,
+ unsigned int flags, long timeout)
{
- return i915_gem_active_wait(&tl->last_request, flags);
+ struct i915_request *rq;
+
+ rq = i915_gem_active_get_unlocked(&tl->last_request);
+ if (!rq)
+ return timeout;
+
+ /*
+ * "Race-to-idle".
+ *
+ * Switching to the kernel context is often used a synchronous
+ * step prior to idling, e.g. in suspend for flushing all
+ * current operations to memory before sleeping. These we
+ * want to complete as quickly as possible to avoid prolonged
+ * stalls, so allow the gpu to boost to maximum clocks.
+ */
+ if (flags & I915_WAIT_FOR_IDLE_BOOST)
+ gen6_rps_boost(rq, NULL);
+
+ timeout = i915_request_wait(rq, flags, timeout);
+ i915_request_put(rq);
+
+ return timeout;
}
static int wait_for_engines(struct drm_i915_private *i915)
@@ -3773,8 +3794,13 @@ static int wait_for_engines(struct drm_i915_private *i915)
return 0;
}
-int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
+int i915_gem_wait_for_idle(struct drm_i915_private *i915,
+ unsigned int flags, long timeout)
{
+ GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
+ flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
+ timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
+
/* If the device is asleep, we have no requests outstanding */
if (!READ_ONCE(i915->gt.awake))
return 0;
@@ -3786,26 +3812,31 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
lockdep_assert_held(&i915->drm.struct_mutex);
list_for_each_entry(tl, &i915->gt.timelines, link) {
- err = wait_for_timeline(tl, flags);
- if (err)
- return err;
+ timeout = wait_for_timeline(tl, flags, timeout);
+ if (timeout < 0)
+ return timeout;
}
- i915_retire_requests(i915);
- return wait_for_engines(i915);
+ err = wait_for_engines(i915);
+ if (err)
+ return err;
+
+ i915_retire_requests(i915);
+ GEM_BUG_ON(i915->gt.active_requests);
} else {
struct intel_engine_cs *engine;
enum intel_engine_id id;
- int err;
for_each_engine(engine, i915, id) {
- err = wait_for_timeline(&engine->timeline, flags);
- if (err)
- return err;
- }
+ struct i915_timeline *tl = &engine->timeline;
- return 0;
+ timeout = wait_for_timeline(tl, flags, timeout);
+ if (timeout < 0)
+ return timeout;
+ }
}
+
+ return 0;
}
static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
@@ -4379,7 +4410,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
u64 flags)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
- struct i915_address_space *vm = &dev_priv->ggtt.base;
+ struct i915_address_space *vm = &dev_priv->ggtt.vm;
struct i915_vma *vma;
int ret;
@@ -4967,25 +4998,25 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
i915_gem_object_put(obj);
}
-static void assert_kernel_context_is_current(struct drm_i915_private *i915)
+void i915_gem_sanitize(struct drm_i915_private *i915)
{
- struct i915_gem_context *kernel_context = i915->kernel_context;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
+ int err;
- for_each_engine(engine, i915, id) {
- GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request));
- GEM_BUG_ON(engine->last_retired_context != kernel_context);
- }
-}
+ GEM_TRACE("\n");
-void i915_gem_sanitize(struct drm_i915_private *i915)
-{
- if (i915_terminally_wedged(&i915->gpu_error)) {
- mutex_lock(&i915->drm.struct_mutex);
+ mutex_lock(&i915->drm.struct_mutex);
+
+ intel_runtime_pm_get(i915);
+ intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
+
+ /*
+ * As we have just resumed the machine and woken the device up from
+ * deep PCI sleep (presumably D3_cold), assume the HW has been reset
+ * back to defaults, recovering from whatever wedged state we left it
+ * in and so worth trying to use the device once more.
+ */
+ if (i915_terminally_wedged(&i915->gpu_error))
i915_gem_unset_wedged(i915);
- mutex_unlock(&i915->drm.struct_mutex);
- }
/*
* If we inherit context state from the BIOS or earlier occupants
@@ -4995,60 +5026,94 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
* it may impact the display and we are uncertain about the stability
* of the reset, so this could be applied to even earlier gen.
*/
+ err = -ENODEV;
if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
- WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
+ err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
+ if (!err)
+ intel_engines_sanitize(i915);
+
+ intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
+ intel_runtime_pm_put(i915);
+
+ i915_gem_contexts_lost(i915);
+ mutex_unlock(&i915->drm.struct_mutex);
}
-int i915_gem_suspend(struct drm_i915_private *dev_priv)
+int i915_gem_suspend(struct drm_i915_private *i915)
{
- struct drm_device *dev = &dev_priv->drm;
int ret;
- intel_runtime_pm_get(dev_priv);
- intel_suspend_gt_powersave(dev_priv);
+ GEM_TRACE("\n");
- mutex_lock(&dev->struct_mutex);
+ intel_runtime_pm_get(i915);
+ intel_suspend_gt_powersave(i915);
+
+ mutex_lock(&i915->drm.struct_mutex);
- /* We have to flush all the executing contexts to main memory so
+ /*
+ * We have to flush all the executing contexts to main memory so
* that they can saved in the hibernation image. To ensure the last
* context image is coherent, we have to switch away from it. That
- * leaves the dev_priv->kernel_context still active when
+ * leaves the i915->kernel_context still active when
* we actually suspend, and its image in memory may not match the GPU
* state. Fortunately, the kernel_context is disposable and we do
* not rely on its state.
*/
- if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
- ret = i915_gem_switch_to_kernel_context(dev_priv);
+ if (!i915_terminally_wedged(&i915->gpu_error)) {
+ ret = i915_gem_switch_to_kernel_context(i915);
if (ret)
goto err_unlock;
- ret = i915_gem_wait_for_idle(dev_priv,
+ ret = i915_gem_wait_for_idle(i915,
I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_LOCKED);
+ I915_WAIT_LOCKED |
+ I915_WAIT_FOR_IDLE_BOOST,
+ MAX_SCHEDULE_TIMEOUT);
if (ret && ret != -EIO)
goto err_unlock;
- assert_kernel_context_is_current(dev_priv);
+ assert_kernel_context_is_current(i915);
}
- i915_gem_contexts_lost(dev_priv);
- mutex_unlock(&dev->struct_mutex);
+ i915_retire_requests(i915); /* ensure we flush after wedging */
+
+ mutex_unlock(&i915->drm.struct_mutex);
- intel_uc_suspend(dev_priv);
+ intel_uc_suspend(i915);
- cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
- cancel_delayed_work_sync(&dev_priv->gt.retire_work);
+ cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
+ cancel_delayed_work_sync(&i915->gt.retire_work);
- /* As the idle_work is rearming if it detects a race, play safe and
+ /*
+ * As the idle_work is rearming if it detects a race, play safe and
* repeat the flush until it is definitely idle.
*/
- drain_delayed_work(&dev_priv->gt.idle_work);
+ drain_delayed_work(&i915->gt.idle_work);
- /* Assert that we sucessfully flushed all the work and
+ /*
+ * Assert that we successfully flushed all the work and
* reset the GPU back to its idle, low power state.
*/
- WARN_ON(dev_priv->gt.awake);
- if (WARN_ON(!intel_engines_are_idle(dev_priv)))
- i915_gem_set_wedged(dev_priv); /* no hope, discard everything */
+ WARN_ON(i915->gt.awake);
+ if (WARN_ON(!intel_engines_are_idle(i915)))
+ i915_gem_set_wedged(i915); /* no hope, discard everything */
+
+ intel_runtime_pm_put(i915);
+ return 0;
+
+err_unlock:
+ mutex_unlock(&i915->drm.struct_mutex);
+ intel_runtime_pm_put(i915);
+ return ret;
+}
+
+void i915_gem_suspend_late(struct drm_i915_private *i915)
+{
+ struct drm_i915_gem_object *obj;
+ struct list_head *phases[] = {
+ &i915->mm.unbound_list,
+ &i915->mm.bound_list,
+ NULL
+ }, **phase;
/*
* Neither the BIOS, ourselves or any other kernel
@@ -5069,20 +5134,22 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
* machines is a good idea, we don't - just in case it leaves the
* machine in an unusable condition.
*/
- intel_uc_sanitize(dev_priv);
- i915_gem_sanitize(dev_priv);
- intel_runtime_pm_put(dev_priv);
- return 0;
+ mutex_lock(&i915->drm.struct_mutex);
+ for (phase = phases; *phase; phase++) {
+ list_for_each_entry(obj, *phase, mm.link)
+ WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
+ }
+ mutex_unlock(&i915->drm.struct_mutex);
-err_unlock:
- mutex_unlock(&dev->struct_mutex);
- intel_runtime_pm_put(dev_priv);
- return ret;
+ intel_uc_sanitize(i915);
+ i915_gem_sanitize(i915);
}
void i915_gem_resume(struct drm_i915_private *i915)
{
+ GEM_TRACE("\n");
+
WARN_ON(i915->gt.awake);
mutex_lock(&i915->drm.struct_mutex);
@@ -5256,8 +5323,18 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
/* Only when the HW is re-initialised, can we replay the requests */
ret = __i915_gem_restart_engines(dev_priv);
+ if (ret)
+ goto cleanup_uc;
+
+ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+
+ return 0;
+
+cleanup_uc:
+ intel_uc_fini_hw(dev_priv);
out:
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+
return ret;
}
@@ -5294,7 +5371,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
if (engine->init_context)
err = engine->init_context(rq);
- __i915_request_add(rq, true);
+ i915_request_add(rq);
if (err)
goto err_active;
}
@@ -5303,9 +5380,11 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
if (err)
goto err_active;
- err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
- if (err)
+ if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) {
+ i915_gem_set_wedged(i915);
+ err = -EIO; /* Caller will declare us wedged */
goto err_active;
+ }
assert_kernel_context_is_current(i915);
@@ -5368,7 +5447,9 @@ err_active:
if (WARN_ON(i915_gem_switch_to_kernel_context(i915)))
goto out_ctx;
- if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED)))
+ if (WARN_ON(i915_gem_wait_for_idle(i915,
+ I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT)))
goto out_ctx;
i915_gem_contexts_lost(i915);
@@ -5379,12 +5460,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
{
int ret;
- /*
- * We need to fallback to 4K pages since gvt gtt handling doesn't
- * support huge page entries - we will need to check either hypervisor
- * mm can support huge guest page or just do emulation in gvt.
- */
- if (intel_vgpu_active(dev_priv))
+ /* We need to fallback to 4K pages if host doesn't support huge gtt. */
+ if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
mkwrite_device_info(dev_priv)->page_sizes =
I915_GTT_PAGE_SIZE_4K;
@@ -5402,13 +5479,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
if (ret)
return ret;
- ret = intel_wopcm_init(&dev_priv->wopcm);
+ ret = intel_uc_init_misc(dev_priv);
if (ret)
return ret;
- ret = intel_uc_init_misc(dev_priv);
+ ret = intel_wopcm_init(&dev_priv->wopcm);
if (ret)
- return ret;
+ goto err_uc_misc;
/* This is just a security blanket to placate dragons.
* On some systems, we very sporadically observe that the first TLBs
@@ -5484,8 +5561,14 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
* driver doesn't explode during runtime.
*/
err_init_hw:
- i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
- i915_gem_contexts_lost(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
+ WARN_ON(i915_gem_suspend(dev_priv));
+ i915_gem_suspend_late(dev_priv);
+
+ i915_gem_drain_workqueue(dev_priv);
+
+ mutex_lock(&dev_priv->drm.struct_mutex);
intel_uc_fini_hw(dev_priv);
err_uc_init:
intel_uc_fini(dev_priv);
@@ -5502,6 +5585,7 @@ err_unlock:
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
mutex_unlock(&dev_priv->drm.struct_mutex);
+err_uc_misc:
intel_uc_fini_misc(dev_priv);
if (ret != -EIO)
@@ -5514,7 +5598,8 @@ err_unlock:
* for all other failure, such as an allocation failure, bail.
*/
if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
- DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
+ i915_load_error(dev_priv,
+ "Failed to initialize GPU, declaring it wedged!\n");
i915_gem_set_wedged(dev_priv);
}
ret = 0;
@@ -5524,6 +5609,28 @@ err_unlock:
return ret;
}
+void i915_gem_fini(struct drm_i915_private *dev_priv)
+{
+ i915_gem_suspend_late(dev_priv);
+
+ /* Flush any outstanding unpin_work. */
+ i915_gem_drain_workqueue(dev_priv);
+
+ mutex_lock(&dev_priv->drm.struct_mutex);
+ intel_uc_fini_hw(dev_priv);
+ intel_uc_fini(dev_priv);
+ i915_gem_cleanup_engines(dev_priv);
+ i915_gem_contexts_fini(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
+ intel_uc_fini_misc(dev_priv);
+ i915_gem_cleanup_userptr(dev_priv);
+
+ i915_gem_drain_freed_objects(dev_priv);
+
+ WARN_ON(!list_empty(&dev_priv->contexts.list));
+}
+
void i915_gem_init_mmio(struct drm_i915_private *i915)
{
i915_gem_sanitize(i915);
@@ -5688,16 +5795,17 @@ int i915_gem_freeze(struct drm_i915_private *dev_priv)
return 0;
}
-int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
+int i915_gem_freeze_late(struct drm_i915_private *i915)
{
struct drm_i915_gem_object *obj;
struct list_head *phases[] = {
- &dev_priv->mm.unbound_list,
- &dev_priv->mm.bound_list,
+ &i915->mm.unbound_list,
+ &i915->mm.bound_list,
NULL
- }, **p;
+ }, **phase;
- /* Called just before we write the hibernation image.
+ /*
+ * Called just before we write the hibernation image.
*
* We need to update the domain tracking to reflect that the CPU
* will be accessing all the pages to create and restore from the
@@ -5711,15 +5819,15 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
* the objects as well, see i915_gem_freeze()
*/
- i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_UNBOUND);
- i915_gem_drain_freed_objects(dev_priv);
+ i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND);
+ i915_gem_drain_freed_objects(i915);
- spin_lock(&dev_priv->mm.obj_lock);
- for (p = phases; *p; p++) {
- list_for_each_entry(obj, *p, mm.link)
- __start_cpu_write(obj);
+ mutex_lock(&i915->drm.struct_mutex);
+ for (phase = phases; *phase; phase++) {
+ list_for_each_entry(obj, *phase, mm.link)
+ WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
}
- spin_unlock(&dev_priv->mm.obj_lock);
+ mutex_unlock(&i915->drm.struct_mutex);
return 0;
}
@@ -6039,16 +6147,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
goto err_unlock;
}
- pages = fetch_and_zero(&obj->mm.pages);
- if (pages) {
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
-
- __i915_gem_object_reset_page_iter(obj);
-
- spin_lock(&i915->mm.obj_lock);
- list_del(&obj->mm.link);
- spin_unlock(&i915->mm.obj_lock);
- }
+ pages = __i915_gem_object_unset_pages(obj);
obj->ops = &i915_gem_phys_ops;
@@ -6066,7 +6165,11 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
err_xfer:
obj->ops = &i915_gem_object_ops;
- obj->mm.pages = pages;
+ if (!IS_ERR_OR_NULL(pages)) {
+ unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
+
+ __i915_gem_object_set_pages(obj, pages, sg_page_sizes);
+ }
err_unlock:
mutex_unlock(&obj->mm.lock);
return err;