aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c782
1 files changed, 268 insertions, 514 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8558e81fdc2a..ad01c92aaf74 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -42,6 +42,7 @@
#include "i915_drv.h"
#include "i915_gem_clflush.h"
#include "i915_gemfs.h"
+#include "i915_globals.h"
#include "i915_reset.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
@@ -49,6 +50,7 @@
#include "intel_drv.h"
#include "intel_frontbuffer.h"
#include "intel_mocs.h"
+#include "intel_pm.h"
#include "intel_workarounds.h"
static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
@@ -100,48 +102,7 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
spin_unlock(&dev_priv->mm.object_stat_lock);
}
-static int
-i915_gem_wait_for_error(struct i915_gpu_error *error)
-{
- int ret;
-
- might_sleep();
-
- /*
- * Only wait 10 seconds for the gpu reset to complete to avoid hanging
- * userspace. If it takes that long something really bad is going on and
- * we should simply try to bail out and fail as gracefully as possible.
- */
- ret = wait_event_interruptible_timeout(error->reset_queue,
- !i915_reset_backoff(error),
- I915_RESET_TIMEOUT);
- if (ret == 0) {
- DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
- return -EIO;
- } else if (ret < 0) {
- return ret;
- } else {
- return 0;
- }
-}
-
-int i915_mutex_lock_interruptible(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = to_i915(dev);
- int ret;
-
- ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
- if (ret)
- return ret;
-
- ret = mutex_lock_interruptible(&dev->struct_mutex);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static u32 __i915_gem_park(struct drm_i915_private *i915)
+static void __i915_gem_park(struct drm_i915_private *i915)
{
intel_wakeref_t wakeref;
@@ -152,9 +113,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
if (!i915->gt.awake)
- return I915_EPOCH_INVALID;
-
- GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID);
+ return;
/*
* Be paranoid and flush a concurrent interrupt to make sure
@@ -183,7 +142,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
- return i915->gt.epoch;
+ i915_globals_park();
}
void i915_gem_park(struct drm_i915_private *i915)
@@ -225,8 +184,7 @@ void i915_gem_unpark(struct drm_i915_private *i915)
i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
GEM_BUG_ON(!i915->gt.awake);
- if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
- i915->gt.epoch = 1;
+ i915_globals_unpark();
intel_enable_gt_powersave(i915);
i915_update_gfx_val(i915);
@@ -351,7 +309,7 @@ static void __start_cpu_write(struct drm_i915_gem_object *obj)
obj->cache_dirty = true;
}
-static void
+void
__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
struct sg_table *pages,
bool needs_clflush)
@@ -459,8 +417,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
static long
i915_gem_object_wait_fence(struct dma_fence *fence,
unsigned int flags,
- long timeout,
- struct intel_rps_client *rps_client)
+ long timeout)
{
struct i915_request *rq;
@@ -478,27 +435,6 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
if (i915_request_completed(rq))
goto out;
- /*
- * This client is about to stall waiting for the GPU. In many cases
- * this is undesirable and limits the throughput of the system, as
- * many clients cannot continue processing user input/output whilst
- * blocked. RPS autotuning may take tens of milliseconds to respond
- * to the GPU load and thus incurs additional latency for the client.
- * We can circumvent that by promoting the GPU frequency to maximum
- * before we wait. This makes the GPU throttle up much more quickly
- * (good for benchmarks and user experience, e.g. window animations),
- * but at a cost of spending more power processing the workload
- * (bad for battery). Not all clients even want their results
- * immediately and for them we should just let the GPU select its own
- * frequency to maximise efficiency. To prevent a single client from
- * forcing the clocks too high for the whole system, we only allow
- * each client to waitboost once in a busy period.
- */
- if (rps_client && !i915_request_started(rq)) {
- if (INTEL_GEN(rq->i915) >= 6)
- gen6_rps_boost(rq, rps_client);
- }
-
timeout = i915_request_wait(rq, flags, timeout);
out:
@@ -511,8 +447,7 @@ out:
static long
i915_gem_object_wait_reservation(struct reservation_object *resv,
unsigned int flags,
- long timeout,
- struct intel_rps_client *rps_client)
+ long timeout)
{
unsigned int seq = __read_seqcount_begin(&resv->seq);
struct dma_fence *excl;
@@ -530,8 +465,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
for (i = 0; i < count; i++) {
timeout = i915_gem_object_wait_fence(shared[i],
- flags, timeout,
- rps_client);
+ flags, timeout);
if (timeout < 0)
break;
@@ -557,8 +491,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
}
if (excl && timeout >= 0)
- timeout = i915_gem_object_wait_fence(excl, flags, timeout,
- rps_client);
+ timeout = i915_gem_object_wait_fence(excl, flags, timeout);
dma_fence_put(excl);
@@ -652,30 +585,19 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
* @obj: i915 gem object
* @flags: how to wait (under a lock, for all rendering or just for writes etc)
* @timeout: how long to wait
- * @rps_client: client (user process) to charge for any waitboosting
*/
int
i915_gem_object_wait(struct drm_i915_gem_object *obj,
unsigned int flags,
- long timeout,
- struct intel_rps_client *rps_client)
+ long timeout)
{
might_sleep();
GEM_BUG_ON(timeout < 0);
- timeout = i915_gem_object_wait_reservation(obj->resv,
- flags, timeout,
- rps_client);
+ timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
return timeout < 0 ? timeout : 0;
}
-static struct intel_rps_client *to_rps_client(struct drm_file *file)
-{
- struct drm_i915_file_private *fpriv = file->driver_priv;
-
- return &fpriv->rps_client;
-}
-
static int
i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
struct drm_i915_gem_pwrite *args,
@@ -698,28 +620,18 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
return 0;
}
-void *i915_gem_object_alloc(struct drm_i915_private *dev_priv)
-{
- return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
-}
-
-void i915_gem_object_free(struct drm_i915_gem_object *obj)
-{
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
- kmem_cache_free(dev_priv->objects, obj);
-}
-
static int
i915_gem_create(struct drm_file *file,
struct drm_i915_private *dev_priv,
- u64 size,
+ u64 *size_p,
u32 *handle_p)
{
struct drm_i915_gem_object *obj;
- int ret;
u32 handle;
+ u64 size;
+ int ret;
- size = roundup(size, PAGE_SIZE);
+ size = round_up(*size_p, PAGE_SIZE);
if (size == 0)
return -EINVAL;
@@ -735,6 +647,7 @@ i915_gem_create(struct drm_file *file,
return ret;
*handle_p = handle;
+ *size_p = size;
return 0;
}
@@ -747,7 +660,7 @@ i915_gem_dumb_create(struct drm_file *file,
args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
args->size = args->pitch * args->height;
return i915_gem_create(file, to_i915(dev),
- args->size, &args->handle);
+ &args->size, &args->handle);
}
static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
@@ -772,7 +685,7 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
i915_gem_flush_free_objects(dev_priv);
return i915_gem_create(file, dev_priv,
- args->size, &args->handle);
+ &args->size, &args->handle);
}
static inline enum fb_op_origin
@@ -881,8 +794,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
@@ -934,8 +846,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
@@ -1197,8 +1108,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT,
- to_rps_client(file));
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
goto out;
@@ -1497,8 +1407,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT,
- to_rps_client(file));
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
goto err;
@@ -1578,17 +1487,37 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
return -EINVAL;
- /* Having something in the write domain implies it's in the read
+ /*
+ * Having something in the write domain implies it's in the read
* domain, and only that read domain. Enforce that in the request.
*/
- if (write_domain != 0 && read_domains != write_domain)
+ if (write_domain && read_domains != write_domain)
return -EINVAL;
+ if (!read_domains)
+ return 0;
+
obj = i915_gem_object_lookup(file, args->handle);
if (!obj)
return -ENOENT;
- /* Try to flush the object off the GPU without holding the lock.
+ /*
+ * Already in the desired write domain? Nothing for us to do!
+ *
+ * We apply a little bit of cunning here to catch a broader set of
+ * no-ops. If obj->write_domain is set, we must be in the same
+ * obj->read_domains, and only that domain. Therefore, if that
+ * obj->write_domain matches the request read_domains, we are
+ * already in the same read/write domain and can skip the operation,
+ * without having to further check the requested write_domain.
+ */
+ if (READ_ONCE(obj->write_domain) == read_domains) {
+ err = 0;
+ goto out;
+ }
+
+ /*
+ * Try to flush the object off the GPU without holding the lock.
* We will repeat the flush holding the lock in the normal manner
* to catch cases where we are gazumped.
*/
@@ -1596,8 +1525,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT,
- to_rps_client(file));
+ MAX_SCHEDULE_TIMEOUT);
if (err)
goto out;
@@ -1808,6 +1736,9 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
* 2 - Recognise WC as a separate cache domain so that we can flush the
* delayed writes via GTT before performing direct access via WC.
*
+ * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
+ * pagefault; swapin remains transparent.
+ *
* Restrictions:
*
* * snoopable objects cannot be accessed via the GTT. It can cause machine
@@ -1835,7 +1766,7 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
*/
int i915_gem_mmap_gtt_version(void)
{
- return 2;
+ return 3;
}
static inline struct i915_ggtt_view
@@ -1891,6 +1822,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
intel_wakeref_t wakeref;
struct i915_vma *vma;
pgoff_t page_offset;
+ int srcu;
int ret;
/* Sanity check that we allow writing into this object */
@@ -1902,27 +1834,21 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
trace_i915_gem_object_fault(obj, page_offset, true, write);
- /* Try to flush the object off the GPU first without holding the lock.
- * Upon acquiring the lock, we will perform our sanity checks and then
- * repeat the flush holding the lock in the normal manner to catch cases
- * where we are gazumped.
- */
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
- if (ret)
- goto err;
-
ret = i915_gem_object_pin_pages(obj);
if (ret)
goto err;
wakeref = intel_runtime_pm_get(dev_priv);
+ srcu = i915_reset_trylock(dev_priv);
+ if (srcu < 0) {
+ ret = srcu;
+ goto err_rpm;
+ }
+
ret = i915_mutex_lock_interruptible(dev);
if (ret)
- goto err_rpm;
+ goto err_reset;
/* Access to snoopable pages through the GTT is incoherent. */
if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
@@ -1930,7 +1856,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
goto err_unlock;
}
-
/* Now pin it into the GTT as needed */
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
PIN_MAPPABLE |
@@ -1964,10 +1889,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
goto err_unlock;
}
- ret = i915_gem_object_set_to_gtt_domain(obj, write);
- if (ret)
- goto err_unpin;
-
ret = i915_vma_pin_fence(vma);
if (ret)
goto err_unpin;
@@ -1995,6 +1916,8 @@ err_unpin:
__i915_vma_unpin(vma);
err_unlock:
mutex_unlock(&dev->struct_mutex);
+err_reset:
+ i915_reset_unlock(dev_priv, srcu);
err_rpm:
intel_runtime_pm_put(dev_priv, wakeref);
i915_gem_object_unpin_pages(obj);
@@ -2007,7 +1930,7 @@ err:
* fail). But any other -EIO isn't ours (e.g. swap in failure)
* and so needs to be reported.
*/
- if (!i915_terminally_wedged(&dev_priv->gpu_error))
+ if (!i915_terminally_wedged(dev_priv))
return VM_FAULT_SIGBUS;
/* else: fall through */
case -EAGAIN:
@@ -2280,7 +2203,6 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
struct page *page;
__i915_gem_object_release_shmem(obj, pages, true);
-
i915_gem_gtt_finish_pages(obj, pages);
if (i915_gem_object_needs_bit17_swizzle(obj))
@@ -2488,7 +2410,7 @@ rebuild_st:
do {
cond_resched();
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
- if (likely(!IS_ERR(page)))
+ if (!IS_ERR(page))
break;
if (!*s) {
@@ -2622,6 +2544,14 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
lockdep_assert_held(&obj->mm.lock);
+ /* Make the pages coherent with the GPU (flushing any swapin). */
+ if (obj->cache_dirty) {
+ obj->write_domain = 0;
+ if (i915_gem_object_has_struct_page(obj))
+ drm_clflush_sg(pages);
+ obj->cache_dirty = false;
+ }
+
obj->mm.get_page.sg_pos = pages->sgl;
obj->mm.get_page.sg_idx = 0;
@@ -2823,6 +2753,33 @@ err_unlock:
goto out_unlock;
}
+void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
+ unsigned long offset,
+ unsigned long size)
+{
+ enum i915_map_type has_type;
+ void *ptr;
+
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
+ offset, size, obj->base.size));
+
+ obj->mm.dirty = true;
+
+ if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
+ return;
+
+ ptr = page_unpack_bits(obj->mm.mapping, &has_type);
+ if (has_type == I915_MAP_WC)
+ return;
+
+ drm_clflush_virt_range(ptr + offset, size);
+ if (size == obj->base.size) {
+ obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
+ obj->cache_dirty = false;
+ }
+}
+
static int
i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pwrite *arg)
@@ -2832,7 +2789,11 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
u64 remain, offset;
unsigned int pg;
- /* Before we instantiate/pin the backing store for our use, we
+ /* Caller already validated user args */
+ GEM_BUG_ON(!access_ok(user_data, arg->size));
+
+ /*
+ * Before we instantiate/pin the backing store for our use, we
* can prepopulate the shmemfs filp efficiently using a write into
* the pagecache. We avoid the penalty of instantiating all the
* pages, important if the user is just writing to a few and never
@@ -2846,7 +2807,8 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
if (obj->mm.madv != I915_MADV_WILLNEED)
return -EFAULT;
- /* Before the pages are instantiated the object is treated as being
+ /*
+ * Before the pages are instantiated the object is treated as being
* in the CPU domain. The pages will be clflushed as required before
* use, and we can freely write into the pages directly. If userspace
* races pwrite with any other operation; corruption will ensue -
@@ -2862,20 +2824,32 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
struct page *page;
void *data, *vaddr;
int err;
+ char c;
len = PAGE_SIZE - pg;
if (len > remain)
len = remain;
+ /* Prefault the user page to reduce potential recursion */
+ err = __get_user(c, user_data);
+ if (err)
+ return err;
+
+ err = __get_user(c, user_data + len - 1);
+ if (err)
+ return err;
+
err = pagecache_write_begin(obj->base.filp, mapping,
offset, len, 0,
&page, &data);
if (err < 0)
return err;
- vaddr = kmap(page);
- unwritten = copy_from_user(vaddr + pg, user_data, len);
- kunmap(page);
+ vaddr = kmap_atomic(page);
+ unwritten = __copy_from_user_inatomic(vaddr + pg,
+ user_data,
+ len);
+ kunmap_atomic(vaddr);
err = pagecache_write_end(obj->base.filp, mapping,
offset, len, len - unwritten,
@@ -2883,8 +2857,9 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
if (err < 0)
return err;
+ /* We don't handle -EFAULT, leave it to the caller to check */
if (unwritten)
- return -EFAULT;
+ return -ENODEV;
remain -= len;
user_data += len;
@@ -2895,51 +2870,6 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
return 0;
}
-static bool match_ring(struct i915_request *rq)
-{
- struct drm_i915_private *dev_priv = rq->i915;
- u32 ring = I915_READ(RING_START(rq->engine->mmio_base));
-
- return ring == i915_ggtt_offset(rq->ring->vma);
-}
-
-struct i915_request *
-i915_gem_find_active_request(struct intel_engine_cs *engine)
-{
- struct i915_request *request, *active = NULL;
- unsigned long flags;
-
- /*
- * We are called by the error capture, reset and to dump engine
- * state at random points in time. In particular, note that neither is
- * crucially ordered with an interrupt. After a hang, the GPU is dead
- * and we assume that no more writes can happen (we waited long enough
- * for all writes that were in transaction to be flushed) - adding an
- * extra delay for a recent interrupt is pointless. Hence, we do
- * not need an engine->irq_seqno_barrier() before the seqno reads.
- * At all other times, we must assume the GPU is still running, but
- * we only care about the snapshot of this moment.
- */
- spin_lock_irqsave(&engine->timeline.lock, flags);
- list_for_each_entry(request, &engine->timeline.requests, link) {
- if (i915_request_completed(request))
- continue;
-
- if (!i915_request_started(request))
- break;
-
- /* More than one preemptible request may match! */
- if (!match_ring(request))
- break;
-
- active = request;
- break;
- }
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
- return active;
-}
-
static void
i915_gem_retire_work_handler(struct work_struct *work)
{
@@ -2964,180 +2894,105 @@ i915_gem_retire_work_handler(struct work_struct *work)
round_jiffies_up_relative(HZ));
}
-static void shrink_caches(struct drm_i915_private *i915)
+static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
+ unsigned long mask)
{
- /*
- * kmem_cache_shrink() discards empty slabs and reorders partially
- * filled slabs to prioritise allocating from the mostly full slabs,
- * with the aim of reducing fragmentation.
- */
- kmem_cache_shrink(i915->priorities);
- kmem_cache_shrink(i915->dependencies);
- kmem_cache_shrink(i915->requests);
- kmem_cache_shrink(i915->luts);
- kmem_cache_shrink(i915->vmas);
- kmem_cache_shrink(i915->objects);
-}
-
-struct sleep_rcu_work {
- union {
- struct rcu_head rcu;
- struct work_struct work;
- };
- struct drm_i915_private *i915;
- unsigned int epoch;
-};
+ bool result = true;
-static inline bool
-same_epoch(struct drm_i915_private *i915, unsigned int epoch)
-{
/*
- * There is a small chance that the epoch wrapped since we started
- * sleeping. If we assume that epoch is at least a u32, then it will
- * take at least 2^32 * 100ms for it to wrap, or about 326 years.
+ * Even if we fail to switch, give whatever is running a small chance
+ * to save itself before we report the failure. Yes, this may be a
+ * false positive due to e.g. ENOMEM, caveat emptor!
*/
- return epoch == READ_ONCE(i915->gt.epoch);
-}
-
-static void __sleep_work(struct work_struct *work)
-{
- struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
- struct drm_i915_private *i915 = s->i915;
- unsigned int epoch = s->epoch;
-
- kfree(s);
- if (same_epoch(i915, epoch))
- shrink_caches(i915);
-}
+ if (i915_gem_switch_to_kernel_context(i915, mask))
+ result = false;
-static void __sleep_rcu(struct rcu_head *rcu)
-{
- struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
- struct drm_i915_private *i915 = s->i915;
-
- destroy_rcu_head(&s->rcu);
+ if (i915_gem_wait_for_idle(i915,
+ I915_WAIT_LOCKED |
+ I915_WAIT_FOR_IDLE_BOOST,
+ I915_GEM_IDLE_TIMEOUT))
+ result = false;
+
+ if (!result) {
+ if (i915_modparams.reset) { /* XXX hide warning from gem_eio */
+ dev_err(i915->drm.dev,
+ "Failed to idle engines, declaring wedged!\n");
+ GEM_TRACE_DUMP();
+ }
- if (same_epoch(i915, s->epoch)) {
- INIT_WORK(&s->work, __sleep_work);
- queue_work(i915->wq, &s->work);
- } else {
- kfree(s);
+ /* Forcibly cancel outstanding work and leave the gpu quiet. */
+ i915_gem_set_wedged(i915);
}
-}
-static inline bool
-new_requests_since_last_retire(const struct drm_i915_private *i915)
-{
- return (READ_ONCE(i915->gt.active_requests) ||
- work_pending(&i915->gt.idle_work.work));
+ i915_retire_requests(i915); /* ensure we flush after wedging */
+ return result;
}
-static void assert_kernel_context_is_current(struct drm_i915_private *i915)
+static bool load_power_context(struct drm_i915_private *i915)
{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
+ /* Force loading the kernel context on all engines */
+ if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
+ return false;
- if (i915_terminally_wedged(&i915->gpu_error))
- return;
+ /*
+ * Immediately park the GPU so that we enable powersaving and
+ * treat it as idle. The next time we issue a request, we will
+ * unpark and start using the engine->pinned_default_state, otherwise
+ * it is in limbo and an early reset may fail.
+ */
+ __i915_gem_park(i915);
- GEM_BUG_ON(i915->gt.active_requests);
- for_each_engine(engine, i915, id) {
- GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request));
- GEM_BUG_ON(engine->last_retired_context !=
- to_intel_context(i915->kernel_context, engine));
- }
+ return true;
}
static void
i915_gem_idle_work_handler(struct work_struct *work)
{
- struct drm_i915_private *dev_priv =
- container_of(work, typeof(*dev_priv), gt.idle_work.work);
- unsigned int epoch = I915_EPOCH_INVALID;
+ struct drm_i915_private *i915 =
+ container_of(work, typeof(*i915), gt.idle_work.work);
bool rearm_hangcheck;
- if (!READ_ONCE(dev_priv->gt.awake))
+ if (!READ_ONCE(i915->gt.awake))
return;
- if (READ_ONCE(dev_priv->gt.active_requests))
+ if (READ_ONCE(i915->gt.active_requests))
return;
- /*
- * Flush out the last user context, leaving only the pinned
- * kernel context resident. When we are idling on the kernel_context,
- * no more new requests (with a context switch) are emitted and we
- * can finally rest. A consequence is that the idle work handler is
- * always called at least twice before idling (and if the system is
- * idle that implies a round trip through the retire worker).
- */
- mutex_lock(&dev_priv->drm.struct_mutex);
- i915_gem_switch_to_kernel_context(dev_priv);
- mutex_unlock(&dev_priv->drm.struct_mutex);
-
- GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n",
- READ_ONCE(dev_priv->gt.active_requests));
-
- /*
- * Wait for last execlists context complete, but bail out in case a
- * new request is submitted. As we don't trust the hardware, we
- * continue on if the wait times out. This is necessary to allow
- * the machine to suspend even if the hardware dies, and we will
- * try to recover in resume (after depriving the hardware of power,
- * it may be in a better mmod).
- */
- __wait_for(if (new_requests_since_last_retire(dev_priv)) return,
- intel_engines_are_idle(dev_priv),
- I915_IDLE_ENGINES_TIMEOUT * 1000,
- 10, 500);
-
rearm_hangcheck =
- cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
+ cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
- if (!mutex_trylock(&dev_priv->drm.struct_mutex)) {
+ if (!mutex_trylock(&i915->drm.struct_mutex)) {
/* Currently busy, come back later */
- mod_delayed_work(dev_priv->wq,
- &dev_priv->gt.idle_work,
+ mod_delayed_work(i915->wq,
+ &i915->gt.idle_work,
msecs_to_jiffies(50));
goto out_rearm;
}
/*
- * New request retired after this work handler started, extend active
- * period until next instance of the work.
+ * Flush out the last user context, leaving only the pinned
+ * kernel context resident. Should anything unfortunate happen
+ * while we are idle (such as the GPU being power cycled), no users
+ * will be harmed.
*/
- if (new_requests_since_last_retire(dev_priv))
- goto out_unlock;
+ if (!work_pending(&i915->gt.idle_work.work) &&
+ !i915->gt.active_requests) {
+ ++i915->gt.active_requests; /* don't requeue idle */
- epoch = __i915_gem_park(dev_priv);
+ switch_to_kernel_context_sync(i915, i915->gt.active_engines);
- assert_kernel_context_is_current(dev_priv);
+ if (!--i915->gt.active_requests) {
+ __i915_gem_park(i915);
+ rearm_hangcheck = false;
+ }
+ }
- rearm_hangcheck = false;
-out_unlock:
- mutex_unlock(&dev_priv->drm.struct_mutex);
+ mutex_unlock(&i915->drm.struct_mutex);
out_rearm:
if (rearm_hangcheck) {
- GEM_BUG_ON(!dev_priv->gt.awake);
- i915_queue_hangcheck(dev_priv);
- }
-
- /*
- * When we are idle, it is an opportune time to reap our caches.
- * However, we have many objects that utilise RCU and the ordered
- * i915->wq that this work is executing on. To try and flush any
- * pending frees now we are idle, we first wait for an RCU grace
- * period, and then queue a task (that will run last on the wq) to
- * shrink and re-optimize the caches.
- */
- if (same_epoch(dev_priv, epoch)) {
- struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (s) {
- init_rcu_head(&s->rcu);
- s->i915 = dev_priv;
- s->epoch = epoch;
- call_rcu(&s->rcu, __sleep_rcu);
- }
+ GEM_BUG_ON(!i915->gt.awake);
+ i915_queue_hangcheck(i915);
}
}
@@ -3171,7 +3026,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
list_del(&lut->obj_link);
list_del(&lut->ctx_link);
- kmem_cache_free(i915->luts, lut);
+ i915_lut_handle_free(lut);
__i915_gem_object_release_unless_active(obj);
}
@@ -3234,8 +3089,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
I915_WAIT_ALL,
- to_wait_timeout(args->timeout_ns),
- to_rps_client(file));
+ to_wait_timeout(args->timeout_ns));
if (args->timeout_ns > 0) {
args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
@@ -3304,7 +3158,7 @@ wait_for_timelines(struct drm_i915_private *i915,
* stalls, so allow the gpu to boost to maximum clocks.
*/
if (flags & I915_WAIT_FOR_IDLE_BOOST)
- gen6_rps_boost(rq, NULL);
+ gen6_rps_boost(rq);
timeout = i915_request_wait(rq, flags, timeout);
i915_request_put(rq);
@@ -3340,19 +3194,11 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
lockdep_assert_held(&i915->drm.struct_mutex);
- if (GEM_SHOW_DEBUG() && !timeout) {
- /* Presume that timeout was non-zero to begin with! */
- dev_warn(&i915->drm.pdev->dev,
- "Missed idle-completion interrupt!\n");
- GEM_TRACE_DUMP();
- }
-
err = wait_for_engines(i915);
if (err)
return err;
i915_retire_requests(i915);
- GEM_BUG_ON(i915->gt.active_requests);
}
return 0;
@@ -3399,8 +3245,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
(write ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
@@ -3462,8 +3307,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
(write ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
@@ -3578,8 +3422,7 @@ restart:
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
@@ -3717,8 +3560,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT,
- to_rps_client(file));
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
goto out;
@@ -3844,8 +3686,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
(write ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
@@ -3891,8 +3732,9 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
long ret;
/* ABI: return -EIO if already wedged */
- if (i915_terminally_wedged(&dev_priv->gpu_error))
- return -EIO;
+ ret = i915_terminally_wedged(dev_priv);
+ if (ret)
+ return ret;
spin_lock(&file_priv->mm.lock);
list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
@@ -3968,7 +3810,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
}
vma = i915_vma_instance(obj, vm, view);
- if (unlikely(IS_ERR(vma)))
+ if (IS_ERR(vma))
return vma;
if (i915_vma_misplaced(vma, size, alignment, flags)) {
@@ -4000,22 +3842,19 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
return vma;
}
-static __always_inline unsigned int __busy_read_flag(unsigned int id)
+static __always_inline u32 __busy_read_flag(u8 id)
{
- /* Note that we could alias engines in the execbuf API, but
- * that would be very unwise as it prevents userspace from
- * fine control over engine selection. Ahem.
- *
- * This should be something like EXEC_MAX_ENGINE instead of
- * I915_NUM_ENGINES.
- */
- BUILD_BUG_ON(I915_NUM_ENGINES > 16);
- return 0x10000 << id;
+ if (id == (u8)I915_ENGINE_CLASS_INVALID)
+ return 0xffff0000u;
+
+ GEM_BUG_ON(id >= 16);
+ return 0x10000u << id;
}
-static __always_inline unsigned int __busy_write_id(unsigned int id)
+static __always_inline u32 __busy_write_id(u8 id)
{
- /* The uABI guarantees an active writer is also amongst the read
+ /*
+ * The uABI guarantees an active writer is also amongst the read
* engines. This would be true if we accessed the activity tracking
* under the lock, but as we perform the lookup of the object and
* its activity locklessly we can not guarantee that the last_write
@@ -4023,16 +3862,19 @@ static __always_inline unsigned int __busy_write_id(unsigned int id)
* last_read - hence we always set both read and write busy for
* last_write.
*/
- return id | __busy_read_flag(id);
+ if (id == (u8)I915_ENGINE_CLASS_INVALID)
+ return 0xffffffffu;
+
+ return (id + 1) | __busy_read_flag(id);
}
static __always_inline unsigned int
-__busy_set_if_active(const struct dma_fence *fence,
- unsigned int (*flag)(unsigned int id))
+__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
{
- struct i915_request *rq;
+ const struct i915_request *rq;
- /* We have to check the current hw status of the fence as the uABI
+ /*
+ * We have to check the current hw status of the fence as the uABI
* guarantees forward progress. We could rely on the idle worker
* to eventually flush us, but to minimise latency just ask the
* hardware.
@@ -4043,11 +3885,13 @@ __busy_set_if_active(const struct dma_fence *fence,
return 0;
/* opencode to_request() in order to avoid const warnings */
- rq = container_of(fence, struct i915_request, fence);
+ rq = container_of(fence, const struct i915_request, fence);
if (i915_request_completed(rq))
return 0;
- return flag(rq->engine->uabi_id);
+ /* Beware type-expansion follies! */
+ BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class));
+ return flag(rq->engine->uabi_class);
}
static __always_inline unsigned int
@@ -4081,7 +3925,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
if (!obj)
goto out;
- /* A discrepancy here is that we do not report the status of
+ /*
+ * A discrepancy here is that we do not report the status of
* non-i915 fences, i.e. even though we may report the object as idle,
* a call to set-domain may still stall waiting for foreign rendering.
* This also means that wait-ioctl may report an object as busy,
@@ -4281,7 +4126,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
if (overflows_type(size, obj->base.size))
return ERR_PTR(-E2BIG);
- obj = i915_gem_object_alloc(dev_priv);
+ obj = i915_gem_object_alloc();
if (obj == NULL)
return ERR_PTR(-ENOMEM);
@@ -4354,7 +4199,7 @@ static bool discard_backing_storage(struct drm_i915_gem_object *obj)
* acquiring such a reference whilst we are in the middle of
* freeing the object.
*/
- return atomic_long_read(&obj->base.filp->f_count) == 1;
+ return file_count(obj->base.filp) == 1;
}
static void __i915_gem_free_objects(struct drm_i915_private *i915,
@@ -4414,7 +4259,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
drm_gem_object_release(&obj->base);
i915_gem_info_remove_obj(i915, obj->base.size);
- kfree(obj->bit_17);
+ bitmap_free(obj->bit_17);
i915_gem_object_free(obj);
GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
@@ -4537,7 +4382,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
GEM_TRACE("\n");
wakeref = intel_runtime_pm_get(i915);
- intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
+ intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
/*
* As we have just resumed the machine and woken the device up from
@@ -4545,7 +4390,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
* back to defaults, recovering from whatever wedged state we left it
* in and so worth trying to use the device once more.
*/
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_terminally_wedged(i915))
i915_gem_unset_wedged(i915);
/*
@@ -4558,7 +4403,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
*/
intel_engines_sanitize(i915, false);
- intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
+ intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
intel_runtime_pm_put(i915, wakeref);
mutex_lock(&i915->drm.struct_mutex);
@@ -4566,15 +4411,13 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
mutex_unlock(&i915->drm.struct_mutex);
}
-int i915_gem_suspend(struct drm_i915_private *i915)
+void i915_gem_suspend(struct drm_i915_private *i915)
{
intel_wakeref_t wakeref;
- int ret;
GEM_TRACE("\n");
wakeref = intel_runtime_pm_get(i915);
- intel_suspend_gt_powersave(i915);
flush_workqueue(i915->wq);
@@ -4589,22 +4432,7 @@ int i915_gem_suspend(struct drm_i915_private *i915)
* state. Fortunately, the kernel_context is disposable and we do
* not rely on its state.
*/
- if (!i915_terminally_wedged(&i915->gpu_error)) {
- ret = i915_gem_switch_to_kernel_context(i915);
- if (ret)
- goto err_unlock;
-
- ret = i915_gem_wait_for_idle(i915,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_LOCKED |
- I915_WAIT_FOR_IDLE_BOOST,
- MAX_SCHEDULE_TIMEOUT);
- if (ret && ret != -EIO)
- goto err_unlock;
-
- assert_kernel_context_is_current(i915);
- }
- i915_retire_requests(i915); /* ensure we flush after wedging */
+ switch_to_kernel_context_sync(i915, i915->gt.active_engines);
mutex_unlock(&i915->drm.struct_mutex);
i915_reset_flush(i915);
@@ -4617,23 +4445,15 @@ int i915_gem_suspend(struct drm_i915_private *i915)
*/
drain_delayed_work(&i915->gt.idle_work);
- intel_uc_suspend(i915);
-
/*
* Assert that we successfully flushed all the work and
* reset the GPU back to its idle, low power state.
*/
- WARN_ON(i915->gt.awake);
- if (WARN_ON(!intel_engines_are_idle(i915)))
- i915_gem_set_wedged(i915); /* no hope, discard everything */
+ GEM_BUG_ON(i915->gt.awake);
- intel_runtime_pm_put(i915, wakeref);
- return 0;
+ intel_uc_suspend(i915);
-err_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
intel_runtime_pm_put(i915, wakeref);
- return ret;
}
void i915_gem_suspend_late(struct drm_i915_private *i915)
@@ -4683,7 +4503,7 @@ void i915_gem_resume(struct drm_i915_private *i915)
WARN_ON(i915->gt.awake);
mutex_lock(&i915->drm.struct_mutex);
- intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
+ intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
i915_gem_restore_gtt_mappings(i915);
i915_gem_restore_fences(i915);
@@ -4693,7 +4513,7 @@ void i915_gem_resume(struct drm_i915_private *i915)
* guarantee that the context image is complete. So let's just reset
* it and start again.
*/
- i915->gt.resume(i915);
+ intel_gt_resume(i915);
if (i915_gem_init_hw(i915))
goto err_wedged;
@@ -4701,17 +4521,18 @@ void i915_gem_resume(struct drm_i915_private *i915)
intel_uc_resume(i915);
/* Always reload a context for powersaving. */
- if (i915_gem_switch_to_kernel_context(i915))
+ if (!load_power_context(i915))
goto err_wedged;
out_unlock:
- intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
+ intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
mutex_unlock(&i915->drm.struct_mutex);
return;
err_wedged:
- if (!i915_terminally_wedged(&i915->gpu_error)) {
- DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
+ if (!i915_reset_failed(i915)) {
+ dev_err(i915->drm.dev,
+ "Failed to re-initialize GPU, declaring it wedged!\n");
i915_gem_set_wedged(i915);
}
goto out_unlock;
@@ -4781,6 +4602,8 @@ static int __i915_gem_restart_engines(void *data)
}
}
+ intel_engines_set_scheduler_caps(i915);
+
return 0;
}
@@ -4791,7 +4614,7 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
dev_priv->gt.last_init_time = ktime_get();
/* Double layer security blanket, see i915_gem_init() */
- intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+ intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
@@ -4816,10 +4639,9 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
init_unused_rings(dev_priv);
BUG_ON(!dev_priv->kernel_context);
- if (i915_terminally_wedged(&dev_priv->gpu_error)) {
- ret = -EIO;
+ ret = i915_terminally_wedged(dev_priv);
+ if (ret)
goto out;
- }
ret = i915_ppgtt_init_hw(dev_priv);
if (ret) {
@@ -4847,14 +4669,14 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
if (ret)
goto cleanup_uc;
- intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+ intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
return 0;
cleanup_uc:
intel_uc_fini_hw(dev_priv);
out:
- intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+ intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
return ret;
}
@@ -4864,7 +4686,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
struct i915_gem_context *ctx;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- int err;
+ int err = 0;
/*
* As we reset the gpu during very early sanitisation, the current
@@ -4897,36 +4719,27 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
goto err_active;
}
- err = i915_gem_switch_to_kernel_context(i915);
- if (err)
- goto err_active;
-
- if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) {
- i915_gem_set_wedged(i915);
- err = -EIO; /* Caller will declare us wedged */
+ /* Flush the default context image to memory, and enable powersaving. */
+ if (!load_power_context(i915)) {
+ err = -EIO;
goto err_active;
}
- assert_kernel_context_is_current(i915);
-
- /*
- * Immediately park the GPU so that we enable powersaving and
- * treat it as idle. The next time we issue a request, we will
- * unpark and start using the engine->pinned_default_state, otherwise
- * it is in limbo and an early reset may fail.
- */
- __i915_gem_park(i915);
-
for_each_engine(engine, i915, id) {
+ struct intel_context *ce;
struct i915_vma *state;
void *vaddr;
- GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count);
+ ce = intel_context_lookup(ctx, engine);
+ if (!ce)
+ continue;
- state = to_intel_context(ctx, engine)->state;
+ state = ce->state;
if (!state)
continue;
+ GEM_BUG_ON(intel_context_is_pinned(ce));
+
/*
* As we will hold a reference to the logical state, it will
* not be torn down with the context, and importantly the
@@ -4944,6 +4757,8 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
goto err_active;
engine->default_state = i915_gem_object_get(state->obj);
+ i915_gem_object_set_cache_coherency(engine->default_state,
+ I915_CACHE_LLC);
/* Check we can acquire the image of the context state */
vaddr = i915_gem_object_pin_map(engine->default_state,
@@ -4982,19 +4797,10 @@ out_ctx:
err_active:
/*
* If we have to abandon now, we expect the engines to be idle
- * and ready to be torn-down. First try to flush any remaining
- * request, ensure we are pointing at the kernel context and
- * then remove it.
+ * and ready to be torn-down. The quickest way we can accomplish
+ * this is by declaring ourselves wedged.
*/
- if (WARN_ON(i915_gem_switch_to_kernel_context(i915)))
- goto out_ctx;
-
- if (WARN_ON(i915_gem_wait_for_idle(i915,
- I915_WAIT_LOCKED,
- MAX_SCHEDULE_TIMEOUT)))
- goto out_ctx;
-
- i915_gem_contexts_lost(i915);
+ i915_gem_set_wedged(i915);
goto out_ctx;
}
@@ -5047,13 +4853,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
- if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
- dev_priv->gt.resume = intel_lr_context_resume;
+ if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
- } else {
- dev_priv->gt.resume = intel_legacy_submission_resume;
+ else
dev_priv->gt.cleanup_engine = intel_engine_cleanup;
- }
i915_timelines_init(dev_priv);
@@ -5076,7 +4879,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
* just magically go away.
*/
mutex_lock(&dev_priv->drm.struct_mutex);
- intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+ intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
ret = i915_gem_init_ggtt(dev_priv);
if (ret) {
@@ -5138,7 +4941,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
goto err_init_hw;
}
- intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+ intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
mutex_unlock(&dev_priv->drm.struct_mutex);
return 0;
@@ -5152,7 +4955,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
err_init_hw:
mutex_unlock(&dev_priv->drm.struct_mutex);
- WARN_ON(i915_gem_suspend(dev_priv));
+ i915_gem_suspend(dev_priv);
i915_gem_suspend_late(dev_priv);
i915_gem_drain_workqueue(dev_priv);
@@ -5173,7 +4976,7 @@ err_scratch:
i915_gem_fini_scratch(dev_priv);
err_ggtt:
err_unlock:
- intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+ intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
mutex_unlock(&dev_priv->drm.struct_mutex);
err_uc_misc:
@@ -5192,7 +4995,7 @@ err_uc_misc:
* wedged. But we only want to do this where the GPU is angry,
* for all other failure, such as an allocation failure, bail.
*/
- if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
+ if (!i915_reset_failed(dev_priv)) {
i915_load_error(dev_priv,
"Failed to initialize GPU, declaring it wedged!\n");
i915_gem_set_wedged(dev_priv);
@@ -5305,36 +5108,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915)
int i915_gem_init_early(struct drm_i915_private *dev_priv)
{
- int err = -ENOMEM;
-
- dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
- if (!dev_priv->objects)
- goto err_out;
-
- dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN);
- if (!dev_priv->vmas)
- goto err_objects;
-
- dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0);
- if (!dev_priv->luts)
- goto err_vmas;
-
- dev_priv->requests = KMEM_CACHE(i915_request,
- SLAB_HWCACHE_ALIGN |
- SLAB_RECLAIM_ACCOUNT |
- SLAB_TYPESAFE_BY_RCU);
- if (!dev_priv->requests)
- goto err_luts;
-
- dev_priv->dependencies = KMEM_CACHE(i915_dependency,
- SLAB_HWCACHE_ALIGN |
- SLAB_RECLAIM_ACCOUNT);
- if (!dev_priv->dependencies)
- goto err_requests;
-
- dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
- if (!dev_priv->priorities)
- goto err_dependencies;
+ int err;
INIT_LIST_HEAD(&dev_priv->gt.active_rings);
INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
@@ -5348,6 +5122,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
mutex_init(&dev_priv->gpu_error.wedge_mutex);
+ init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
@@ -5358,19 +5133,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
return 0;
-
-err_dependencies:
- kmem_cache_destroy(dev_priv->dependencies);
-err_requests:
- kmem_cache_destroy(dev_priv->requests);
-err_luts:
- kmem_cache_destroy(dev_priv->luts);
-err_vmas:
- kmem_cache_destroy(dev_priv->vmas);
-err_objects:
- kmem_cache_destroy(dev_priv->objects);
-err_out:
- return err;
}
void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
@@ -5380,15 +5142,7 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
WARN_ON(dev_priv->mm.object_count);
- kmem_cache_destroy(dev_priv->priorities);
- kmem_cache_destroy(dev_priv->dependencies);
- kmem_cache_destroy(dev_priv->requests);
- kmem_cache_destroy(dev_priv->luts);
- kmem_cache_destroy(dev_priv->vmas);
- kmem_cache_destroy(dev_priv->objects);
-
- /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
- rcu_barrier();
+ cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
i915_gemfs_fini(dev_priv);
}