aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_irq.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_irq.c')
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c91
1 files changed, 71 insertions, 20 deletions
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a03b445ceb5f..4b91228fd9bd 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1027,8 +1027,13 @@ static inline void intel_hpd_irq_handler(struct drm_device *dev,
dev_priv->display.hpd_irq_setup(dev);
spin_unlock(&dev_priv->irq_lock);
- queue_work(dev_priv->wq,
- &dev_priv->hotplug_work);
+ /*
+ * Our hotplug handler can grab modeset locks (by calling down into the
+ * fb helpers). Hence it must not be run on our own dev-priv->wq work
+ * queue for otherwise the flush_work in the pageflip code will
+ * deadlock.
+ */
+ schedule_work(&dev_priv->hotplug_work);
}
static void gmbus_irq_handler(struct drm_device *dev)
@@ -1464,6 +1469,34 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
return ret;
}
+static void i915_error_wake_up(struct drm_i915_private *dev_priv,
+ bool reset_completed)
+{
+ struct intel_ring_buffer *ring;
+ int i;
+
+ /*
+ * Notify all waiters for GPU completion events that reset state has
+ * been changed, and that they need to restart their wait after
+ * checking for potential errors (and bail out to drop locks if there is
+ * a gpu reset pending so that i915_error_work_func can acquire them).
+ */
+
+ /* Wake up __wait_seqno, potentially holding dev->struct_mutex. */
+ for_each_ring(ring, dev_priv, i)
+ wake_up_all(&ring->irq_queue);
+
+ /* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */
+ wake_up_all(&dev_priv->pending_flip_queue);
+
+ /*
+ * Signal tasks blocked in i915_gem_wait_for_error that the pending
+ * reset state is cleared.
+ */
+ if (reset_completed)
+ wake_up_all(&dev_priv->gpu_error.reset_queue);
+}
+
/**
* i915_error_work_func - do process context error handling work
* @work: work struct
@@ -1478,11 +1511,10 @@ static void i915_error_work_func(struct work_struct *work)
drm_i915_private_t *dev_priv = container_of(error, drm_i915_private_t,
gpu_error);
struct drm_device *dev = dev_priv->dev;
- struct intel_ring_buffer *ring;
char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
- int i, ret;
+ int ret;
kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event);
@@ -1501,8 +1533,16 @@ static void i915_error_work_func(struct work_struct *work)
kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE,
reset_event);
+ /*
+ * All state reset _must_ be completed before we update the
+ * reset counter, for otherwise waiters might miss the reset
+ * pending state and not properly drop locks, resulting in
+ * deadlocks with the reset work.
+ */
ret = i915_reset(dev);
+ intel_display_handle_reset(dev);
+
if (ret == 0) {
/*
* After all the gem state is reset, increment the reset
@@ -1523,12 +1563,11 @@ static void i915_error_work_func(struct work_struct *work)
atomic_set(&error->reset_counter, I915_WEDGED);
}
- for_each_ring(ring, dev_priv, i)
- wake_up_all(&ring->irq_queue);
-
- intel_display_handle_reset(dev);
-
- wake_up_all(&dev_priv->gpu_error.reset_queue);
+ /*
+ * Note: The wake_up also serves as a memory barrier so that
+ * waiters see the update value of the reset counter atomic_t.
+ */
+ i915_error_wake_up(dev_priv, true);
}
}
@@ -1637,8 +1676,6 @@ static void i915_report_and_clear_eir(struct drm_device *dev)
void i915_handle_error(struct drm_device *dev, bool wedged)
{
struct drm_i915_private *dev_priv = dev->dev_private;
- struct intel_ring_buffer *ring;
- int i;
i915_capture_error_state(dev);
i915_report_and_clear_eir(dev);
@@ -1648,14 +1685,28 @@ void i915_handle_error(struct drm_device *dev, bool wedged)
&dev_priv->gpu_error.reset_counter);
/*
- * Wakeup waiting processes so that the reset work item
- * doesn't deadlock trying to grab various locks.
+ * Wakeup waiting processes so that the reset work function
+ * i915_error_work_func doesn't deadlock trying to grab various
+ * locks. By bumping the reset counter first, the woken
+ * processes will see a reset in progress and back off,
+ * releasing their locks and then wait for the reset completion.
+ * We must do this for _all_ gpu waiters that might hold locks
+ * that the reset work needs to acquire.
+ *
+ * Note: The wake_up serves as the required memory barrier to
+ * ensure that the waiters see the updated value of the reset
+ * counter atomic_t.
*/
- for_each_ring(ring, dev_priv, i)
- wake_up_all(&ring->irq_queue);
+ i915_error_wake_up(dev_priv, false);
}
- queue_work(dev_priv->wq, &dev_priv->gpu_error.work);
+ /*
+ * Our reset work can grab modeset locks (since it needs to reset the
+ * state of outstanding pagelips). Hence it must not be run on our own
+ * dev-priv->wq work queue for otherwise the flush_work in the pageflip
+ * code will deadlock.
+ */
+ schedule_work(&dev_priv->gpu_error.work);
}
static void __always_unused i915_pageflip_stall_check(struct drm_device *dev, int pipe)
@@ -2027,9 +2078,9 @@ static void i915_hangcheck_elapsed(unsigned long data)
for_each_ring(ring, dev_priv, i) {
if (ring->hangcheck.score > FIRE) {
- DRM_ERROR("%s on %s\n",
- stuck[i] ? "stuck" : "no progress",
- ring->name);
+ DRM_INFO("%s on %s\n",
+ stuck[i] ? "stuck" : "no progress",
+ ring->name);
rings_hung++;
}
}