aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gpu_error.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2019-10-23 14:31:08 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2019-10-23 23:52:10 +0100
commit058179e72e0956a2dfe4927db6cbe5fbfb2406aa (patch)
treec187567f6a2c286a45d28f88189313df6856ce31 /drivers/gpu/drm/i915/i915_gpu_error.c
parentdrm/i915/gem: Cancel contexts when hangchecking is disabled (diff)
downloadlinux-dev-058179e72e0956a2dfe4927db6cbe5fbfb2406aa.tar.xz
linux-dev-058179e72e0956a2dfe4927db6cbe5fbfb2406aa.zip
drm/i915/gt: Replace hangcheck by heartbeats
Replace sampling the engine state every so often with a periodic heartbeat request to measure the health of an engine. This is coupled with the forced-preemption to allow long running requests to survive so long as they do not block other users. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191023133108.21401-5-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c33
1 files changed, 4 insertions, 29 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 5cf4eed5add8..47239df653f2 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -534,10 +534,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
}
err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head);
err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail);
- err_printf(m, " hangcheck timestamp: %dms (%lu%s)\n",
- jiffies_to_msecs(ee->hangcheck_timestamp - epoch),
- ee->hangcheck_timestamp,
- ee->hangcheck_timestamp == epoch ? "; epoch" : "");
err_printf(m, " engine reset count: %u\n", ee->reset_count);
for (n = 0; n < ee->num_ports; n++) {
@@ -679,11 +675,8 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
ts = ktime_to_timespec64(error->uptime);
err_printf(m, "Uptime: %lld s %ld us\n",
(s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
- err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ);
- err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n",
- error->capture,
- jiffies_to_msecs(jiffies - error->capture),
- jiffies_to_msecs(error->capture - error->epoch));
+ err_printf(m, "Capture: %lu jiffies; %d ms ago\n",
+ error->capture, jiffies_to_msecs(jiffies - error->capture));
for (ee = error->engine; ee; ee = ee->next)
err_printf(m, "Active process (on ring %s): %s [%d]\n",
@@ -742,7 +735,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
err_printf(m, "GTT_CACHE_EN: 0x%08x\n", error->gtt_cache);
for (ee = error->engine; ee; ee = ee->next)
- error_print_engine(m, ee, error->epoch);
+ error_print_engine(m, ee, error->capture);
for (ee = error->engine; ee; ee = ee->next) {
const struct drm_i915_error_object *obj;
@@ -770,7 +763,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
for (j = 0; j < ee->num_requests; j++)
error_print_request(m, " ",
&ee->requests[j],
- error->epoch);
+ error->capture);
}
print_error_obj(m, ee->engine, "ringbuffer", ee->ringbuffer);
@@ -1144,8 +1137,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
}
ee->idle = intel_engine_is_idle(engine);
- if (!ee->idle)
- ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
engine);
@@ -1657,20 +1648,6 @@ static void capture_params(struct i915_gpu_state *error)
i915_params_copy(&error->params, &i915_modparams);
}
-static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
-{
- const struct drm_i915_error_engine *ee;
- unsigned long epoch = error->capture;
-
- for (ee = error->engine; ee; ee = ee->next) {
- if (ee->hangcheck_timestamp &&
- time_before(ee->hangcheck_timestamp, epoch))
- epoch = ee->hangcheck_timestamp;
- }
-
- return epoch;
-}
-
static void capture_finish(struct i915_gpu_state *error)
{
struct i915_ggtt *ggtt = &error->i915->ggtt;
@@ -1722,8 +1699,6 @@ i915_capture_gpu_state(struct drm_i915_private *i915)
error->overlay = intel_overlay_capture_error_state(i915);
error->display = intel_display_capture_error_state(i915);
- error->epoch = capture_find_epoch(error);
-
capture_finish(error);
compress_fini(&compress);