aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gpu_error.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c228
1 files changed, 136 insertions, 92 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 0c779671fe2d..944059322daa 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -30,6 +30,8 @@
#include <generated/utsrelease.h>
#include <linux/stop_machine.h>
#include <linux/zlib.h>
+#include <drm/drm_print.h>
+
#include "i915_drv.h"
static const char *engine_str(int engine)
@@ -175,6 +177,21 @@ static void i915_error_puts(struct drm_i915_error_state_buf *e,
#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
#define err_puts(e, s) i915_error_puts(e, s)
+static void __i915_printfn_error(struct drm_printer *p, struct va_format *vaf)
+{
+ i915_error_vprintf(p->arg, vaf->fmt, *vaf->va);
+}
+
+static inline struct drm_printer
+i915_error_printer(struct drm_i915_error_state_buf *e)
+{
+ struct drm_printer p = {
+ .printfn = __i915_printfn_error,
+ .arg = e,
+ };
+ return p;
+}
+
#ifdef CONFIG_DRM_I915_COMPRESS_ERROR
struct compress {
@@ -377,9 +394,9 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
if (!erq->seqno)
return;
- err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, emitted %dms ago, head %08x, tail %08x\n",
+ err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n",
prefix, erq->pid, erq->ban_score,
- erq->context, erq->seqno,
+ erq->context, erq->seqno, erq->priority,
jiffies_to_msecs(jiffies - erq->jiffies),
erq->head, erq->tail);
}
@@ -388,15 +405,18 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
const char *header,
const struct drm_i915_error_context *ctx)
{
- err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n",
+ err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d guilty %d active %d\n",
header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
- ctx->ban_score, ctx->guilty, ctx->active);
+ ctx->priority, ctx->ban_score, ctx->guilty, ctx->active);
}
static void error_print_engine(struct drm_i915_error_state_buf *m,
const struct drm_i915_error_engine *ee)
{
+ int n;
+
err_printf(m, "%s command stream:\n", engine_str(ee->engine_id));
+ err_printf(m, " IDLE?: %s\n", yesno(ee->idle));
err_printf(m, " START: 0x%08x\n", ee->start);
err_printf(m, " HEAD: 0x%08x [0x%08x]\n", ee->head, ee->rq_head);
err_printf(m, " TAIL: 0x%08x [0x%08x, 0x%08x]\n",
@@ -465,8 +485,11 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
jiffies_to_msecs(jiffies - ee->hangcheck_timestamp));
err_printf(m, " engine reset count: %u\n", ee->reset_count);
- error_print_request(m, " ELSP[0]: ", &ee->execlist[0]);
- error_print_request(m, " ELSP[1]: ", &ee->execlist[1]);
+ for (n = 0; n < ee->num_ports; n++) {
+ err_printf(m, " ELSP[%d]:", n);
+ error_print_request(m, " ", &ee->execlist[n]);
+ }
+
error_print_context(m, " Active context: ", &ee->context);
}
@@ -542,34 +565,17 @@ static void print_error_obj(struct drm_i915_error_state_buf *m,
static void err_print_capabilities(struct drm_i915_error_state_buf *m,
const struct intel_device_info *info)
{
-#define PRINT_FLAG(x) err_printf(m, #x ": %s\n", yesno(info->x))
- DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG);
-#undef PRINT_FLAG
-}
+ struct drm_printer p = i915_error_printer(m);
-static __always_inline void err_print_param(struct drm_i915_error_state_buf *m,
- const char *name,
- const char *type,
- const void *x)
-{
- if (!__builtin_strcmp(type, "bool"))
- err_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x));
- else if (!__builtin_strcmp(type, "int"))
- err_printf(m, "i915.%s=%d\n", name, *(const int *)x);
- else if (!__builtin_strcmp(type, "unsigned int"))
- err_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x);
- else if (!__builtin_strcmp(type, "char *"))
- err_printf(m, "i915.%s=%s\n", name, *(const char **)x);
- else
- BUILD_BUG();
+ intel_device_info_dump_flags(info, &p);
}
static void err_print_params(struct drm_i915_error_state_buf *m,
- const struct i915_params *p)
+ const struct i915_params *params)
{
-#define PRINT(T, x) err_print_param(m, #x, #T, &p->x);
- I915_PARAMS_FOR_EACH(PRINT);
-#undef PRINT
+ struct drm_printer p = i915_error_printer(m);
+
+ i915_params_dump(params, &p);
}
static void err_print_pciid(struct drm_i915_error_state_buf *m,
@@ -584,6 +590,21 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m,
pdev->subsystem_device);
}
+static void err_print_uc(struct drm_i915_error_state_buf *m,
+ const struct i915_error_uc *error_uc)
+{
+ struct drm_printer p = i915_error_printer(m);
+ const struct i915_gpu_state *error =
+ container_of(error_uc, typeof(*error), uc);
+
+ if (!error->device_info.has_guc)
+ return;
+
+ intel_uc_fw_dump(&error_uc->guc_fw, &p);
+ intel_uc_fw_dump(&error_uc->huc_fw, &p);
+ print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log);
+}
+
int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
const struct i915_gpu_state *error)
{
@@ -754,11 +775,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
print_error_obj(m, dev_priv->engine[i],
"WA batchbuffer", ee->wa_batchbuffer);
- }
- print_error_obj(m, NULL, "Semaphores", error->semaphore);
-
- print_error_obj(m, NULL, "GuC log buffer", error->guc_log);
+ print_error_obj(m, dev_priv->engine[i],
+ "NULL context", ee->default_state);
+ }
if (error->overlay)
intel_overlay_print_error_state(m, error->overlay);
@@ -768,6 +788,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
err_print_capabilities(m, &error->device_info);
err_print_params(m, &error->params);
+ err_print_uc(m, &error->uc);
if (m->bytes == 0 && m->err)
return m->err;
@@ -826,6 +847,22 @@ static __always_inline void free_param(const char *type, void *x)
kfree(*(void **)x);
}
+static void cleanup_params(struct i915_gpu_state *error)
+{
+#define FREE(T, x, ...) free_param(#T, &error->params.x);
+ I915_PARAMS_FOR_EACH(FREE);
+#undef FREE
+}
+
+static void cleanup_uc_state(struct i915_gpu_state *error)
+{
+ struct i915_error_uc *error_uc = &error->uc;
+
+ kfree(error_uc->guc_fw.path);
+ kfree(error_uc->huc_fw.path);
+ i915_error_object_free(error_uc->guc_log);
+}
+
void __i915_gpu_state_free(struct kref *error_ref)
{
struct i915_gpu_state *error =
@@ -851,9 +888,6 @@ void __i915_gpu_state_free(struct kref *error_ref)
kfree(ee->waiters);
}
- i915_error_object_free(error->semaphore);
- i915_error_object_free(error->guc_log);
-
for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
kfree(error->active_bo[i]);
kfree(error->pinned_bo);
@@ -861,9 +895,8 @@ void __i915_gpu_state_free(struct kref *error_ref)
kfree(error->overlay);
kfree(error->display);
-#define FREE(T, x) free_param(#T, &error->params.x);
- I915_PARAMS_FOR_EACH(FREE);
-#undef FREE
+ cleanup_params(error);
+ cleanup_uc_state(error);
kfree(error);
}
@@ -907,7 +940,7 @@ i915_error_object_create(struct drm_i915_private *i915,
ggtt->base.insert_page(&ggtt->base, dma, slot,
I915_CACHE_NONE, 0);
- s = io_mapping_map_atomic_wc(&ggtt->mappable, slot);
+ s = io_mapping_map_atomic_wc(&ggtt->iomap, slot);
ret = compress_page(&compress, (void __force *)s, dst);
io_mapping_unmap_atomic(s);
@@ -1066,34 +1099,6 @@ gen8_engine_sync_index(struct intel_engine_cs *engine,
return idx;
}
-static void gen8_record_semaphore_state(struct i915_gpu_state *error,
- struct intel_engine_cs *engine,
- struct drm_i915_error_engine *ee)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- struct intel_engine_cs *to;
- enum intel_engine_id id;
-
- if (!error->semaphore)
- return;
-
- for_each_engine(to, dev_priv, id) {
- int idx;
- u16 signal_offset;
- u32 *tmp;
-
- if (engine == to)
- continue;
-
- signal_offset =
- (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4;
- tmp = error->semaphore->pages[0];
- idx = gen8_engine_sync_index(engine, to);
-
- ee->semaphore_mboxes[idx] = tmp[signal_offset];
- }
-}
-
static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee)
{
@@ -1167,11 +1172,12 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
if (INTEL_GEN(dev_priv) >= 6) {
ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base));
- ee->fault_reg = I915_READ(RING_FAULT_REG(engine));
- if (INTEL_GEN(dev_priv) >= 8)
- gen8_record_semaphore_state(error, engine, ee);
- else
+ if (INTEL_GEN(dev_priv) >= 8) {
+ ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG);
+ } else {
gen6_record_semaphore_state(engine, ee);
+ ee->fault_reg = I915_READ(RING_FAULT_REG(engine));
+ }
}
if (INTEL_GEN(dev_priv) >= 4) {
@@ -1234,6 +1240,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
ee->hws = I915_READ(mmio);
}
+ ee->idle = intel_engine_is_idle(engine);
ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
ee->hangcheck_action = engine->hangcheck.action;
ee->hangcheck_stalled = engine->hangcheck.stalled;
@@ -1266,6 +1273,7 @@ static void record_request(struct drm_i915_gem_request *request,
struct drm_i915_error_request *erq)
{
erq->context = request->ctx->hw_id;
+ erq->priority = request->priotree.priority;
erq->ban_score = atomic_read(&request->ctx->ban_score);
erq->seqno = request->global_seqno;
erq->jiffies = request->emitted_jiffies;
@@ -1327,17 +1335,19 @@ static void engine_record_requests(struct intel_engine_cs *engine,
static void error_record_engine_execlists(struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee)
{
- const struct execlist_port *port = engine->execlist_port;
+ const struct intel_engine_execlists * const execlists = &engine->execlists;
unsigned int n;
- for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
- struct drm_i915_gem_request *rq = port_request(&port[n]);
+ for (n = 0; n < execlists_num_ports(execlists); n++) {
+ struct drm_i915_gem_request *rq = port_request(&execlists->port[n]);
if (!rq)
break;
record_request(rq, &ee->execlist[n]);
}
+
+ ee->num_ports = n;
}
static void record_context(struct drm_i915_error_context *e,
@@ -1357,6 +1367,7 @@ static void record_context(struct drm_i915_error_context *e,
e->handle = ctx->user_handle;
e->hw_id = ctx->hw_id;
+ e->priority = ctx->priority;
e->ban_score = atomic_read(&ctx->ban_score);
e->guilty = atomic_read(&ctx->guilty_count);
e->active = atomic_read(&ctx->active_count);
@@ -1391,15 +1402,30 @@ static void request_record_user_bo(struct drm_i915_gem_request *request,
ee->user_bo_count = count;
}
+static struct drm_i915_error_object *
+capture_object(struct drm_i915_private *dev_priv,
+ struct drm_i915_gem_object *obj)
+{
+ if (obj && i915_gem_object_has_pages(obj)) {
+ struct i915_vma fake = {
+ .node = { .start = U64_MAX, .size = obj->base.size },
+ .size = obj->base.size,
+ .pages = obj->mm.pages,
+ .obj = obj,
+ };
+
+ return i915_error_object_create(dev_priv, &fake);
+ } else {
+ return NULL;
+ }
+}
+
static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
struct i915_gpu_state *error)
{
struct i915_ggtt *ggtt = &dev_priv->ggtt;
int i;
- error->semaphore =
- i915_error_object_create(dev_priv, dev_priv->semaphore);
-
for (i = 0; i < I915_NUM_ENGINES; i++) {
struct intel_engine_cs *engine = dev_priv->engine[i];
struct drm_i915_error_engine *ee = &error->engine[i];
@@ -1465,6 +1491,9 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
ee->wa_ctx =
i915_error_object_create(dev_priv, engine->wa_ctx.vma);
+
+ ee->default_state =
+ capture_object(dev_priv, engine->default_state);
}
}
@@ -1550,15 +1579,25 @@ static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv,
error->pinned_bo = bo;
}
-static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv,
- struct i915_gpu_state *error)
+static void capture_uc_state(struct i915_gpu_state *error)
{
- /* Capturing log buf contents won't be useful if logging was disabled */
- if (!dev_priv->guc.log.vma || (i915.guc_log_level < 0))
+ struct drm_i915_private *i915 = error->i915;
+ struct i915_error_uc *error_uc = &error->uc;
+
+ /* Capturing uC state won't be useful if there is no GuC */
+ if (!error->device_info.has_guc)
return;
- error->guc_log = i915_error_object_create(dev_priv,
- dev_priv->guc.log.vma);
+ error_uc->guc_fw = i915->guc.fw;
+ error_uc->huc_fw = i915->huc.fw;
+
+ /* Non-default firmware paths will be specified by the modparam.
+ * As modparams are generally accesible from the userspace make
+ * explicit copies of the firmware paths.
+ */
+ error_uc->guc_fw.path = kstrdup(i915->guc.fw.path, GFP_ATOMIC);
+ error_uc->huc_fw.path = kstrdup(i915->huc.fw.path, GFP_ATOMIC);
+ error_uc->guc_log = i915_error_object_create(i915, i915->guc.log.vma);
}
/* Capture all registers which don't fit into another category. */
@@ -1665,8 +1704,8 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
struct i915_gpu_state *error)
{
error->awake = dev_priv->gt.awake;
- error->wakelock = atomic_read(&dev_priv->pm.wakeref_count);
- error->suspended = dev_priv->pm.suspended;
+ error->wakelock = atomic_read(&dev_priv->runtime_pm.wakeref_count);
+ error->suspended = dev_priv->runtime_pm.suspended;
error->iommu = -1;
#ifdef CONFIG_INTEL_IOMMU
@@ -1686,6 +1725,14 @@ static __always_inline void dup_param(const char *type, void *x)
*(void **)x = kstrdup(*(void **)x, GFP_ATOMIC);
}
+static void capture_params(struct i915_gpu_state *error)
+{
+ error->params = i915_modparams;
+#define DUP(T, x, ...) dup_param(#T, &error->params.x);
+ I915_PARAMS_FOR_EACH(DUP);
+#undef DUP
+}
+
static int capture(void *data)
{
struct i915_gpu_state *error = data;
@@ -1696,10 +1743,8 @@ static int capture(void *data)
ktime_to_timeval(ktime_sub(ktime_get(),
error->i915->gt.last_init_time));
- error->params = i915;
-#define DUP(T, x) dup_param(#T, &error->params.x);
- I915_PARAMS_FOR_EACH(DUP);
-#undef DUP
+ capture_params(error);
+ capture_uc_state(error);
i915_capture_gen_state(error->i915, error);
i915_capture_reg_state(error->i915, error);
@@ -1707,7 +1752,6 @@ static int capture(void *data)
i915_gem_record_rings(error->i915, error);
i915_capture_active_buffers(error->i915, error);
i915_capture_pinned_buffers(error->i915, error);
- i915_gem_capture_guc_log_buffer(error->i915, error);
error->overlay = intel_overlay_capture_error_state(error->i915);
error->display = intel_display_capture_error_state(error->i915);
@@ -1751,7 +1795,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
struct i915_gpu_state *error;
unsigned long flags;
- if (!i915.error_capture)
+ if (!i915_modparams.error_capture)
return;
if (READ_ONCE(dev_priv->gpu_error.first_error))