diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gpu_error.c | 68 |
1 files changed, 30 insertions, 38 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index cf6e47adfde6..d8cac4c5881f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -570,6 +570,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, ee->vm_info.pp_dir_base); } } + err_printf(m, " hung: %u\n", ee->hung); err_printf(m, " engine reset count: %u\n", ee->reset_count); for (n = 0; n < ee->num_ports; n++) { @@ -1026,6 +1027,7 @@ i915_vma_coredump_create(const struct intel_gt *gt, dma_addr_t dma; for_each_sgt_daddr(dma, iter, vma->pages) { + mutex_lock(&ggtt->error_mutex); ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); mb(); @@ -1035,6 +1037,10 @@ i915_vma_coredump_create(const struct intel_gt *gt, (void __force *)s, dst, true); io_mapping_unmap(s); + + mb(); + ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); + mutex_unlock(&ggtt->error_mutex); if (ret) break; } @@ -1451,6 +1457,7 @@ capture_engine(struct intel_engine_cs *engine, static void gt_record_engines(struct intel_gt_coredump *gt, + intel_engine_mask_t engine_mask, struct i915_vma_compress *compress) { struct intel_engine_cs *engine; @@ -1466,6 +1473,8 @@ gt_record_engines(struct intel_gt_coredump *gt, if (!ee) continue; + ee->hung = engine->mask & engine_mask; + gt->simulated |= ee->simulated; if (ee->simulated) { kfree(ee); @@ -1505,25 +1514,6 @@ gt_record_uc(struct intel_gt_coredump *gt, return error_uc; } -static void gt_capture_prepare(struct intel_gt_coredump *gt) -{ - struct i915_ggtt *ggtt = gt->_gt->ggtt; - - mutex_lock(&ggtt->error_mutex); -} - -static void gt_capture_finish(struct intel_gt_coredump *gt) -{ - struct i915_ggtt *ggtt = gt->_gt->ggtt; - - if (drm_mm_node_allocated(&ggtt->error_capture)) - ggtt->vm.clear_range(&ggtt->vm, - ggtt->error_capture.start, - PAGE_SIZE); - - mutex_unlock(&ggtt->error_mutex); -} - /* Capture all registers which don't fit into another category. */ static void gt_record_regs(struct intel_gt_coredump *gt) { @@ -1669,24 +1659,25 @@ static u32 generate_ecode(const struct intel_engine_coredump *ee) static const char *error_msg(struct i915_gpu_coredump *error) { struct intel_engine_coredump *first = NULL; + unsigned int hung_classes = 0; struct intel_gt_coredump *gt; - intel_engine_mask_t engines; int len; - engines = 0; for (gt = error->gt; gt; gt = gt->next) { struct intel_engine_coredump *cs; - if (gt->engine && !first) - first = gt->engine; - - for (cs = gt->engine; cs; cs = cs->next) - engines |= cs->engine->mask; + for (cs = gt->engine; cs; cs = cs->next) { + if (cs->hung) { + hung_classes |= BIT(cs->engine->uabi_class); + if (!first) + first = cs; + } + } } len = scnprintf(error->error_msg, sizeof(error->error_msg), "GPU HANG: ecode %d:%x:%08x", - INTEL_GEN(error->i915), engines, + INTEL_GEN(error->i915), hung_classes, generate_ecode(first)); if (first && first->context.pid) { /* Just show the first executing process, more is confusing */ @@ -1782,8 +1773,6 @@ i915_vma_capture_prepare(struct intel_gt_coredump *gt) return NULL; } - gt_capture_prepare(gt); - return compress; } @@ -1793,14 +1782,14 @@ void i915_vma_capture_finish(struct intel_gt_coredump *gt, if (!compress) return; - gt_capture_finish(gt); - compress_fini(compress); kfree(compress); } -struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915) +struct i915_gpu_coredump * +i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) { + struct drm_i915_private *i915 = gt->i915; struct i915_gpu_coredump *error; /* Check if GPU capture has been disabled */ @@ -1812,7 +1801,7 @@ struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915) if (!error) return ERR_PTR(-ENOMEM); - error->gt = intel_gt_coredump_alloc(&i915->gt, ALLOW_FAIL); + error->gt = intel_gt_coredump_alloc(gt, ALLOW_FAIL); if (error->gt) { struct i915_vma_compress *compress; @@ -1824,7 +1813,7 @@ struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915) } gt_record_info(error->gt); - gt_record_engines(error->gt, compress); + gt_record_engines(error->gt, engine_mask, compress); if (INTEL_INFO(i915)->has_gt_uc) error->gt->uc = gt_record_uc(error->gt, compress); @@ -1871,20 +1860,23 @@ void i915_error_state_store(struct i915_gpu_coredump *error) /** * i915_capture_error_state - capture an error record for later analysis - * @i915: i915 device + * @gt: intel_gt which originated the hang + * @engine_mask: hung engines + * * * Should be called when an error is detected (either a hang or an error * interrupt) to capture error state from the time of the error. Fills * out a structure which becomes available in debugfs for user level tools * to pick up. */ -void i915_capture_error_state(struct drm_i915_private *i915) +void i915_capture_error_state(struct intel_gt *gt, + intel_engine_mask_t engine_mask) { struct i915_gpu_coredump *error; - error = i915_gpu_coredump(i915); + error = i915_gpu_coredump(gt, engine_mask); if (IS_ERR(error)) { - cmpxchg(&i915->gpu_error.first_error, NULL, error); + cmpxchg(>->i915->gpu_error.first_error, NULL, error); return; } |