aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915
diff options
context:
space:
mode:
authorJohn Harrison <John.C.Harrison@Intel.com>2021-12-10 22:58:58 -0800
committerJohn Harrison <John.C.Harrison@Intel.com>2021-12-20 15:34:41 -0800
commitfb3965f9ae28b83290e5b5431a77aace66071ca1 (patch)
tree3c168bd38af59d212b80959a22e73d3fc484168d /drivers/gpu/drm/i915
parentdrm/i915/guc: Increase GuC log size for CONFIG_DEBUG_GEM (diff)
downloadlinux-dev-fb3965f9ae28b83290e5b5431a77aace66071ca1.tar.xz
linux-dev-fb3965f9ae28b83290e5b5431a77aace66071ca1.zip
drm/i915/guc: Flag an error if an engine reset fails
If GuC encounters an error during engine reset, the i915 driver promotes to full GT reset. This includes an info message about why the reset is happening. However, that is not treated as a failure by any of the CI systems because resets are an expected occurrance during testing. This kind of failure is a major problem and should never happen. So, complain more loudly and make sure CI notices. Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211211065859.2248188-4-John.C.Harrison@Intel.com
Diffstat (limited to 'drivers/gpu/drm/i915')
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c14
1 files changed, 11 insertions, 3 deletions
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 30933d59287c..e7517206af82 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -4033,11 +4033,12 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len)
{
struct intel_engine_cs *engine;
+ struct intel_gt *gt = guc_to_gt(guc);
u8 guc_class, instance;
u32 reason;
if (unlikely(len != 3)) {
- drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+ drm_err(&gt->i915->drm, "Invalid length %u", len);
return -EPROTO;
}
@@ -4047,12 +4048,19 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
engine = guc_lookup_engine(guc, guc_class, instance);
if (unlikely(!engine)) {
- drm_err(&guc_to_gt(guc)->i915->drm,
+ drm_err(&gt->i915->drm,
"Invalid engine %d:%d", guc_class, instance);
return -EPROTO;
}
- intel_gt_handle_error(guc_to_gt(guc), engine->mask,
+ /*
+ * This is an unexpected failure of a hardware feature. So, log a real
+ * error message not just the informational that comes with the reset.
+ */
+ drm_err(&gt->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
+ guc_class, instance, engine->name, reason);
+
+ intel_gt_handle_error(gt, engine->mask,
I915_ERROR_CAPTURE,
"GuC failed to reset %s (reason=0x%08x)\n",
engine->name, reason);