aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gpu_error.c
diff options
context:
space:
mode:
authorAlan Previn <alan.previn.teres.alexis@intel.com>2022-03-21 09:45:26 -0700
committerLucas De Marchi <lucas.demarchi@intel.com>2022-03-22 10:33:31 -0700
commita6f0f9cf330a86971f587333762d9a61a218bc30 (patch)
treeae5c5e1cd362ebc35f328fd5e268537bb7e043f4 /drivers/gpu/drm/i915/i915_gpu_error.c
parentdrm/i915/guc: Pre-allocate output nodes for extraction (diff)
downloadlinux-dev-a6f0f9cf330a86971f587333762d9a61a218bc30.tar.xz
linux-dev-a6f0f9cf330a86971f587333762d9a61a218bc30.zip
drm/i915/guc: Plumb GuC-capture into gpu_coredump
Add a flags parameter through all of the coredump creation functions. Add a bitmask flag to indicate if the top level gpu_coredump event is triggered in response to a GuC context reset notification. Using that flag, ensure all coredump functions that read or print mmio-register values related to work submission or command-streamer engines are skipped and replaced with a calls guc-capture module equivalent functions to retrieve or print the register dump. While here, split out display related register reading and printing into its own function that is called agnostic to whether GuC had triggered the reset. For now, introduce an empty printing function that can filled in on a subsequent patch just to handle formatting. Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220321164527.2500062-13-alan.previn.teres.alexis@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c266
1 files changed, 183 insertions, 83 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 81f70099525a..c76d8df4c190 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -46,6 +46,7 @@
#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
#include "gt/intel_gt_regs.h"
+#include "gt/uc/intel_guc_capture.h"
#include "i915_driver.h"
#include "i915_drv.h"
@@ -593,15 +594,11 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
ee->vm_info.pp_dir_base);
}
}
- err_printf(m, " hung: %u\n", ee->hung);
- err_printf(m, " engine reset count: %u\n", ee->reset_count);
for (n = 0; n < ee->num_ports; n++) {
err_printf(m, " ELSP[%d]:", n);
error_print_request(m, " ", &ee->execlist[n]);
}
-
- error_print_context(m, " Active context: ", &ee->context);
}
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
@@ -713,23 +710,30 @@ static void err_print_gt_info(struct drm_i915_error_state_buf *m,
intel_sseu_print_topology(gt->_gt->i915, &gt->info.sseu, &p);
}
-static void err_print_gt(struct drm_i915_error_state_buf *m,
- struct intel_gt_coredump *gt)
+static void err_print_gt_display(struct drm_i915_error_state_buf *m,
+ struct intel_gt_coredump *gt)
+{
+ err_printf(m, "IER: 0x%08x\n", gt->ier);
+ err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr);
+}
+
+static void err_print_gt_global_nonguc(struct drm_i915_error_state_buf *m,
+ struct intel_gt_coredump *gt)
{
- const struct intel_engine_coredump *ee;
int i;
err_printf(m, "GT awake: %s\n", yesno(gt->awake));
err_printf(m, "EIR: 0x%08x\n", gt->eir);
- err_printf(m, "IER: 0x%08x\n", gt->ier);
+ err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er);
+
for (i = 0; i < gt->ngtier; i++)
err_printf(m, "GTIER[%d]: 0x%08x\n", i, gt->gtier[i]);
- err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er);
- err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake);
- err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr);
+}
- for (i = 0; i < gt->nfence; i++)
- err_printf(m, " fence[%d] = %08llx\n", i, gt->fence[i]);
+static void err_print_gt_global(struct drm_i915_error_state_buf *m,
+ struct intel_gt_coredump *gt)
+{
+ err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake);
if (IS_GRAPHICS_VER(m->i915, 6, 11)) {
err_printf(m, "ERROR: 0x%08x\n", gt->error);
@@ -768,19 +772,38 @@ static void err_print_gt(struct drm_i915_error_state_buf *m,
err_printf(m, " GAM_DONE: 0x%08x\n", gt->gam_done);
}
+}
+
+static void err_print_gt_fences(struct drm_i915_error_state_buf *m,
+ struct intel_gt_coredump *gt)
+{
+ int i;
+
+ for (i = 0; i < gt->nfence; i++)
+ err_printf(m, " fence[%d] = %08llx\n", i, gt->fence[i]);
+}
+
+static void err_print_gt_engines(struct drm_i915_error_state_buf *m,
+ struct intel_gt_coredump *gt)
+{
+ const struct intel_engine_coredump *ee;
for (ee = gt->engine; ee; ee = ee->next) {
const struct i915_vma_coredump *vma;
- error_print_engine(m, ee);
+ if (ee->guc_capture_node)
+ intel_guc_capture_print_engine_node(m, ee);
+ else
+ error_print_engine(m, ee);
+
+ err_printf(m, " hung: %u\n", ee->hung);
+ err_printf(m, " engine reset count: %u\n", ee->reset_count);
+ error_print_context(m, " Active context: ", &ee->context);
+
for (vma = ee->vma; vma; vma = vma->next)
print_error_vma(m, ee->engine, vma);
}
- if (gt->uc)
- err_print_uc(m, gt->uc);
-
- err_print_gt_info(m, gt);
}
static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
@@ -836,8 +859,30 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock));
err_printf(m, "PM suspended: %s\n", yesno(error->suspended));
- if (error->gt)
- err_print_gt(m, error->gt);
+ if (error->gt) {
+ bool print_guc_capture = false;
+
+ if (error->gt->uc && error->gt->uc->is_guc_capture)
+ print_guc_capture = true;
+
+ err_print_gt_display(m, error->gt);
+ err_print_gt_global_nonguc(m, error->gt);
+ err_print_gt_fences(m, error->gt);
+
+ /*
+ * GuC dumped global, eng-class and eng-instance registers together
+ * as part of engine state dump so we print in err_print_gt_engines
+ */
+ if (!print_guc_capture)
+ err_print_gt_global(m, error->gt);
+
+ err_print_gt_engines(m, error->gt);
+
+ if (error->gt->uc)
+ err_print_uc(m, error->gt->uc);
+
+ err_print_gt_info(m, error->gt);
+ }
if (error->overlay)
intel_overlay_print_error_state(m, error->overlay);
@@ -985,6 +1030,7 @@ static void cleanup_gt(struct intel_gt_coredump *gt)
gt->engine = ee->next;
i915_vma_coredump_free(ee->vma);
+ intel_guc_capture_free_node(ee);
kfree(ee);
}
@@ -1436,7 +1482,7 @@ static void add_vma_coredump(struct intel_engine_coredump *ee,
}
struct intel_engine_coredump *
-intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags)
{
struct intel_engine_coredump *ee;
@@ -1446,8 +1492,10 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
ee->engine = engine;
- engine_record_registers(ee);
- engine_record_execlists(ee);
+ if (!(dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)) {
+ engine_record_registers(ee);
+ engine_record_execlists(ee);
+ }
return ee;
}
@@ -1511,7 +1559,8 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
static struct intel_engine_coredump *
capture_engine(struct intel_engine_cs *engine,
- struct i915_vma_compress *compress)
+ struct i915_vma_compress *compress,
+ u32 dump_flags)
{
struct intel_engine_capture_vma *capture = NULL;
struct intel_engine_coredump *ee;
@@ -1519,7 +1568,7 @@ capture_engine(struct intel_engine_cs *engine,
struct i915_request *rq = NULL;
unsigned long flags;
- ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL);
+ ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL, dump_flags);
if (!ee)
return NULL;
@@ -1552,6 +1601,8 @@ capture_engine(struct intel_engine_cs *engine,
i915_request_put(rq);
goto no_request_capture;
}
+ if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
+ intel_guc_capture_get_matching_node(engine->gt, ee, ce);
intel_engine_coredump_add_vma(ee, capture, compress);
i915_request_put(rq);
@@ -1566,7 +1617,8 @@ no_request_capture:
static void
gt_record_engines(struct intel_gt_coredump *gt,
intel_engine_mask_t engine_mask,
- struct i915_vma_compress *compress)
+ struct i915_vma_compress *compress,
+ u32 dump_flags)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -1577,7 +1629,7 @@ gt_record_engines(struct intel_gt_coredump *gt,
/* Refill our page pool before entering atomic section */
pool_refill(&compress->pool, ALLOW_FAIL);
- ee = capture_engine(engine, compress);
+ ee = capture_engine(engine, compress, dump_flags);
if (!ee)
continue;
@@ -1585,6 +1637,8 @@ gt_record_engines(struct intel_gt_coredump *gt,
gt->simulated |= ee->simulated;
if (ee->simulated) {
+ if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
+ intel_guc_capture_free_node(ee);
kfree(ee);
continue;
}
@@ -1620,8 +1674,74 @@ gt_record_uc(struct intel_gt_coredump *gt,
return error_uc;
}
-/* Capture all registers which don't fit into another category. */
-static void gt_record_regs(struct intel_gt_coredump *gt)
+/* Capture display registers. */
+static void gt_record_display_regs(struct intel_gt_coredump *gt)
+{
+ struct intel_uncore *uncore = gt->_gt->uncore;
+ struct drm_i915_private *i915 = uncore->i915;
+
+ if (GRAPHICS_VER(i915) >= 6)
+ gt->derrmr = intel_uncore_read(uncore, DERRMR);
+
+ if (GRAPHICS_VER(i915) >= 8)
+ gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
+ else if (IS_VALLEYVIEW(i915))
+ gt->ier = intel_uncore_read(uncore, VLV_IER);
+ else if (HAS_PCH_SPLIT(i915))
+ gt->ier = intel_uncore_read(uncore, DEIER);
+ else if (GRAPHICS_VER(i915) == 2)
+ gt->ier = intel_uncore_read16(uncore, GEN2_IER);
+ else
+ gt->ier = intel_uncore_read(uncore, GEN2_IER);
+}
+
+/* Capture all other registers that GuC doesn't capture. */
+static void gt_record_global_nonguc_regs(struct intel_gt_coredump *gt)
+{
+ struct intel_uncore *uncore = gt->_gt->uncore;
+ struct drm_i915_private *i915 = uncore->i915;
+ int i;
+
+ if (IS_VALLEYVIEW(i915)) {
+ gt->gtier[0] = intel_uncore_read(uncore, GTIER);
+ gt->ngtier = 1;
+ } else if (GRAPHICS_VER(i915) >= 11) {
+ gt->gtier[0] =
+ intel_uncore_read(uncore,
+ GEN11_RENDER_COPY_INTR_ENABLE);
+ gt->gtier[1] =
+ intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE);
+ gt->gtier[2] =
+ intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE);
+ gt->gtier[3] =
+ intel_uncore_read(uncore,
+ GEN11_GPM_WGBOXPERF_INTR_ENABLE);
+ gt->gtier[4] =
+ intel_uncore_read(uncore,
+ GEN11_CRYPTO_RSVD_INTR_ENABLE);
+ gt->gtier[5] =
+ intel_uncore_read(uncore,
+ GEN11_GUNIT_CSME_INTR_ENABLE);
+ gt->ngtier = 6;
+ } else if (GRAPHICS_VER(i915) >= 8) {
+ for (i = 0; i < 4; i++)
+ gt->gtier[i] =
+ intel_uncore_read(uncore, GEN8_GT_IER(i));
+ gt->ngtier = 4;
+ } else if (HAS_PCH_SPLIT(i915)) {
+ gt->gtier[0] = intel_uncore_read(uncore, GTIER);
+ gt->ngtier = 1;
+ }
+
+ gt->eir = intel_uncore_read(uncore, EIR);
+ gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER);
+}
+
+/*
+ * Capture all registers that relate to workload submission.
+ * NOTE: In GuC submission, when GuC resets an engine, it can dump these for us
+ */
+static void gt_record_global_regs(struct intel_gt_coredump *gt)
{
struct intel_uncore *uncore = gt->_gt->uncore;
struct drm_i915_private *i915 = uncore->i915;
@@ -1637,11 +1757,8 @@ static void gt_record_regs(struct intel_gt_coredump *gt)
*/
/* 1: Registers specific to a single generation */
- if (IS_VALLEYVIEW(i915)) {
- gt->gtier[0] = intel_uncore_read(uncore, GTIER);
- gt->ier = intel_uncore_read(uncore, VLV_IER);
+ if (IS_VALLEYVIEW(i915))
gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV);
- }
if (GRAPHICS_VER(i915) == 7)
gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
@@ -1669,7 +1786,6 @@ static void gt_record_regs(struct intel_gt_coredump *gt)
gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
if (GRAPHICS_VER(i915) >= 6) {
- gt->derrmr = intel_uncore_read(uncore, DERRMR);
if (GRAPHICS_VER(i915) < 12) {
gt->error = intel_uncore_read(uncore, ERROR_GEN6);
gt->done_reg = intel_uncore_read(uncore, DONE_REG);
@@ -1705,44 +1821,6 @@ static void gt_record_regs(struct intel_gt_coredump *gt)
gt->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE);
}
-
- /* 4: Everything else */
- if (GRAPHICS_VER(i915) >= 11) {
- gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
- gt->gtier[0] =
- intel_uncore_read(uncore,
- GEN11_RENDER_COPY_INTR_ENABLE);
- gt->gtier[1] =
- intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE);
- gt->gtier[2] =
- intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE);
- gt->gtier[3] =
- intel_uncore_read(uncore,
- GEN11_GPM_WGBOXPERF_INTR_ENABLE);
- gt->gtier[4] =
- intel_uncore_read(uncore,
- GEN11_CRYPTO_RSVD_INTR_ENABLE);
- gt->gtier[5] =
- intel_uncore_read(uncore,
- GEN11_GUNIT_CSME_INTR_ENABLE);
- gt->ngtier = 6;
- } else if (GRAPHICS_VER(i915) >= 8) {
- gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
- for (i = 0; i < 4; i++)
- gt->gtier[i] =
- intel_uncore_read(uncore, GEN8_GT_IER(i));
- gt->ngtier = 4;
- } else if (HAS_PCH_SPLIT(i915)) {
- gt->ier = intel_uncore_read(uncore, DEIER);
- gt->gtier[0] = intel_uncore_read(uncore, GTIER);
- gt->ngtier = 1;
- } else if (GRAPHICS_VER(i915) == 2) {
- gt->ier = intel_uncore_read16(uncore, GEN2_IER);
- } else if (!IS_VALLEYVIEW(i915)) {
- gt->ier = intel_uncore_read(uncore, GEN2_IER);
- }
- gt->eir = intel_uncore_read(uncore, EIR);
- gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER);
}
static void gt_record_info(struct intel_gt_coredump *gt)
@@ -1854,7 +1932,7 @@ i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
struct intel_gt_coredump *
-intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags)
{
struct intel_gt_coredump *gc;
@@ -1865,7 +1943,21 @@ intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
gc->_gt = gt;
gc->awake = intel_gt_pm_is_awake(gt);
- gt_record_regs(gc);
+ gt_record_display_regs(gc);
+ gt_record_global_nonguc_regs(gc);
+
+ /*
+ * GuC dumps global, eng-class and eng-instance registers
+ * (that can change as part of engine state during execution)
+ * before an engine is reset due to a hung context.
+ * GuC captures and reports all three groups of registers
+ * together as a single set before the engine is reset.
+ * Thus, if GuC triggered the context reset we retrieve
+ * the register values as part of gt_record_engines.
+ */
+ if (!(dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE))
+ gt_record_global_regs(gc);
+
gt_record_fences(gc);
return gc;
@@ -1899,7 +1991,7 @@ void i915_vma_capture_finish(struct intel_gt_coredump *gt,
}
static struct i915_gpu_coredump *
-__i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+__i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
{
struct drm_i915_private *i915 = gt->i915;
struct i915_gpu_coredump *error;
@@ -1913,7 +2005,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
if (!error)
return ERR_PTR(-ENOMEM);
- error->gt = intel_gt_coredump_alloc(gt, ALLOW_FAIL);
+ error->gt = intel_gt_coredump_alloc(gt, ALLOW_FAIL, dump_flags);
if (error->gt) {
struct i915_vma_compress *compress;
@@ -1924,11 +2016,19 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
return ERR_PTR(-ENOMEM);
}
+ if (INTEL_INFO(i915)->has_gt_uc) {
+ error->gt->uc = gt_record_uc(error->gt, compress);
+ if (error->gt->uc) {
+ if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
+ error->gt->uc->is_guc_capture = true;
+ else
+ GEM_BUG_ON(error->gt->uc->is_guc_capture);
+ }
+ }
+
gt_record_info(error->gt);
- gt_record_engines(error->gt, engine_mask, compress);
+ gt_record_engines(error->gt, engine_mask, compress, dump_flags);
- if (INTEL_INFO(i915)->has_gt_uc)
- error->gt->uc = gt_record_uc(error->gt, compress);
i915_vma_capture_finish(error->gt, compress);
@@ -1941,7 +2041,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
}
struct i915_gpu_coredump *
-i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
{
static DEFINE_MUTEX(capture_mutex);
int ret = mutex_lock_interruptible(&capture_mutex);
@@ -1950,7 +2050,7 @@ i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
if (ret)
return ERR_PTR(ret);
- dump = __i915_gpu_coredump(gt, engine_mask);
+ dump = __i915_gpu_coredump(gt, engine_mask, dump_flags);
mutex_unlock(&capture_mutex);
return dump;
@@ -1997,11 +2097,11 @@ void i915_error_state_store(struct i915_gpu_coredump *error)
* to pick up.
*/
void i915_capture_error_state(struct intel_gt *gt,
- intel_engine_mask_t engine_mask)
+ intel_engine_mask_t engine_mask, u32 dump_flags)
{
struct i915_gpu_coredump *error;
- error = i915_gpu_coredump(gt, engine_mask);
+ error = i915_gpu_coredump(gt, engine_mask, dump_flags);
if (IS_ERR(error)) {
cmpxchg(&gt->i915->gpu_error.first_error, NULL, error);
return;