aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Barrat <fbarrat@linux.ibm.com>2019-05-23 14:28:04 +0200
committerMichael Ellerman <mpe@ellerman.id.au>2019-06-02 19:39:36 +1000
commit89d87bcba2874d824affb7842bb3960cb6f5be05 (patch)
tree84892e8ebd7b46fce9f251778f1da073bb881ab3
parentpowerpc/powernv: Update firmware archaeology around OPAL_HANDLE_HMI (diff)
downloadlinux-dev-89d87bcba2874d824affb7842bb3960cb6f5be05.tar.xz
linux-dev-89d87bcba2874d824affb7842bb3960cb6f5be05.zip
powerpc/powernv: Show checkstop reason for NPU2 HMIs
If the kernel is notified of an HMI caused by the NPU2, it's currently not being recognized and it logs the default message: Unknown Malfunction Alert of type 3 The NPU on Power 9 has 3 Fault Isolation Registers, so that's a lot of possible causes, but we should at least log that it's an NPU problem and report which FIR and which bit were raised if opal gave us the information. Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/include/asm/opal-api.h1
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c40
2 files changed, 41 insertions, 0 deletions
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index e1577cfa7186..2492fe248e1e 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -568,6 +568,7 @@ enum OpalHMI_XstopType {
CHECKSTOP_TYPE_UNKNOWN = 0,
CHECKSTOP_TYPE_CORE = 1,
CHECKSTOP_TYPE_NX = 2,
+ CHECKSTOP_TYPE_NPU = 3
};
enum OpalHMI_CoreXstopReason {
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index 5cae375525d0..3e1f064a18db 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -137,6 +137,43 @@ static void print_nx_checkstop_reason(const char *level,
xstop_reason[i].description);
}
+static void print_npu_checkstop_reason(const char *level,
+ struct OpalHMIEvent *hmi_evt)
+{
+ uint8_t reason, reason_count, i;
+
+ /*
+ * We may not have a checkstop reason on some combination of
+ * hardware and/or skiboot version
+ */
+ if (!hmi_evt->u.xstop_error.xstop_reason) {
+ printk("%s NPU checkstop on chip %x\n", level,
+ be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id));
+ return;
+ }
+
+ /*
+ * NPU2 has 3 FIRs. Reason encoded on a byte as:
+ * 2 bits for the FIR number
+ * 6 bits for the bit number
+ * It may be possible to find several reasons.
+ *
+ * We don't display a specific message per FIR bit as there
+ * are too many and most are meaningless without the workbook
+ * and/or hw team help anyway.
+ */
+ reason_count = sizeof(hmi_evt->u.xstop_error.xstop_reason) /
+ sizeof(reason);
+ for (i = 0; i < reason_count; i++) {
+ reason = (hmi_evt->u.xstop_error.xstop_reason >> (8 * i)) & 0xFF;
+ if (reason)
+ printk("%s NPU checkstop on chip %x: FIR%d bit %d is set\n",
+ level,
+ be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id),
+ reason >> 6, reason & 0x3F);
+ }
+}
+
static void print_checkstop_reason(const char *level,
struct OpalHMIEvent *hmi_evt)
{
@@ -148,6 +185,9 @@ static void print_checkstop_reason(const char *level,
case CHECKSTOP_TYPE_NX:
print_nx_checkstop_reason(level, hmi_evt);
break;
+ case CHECKSTOP_TYPE_NPU:
+ print_npu_checkstop_reason(level, hmi_evt);
+ break;
default:
printk("%s Unknown Malfunction Alert of type %d\n",
level, type);