aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/mce.c
diff options
context:
space:
mode:
authorMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>2019-04-29 23:45:48 +0530
committerMichael Ellerman <mpe@ellerman.id.au>2019-05-01 22:22:24 +1000
commitd6e8a150850601277039a548ffcdddd1bfe3e365 (patch)
treed245f4bdb0c78b2b9184b0171f5933daf8034420 /arch/powerpc/kernel/mce.c
parentpowerpc: Add doorbell tracepoints (diff)
downloadlinux-dev-d6e8a150850601277039a548ffcdddd1bfe3e365.tar.xz
linux-dev-d6e8a150850601277039a548ffcdddd1bfe3e365.zip
powerpc/powernv/mce: Reduce MCE console logs to lesser lines.
Also add cpu number while displaying MCE log. This will help cleaner logs when MCE hits on multiple cpus simultaneously. Before the changes the MCE output was: Severe Machine check interrupt [Recovered] NIP [d00000000ba80280]: insert_slb_entry.constprop.0+0x278/0x2c0 [mcetest_slb] Initiator: CPU Error type: SLB [Multihit] Effective address: d00000000ba80280 After this patch series changes the MCE output will be: MCE: CPU80: machine check (Warning) Host SLB Multihit [Recovered] MCE: CPU80: NIP: [d00000000b550280] insert_slb_entry.constprop.0+0x278/0x2c0 [mcetest_slb] MCE: CPU80: Probable software error (some chance of hardware cause) UE in host application: MCE: CPU48: machine check (Severe) Host UE Load/Store DAR: 00007fffc6079a80 paddr: 0000000f8e260000 [Not recovered] MCE: CPU48: PID: 4584 Comm: find NIP: [0000000010023368] MCE: CPU48: Hardware error and for MCE in Guest: MCE: CPU80: machine check (Warning) Guest SLB Multihit DAR: 000001001b6e0320 [Recovered] MCE: CPU80: PID: 24765 Comm: qemu-system-ppc Guest NIP: [00007fffa309dc60] MCE: CPU80: Probable software error (some chance of hardware cause) Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to '')
-rw-r--r--arch/powerpc/kernel/mce.c89
1 files changed, 48 insertions, 41 deletions
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index b5fec1f9751a..25a8b20cbbdc 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -112,6 +112,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
mce->srr1 = regs->msr;
mce->gpr3 = regs->gpr[3];
mce->in_use = 1;
+ mce->cpu = get_paca()->paca_index;
/* Mark it recovered if we have handled it and MSR(RI=1). */
if (handled && (regs->msr & MSR_RI))
@@ -310,7 +311,11 @@ static void machine_check_process_queued_event(struct irq_work *work)
void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode, bool in_guest)
{
- const char *level, *sevstr, *subtype;
+ const char *level, *sevstr, *subtype, *err_type;
+ uint64_t ea = 0, pa = 0;
+ int n = 0;
+ char dar_str[50];
+ char pa_str[50];
static const char *mc_ue_types[] = {
"Indeterminate",
"Instruction fetch",
@@ -384,101 +389,103 @@ void machine_check_print_event_info(struct machine_check_event *evt,
break;
}
- printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
- evt->disposition == MCE_DISPOSITION_RECOVERED ?
- "Recovered" : "Not recovered");
-
- if (in_guest) {
- printk("%s Guest NIP: %016llx\n", level, evt->srr0);
- } else if (user_mode) {
- printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
- evt->srr0, current->pid, current->comm);
- } else {
- printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
- (void *)evt->srr0);
- }
-
- printk("%s Initiator: %s\n", level,
- evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
switch (evt->error_type) {
case MCE_ERROR_TYPE_UE:
+ err_type = "UE";
subtype = evt->u.ue_error.ue_error_type <
ARRAY_SIZE(mc_ue_types) ?
mc_ue_types[evt->u.ue_error.ue_error_type]
: "Unknown";
- printk("%s Error type: UE [%s]\n", level, subtype);
if (evt->u.ue_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.ue_error.effective_address);
+ ea = evt->u.ue_error.effective_address;
if (evt->u.ue_error.physical_address_provided)
- printk("%s Physical address: %016llx\n",
- level, evt->u.ue_error.physical_address);
+ pa = evt->u.ue_error.physical_address;
break;
case MCE_ERROR_TYPE_SLB:
+ err_type = "SLB";
subtype = evt->u.slb_error.slb_error_type <
ARRAY_SIZE(mc_slb_types) ?
mc_slb_types[evt->u.slb_error.slb_error_type]
: "Unknown";
- printk("%s Error type: SLB [%s]\n", level, subtype);
if (evt->u.slb_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.slb_error.effective_address);
+ ea = evt->u.slb_error.effective_address;
break;
case MCE_ERROR_TYPE_ERAT:
+ err_type = "ERAT";
subtype = evt->u.erat_error.erat_error_type <
ARRAY_SIZE(mc_erat_types) ?
mc_erat_types[evt->u.erat_error.erat_error_type]
: "Unknown";
- printk("%s Error type: ERAT [%s]\n", level, subtype);
if (evt->u.erat_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.erat_error.effective_address);
+ ea = evt->u.erat_error.effective_address;
break;
case MCE_ERROR_TYPE_TLB:
+ err_type = "TLB";
subtype = evt->u.tlb_error.tlb_error_type <
ARRAY_SIZE(mc_tlb_types) ?
mc_tlb_types[evt->u.tlb_error.tlb_error_type]
: "Unknown";
- printk("%s Error type: TLB [%s]\n", level, subtype);
if (evt->u.tlb_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.tlb_error.effective_address);
+ ea = evt->u.tlb_error.effective_address;
break;
case MCE_ERROR_TYPE_USER:
+ err_type = "User";
subtype = evt->u.user_error.user_error_type <
ARRAY_SIZE(mc_user_types) ?
mc_user_types[evt->u.user_error.user_error_type]
: "Unknown";
- printk("%s Error type: User [%s]\n", level, subtype);
if (evt->u.user_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.user_error.effective_address);
+ ea = evt->u.user_error.effective_address;
break;
case MCE_ERROR_TYPE_RA:
+ err_type = "Real address";
subtype = evt->u.ra_error.ra_error_type <
ARRAY_SIZE(mc_ra_types) ?
mc_ra_types[evt->u.ra_error.ra_error_type]
: "Unknown";
- printk("%s Error type: Real address [%s]\n", level, subtype);
if (evt->u.ra_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.ra_error.effective_address);
+ ea = evt->u.ra_error.effective_address;
break;
case MCE_ERROR_TYPE_LINK:
+ err_type = "Link";
subtype = evt->u.link_error.link_error_type <
ARRAY_SIZE(mc_link_types) ?
mc_link_types[evt->u.link_error.link_error_type]
: "Unknown";
- printk("%s Error type: Link [%s]\n", level, subtype);
if (evt->u.link_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.link_error.effective_address);
+ ea = evt->u.link_error.effective_address;
break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
- printk("%s Error type: Unknown\n", level);
+ err_type = "Unknown";
+ subtype = "";
break;
}
+
+ dar_str[0] = pa_str[0] = '\0';
+ if (ea && evt->srr0 != ea) {
+ /* Load/Store address */
+ n = sprintf(dar_str, "DAR: %016llx ", ea);
+ if (pa)
+ sprintf(dar_str + n, "paddr: %016llx ", pa);
+ } else if (pa) {
+ sprintf(pa_str, " paddr: %016llx", pa);
+ }
+
+ printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
+ level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
+ err_type, subtype, dar_str,
+ evt->disposition == MCE_DISPOSITION_RECOVERED ?
+ "Recovered" : "Not recovered");
+
+ if (in_guest || user_mode) {
+ printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
+ level, evt->cpu, current->pid, current->comm,
+ in_guest ? "Guest " : "", evt->srr0, pa_str);
+ } else {
+ printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
+ level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
+ }
}
EXPORT_SYMBOL_GPL(machine_check_print_event_info);