From bc39f010200d09dc6d4d5e613e86dfd0b22c63b3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Jun 2018 11:54:22 +0200 Subject: x86/mce: Always use 64-bit timestamps The machine check timestamp uses get_seconds(), which returns an 'unsigned long' number that might overflow on 32-bit architectures (in the distant future) and is therefore deprecated. The normal replacement would be ktime_get_real_seconds(), but that needs to use a sequence lock that might cause a deadlock if the MCE happens at just the wrong moment. The __ktime_get_real_seconds() skips that lock and is safer here, but has a miniscule risk of returning the wrong time when we read it on a 32-bit architecture at the same time as updating the epoch, i.e. from before y2106 overflow time to after, or vice versa. This seems to be an acceptable risk in this particular case, and is the same thing we do in kdb. Signed-off-by: Arnd Bergmann Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Acked-by: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Cc: y2038@lists.linaro.org Link: http://lkml.kernel.org/r/20180618100759.1921750-1-arnd@arndb.de --- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu/mcheck/mce.c') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e93670d736a6..d62201e40027 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -123,8 +123,8 @@ void mce_setup(struct mce *m) { memset(m, 0, sizeof(struct mce)); m->cpu = m->extcpu = smp_processor_id(); - /* We hope get_seconds stays lockless */ - m->time = get_seconds(); + /* need the internal __ version to avoid deadlocks */ + m->time = __ktime_get_real_seconds(); m->cpuvendor = boot_cpu_data.x86_vendor; m->cpuid = cpuid_eax(1); m->socketid = cpu_data(m->extcpu).phys_proc_id; -- cgit v1.2.3-59-g8ed1b From d3d6923cd1ae61a493a405f2f001293ab62eb092 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 22 Jun 2018 11:54:24 +0200 Subject: x86/mce: Carve out the crashing_cpu check Carve out the rendezvous handler timeout avoidance check into a separate function in order to simplify the #MC handler. No functional changes. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20180622095428.626-4-bp@alien8.de --- arch/x86/kernel/cpu/mcheck/mce.c | 64 +++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 27 deletions(-) (limited to 'arch/x86/kernel/cpu/mcheck/mce.c') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d62201e40027..18804834dbc0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1104,6 +1104,34 @@ static void mce_unmap_kpfn(unsigned long pfn) } #endif + +/* + * Cases where we avoid rendezvous handler timeout: + * 1) If this CPU is offline. + * + * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to + * skip those CPUs which remain looping in the 1st kernel - see + * crash_nmi_callback(). + * + * Note: there still is a small window between kexec-ing and the new, + * kdump kernel establishing a new #MC handler where a broadcasted MCE + * might not get handled properly. + */ +static bool __mc_check_crashing_cpu(int cpu) +{ + if (cpu_is_offline(cpu) || + (crashing_cpu != -1 && crashing_cpu != cpu)) { + u64 mcgstatus; + + mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); + if (mcgstatus & MCG_STATUS_RIPV) { + mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); + return true; + } + } + return false; +} + /* * The actual machine check handler. This only handles real * exceptions when something got corrupted coming in through int 18. @@ -1118,60 +1146,42 @@ static void mce_unmap_kpfn(unsigned long pfn) */ void do_machine_check(struct pt_regs *regs, long error_code) { + DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); + DECLARE_BITMAP(toclear, MAX_NR_BANKS); struct mca_config *cfg = &mca_cfg; + int cpu = smp_processor_id(); + char *msg = "Unknown"; struct mce m, *final; - int i; int worst = 0; int severity; + int i; /* * Establish sequential order between the CPUs entering the machine * check handler. */ int order = -1; + /* * If no_way_out gets set, there is no safe way to recover from this * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway. */ int no_way_out = 0; + /* * If kill_it gets set, there might be a way to recover from this * error. */ int kill_it = 0; - DECLARE_BITMAP(toclear, MAX_NR_BANKS); - DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); - char *msg = "Unknown"; /* * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES * on Intel. */ int lmce = 1; - int cpu = smp_processor_id(); - - /* - * Cases where we avoid rendezvous handler timeout: - * 1) If this CPU is offline. - * - * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to - * skip those CPUs which remain looping in the 1st kernel - see - * crash_nmi_callback(). - * - * Note: there still is a small window between kexec-ing and the new, - * kdump kernel establishing a new #MC handler where a broadcasted MCE - * might not get handled properly. - */ - if (cpu_is_offline(cpu) || - (crashing_cpu != -1 && crashing_cpu != cpu)) { - u64 mcgstatus; - mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); - if (mcgstatus & MCG_STATUS_RIPV) { - mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); - return; - } - } + if (__mc_check_crashing_cpu(cpu)) + return; ist_enter(regs); -- cgit v1.2.3-59-g8ed1b From 45deca7d96e0e33061b7d12932b381114db18f60 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 22 Jun 2018 11:54:25 +0200 Subject: x86/mce: Remove !banks check If we don't have MCA banks, we won't see machine checks anyway. Drop the check. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20180622095428.626-5-bp@alien8.de --- arch/x86/kernel/cpu/mcheck/mce.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86/kernel/cpu/mcheck/mce.c') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 18804834dbc0..8ac2ea2a874a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1187,9 +1187,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) this_cpu_inc(mce_exception_count); - if (!cfg->banks) - goto out; - mce_gather_info(&m, regs); m.tsc = rdtsc(); @@ -1329,7 +1326,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (worst > 0) mce_report_event(regs); mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); -out: + sync_core(); if (worst != MCE_AR_SEVERITY && !kill_it) -- cgit v1.2.3-59-g8ed1b From f35565e3986dcc5dde52649d461543cde1b745cd Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 22 Jun 2018 11:54:26 +0200 Subject: x86/mce: Carve out bank scanning code Carve out the scan loop into a separate function. No functional changes. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20180622095428.626-6-bp@alien8.de --- arch/x86/kernel/cpu/mcheck/mce.c | 134 +++++++++++++++++++++------------------ 1 file changed, 71 insertions(+), 63 deletions(-) (limited to 'arch/x86/kernel/cpu/mcheck/mce.c') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8ac2ea2a874a..d23fcd8d11c9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1132,6 +1132,76 @@ static bool __mc_check_crashing_cpu(int cpu) return false; } +static void __mc_scan_banks(struct mce *m, struct mce *final, + unsigned long *toclear, unsigned long *valid_banks, + int no_way_out, int *worst) +{ + struct mca_config *cfg = &mca_cfg; + int severity, i; + + for (i = 0; i < cfg->banks; i++) { + __clear_bit(i, toclear); + if (!test_bit(i, valid_banks)) + continue; + if (!mce_banks[i].ctl) + continue; + + m->misc = 0; + m->addr = 0; + m->bank = i; + + m->status = mce_rdmsrl(msr_ops.status(i)); + if ((m->status & MCI_STATUS_VAL) == 0) + continue; + + /* + * Non uncorrected or non signaled errors are handled by + * machine_check_poll. Leave them alone, unless this panics. + */ + if (!(m->status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) && + !no_way_out) + continue; + + /* + * Set taint even when machine check was not enabled. + */ + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + + severity = mce_severity(m, cfg->tolerant, NULL, true); + + /* + * When machine check was for corrected/deferred handler don't + * touch, unless we're panicing. + */ + if ((severity == MCE_KEEP_SEVERITY || + severity == MCE_UCNA_SEVERITY) && !no_way_out) + continue; + __set_bit(i, toclear); + if (severity == MCE_NO_SEVERITY) { + /* + * Machine check event was not enabled. Clear, but + * ignore. + */ + continue; + } + + mce_read_aux(m, i); + + /* assuming valid severity level != 0 */ + m->severity = severity; + + mce_log(m); + + if (severity > *worst) { + *final = *m; + *worst = severity; + } + } + + /* mce_clear_state will clear *final, save locally for use later */ + *m = *final; +} + /* * The actual machine check handler. This only handles real * exceptions when something got corrupted coming in through int 18. @@ -1153,8 +1223,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) char *msg = "Unknown"; struct mce m, *final; int worst = 0; - int severity; - int i; /* * Establish sequential order between the CPUs entering the machine @@ -1227,67 +1295,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) order = mce_start(&no_way_out); } - for (i = 0; i < cfg->banks; i++) { - __clear_bit(i, toclear); - if (!test_bit(i, valid_banks)) - continue; - if (!mce_banks[i].ctl) - continue; - - m.misc = 0; - m.addr = 0; - m.bank = i; - - m.status = mce_rdmsrl(msr_ops.status(i)); - if ((m.status & MCI_STATUS_VAL) == 0) - continue; - - /* - * Non uncorrected or non signaled errors are handled by - * machine_check_poll. Leave them alone, unless this panics. - */ - if (!(m.status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) && - !no_way_out) - continue; - - /* - * Set taint even when machine check was not enabled. - */ - add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - - severity = mce_severity(&m, cfg->tolerant, NULL, true); - - /* - * When machine check was for corrected/deferred handler don't - * touch, unless we're panicing. - */ - if ((severity == MCE_KEEP_SEVERITY || - severity == MCE_UCNA_SEVERITY) && !no_way_out) - continue; - __set_bit(i, toclear); - if (severity == MCE_NO_SEVERITY) { - /* - * Machine check event was not enabled. Clear, but - * ignore. - */ - continue; - } - - mce_read_aux(&m, i); - - /* assuming valid severity level != 0 */ - m.severity = severity; - - mce_log(&m); - - if (severity > worst) { - *final = m; - worst = severity; - } - } - - /* mce_clear_state will clear *final, save locally for use later */ - m = *final; + __mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst); if (!no_way_out) mce_clear_state(toclear); -- cgit v1.2.3-59-g8ed1b From d5c84ef202d7db9c838448229f62d66560b951e5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 22 Jun 2018 11:54:27 +0200 Subject: x86/mce: Cleanup __mc_scan_banks() Correct comments, improve readability, simplify. No functional changes. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20180622095428.626-7-bp@alien8.de --- arch/x86/kernel/cpu/mcheck/mce.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel/cpu/mcheck/mce.c') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d23fcd8d11c9..888b51bef13d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1143,6 +1143,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final, __clear_bit(i, toclear); if (!test_bit(i, valid_banks)) continue; + if (!mce_banks[i].ctl) continue; @@ -1151,39 +1152,35 @@ static void __mc_scan_banks(struct mce *m, struct mce *final, m->bank = i; m->status = mce_rdmsrl(msr_ops.status(i)); - if ((m->status & MCI_STATUS_VAL) == 0) + if (!(m->status & MCI_STATUS_VAL)) continue; /* - * Non uncorrected or non signaled errors are handled by - * machine_check_poll. Leave them alone, unless this panics. + * Corrected or non-signaled errors are handled by + * machine_check_poll(). Leave them alone, unless this panics. */ if (!(m->status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) && !no_way_out) continue; - /* - * Set taint even when machine check was not enabled. - */ + /* Set taint even when machine check was not enabled. */ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); severity = mce_severity(m, cfg->tolerant, NULL, true); /* * When machine check was for corrected/deferred handler don't - * touch, unless we're panicing. + * touch, unless we're panicking. */ if ((severity == MCE_KEEP_SEVERITY || severity == MCE_UCNA_SEVERITY) && !no_way_out) continue; + __set_bit(i, toclear); - if (severity == MCE_NO_SEVERITY) { - /* - * Machine check event was not enabled. Clear, but - * ignore. - */ + + /* Machine check event was not enabled. Clear, but ignore. */ + if (severity == MCE_NO_SEVERITY) continue; - } mce_read_aux(m, i); -- cgit v1.2.3-59-g8ed1b