aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mce/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mce/core.c')
-rw-r--r--arch/x86/kernel/cpu/mce/core.c188
1 files changed, 133 insertions, 55 deletions
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index f43a78bde670..4102b866e7c0 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -40,7 +40,6 @@
#include <linux/debugfs.h>
#include <linux/irq_work.h>
#include <linux/export.h>
-#include <linux/jump_label.h>
#include <linux/set_memory.h>
#include <linux/sync_core.h>
#include <linux/task_work.h>
@@ -373,42 +372,105 @@ static int msr_to_offset(u32 msr)
return -1;
}
+__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
+{
+ pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
+ (unsigned int)regs->cx, regs->ip, (void *)regs->ip);
+
+ show_stack_regs(regs);
+
+ panic("MCA architectural violation!\n");
+
+ while (true)
+ cpu_relax();
+
+ return true;
+}
+
/* MSR access wrappers used for error injection */
-static u64 mce_rdmsrl(u32 msr)
+static noinstr u64 mce_rdmsrl(u32 msr)
{
- u64 v;
+ DECLARE_ARGS(val, low, high);
if (__this_cpu_read(injectm.finished)) {
- int offset = msr_to_offset(msr);
+ int offset;
+ u64 ret;
+
+ instrumentation_begin();
+ offset = msr_to_offset(msr);
if (offset < 0)
- return 0;
- return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
- }
+ ret = 0;
+ else
+ ret = *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
- if (rdmsrl_safe(msr, &v)) {
- WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr);
- /*
- * Return zero in case the access faulted. This should
- * not happen normally but can happen if the CPU does
- * something weird, or if the code is buggy.
- */
- v = 0;
+ instrumentation_end();
+
+ return ret;
}
- return v;
+ /*
+ * RDMSR on MCA MSRs should not fault. If they do, this is very much an
+ * architectural violation and needs to be reported to hw vendor. Panic
+ * the box to not allow any further progress.
+ */
+ asm volatile("1: rdmsr\n"
+ "2:\n"
+ _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault)
+ : EAX_EDX_RET(val, low, high) : "c" (msr));
+
+
+ return EAX_EDX_VAL(val, low, high);
+}
+
+__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
+{
+ pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
+ (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
+ regs->ip, (void *)regs->ip);
+
+ show_stack_regs(regs);
+
+ panic("MCA architectural violation!\n");
+
+ while (true)
+ cpu_relax();
+
+ return true;
}
-static void mce_wrmsrl(u32 msr, u64 v)
+static noinstr void mce_wrmsrl(u32 msr, u64 v)
{
+ u32 low, high;
+
if (__this_cpu_read(injectm.finished)) {
- int offset = msr_to_offset(msr);
+ int offset;
+
+ instrumentation_begin();
+ offset = msr_to_offset(msr);
if (offset >= 0)
*(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v;
+
+ instrumentation_end();
+
return;
}
- wrmsrl(msr, v);
+
+ low = (u32)v;
+ high = (u32)(v >> 32);
+
+ /* See comment in mce_rdmsrl() */
+ asm volatile("1: wrmsr\n"
+ "2:\n"
+ _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault)
+ : : "c" (msr), "a"(low), "d" (high) : "memory");
}
/*
@@ -745,7 +807,7 @@ log_it:
goto clear_it;
mce_read_aux(&m, i);
- m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
+ m.severity = mce_severity(&m, NULL, mca_cfg.tolerant, NULL, false);
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
@@ -794,7 +856,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
quirk_no_way_out(i, m, regs);
m->bank = i;
- if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+ if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
mce_read_aux(m, i);
*msg = tmp;
return 1;
@@ -872,7 +934,6 @@ static void mce_reign(void)
struct mce *m = NULL;
int global_worst = 0;
char *msg = NULL;
- char *nmsg = NULL;
/*
* This CPU is the Monarch and the other CPUs have run
@@ -880,12 +941,10 @@ static void mce_reign(void)
* Grade the severity of the errors of all the CPUs.
*/
for_each_possible_cpu(cpu) {
- int severity = mce_severity(&per_cpu(mces_seen, cpu),
- mca_cfg.tolerant,
- &nmsg, true);
- if (severity > global_worst) {
- msg = nmsg;
- global_worst = severity;
+ struct mce *mtmp = &per_cpu(mces_seen, cpu);
+
+ if (mtmp->severity > global_worst) {
+ global_worst = mtmp->severity;
m = &per_cpu(mces_seen, cpu);
}
}
@@ -895,8 +954,11 @@ static void mce_reign(void)
* This dumps all the mces in the log buffer and stops the
* other CPUs.
*/
- if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
+ if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
+ /* call mce_severity() to get "msg" for panic */
+ mce_severity(m, NULL, mca_cfg.tolerant, &msg, true);
mce_panic("Fatal machine check", m, msg);
+ }
/*
* For UC somewhere we let the CPU who detects it handle it.
@@ -1105,7 +1167,7 @@ static noinstr bool mce_check_crashing_cpu(void)
return false;
}
-static void __mc_scan_banks(struct mce *m, struct mce *final,
+static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks,
int no_way_out, int *worst)
{
@@ -1140,7 +1202,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
/* Set taint even when machine check was not enabled. */
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
- severity = mce_severity(m, cfg->tolerant, NULL, true);
+ severity = mce_severity(m, regs, cfg->tolerant, NULL, true);
/*
* When machine check was for corrected/deferred handler don't
@@ -1188,13 +1250,34 @@ static void kill_me_maybe(struct callback_head *cb)
if (!p->mce_ripv)
flags |= MF_MUST_KILL;
- if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
+ if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
+ !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
+ sync_core();
return;
}
- pr_err("Memory error not recovered");
- kill_me_now(cb);
+ if (p->mce_vaddr != (void __user *)-1l) {
+ force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
+ } else {
+ pr_err("Memory error not recovered");
+ kill_me_now(cb);
+ }
+}
+
+static void queue_task_work(struct mce *m, int kill_it)
+{
+ current->mce_addr = m->addr;
+ current->mce_kflags = m->kflags;
+ current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
+ current->mce_whole_page = whole_page(m);
+
+ if (kill_it)
+ current->mce_kill_me.func = kill_me_now;
+ else
+ current->mce_kill_me.func = kill_me_maybe;
+
+ task_work_add(current, &current->mce_kill_me, TWA_RESUME);
}
/*
@@ -1291,7 +1374,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
order = mce_start(&no_way_out);
}
- __mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
+ __mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
@@ -1313,7 +1396,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
* make sure we have the right "msg".
*/
if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
- mce_severity(&m, cfg->tolerant, &msg, true);
+ mce_severity(&m, regs, cfg->tolerant, &msg, true);
mce_panic("Local fatal machine check!", &m, msg);
}
}
@@ -1330,25 +1413,16 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (worst > 0)
irq_work_queue(&mce_irq_work);
- mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
-
- sync_core();
-
if (worst != MCE_AR_SEVERITY && !kill_it)
- return;
+ goto out;
/* Fault was in user mode and we need to take some action */
if ((m.cs & 3) == 3) {
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
- current->mce_addr = m.addr;
- current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
- current->mce_whole_page = whole_page(&m);
- current->mce_kill_me.func = kill_me_maybe;
- if (kill_it)
- current->mce_kill_me.func = kill_me_now;
- task_work_add(current, &current->mce_kill_me, true);
+ queue_task_work(&m, kill_it);
+
} else {
/*
* Handle an MCE which has happened in kernel space but from
@@ -1363,7 +1437,12 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
mce_panic("Failed kernel mode recovery", &m, msg);
}
+
+ if (m.kflags & MCE_IN_KERNEL_COPYIN)
+ queue_task_work(&m, kill_it);
}
+out:
+ mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
}
EXPORT_SYMBOL_GPL(do_machine_check);
@@ -1904,6 +1983,8 @@ void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check;
static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
{
+ bool irq_state;
+
WARN_ON_ONCE(user_mode(regs));
/*
@@ -1914,7 +1995,7 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
mce_check_crashing_cpu())
return;
- nmi_enter();
+ irq_state = idtentry_enter_nmi(regs);
/*
* The call targets are marked noinstr, but objtool can't figure
* that out because it's an indirect call. Annotate it.
@@ -1925,7 +2006,7 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
if (regs->flags & X86_EFLAGS_IF)
trace_hardirqs_on_prepare();
instrumentation_end();
- nmi_exit();
+ idtentry_exit_nmi(regs, irq_state);
}
static __always_inline void exc_machine_check_user(struct pt_regs *regs)
@@ -2062,7 +2143,7 @@ void mce_disable_bank(int bank)
and older.
* mce=nobootlog Don't log MCEs from before booting.
* mce=bios_cmci_threshold Don't program the CMCI threshold
- * mce=recovery force enable memcpy_mcsafe()
+ * mce=recovery force enable copy_mc_fragile()
*/
static int __init mcheck_enable(char *str)
{
@@ -2670,13 +2751,10 @@ static void __init mcheck_debugfs_init(void)
static void __init mcheck_debugfs_init(void) { }
#endif
-DEFINE_STATIC_KEY_FALSE(mcsafe_key);
-EXPORT_SYMBOL_GPL(mcsafe_key);
-
static int __init mcheck_late_init(void)
{
if (mca_cfg.recovery)
- static_branch_inc(&mcsafe_key);
+ enable_copy_mc_fragile();
mcheck_debugfs_init();