diff options
Diffstat (limited to 'arch/x86/kernel/cpu/mce/core.c')
-rw-r--r-- | arch/x86/kernel/cpu/mce/core.c | 188 |
1 files changed, 133 insertions, 55 deletions
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index f43a78bde670..4102b866e7c0 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -40,7 +40,6 @@ #include <linux/debugfs.h> #include <linux/irq_work.h> #include <linux/export.h> -#include <linux/jump_label.h> #include <linux/set_memory.h> #include <linux/sync_core.h> #include <linux/task_work.h> @@ -373,42 +372,105 @@ static int msr_to_offset(u32 msr) return -1; } +__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr, + unsigned long error_code, + unsigned long fault_addr) +{ + pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", + (unsigned int)regs->cx, regs->ip, (void *)regs->ip); + + show_stack_regs(regs); + + panic("MCA architectural violation!\n"); + + while (true) + cpu_relax(); + + return true; +} + /* MSR access wrappers used for error injection */ -static u64 mce_rdmsrl(u32 msr) +static noinstr u64 mce_rdmsrl(u32 msr) { - u64 v; + DECLARE_ARGS(val, low, high); if (__this_cpu_read(injectm.finished)) { - int offset = msr_to_offset(msr); + int offset; + u64 ret; + + instrumentation_begin(); + offset = msr_to_offset(msr); if (offset < 0) - return 0; - return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset); - } + ret = 0; + else + ret = *(u64 *)((char *)this_cpu_ptr(&injectm) + offset); - if (rdmsrl_safe(msr, &v)) { - WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr); - /* - * Return zero in case the access faulted. This should - * not happen normally but can happen if the CPU does - * something weird, or if the code is buggy. - */ - v = 0; + instrumentation_end(); + + return ret; } - return v; + /* + * RDMSR on MCA MSRs should not fault. If they do, this is very much an + * architectural violation and needs to be reported to hw vendor. Panic + * the box to not allow any further progress. + */ + asm volatile("1: rdmsr\n" + "2:\n" + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault) + : EAX_EDX_RET(val, low, high) : "c" (msr)); + + + return EAX_EDX_VAL(val, low, high); +} + +__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr, + unsigned long error_code, + unsigned long fault_addr) +{ + pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", + (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, + regs->ip, (void *)regs->ip); + + show_stack_regs(regs); + + panic("MCA architectural violation!\n"); + + while (true) + cpu_relax(); + + return true; } -static void mce_wrmsrl(u32 msr, u64 v) +static noinstr void mce_wrmsrl(u32 msr, u64 v) { + u32 low, high; + if (__this_cpu_read(injectm.finished)) { - int offset = msr_to_offset(msr); + int offset; + + instrumentation_begin(); + offset = msr_to_offset(msr); if (offset >= 0) *(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v; + + instrumentation_end(); + return; } - wrmsrl(msr, v); + + low = (u32)v; + high = (u32)(v >> 32); + + /* See comment in mce_rdmsrl() */ + asm volatile("1: wrmsr\n" + "2:\n" + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault) + : : "c" (msr), "a"(low), "d" (high) : "memory"); } /* @@ -745,7 +807,7 @@ log_it: goto clear_it; mce_read_aux(&m, i); - m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); + m.severity = mce_severity(&m, NULL, mca_cfg.tolerant, NULL, false); /* * Don't get the IP here because it's unlikely to * have anything to do with the actual error location. @@ -794,7 +856,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, quirk_no_way_out(i, m, regs); m->bank = i; - if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { + if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { mce_read_aux(m, i); *msg = tmp; return 1; @@ -872,7 +934,6 @@ static void mce_reign(void) struct mce *m = NULL; int global_worst = 0; char *msg = NULL; - char *nmsg = NULL; /* * This CPU is the Monarch and the other CPUs have run @@ -880,12 +941,10 @@ static void mce_reign(void) * Grade the severity of the errors of all the CPUs. */ for_each_possible_cpu(cpu) { - int severity = mce_severity(&per_cpu(mces_seen, cpu), - mca_cfg.tolerant, - &nmsg, true); - if (severity > global_worst) { - msg = nmsg; - global_worst = severity; + struct mce *mtmp = &per_cpu(mces_seen, cpu); + + if (mtmp->severity > global_worst) { + global_worst = mtmp->severity; m = &per_cpu(mces_seen, cpu); } } @@ -895,8 +954,11 @@ static void mce_reign(void) * This dumps all the mces in the log buffer and stops the * other CPUs. */ - if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) + if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) { + /* call mce_severity() to get "msg" for panic */ + mce_severity(m, NULL, mca_cfg.tolerant, &msg, true); mce_panic("Fatal machine check", m, msg); + } /* * For UC somewhere we let the CPU who detects it handle it. @@ -1105,7 +1167,7 @@ static noinstr bool mce_check_crashing_cpu(void) return false; } -static void __mc_scan_banks(struct mce *m, struct mce *final, +static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final, unsigned long *toclear, unsigned long *valid_banks, int no_way_out, int *worst) { @@ -1140,7 +1202,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final, /* Set taint even when machine check was not enabled. */ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - severity = mce_severity(m, cfg->tolerant, NULL, true); + severity = mce_severity(m, regs, cfg->tolerant, NULL, true); /* * When machine check was for corrected/deferred handler don't @@ -1188,13 +1250,34 @@ static void kill_me_maybe(struct callback_head *cb) if (!p->mce_ripv) flags |= MF_MUST_KILL; - if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) { + if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) && + !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) { set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); + sync_core(); return; } - pr_err("Memory error not recovered"); - kill_me_now(cb); + if (p->mce_vaddr != (void __user *)-1l) { + force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT); + } else { + pr_err("Memory error not recovered"); + kill_me_now(cb); + } +} + +static void queue_task_work(struct mce *m, int kill_it) +{ + current->mce_addr = m->addr; + current->mce_kflags = m->kflags; + current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV); + current->mce_whole_page = whole_page(m); + + if (kill_it) + current->mce_kill_me.func = kill_me_now; + else + current->mce_kill_me.func = kill_me_maybe; + + task_work_add(current, ¤t->mce_kill_me, TWA_RESUME); } /* @@ -1291,7 +1374,7 @@ noinstr void do_machine_check(struct pt_regs *regs) order = mce_start(&no_way_out); } - __mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst); + __mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst); if (!no_way_out) mce_clear_state(toclear); @@ -1313,7 +1396,7 @@ noinstr void do_machine_check(struct pt_regs *regs) * make sure we have the right "msg". */ if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) { - mce_severity(&m, cfg->tolerant, &msg, true); + mce_severity(&m, regs, cfg->tolerant, &msg, true); mce_panic("Local fatal machine check!", &m, msg); } } @@ -1330,25 +1413,16 @@ noinstr void do_machine_check(struct pt_regs *regs) if (worst > 0) irq_work_queue(&mce_irq_work); - mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); - - sync_core(); - if (worst != MCE_AR_SEVERITY && !kill_it) - return; + goto out; /* Fault was in user mode and we need to take some action */ if ((m.cs & 3) == 3) { /* If this triggers there is no way to recover. Die hard. */ BUG_ON(!on_thread_stack() || !user_mode(regs)); - current->mce_addr = m.addr; - current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV); - current->mce_whole_page = whole_page(&m); - current->mce_kill_me.func = kill_me_maybe; - if (kill_it) - current->mce_kill_me.func = kill_me_now; - task_work_add(current, ¤t->mce_kill_me, true); + queue_task_work(&m, kill_it); + } else { /* * Handle an MCE which has happened in kernel space but from @@ -1363,7 +1437,12 @@ noinstr void do_machine_check(struct pt_regs *regs) if (!fixup_exception(regs, X86_TRAP_MC, 0, 0)) mce_panic("Failed kernel mode recovery", &m, msg); } + + if (m.kflags & MCE_IN_KERNEL_COPYIN) + queue_task_work(&m, kill_it); } +out: + mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); } EXPORT_SYMBOL_GPL(do_machine_check); @@ -1904,6 +1983,8 @@ void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check; static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) { + bool irq_state; + WARN_ON_ONCE(user_mode(regs)); /* @@ -1914,7 +1995,7 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) mce_check_crashing_cpu()) return; - nmi_enter(); + irq_state = idtentry_enter_nmi(regs); /* * The call targets are marked noinstr, but objtool can't figure * that out because it's an indirect call. Annotate it. @@ -1925,7 +2006,7 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) if (regs->flags & X86_EFLAGS_IF) trace_hardirqs_on_prepare(); instrumentation_end(); - nmi_exit(); + idtentry_exit_nmi(regs, irq_state); } static __always_inline void exc_machine_check_user(struct pt_regs *regs) @@ -2062,7 +2143,7 @@ void mce_disable_bank(int bank) and older. * mce=nobootlog Don't log MCEs from before booting. * mce=bios_cmci_threshold Don't program the CMCI threshold - * mce=recovery force enable memcpy_mcsafe() + * mce=recovery force enable copy_mc_fragile() */ static int __init mcheck_enable(char *str) { @@ -2670,13 +2751,10 @@ static void __init mcheck_debugfs_init(void) static void __init mcheck_debugfs_init(void) { } #endif -DEFINE_STATIC_KEY_FALSE(mcsafe_key); -EXPORT_SYMBOL_GPL(mcsafe_key); - static int __init mcheck_late_init(void) { if (mca_cfg.recovery) - static_branch_inc(&mcsafe_key); + enable_copy_mc_fragile(); mcheck_debugfs_init(); |