aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/nmi.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/nmi.c')
-rw-r--r--arch/x86/kernel/nmi.c119
1 files changed, 48 insertions, 71 deletions
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 54c21d6abd5a..cec0bfa3bc04 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -25,10 +25,6 @@
#include <linux/atomic.h>
#include <linux/sched/clock.h>
-#if defined(CONFIG_EDAC)
-#include <linux/edac.h>
-#endif
-
#include <asm/cpu_entry_area.h>
#include <asm/traps.h>
#include <asm/mach_traps.h>
@@ -37,6 +33,7 @@
#include <asm/reboot.h>
#include <asm/cache.h>
#include <asm/nospec-branch.h>
+#include <asm/sev.h>
#define CREATE_TRACE_POINTS
#include <trace/events/nmi.h>
@@ -106,7 +103,6 @@ fs_initcall(nmi_warning_debugfs);
static void nmi_check_duration(struct nmiaction *action, u64 duration)
{
- u64 whole_msecs = READ_ONCE(action->max_duration);
int remainder_ns, decimal_msecs;
if (duration < nmi_longest_ns || duration < action->max_duration)
@@ -114,12 +110,12 @@ static void nmi_check_duration(struct nmiaction *action, u64 duration)
action->max_duration = duration;
- remainder_ns = do_div(whole_msecs, (1000 * 1000));
+ remainder_ns = do_div(duration, (1000 * 1000));
decimal_msecs = remainder_ns / 1000;
printk_ratelimited(KERN_INFO
"INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
- action->handler, whole_msecs, decimal_msecs);
+ action->handler, duration, decimal_msecs);
}
static int nmi_handle(unsigned int type, struct pt_regs *regs)
@@ -161,7 +157,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
struct nmi_desc *desc = nmi_to_desc(type);
unsigned long flags;
- if (!action->handler)
+ if (WARN_ON_ONCE(!action->handler || !list_empty(&action->list)))
return -EINVAL;
raw_spin_lock_irqsave(&desc->lock, flags);
@@ -181,7 +177,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
list_add_rcu(&action->list, &desc->head);
else
list_add_tail_rcu(&action->list, &desc->head);
-
+
raw_spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
@@ -190,7 +186,7 @@ EXPORT_SYMBOL(__register_nmi_handler);
void unregister_nmi_handler(unsigned int type, const char *name)
{
struct nmi_desc *desc = nmi_to_desc(type);
- struct nmiaction *n;
+ struct nmiaction *n, *found = NULL;
unsigned long flags;
raw_spin_lock_irqsave(&desc->lock, flags);
@@ -204,12 +200,16 @@ void unregister_nmi_handler(unsigned int type, const char *name)
WARN(in_nmi(),
"Trying to free NMI (%s) from NMI context!\n", n->name);
list_del_rcu(&n->list);
+ found = n;
break;
}
}
raw_spin_unlock_irqrestore(&desc->lock, flags);
- synchronize_rcu();
+ if (found) {
+ synchronize_rcu();
+ INIT_LIST_HEAD(&found->list);
+ }
}
EXPORT_SYMBOL_GPL(unregister_nmi_handler);
@@ -296,7 +296,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
reason, smp_processor_id());
- pr_emerg("Do you have a strange power saving mode enabled?\n");
if (unknown_nmi_panic || panic_on_unrecovered_nmi)
nmi_panic(regs, "NMI: Not continuing");
@@ -307,7 +306,7 @@ NOKPROBE_SYMBOL(unknown_nmi_error);
static DEFINE_PER_CPU(bool, swallow_nmi);
static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
-static void default_do_nmi(struct pt_regs *regs)
+static noinstr void default_do_nmi(struct pt_regs *regs)
{
unsigned char reason = 0;
int handled;
@@ -333,6 +332,8 @@ static void default_do_nmi(struct pt_regs *regs)
__this_cpu_write(last_nmi_rip, regs->ip);
+ instrumentation_begin();
+
handled = nmi_handle(NMI_LOCAL, regs);
__this_cpu_add(nmi_stats.normal, handled);
if (handled) {
@@ -346,7 +347,7 @@ static void default_do_nmi(struct pt_regs *regs)
*/
if (handled > 1)
__this_cpu_write(swallow_nmi, true);
- return;
+ goto out;
}
/*
@@ -378,7 +379,7 @@ static void default_do_nmi(struct pt_regs *regs)
#endif
__this_cpu_add(nmi_stats.external, 1);
raw_spin_unlock(&nmi_reason_lock);
- return;
+ goto out;
}
raw_spin_unlock(&nmi_reason_lock);
@@ -403,9 +404,9 @@ static void default_do_nmi(struct pt_regs *regs)
* a 'real' unknown NMI. For example, while processing
* a perf NMI another perf NMI comes in along with a
* 'real' unknown NMI. These two NMIs get combined into
- * one (as descibed above). When the next NMI gets
+ * one (as described above). When the next NMI gets
* processed, it will be flagged by perf as handled, but
- * noone will know that there was a 'real' unknown NMI sent
+ * no one will know that there was a 'real' unknown NMI sent
* also. As a result it gets swallowed. Or if the first
* perf NMI returns two events handled then the second
* NMI will get eaten by the logic below, again losing a
@@ -416,8 +417,10 @@ static void default_do_nmi(struct pt_regs *regs)
__this_cpu_add(nmi_stats.swallow, 1);
else
unknown_nmi_error(reason, regs);
+
+out:
+ instrumentation_end();
}
-NOKPROBE_SYMBOL(default_do_nmi);
/*
* NMIs can page fault or hit breakpoints which will cause it to lose
@@ -471,46 +474,19 @@ enum nmi_states {
};
static DEFINE_PER_CPU(enum nmi_states, nmi_state);
static DEFINE_PER_CPU(unsigned long, nmi_cr2);
+static DEFINE_PER_CPU(unsigned long, nmi_dr7);
-#ifdef CONFIG_X86_64
-/*
- * In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without
- * some care, the inner breakpoint will clobber the outer breakpoint's
- * stack.
- *
- * If a breakpoint is being processed, and the debug stack is being
- * used, if an NMI comes in and also hits a breakpoint, the stack
- * pointer will be set to the same fixed address as the breakpoint that
- * was interrupted, causing that stack to be corrupted. To handle this
- * case, check if the stack that was interrupted is the debug stack, and
- * if so, change the IDT so that new breakpoints will use the current
- * stack and not switch to the fixed address. On return of the NMI,
- * switch back to the original IDT.
- */
-static DEFINE_PER_CPU(int, update_debug_stack);
-
-static bool notrace is_debug_stack(unsigned long addr)
+DEFINE_IDTENTRY_RAW(exc_nmi)
{
- struct cea_exception_stacks *cs = __this_cpu_read(cea_exception_stacks);
- unsigned long top = CEA_ESTACK_TOP(cs, DB);
- unsigned long bot = CEA_ESTACK_BOT(cs, DB1);
+ irqentry_state_t irq_state;
- if (__this_cpu_read(debug_stack_usage))
- return true;
/*
- * Note, this covers the guard page between DB and DB1 as well to
- * avoid two checks. But by all means @addr can never point into
- * the guard page.
+ * Re-enable NMIs right here when running as an SEV-ES guest. This might
+ * cause nested NMIs, but those can be handled safely.
*/
- return addr >= bot && addr < top;
-}
-NOKPROBE_SYMBOL(is_debug_stack);
-#endif
+ sev_es_nmi_complete();
-dotraplinkage notrace void
-do_nmi(struct pt_regs *regs, long error_code)
-{
- if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id()))
+ if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
return;
if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
@@ -521,34 +497,26 @@ do_nmi(struct pt_regs *regs, long error_code)
this_cpu_write(nmi_cr2, read_cr2());
nmi_restart:
-#ifdef CONFIG_X86_64
/*
- * If we interrupted a breakpoint, it is possible that
- * the nmi handler will have breakpoints too. We need to
- * change the IDT such that breakpoints that happen here
- * continue to use the NMI stack.
+ * Needs to happen before DR7 is accessed, because the hypervisor can
+ * intercept DR7 reads/writes, turning those into #VC exceptions.
*/
- if (unlikely(is_debug_stack(regs->sp))) {
- debug_stack_set_zero();
- this_cpu_write(update_debug_stack, 1);
- }
-#endif
+ sev_es_ist_enter(regs);
- nmi_enter();
+ this_cpu_write(nmi_dr7, local_db_save());
+
+ irq_state = irqentry_nmi_enter(regs);
inc_irq_stat(__nmi_count);
if (!ignore_nmis)
default_do_nmi(regs);
- nmi_exit();
+ irqentry_nmi_exit(regs, irq_state);
-#ifdef CONFIG_X86_64
- if (unlikely(this_cpu_read(update_debug_stack))) {
- debug_stack_reset();
- this_cpu_write(update_debug_stack, 0);
- }
-#endif
+ local_db_restore(this_cpu_read(nmi_dr7));
+
+ sev_es_ist_exit();
if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
write_cr2(this_cpu_read(nmi_cr2));
@@ -558,7 +526,16 @@ nmi_restart:
if (user_mode(regs))
mds_user_clear_cpu_buffers();
}
-NOKPROBE_SYMBOL(do_nmi);
+
+#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
+DEFINE_IDTENTRY_RAW(exc_nmi_noist)
+{
+ exc_nmi(regs);
+}
+#endif
+#if IS_MODULE(CONFIG_KVM_INTEL)
+EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
+#endif
void stop_nmi(void)
{