aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/traps.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/traps.c')
-rw-r--r--arch/x86/kernel/traps.c259
1 files changed, 156 insertions, 103 deletions
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1f66d2d1e998..3c70fb34028b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -43,6 +43,7 @@
#include <asm/stacktrace.h>
#include <asm/processor.h>
#include <asm/debugreg.h>
+#include <asm/realmode.h>
#include <asm/text-patching.h>
#include <asm/ftrace.h>
#include <asm/traps.h>
@@ -195,7 +196,7 @@ static __always_inline void __user *error_get_trap_addr(struct pt_regs *regs)
DEFINE_IDTENTRY(exc_divide_error)
{
- do_error_trap(regs, 0, "divide_error", X86_TRAP_DE, SIGFPE,
+ do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE,
FPE_INTDIV, error_get_trap_addr(regs));
}
@@ -673,6 +674,50 @@ asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs)
return regs;
}
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *regs)
+{
+ unsigned long sp, *stack;
+ struct stack_info info;
+ struct pt_regs *regs_ret;
+
+ /*
+ * In the SYSCALL entry path the RSP value comes from user-space - don't
+ * trust it and switch to the current kernel stack
+ */
+ if (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
+ regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack) {
+ sp = this_cpu_read(cpu_current_top_of_stack);
+ goto sync;
+ }
+
+ /*
+ * From here on the RSP value is trusted. Now check whether entry
+ * happened from a safe stack. Not safe are the entry or unknown stacks,
+ * use the fall-back stack instead in this case.
+ */
+ sp = regs->sp;
+ stack = (unsigned long *)sp;
+
+ if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY ||
+ info.type >= STACK_TYPE_EXCEPTION_LAST)
+ sp = __this_cpu_ist_top_va(VC2);
+
+sync:
+ /*
+ * Found a safe stack - switch to it as if the entry didn't happen via
+ * IST stack. The code below only copies pt_regs, the real switch happens
+ * in assembly code.
+ */
+ sp = ALIGN_DOWN(sp, 8) - sizeof(*regs_ret);
+
+ regs_ret = (struct pt_regs *)sp;
+ *regs_ret = *regs;
+
+ return regs_ret;
+}
+#endif
+
struct bad_iret_stack {
void *error_entry_ret;
struct pt_regs regs;
@@ -729,20 +774,9 @@ static bool is_sysenter_singlestep(struct pt_regs *regs)
#endif
}
-static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
+static __always_inline unsigned long debug_read_clear_dr6(void)
{
- /*
- * Disable breakpoints during exception handling; recursive exceptions
- * are exceedingly 'fun'.
- *
- * Since this function is NOKPROBE, and that also applies to
- * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
- * HW_BREAKPOINT_W on our stack)
- *
- * Entry text is excluded for HW_BP_X and cpu_entry_area, which
- * includes the entry stack is excluded for everything.
- */
- *dr7 = local_db_save();
+ unsigned long dr6;
/*
* The Intel SDM says:
@@ -755,15 +789,24 @@ static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
*
* Keep it simple: clear DR6 immediately.
*/
- get_debugreg(*dr6, 6);
- set_debugreg(0, 6);
- /* Filter out all the reserved bits which are preset to 1 */
- *dr6 &= ~DR6_RESERVED;
-}
+ get_debugreg(dr6, 6);
+ set_debugreg(DR6_RESERVED, 6);
+ dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
-static __always_inline void debug_exit(unsigned long dr7)
-{
- local_db_restore(dr7);
+ /*
+ * Clear the virtual DR6 value, ptrace routines will set bits here for
+ * things we want signals for.
+ */
+ current->thread.virtual_dr6 = 0;
+
+ /*
+ * The SDM says "The processor clears the BTF flag when it
+ * generates a debug exception." Clear TIF_BLOCKSTEP to keep
+ * TIF_BLOCKSTEP in sync with the hardware BTF flag.
+ */
+ clear_thread_flag(TIF_BLOCKSTEP);
+
+ return dr6;
}
/*
@@ -790,79 +833,37 @@ static __always_inline void debug_exit(unsigned long dr7)
*
* May run on IST stack.
*/
-static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user)
-{
- struct task_struct *tsk = current;
- bool user_icebp;
- int si_code;
-
- /*
- * The SDM says "The processor clears the BTF flag when it
- * generates a debug exception." Clear TIF_BLOCKSTEP to keep
- * TIF_BLOCKSTEP in sync with the hardware BTF flag.
- */
- clear_thread_flag(TIF_BLOCKSTEP);
-
- /*
- * If DR6 is zero, no point in trying to handle it. The kernel is
- * not using INT1.
- */
- if (!user && !dr6)
- return;
+static bool notify_debug(struct pt_regs *regs, unsigned long *dr6)
+{
/*
- * If dr6 has no reason to give us about the origin of this trap,
- * then it's very likely the result of an icebp/int01 trap.
- * User wants a sigtrap for that.
+ * Notifiers will clear bits in @dr6 to indicate the event has been
+ * consumed - hw_breakpoint_handler(), single_stop_cont().
+ *
+ * Notifiers will set bits in @virtual_dr6 to indicate the desire
+ * for signals - ptrace_triggered(), kgdb_hw_overflow_handler().
*/
- user_icebp = user && !dr6;
-
- /* Store the virtualized DR6 value */
- tsk->thread.debugreg6 = dr6;
-
-#ifdef CONFIG_KPROBES
- if (kprobe_debug_handler(regs)) {
- return;
- }
-#endif
-
- if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, 0,
- SIGTRAP) == NOTIFY_STOP) {
- return;
- }
-
- /* It's safe to allow irq's after DR6 has been saved */
- cond_local_irq_enable(regs);
-
- if (v8086_mode(regs)) {
- handle_vm86_trap((struct kernel_vm86_regs *) regs, 0,
- X86_TRAP_DB);
- goto out;
- }
-
- if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
- /*
- * Historical junk that used to handle SYSENTER single-stepping.
- * This should be unreachable now. If we survive for a while
- * without anyone hitting this warning, we'll turn this into
- * an oops.
- */
- tsk->thread.debugreg6 &= ~DR_STEP;
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
- regs->flags &= ~X86_EFLAGS_TF;
- }
-
- si_code = get_si_code(tsk->thread.debugreg6);
- if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
- send_sigtrap(regs, 0, si_code);
+ if (notify_die(DIE_DEBUG, "debug", regs, (long)dr6, 0, SIGTRAP) == NOTIFY_STOP)
+ return true;
-out:
- cond_local_irq_disable(regs);
+ return false;
}
static __always_inline void exc_debug_kernel(struct pt_regs *regs,
unsigned long dr6)
{
+ /*
+ * Disable breakpoints during exception handling; recursive exceptions
+ * are exceedingly 'fun'.
+ *
+ * Since this function is NOKPROBE, and that also applies to
+ * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
+ * HW_BREAKPOINT_W on our stack)
+ *
+ * Entry text is excluded for HW_BP_X and cpu_entry_area, which
+ * includes the entry stack is excluded for everything.
+ */
+ unsigned long dr7 = local_db_save();
bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin();
@@ -879,26 +880,87 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs))
dr6 &= ~DR_STEP;
- handle_debug(regs, dr6, false);
+ if (kprobe_debug_handler(regs))
+ goto out;
+
+ /*
+ * The kernel doesn't use INT1
+ */
+ if (!dr6)
+ goto out;
+
+ if (notify_debug(regs, &dr6))
+ goto out;
+ /*
+ * The kernel doesn't use TF single-step outside of:
+ *
+ * - Kprobes, consumed through kprobe_debug_handler()
+ * - KGDB, consumed through notify_debug()
+ *
+ * So if we get here with DR_STEP set, something is wonky.
+ *
+ * A known way to trigger this is through QEMU's GDB stub,
+ * which leaks #DB into the guest and causes IST recursion.
+ */
+ if (WARN_ON_ONCE(dr6 & DR_STEP))
+ regs->flags &= ~X86_EFLAGS_TF;
+out:
instrumentation_end();
idtentry_exit_nmi(regs, irq_state);
+
+ local_db_restore(dr7);
}
static __always_inline void exc_debug_user(struct pt_regs *regs,
unsigned long dr6)
{
+ bool icebp;
+
/*
* If something gets miswired and we end up here for a kernel mode
* #DB, we will malfunction.
*/
WARN_ON_ONCE(!user_mode(regs));
+ /*
+ * NB: We can't easily clear DR7 here because
+ * idtentry_exit_to_usermode() can invoke ptrace, schedule, access
+ * user memory, etc. This means that a recursive #DB is possible. If
+ * this happens, that #DB will hit exc_debug_kernel() and clear DR7.
+ * Since we're not on the IST stack right now, everything will be
+ * fine.
+ */
+
irqentry_enter_from_user_mode(regs);
instrumentation_begin();
- handle_debug(regs, dr6, true);
+ /*
+ * If dr6 has no reason to give us about the origin of this trap,
+ * then it's very likely the result of an icebp/int01 trap.
+ * User wants a sigtrap for that.
+ */
+ icebp = !dr6;
+
+ if (notify_debug(regs, &dr6))
+ goto out;
+
+ /* It's safe to allow irq's after DR6 has been saved */
+ local_irq_enable();
+
+ if (v8086_mode(regs)) {
+ handle_vm86_trap((struct kernel_vm86_regs *)regs, 0, X86_TRAP_DB);
+ goto out_irq;
+ }
+ /* Add the virtual_dr6 bits for signals. */
+ dr6 |= current->thread.virtual_dr6;
+ if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp)
+ send_sigtrap(regs, 0, get_si_code(dr6));
+
+out_irq:
+ local_irq_disable();
+out:
instrumentation_end();
irqentry_exit_to_user_mode(regs);
}
@@ -907,36 +969,24 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
/* IST stack entry */
DEFINE_IDTENTRY_DEBUG(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
- exc_debug_kernel(regs, dr6);
- debug_exit(dr7);
+ exc_debug_kernel(regs, debug_read_clear_dr6());
}
/* User entry, runs on regular task stack */
DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
- exc_debug_user(regs, dr6);
- debug_exit(dr7);
+ exc_debug_user(regs, debug_read_clear_dr6());
}
#else
/* 32 bit does not have separate entry points. */
DEFINE_IDTENTRY_RAW(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
+ unsigned long dr6 = debug_read_clear_dr6();
if (user_mode(regs))
exc_debug_user(regs, dr6);
else
exc_debug_kernel(regs, dr6);
-
- debug_exit(dr7);
}
#endif
@@ -1077,6 +1127,9 @@ void __init trap_init(void)
/* Init cpu_entry_area before IST entries are set up */
setup_cpu_entry_areas();
+ /* Init GHCB memory pages when running as an SEV-ES guest */
+ sev_es_init_vc_handling();
+
idt_setup_traps();
/*