aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-01-28 12:28:06 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-01-28 12:28:06 -0800
commitf6170f0afbe23ad82b4a1195168949c31e3a2527 (patch)
treea589c1f05cdbc6ab6f8fef8cdcad9b8dc1a8ddf3 /arch
parentMerge branch 'x86-cleanups-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
parentx86/nmi: Remove irq_work from the long duration NMI handler (diff)
downloadlinux-dev-f6170f0afbe23ad82b4a1195168949c31e3a2527.tar.xz
linux-dev-f6170f0afbe23ad82b4a1195168949c31e3a2527.zip
Merge branch 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull misc x86 updates from Ingo Molnar: "Misc changes: - Enhance #GP fault printouts by distinguishing between canonical and non-canonical address faults, and also add KASAN fault decoding. - Fix/enhance the x86 NMI handler by putting the duration check into a direct function call instead of an irq_work which we know to be broken in some cases. - Clean up do_general_protection() a bit" * 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/nmi: Remove irq_work from the long duration NMI handler x86/traps: Cleanup do_general_protection() x86/kasan: Print original address on #GP x86/dumpstack: Introduce die_addr() for die() with #GP fault address x86/traps: Print address on #GP x86/insn-eval: Add support for 64-bit kernel mode
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/nmi.h1
-rw-r--r--arch/x86/include/asm/ptrace.h13
-rw-r--r--arch/x86/kernel/dumpstack.c26
-rw-r--r--arch/x86/kernel/nmi.c20
-rw-r--r--arch/x86/kernel/traps.c108
-rw-r--r--arch/x86/lib/insn-eval.c26
-rw-r--r--arch/x86/mm/kasan_init_64.c21
8 files changed, 152 insertions, 64 deletions
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 75f1e35e7c15..247ab14c6309 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -33,6 +33,7 @@ enum show_regs_mode {
};
extern void die(const char *, struct pt_regs *,long);
+void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr);
extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs);
extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 75ded1d13d98..9d5d949e662e 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -41,7 +41,6 @@ struct nmiaction {
struct list_head list;
nmi_handler_t handler;
u64 max_duration;
- struct irq_work irq_work;
unsigned long flags;
const char *name;
};
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 78897a8da01f..6d6475fdd327 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -159,6 +159,19 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#endif
}
+/*
+ * Determine whether the register set came from any context that is running in
+ * 64-bit mode.
+ */
+static inline bool any_64bit_mode(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_64
+ return !user_mode(regs) || user_64bit_mode(regs);
+#else
+ return false;
+#endif
+}
+
#ifdef CONFIG_X86_64
#define current_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() current_pt_regs()->sp
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index e07424e19274..ae64ec7f752f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -365,7 +365,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
}
NOKPROBE_SYMBOL(oops_end);
-int __die(const char *str, struct pt_regs *regs, long err)
+static void __die_header(const char *str, struct pt_regs *regs, long err)
{
const char *pr = "";
@@ -384,7 +384,11 @@ int __die(const char *str, struct pt_regs *regs, long err)
IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
(boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
+}
+NOKPROBE_SYMBOL(__die_header);
+static int __die_body(const char *str, struct pt_regs *regs, long err)
+{
show_regs(regs);
print_modules();
@@ -394,6 +398,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
return 0;
}
+NOKPROBE_SYMBOL(__die_body);
+
+int __die(const char *str, struct pt_regs *regs, long err)
+{
+ __die_header(str, regs, err);
+ return __die_body(str, regs, err);
+}
NOKPROBE_SYMBOL(__die);
/*
@@ -410,6 +421,19 @@ void die(const char *str, struct pt_regs *regs, long err)
oops_end(flags, regs, sig);
}
+void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr)
+{
+ unsigned long flags = oops_begin();
+ int sig = SIGSEGV;
+
+ __die_header(str, regs, err);
+ if (gp_addr)
+ kasan_non_canonical_hook(gp_addr);
+ if (__die_body(str, regs, err))
+ sig = 0;
+ oops_end(flags, regs, sig);
+}
+
void show_regs(struct pt_regs *regs)
{
show_regs_print_info(KERN_DEFAULT);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index e676a9916c49..54c21d6abd5a 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -104,18 +104,22 @@ static int __init nmi_warning_debugfs(void)
}
fs_initcall(nmi_warning_debugfs);
-static void nmi_max_handler(struct irq_work *w)
+static void nmi_check_duration(struct nmiaction *action, u64 duration)
{
- struct nmiaction *a = container_of(w, struct nmiaction, irq_work);
+ u64 whole_msecs = READ_ONCE(action->max_duration);
int remainder_ns, decimal_msecs;
- u64 whole_msecs = READ_ONCE(a->max_duration);
+
+ if (duration < nmi_longest_ns || duration < action->max_duration)
+ return;
+
+ action->max_duration = duration;
remainder_ns = do_div(whole_msecs, (1000 * 1000));
decimal_msecs = remainder_ns / 1000;
printk_ratelimited(KERN_INFO
"INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
- a->handler, whole_msecs, decimal_msecs);
+ action->handler, whole_msecs, decimal_msecs);
}
static int nmi_handle(unsigned int type, struct pt_regs *regs)
@@ -142,11 +146,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs)
delta = sched_clock() - delta;
trace_nmi_handler(a->handler, (int)delta, thishandled);
- if (delta < nmi_longest_ns || delta < a->max_duration)
- continue;
-
- a->max_duration = delta;
- irq_work_queue(&a->irq_work);
+ nmi_check_duration(a, delta);
}
rcu_read_unlock();
@@ -164,8 +164,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
if (!action->handler)
return -EINVAL;
- init_irq_work(&action->irq_work, nmi_max_handler);
-
raw_spin_lock_irqsave(&desc->lock, flags);
/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f19de6f45d48..9e6f822922a3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -56,6 +56,8 @@
#include <asm/mpx.h>
#include <asm/vm86.h>
#include <asm/umip.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
@@ -518,11 +520,57 @@ exit_trap:
do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL);
}
-dotraplinkage void
-do_general_protection(struct pt_regs *regs, long error_code)
+enum kernel_gp_hint {
+ GP_NO_HINT,
+ GP_NON_CANONICAL,
+ GP_CANONICAL
+};
+
+/*
+ * When an uncaught #GP occurs, try to determine the memory address accessed by
+ * the instruction and return that address to the caller. Also, try to figure
+ * out whether any part of the access to that address was non-canonical.
+ */
+static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
+ unsigned long *addr)
{
- const char *desc = "general protection fault";
+ u8 insn_buf[MAX_INSN_SIZE];
+ struct insn insn;
+
+ if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
+ return GP_NO_HINT;
+
+ kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
+ insn_get_modrm(&insn);
+ insn_get_sib(&insn);
+
+ *addr = (unsigned long)insn_get_addr_ref(&insn, regs);
+ if (*addr == -1UL)
+ return GP_NO_HINT;
+
+#ifdef CONFIG_X86_64
+ /*
+ * Check that:
+ * - the operand is not in the kernel half
+ * - the last byte of the operand is not in the user canonical half
+ */
+ if (*addr < ~__VIRTUAL_MASK &&
+ *addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK)
+ return GP_NON_CANONICAL;
+#endif
+
+ return GP_CANONICAL;
+}
+
+#define GPFSTR "general protection fault"
+
+dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code)
+{
+ char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
+ enum kernel_gp_hint hint = GP_NO_HINT;
struct task_struct *tsk;
+ unsigned long gp_addr;
+ int ret;
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
cond_local_irq_enable(regs);
@@ -539,34 +587,56 @@ do_general_protection(struct pt_regs *regs, long error_code)
}
tsk = current;
- if (!user_mode(regs)) {
- if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
- return;
+ if (user_mode(regs)) {
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
- /*
- * To be potentially processing a kprobe fault and to
- * trust the result from kprobe_running(), we have to
- * be non-preemptible.
- */
- if (!preemptible() && kprobe_running() &&
- kprobe_fault_handler(regs, X86_TRAP_GP))
- return;
+ show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
+ force_sig(SIGSEGV);
- if (notify_die(DIE_GPF, desc, regs, error_code,
- X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
- die(desc, regs, error_code);
return;
}
+ if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
+ return;
+
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
- show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
+ /*
+ * To be potentially processing a kprobe fault and to trust the result
+ * from kprobe_running(), we have to be non-preemptible.
+ */
+ if (!preemptible() &&
+ kprobe_running() &&
+ kprobe_fault_handler(regs, X86_TRAP_GP))
+ return;
+
+ ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV);
+ if (ret == NOTIFY_STOP)
+ return;
+
+ if (error_code)
+ snprintf(desc, sizeof(desc), "segment-related " GPFSTR);
+ else
+ hint = get_kernel_gp_address(regs, &gp_addr);
+
+ if (hint != GP_NO_HINT)
+ snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx",
+ (hint == GP_NON_CANONICAL) ? "probably for non-canonical address"
+ : "maybe for address",
+ gp_addr);
+
+ /*
+ * KASAN is interested only in the non-canonical case, clear it
+ * otherwise.
+ */
+ if (hint != GP_NON_CANONICAL)
+ gp_addr = 0;
+
+ die_addr(desc, regs, error_code, gp_addr);
- force_sig(SIGSEGV);
}
NOKPROBE_SYMBOL(do_general_protection);
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 306c3a0902ba..31600d851fd8 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -155,7 +155,7 @@ static bool check_seg_overrides(struct insn *insn, int regoff)
*/
static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off)
{
- if (user_64bit_mode(regs))
+ if (any_64bit_mode(regs))
return INAT_SEG_REG_IGNORE;
/*
* Resolve the default segment register as described in Section 3.7.4
@@ -266,7 +266,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
* which may be invalid at this point.
*/
if (regoff == offsetof(struct pt_regs, ip)) {
- if (user_64bit_mode(regs))
+ if (any_64bit_mode(regs))
return INAT_SEG_REG_IGNORE;
else
return INAT_SEG_REG_CS;
@@ -289,7 +289,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
* In long mode, segment override prefixes are ignored, except for
* overrides for FS and GS.
*/
- if (user_64bit_mode(regs)) {
+ if (any_64bit_mode(regs)) {
if (idx != INAT_SEG_REG_FS &&
idx != INAT_SEG_REG_GS)
idx = INAT_SEG_REG_IGNORE;
@@ -646,23 +646,27 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
*/
return (unsigned long)(sel << 4);
- if (user_64bit_mode(regs)) {
+ if (any_64bit_mode(regs)) {
/*
* Only FS or GS will have a base address, the rest of
* the segments' bases are forced to 0.
*/
unsigned long base;
- if (seg_reg_idx == INAT_SEG_REG_FS)
+ if (seg_reg_idx == INAT_SEG_REG_FS) {
rdmsrl(MSR_FS_BASE, base);
- else if (seg_reg_idx == INAT_SEG_REG_GS)
+ } else if (seg_reg_idx == INAT_SEG_REG_GS) {
/*
* swapgs was called at the kernel entry point. Thus,
* MSR_KERNEL_GS_BASE will have the user-space GS base.
*/
- rdmsrl(MSR_KERNEL_GS_BASE, base);
- else
+ if (user_mode(regs))
+ rdmsrl(MSR_KERNEL_GS_BASE, base);
+ else
+ rdmsrl(MSR_GS_BASE, base);
+ } else {
base = 0;
+ }
return base;
}
@@ -703,7 +707,7 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
if (sel < 0)
return 0;
- if (user_64bit_mode(regs) || v8086_mode(regs))
+ if (any_64bit_mode(regs) || v8086_mode(regs))
return -1L;
if (!sel)
@@ -948,7 +952,7 @@ static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs,
* following instruction.
*/
if (*regoff == -EDOM) {
- if (user_64bit_mode(regs))
+ if (any_64bit_mode(regs))
tmp = regs->ip + insn->length;
else
tmp = 0;
@@ -1250,7 +1254,7 @@ static void __user *get_addr_ref_32(struct insn *insn, struct pt_regs *regs)
* After computed, the effective address is treated as an unsigned
* quantity.
*/
- if (!user_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit))
+ if (!any_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit))
goto out;
/*
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index cf5bc37c90ac..763e71abc0fe 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -288,23 +288,6 @@ static void __init kasan_shallow_populate_pgds(void *start, void *end)
} while (pgd++, addr = next, addr != (unsigned long)end);
}
-#ifdef CONFIG_KASAN_INLINE
-static int kasan_die_handler(struct notifier_block *self,
- unsigned long val,
- void *data)
-{
- if (val == DIE_GPF) {
- pr_emerg("CONFIG_KASAN_INLINE enabled\n");
- pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n");
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block kasan_die_notifier = {
- .notifier_call = kasan_die_handler,
-};
-#endif
-
void __init kasan_early_init(void)
{
int i;
@@ -341,10 +324,6 @@ void __init kasan_init(void)
int i;
void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
-#ifdef CONFIG_KASAN_INLINE
- register_die_notifier(&kasan_die_notifier);
-#endif
-
memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
/*