aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-13 12:55:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-13 12:55:49 -0700
commit8603596a327c978534f5c45db135e6c36b4b1425 (patch)
tree7270af18e4d1c42986672eb5673d55c7dfa678cf /arch/x86/kernel
parentMerge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
parentMerge tag 'perf-core-for-mingo-4.19-20180801' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core (diff)
downloadwireguard-linux-8603596a327c978534f5c45db135e6c36b4b1425.tar.xz
wireguard-linux-8603596a327c978534f5c45db135e6c36b4b1425.zip
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf update from Thomas Gleixner: "The perf crowd presents: Kernel updates: - Removal of jprobes - Cleanup and consolidatation the handling of kprobes - Cleanup and consolidation of hardware breakpoints - The usual pile of fixes and updates to PMUs and event descriptors Tooling updates: - Updates and improvements all over the place. Nothing outstanding, just the (good) boring incremental grump work" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (103 commits) perf trace: Do not require --no-syscalls to suppress strace like output perf bpf: Include uapi/linux/bpf.h from the 'perf trace' script's bpf.h perf tools: Allow overriding MAX_NR_CPUS at compile time perf bpf: Show better message when failing to load an object perf list: Unify metric group description format with PMU event description perf vendor events arm64: Update ThunderX2 implementation defined pmu core events perf cs-etm: Generate branch sample for CS_ETM_TRACE_ON packet perf cs-etm: Generate branch sample when receiving a CS_ETM_TRACE_ON packet perf cs-etm: Support dummy address value for CS_ETM_TRACE_ON packet perf cs-etm: Fix start tracing packet handling perf build: Fix installation directory for eBPF perf c2c report: Fix crash for empty browser perf tests: Fix indexing when invoking subtests perf trace: Beautify the AF_INET & AF_INET6 'socket' syscall 'protocol' args perf trace beauty: Add beautifiers for 'socket''s 'protocol' arg perf trace beauty: Do not print NULL strarray entries perf beauty: Add a generator for IPPROTO_ socket's protocol constants tools include uapi: Grab a copy of linux/in.h perf tests: Fix complex event name parsing perf evlist: Fix error out while applying initial delay and LBR ...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/hw_breakpoint.c131
-rw-r--r--arch/x86/kernel/kprobes/common.h10
-rw-r--r--arch/x86/kernel/kprobes/core.c124
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c49
-rw-r--r--arch/x86/kernel/kprobes/opt.c1
5 files changed, 90 insertions, 225 deletions
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 8771766d46b6..34a5c1715148 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -169,28 +169,29 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
set_dr_addr_mask(0, i);
}
-/*
- * Check for virtual address in kernel space.
- */
-int arch_check_bp_in_kernelspace(struct perf_event *bp)
+static int arch_bp_generic_len(int x86_len)
{
- unsigned int len;
- unsigned long va;
- struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-
- va = info->address;
- len = bp->attr.bp_len;
-
- /*
- * We don't need to worry about va + len - 1 overflowing:
- * we already require that va is aligned to a multiple of len.
- */
- return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
+ switch (x86_len) {
+ case X86_BREAKPOINT_LEN_1:
+ return HW_BREAKPOINT_LEN_1;
+ case X86_BREAKPOINT_LEN_2:
+ return HW_BREAKPOINT_LEN_2;
+ case X86_BREAKPOINT_LEN_4:
+ return HW_BREAKPOINT_LEN_4;
+#ifdef CONFIG_X86_64
+ case X86_BREAKPOINT_LEN_8:
+ return HW_BREAKPOINT_LEN_8;
+#endif
+ default:
+ return -EINVAL;
+ }
}
int arch_bp_generic_fields(int x86_len, int x86_type,
int *gen_len, int *gen_type)
{
+ int len;
+
/* Type */
switch (x86_type) {
case X86_BREAKPOINT_EXECUTE:
@@ -211,42 +212,47 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
}
/* Len */
- switch (x86_len) {
- case X86_BREAKPOINT_LEN_1:
- *gen_len = HW_BREAKPOINT_LEN_1;
- break;
- case X86_BREAKPOINT_LEN_2:
- *gen_len = HW_BREAKPOINT_LEN_2;
- break;
- case X86_BREAKPOINT_LEN_4:
- *gen_len = HW_BREAKPOINT_LEN_4;
- break;
-#ifdef CONFIG_X86_64
- case X86_BREAKPOINT_LEN_8:
- *gen_len = HW_BREAKPOINT_LEN_8;
- break;
-#endif
- default:
+ len = arch_bp_generic_len(x86_len);
+ if (len < 0)
return -EINVAL;
- }
+ *gen_len = len;
return 0;
}
-
-static int arch_build_bp_info(struct perf_event *bp)
+/*
+ * Check for virtual address in kernel space.
+ */
+int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
{
- struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ unsigned long va;
+ int len;
- info->address = bp->attr.bp_addr;
+ va = hw->address;
+ len = arch_bp_generic_len(hw->len);
+ WARN_ON_ONCE(len < 0);
+
+ /*
+ * We don't need to worry about va + len - 1 overflowing:
+ * we already require that va is aligned to a multiple of len.
+ */
+ return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
+}
+
+static int arch_build_bp_info(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
+{
+ hw->address = attr->bp_addr;
+ hw->mask = 0;
/* Type */
- switch (bp->attr.bp_type) {
+ switch (attr->bp_type) {
case HW_BREAKPOINT_W:
- info->type = X86_BREAKPOINT_WRITE;
+ hw->type = X86_BREAKPOINT_WRITE;
break;
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
- info->type = X86_BREAKPOINT_RW;
+ hw->type = X86_BREAKPOINT_RW;
break;
case HW_BREAKPOINT_X:
/*
@@ -254,23 +260,23 @@ static int arch_build_bp_info(struct perf_event *bp)
* acceptable for kprobes. On non-kprobes kernels, we don't
* allow kernel breakpoints at all.
*/
- if (bp->attr.bp_addr >= TASK_SIZE_MAX) {
+ if (attr->bp_addr >= TASK_SIZE_MAX) {
#ifdef CONFIG_KPROBES
- if (within_kprobe_blacklist(bp->attr.bp_addr))
+ if (within_kprobe_blacklist(attr->bp_addr))
return -EINVAL;
#else
return -EINVAL;
#endif
}
- info->type = X86_BREAKPOINT_EXECUTE;
+ hw->type = X86_BREAKPOINT_EXECUTE;
/*
* x86 inst breakpoints need to have a specific undefined len.
* But we still need to check userspace is not trying to setup
* an unsupported length, to get a range breakpoint for example.
*/
- if (bp->attr.bp_len == sizeof(long)) {
- info->len = X86_BREAKPOINT_LEN_X;
+ if (attr->bp_len == sizeof(long)) {
+ hw->len = X86_BREAKPOINT_LEN_X;
return 0;
}
default:
@@ -278,28 +284,26 @@ static int arch_build_bp_info(struct perf_event *bp)
}
/* Len */
- info->mask = 0;
-
- switch (bp->attr.bp_len) {
+ switch (attr->bp_len) {
case HW_BREAKPOINT_LEN_1:
- info->len = X86_BREAKPOINT_LEN_1;
+ hw->len = X86_BREAKPOINT_LEN_1;
break;
case HW_BREAKPOINT_LEN_2:
- info->len = X86_BREAKPOINT_LEN_2;
+ hw->len = X86_BREAKPOINT_LEN_2;
break;
case HW_BREAKPOINT_LEN_4:
- info->len = X86_BREAKPOINT_LEN_4;
+ hw->len = X86_BREAKPOINT_LEN_4;
break;
#ifdef CONFIG_X86_64
case HW_BREAKPOINT_LEN_8:
- info->len = X86_BREAKPOINT_LEN_8;
+ hw->len = X86_BREAKPOINT_LEN_8;
break;
#endif
default:
/* AMD range breakpoint */
- if (!is_power_of_2(bp->attr.bp_len))
+ if (!is_power_of_2(attr->bp_len))
return -EINVAL;
- if (bp->attr.bp_addr & (bp->attr.bp_len - 1))
+ if (attr->bp_addr & (attr->bp_len - 1))
return -EINVAL;
if (!boot_cpu_has(X86_FEATURE_BPEXT))
@@ -312,8 +316,8 @@ static int arch_build_bp_info(struct perf_event *bp)
* breakpoints, then we'll have to check for kprobe-blacklisted
* addresses anywhere in the range.
*/
- info->mask = bp->attr.bp_len - 1;
- info->len = X86_BREAKPOINT_LEN_1;
+ hw->mask = attr->bp_len - 1;
+ hw->len = X86_BREAKPOINT_LEN_1;
}
return 0;
@@ -322,22 +326,23 @@ static int arch_build_bp_info(struct perf_event *bp)
/*
* Validate the arch-specific HW Breakpoint register settings
*/
-int arch_validate_hwbkpt_settings(struct perf_event *bp)
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
{
- struct arch_hw_breakpoint *info = counter_arch_bp(bp);
unsigned int align;
int ret;
- ret = arch_build_bp_info(bp);
+ ret = arch_build_bp_info(bp, attr, hw);
if (ret)
return ret;
- switch (info->len) {
+ switch (hw->len) {
case X86_BREAKPOINT_LEN_1:
align = 0;
- if (info->mask)
- align = info->mask;
+ if (hw->mask)
+ align = hw->mask;
break;
case X86_BREAKPOINT_LEN_2:
align = 1;
@@ -358,7 +363,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
* Check that the low-order bits of the address are appropriate
* for the alignment implied by len.
*/
- if (info->address & align)
+ if (hw->address & align)
return -EINVAL;
return 0;
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index ae38dccf0c8f..2b949f4fd4d8 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -105,14 +105,4 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
}
#endif
-#ifdef CONFIG_KPROBES_ON_FTRACE
-extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb);
-#else
-static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
-{
- return 0;
-}
-#endif
#endif
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 6f4d42377fe5..b0d1e81c96bb 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -66,8 +66,6 @@
#include "common.h"
-void jprobe_return_end(void);
-
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
@@ -395,8 +393,6 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
- (u8 *) real;
if ((s64) (s32) newdisp != newdisp) {
pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
- pr_err("\tSrc: %p, Dest: %p, old disp: %x\n",
- src, real, insn->displacement.value);
return 0;
}
disp = (u8 *) dest + insn_offset_displacement(insn);
@@ -596,7 +592,6 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
* stepping.
*/
regs->ip = (unsigned long)p->ainsn.insn;
- preempt_enable_no_resched();
return;
}
#endif
@@ -640,8 +635,7 @@ static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
* Raise a BUG or we'll continue in an endless reentering loop
* and eventually a stack overflow.
*/
- printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
- p->addr);
+ pr_err("Unrecoverable kprobe detected.\n");
dump_kprobe(p);
BUG();
default:
@@ -669,12 +663,10 @@ int kprobe_int3_handler(struct pt_regs *regs)
addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
/*
- * We don't want to be preempted for the entire
- * duration of kprobe processing. We conditionally
- * re-enable preemption at the end of this function,
- * and also in reenter_kprobe() and setup_singlestep().
+ * We don't want to be preempted for the entire duration of kprobe
+ * processing. Since int3 and debug trap disables irqs and we clear
+ * IF while singlestepping, it must be no preemptible.
*/
- preempt_disable();
kcb = get_kprobe_ctlblk();
p = get_kprobe(addr);
@@ -690,13 +682,14 @@ int kprobe_int3_handler(struct pt_regs *regs)
/*
* If we have no pre-handler or it returned 0, we
* continue with normal processing. If we have a
- * pre-handler and it returned non-zero, it prepped
- * for calling the break_handler below on re-entry
- * for jprobe processing, so get out doing nothing
- * more here.
+ * pre-handler and it returned non-zero, that means
+ * user handler setup registers to exit to another
+ * instruction, we must skip the single stepping.
*/
if (!p->pre_handler || !p->pre_handler(p, regs))
setup_singlestep(p, regs, kcb, 0);
+ else
+ reset_current_kprobe();
return 1;
}
} else if (*addr != BREAKPOINT_INSTRUCTION) {
@@ -710,18 +703,9 @@ int kprobe_int3_handler(struct pt_regs *regs)
* the original instruction.
*/
regs->ip = (unsigned long)addr;
- preempt_enable_no_resched();
return 1;
- } else if (kprobe_running()) {
- p = __this_cpu_read(current_kprobe);
- if (p->break_handler && p->break_handler(p, regs)) {
- if (!skip_singlestep(p, regs, kcb))
- setup_singlestep(p, regs, kcb, 0);
- return 1;
- }
} /* else: not a kprobe fault; let the kernel handle it */
- preempt_enable_no_resched();
return 0;
}
NOKPROBE_SYMBOL(kprobe_int3_handler);
@@ -972,8 +956,6 @@ int kprobe_debug_handler(struct pt_regs *regs)
}
reset_current_kprobe();
out:
- preempt_enable_no_resched();
-
/*
* if somebody else is singlestepping across a probe point, flags
* will have TF set, in which case, continue the remaining processing
@@ -1020,7 +1002,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
restore_previous_kprobe(kcb);
else
reset_current_kprobe();
- preempt_enable_no_resched();
} else if (kcb->kprobe_status == KPROBE_HIT_ACTIVE ||
kcb->kprobe_status == KPROBE_HIT_SSDONE) {
/*
@@ -1083,93 +1064,6 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
}
NOKPROBE_SYMBOL(kprobe_exceptions_notify);
-int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct jprobe *jp = container_of(p, struct jprobe, kp);
- unsigned long addr;
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- kcb->jprobe_saved_regs = *regs;
- kcb->jprobe_saved_sp = stack_addr(regs);
- addr = (unsigned long)(kcb->jprobe_saved_sp);
-
- /*
- * As Linus pointed out, gcc assumes that the callee
- * owns the argument space and could overwrite it, e.g.
- * tailcall optimization. So, to be absolutely safe
- * we also save and restore enough stack bytes to cover
- * the argument area.
- * Use __memcpy() to avoid KASAN stack out-of-bounds reports as we copy
- * raw stack chunk with redzones:
- */
- __memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr));
- regs->ip = (unsigned long)(jp->entry);
-
- /*
- * jprobes use jprobe_return() which skips the normal return
- * path of the function, and this messes up the accounting of the
- * function graph tracer to get messed up.
- *
- * Pause function graph tracing while performing the jprobe function.
- */
- pause_graph_tracing();
- return 1;
-}
-NOKPROBE_SYMBOL(setjmp_pre_handler);
-
-void jprobe_return(void)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- /* Unpoison stack redzones in the frames we are going to jump over. */
- kasan_unpoison_stack_above_sp_to(kcb->jprobe_saved_sp);
-
- asm volatile (
-#ifdef CONFIG_X86_64
- " xchg %%rbx,%%rsp \n"
-#else
- " xchgl %%ebx,%%esp \n"
-#endif
- " int3 \n"
- " .globl jprobe_return_end\n"
- " jprobe_return_end: \n"
- " nop \n"::"b"
- (kcb->jprobe_saved_sp):"memory");
-}
-NOKPROBE_SYMBOL(jprobe_return);
-NOKPROBE_SYMBOL(jprobe_return_end);
-
-int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- u8 *addr = (u8 *) (regs->ip - 1);
- struct jprobe *jp = container_of(p, struct jprobe, kp);
- void *saved_sp = kcb->jprobe_saved_sp;
-
- if ((addr > (u8 *) jprobe_return) &&
- (addr < (u8 *) jprobe_return_end)) {
- if (stack_addr(regs) != saved_sp) {
- struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
- printk(KERN_ERR
- "current sp %p does not match saved sp %p\n",
- stack_addr(regs), saved_sp);
- printk(KERN_ERR "Saved registers for jprobe %p\n", jp);
- show_regs(saved_regs);
- printk(KERN_ERR "Current registers\n");
- show_regs(regs);
- BUG();
- }
- /* It's OK to start function graph tracing again */
- unpause_graph_tracing();
- *regs = kcb->jprobe_saved_regs;
- __memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp));
- preempt_enable_no_resched();
- return 1;
- }
- return 0;
-}
-NOKPROBE_SYMBOL(longjmp_break_handler);
-
bool arch_within_kprobe_blacklist(unsigned long addr)
{
bool is_in_entry_trampoline_section = false;
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 8dc0161cec8f..ef819e19650b 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -25,36 +25,6 @@
#include "common.h"
-static nokprobe_inline
-void __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb, unsigned long orig_ip)
-{
- /*
- * Emulate singlestep (and also recover regs->ip)
- * as if there is a 5byte nop
- */
- regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
- if (unlikely(p->post_handler)) {
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
- p->post_handler(p, regs, 0);
- }
- __this_cpu_write(current_kprobe, NULL);
- if (orig_ip)
- regs->ip = orig_ip;
-}
-
-int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
-{
- if (kprobe_ftrace(p)) {
- __skip_singlestep(p, regs, kcb, 0);
- preempt_enable_no_resched();
- return 1;
- }
- return 0;
-}
-NOKPROBE_SYMBOL(skip_singlestep);
-
/* Ftrace callback handler for kprobes -- called under preepmt disabed */
void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs)
@@ -75,18 +45,25 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
regs->ip = ip + sizeof(kprobe_opcode_t);
- /* To emulate trap based kprobes, preempt_disable here */
- preempt_disable();
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs)) {
- __skip_singlestep(p, regs, kcb, orig_ip);
- preempt_enable_no_resched();
+ /*
+ * Emulate singlestep (and also recover regs->ip)
+ * as if there is a 5byte nop
+ */
+ regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ regs->ip = orig_ip;
}
/*
- * If pre_handler returns !0, it sets regs->ip and
- * resets current kprobe, and keep preempt count +1.
+ * If pre_handler returns !0, it changes regs->ip. We have to
+ * skip emulating post_handler.
*/
+ __this_cpu_write(current_kprobe, NULL);
}
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 203d398802a3..eaf02f2e7300 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -491,7 +491,6 @@ int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
if (!reenter)
reset_current_kprobe();
- preempt_enable_no_resched();
return 1;
}
return 0;