aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86/kernel/kprobes/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/kprobes/core.c')
-rw-r--r--arch/x86/kernel/kprobes/core.c234
1 files changed, 136 insertions, 98 deletions
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 7c4ab8870da4..091b3ab76a18 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -37,12 +37,15 @@
#include <linux/extable.h>
#include <linux/kdebug.h>
#include <linux/kallsyms.h>
+#include <linux/kgdb.h>
#include <linux/ftrace.h>
#include <linux/kasan.h>
#include <linux/moduleloader.h>
#include <linux/objtool.h>
#include <linux/vmalloc.h>
#include <linux/pgtable.h>
+#include <linux/set_memory.h>
+#include <linux/cfi.h>
#include <asm/text-patching.h>
#include <asm/cacheflush.h>
@@ -51,7 +54,6 @@
#include <asm/alternative.h>
#include <asm/insn.h>
#include <asm/debugreg.h>
-#include <asm/set_memory.h>
#include <asm/ibt.h>
#include "common.h"
@@ -59,8 +61,6 @@
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
-#define stack_addr(regs) ((unsigned long *)regs->sp)
-
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
(b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
@@ -137,14 +137,14 @@ NOKPROBE_SYMBOL(synthesize_relcall);
* Returns non-zero if INSN is boostable.
* RIP relative instructions are adjusted at copying time in 64 bits mode
*/
-int can_boost(struct insn *insn, void *addr)
+bool can_boost(struct insn *insn, void *addr)
{
kprobe_opcode_t opcode;
insn_byte_t prefix;
int i;
if (search_exception_tables((unsigned long)addr))
- return 0; /* Page fault may occur on this address. */
+ return false; /* Page fault may occur on this address. */
/* 2nd-byte opcode */
if (insn->opcode.nbytes == 2)
@@ -152,7 +152,7 @@ int can_boost(struct insn *insn, void *addr)
(unsigned long *)twobyte_is_boostable);
if (insn->opcode.nbytes != 1)
- return 0;
+ return false;
for_each_insn_prefix(insn, i, prefix) {
insn_attr_t attr;
@@ -160,7 +160,7 @@ int can_boost(struct insn *insn, void *addr)
attr = inat_get_opcode_attribute(prefix);
/* Can't boost Address-size override prefix and CS override prefix */
if (prefix == 0x2e || inat_is_address_size_prefix(attr))
- return 0;
+ return false;
}
opcode = insn->opcode.bytes[0];
@@ -169,24 +169,35 @@ int can_boost(struct insn *insn, void *addr)
case 0x62: /* bound */
case 0x70 ... 0x7f: /* Conditional jumps */
case 0x9a: /* Call far */
- case 0xc0 ... 0xc1: /* Grp2 */
case 0xcc ... 0xce: /* software exceptions */
- case 0xd0 ... 0xd3: /* Grp2 */
case 0xd6: /* (UD) */
case 0xd8 ... 0xdf: /* ESC */
case 0xe0 ... 0xe3: /* LOOP*, JCXZ */
case 0xe8 ... 0xe9: /* near Call, JMP */
case 0xeb: /* Short JMP */
case 0xf0 ... 0xf4: /* LOCK/REP, HLT */
+ /* ... are not boostable */
+ return false;
+ case 0xc0 ... 0xc1: /* Grp2 */
+ case 0xd0 ... 0xd3: /* Grp2 */
+ /*
+ * AMD uses nnn == 110 as SHL/SAL, but Intel makes it reserved.
+ */
+ return X86_MODRM_REG(insn->modrm.bytes[0]) != 0b110;
case 0xf6 ... 0xf7: /* Grp3 */
+ /* AMD uses nnn == 001 as TEST, but Intel makes it reserved. */
+ return X86_MODRM_REG(insn->modrm.bytes[0]) != 0b001;
case 0xfe: /* Grp4 */
- /* ... are not boostable */
- return 0;
+ /* Only INC and DEC are boostable */
+ return X86_MODRM_REG(insn->modrm.bytes[0]) == 0b000 ||
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 0b001;
case 0xff: /* Grp5 */
- /* Only indirect jmp is boostable */
- return X86_MODRM_REG(insn->modrm.bytes[0]) == 4;
+ /* Only INC, DEC, and indirect JMP are boostable */
+ return X86_MODRM_REG(insn->modrm.bytes[0]) == 0b000 ||
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 0b001 ||
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 0b100;
default:
- return 1;
+ return true;
}
}
@@ -252,21 +263,40 @@ unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long add
return __recover_probed_insn(buf, addr);
}
-/* Check if paddr is at an instruction boundary */
-static int can_probe(unsigned long paddr)
+/* Check if insn is INT or UD */
+static inline bool is_exception_insn(struct insn *insn)
+{
+ /* UD uses 0f escape */
+ if (insn->opcode.bytes[0] == 0x0f) {
+ /* UD0 / UD1 / UD2 */
+ return insn->opcode.bytes[1] == 0xff ||
+ insn->opcode.bytes[1] == 0xb9 ||
+ insn->opcode.bytes[1] == 0x0b;
+ }
+
+ /* INT3 / INT n / INTO / INT1 */
+ return insn->opcode.bytes[0] == 0xcc ||
+ insn->opcode.bytes[0] == 0xcd ||
+ insn->opcode.bytes[0] == 0xce ||
+ insn->opcode.bytes[0] == 0xf1;
+}
+
+/*
+ * Check if paddr is at an instruction boundary and that instruction can
+ * be probed
+ */
+static bool can_probe(unsigned long paddr)
{
unsigned long addr, __addr, offset = 0;
struct insn insn;
kprobe_opcode_t buf[MAX_INSN_SIZE];
if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
- return 0;
+ return false;
/* Decode instructions */
addr = paddr - offset;
while (addr < paddr) {
- int ret;
-
/*
* Check if the instruction has been modified by another
* kprobe, in which case we replace the breakpoint by the
@@ -277,22 +307,66 @@ static int can_probe(unsigned long paddr)
*/
__addr = recover_probed_instruction(buf, addr);
if (!__addr)
- return 0;
+ return false;
- ret = insn_decode_kernel(&insn, (void *)__addr);
- if (ret < 0)
- return 0;
+ if (insn_decode_kernel(&insn, (void *)__addr) < 0)
+ return false;
+#ifdef CONFIG_KGDB
/*
- * Another debugging subsystem might insert this breakpoint.
- * In that case, we can't recover it.
+ * If there is a dynamically installed kgdb sw breakpoint,
+ * this function should not be probed.
*/
- if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
- return 0;
+ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE &&
+ kgdb_has_hit_break(addr))
+ return false;
+#endif
addr += insn.length;
}
- return (addr == paddr);
+ /* Check if paddr is at an instruction boundary */
+ if (addr != paddr)
+ return false;
+
+ __addr = recover_probed_instruction(buf, addr);
+ if (!__addr)
+ return false;
+
+ if (insn_decode_kernel(&insn, (void *)__addr) < 0)
+ return false;
+
+ /* INT and UD are special and should not be kprobed */
+ if (is_exception_insn(&insn))
+ return false;
+
+ if (IS_ENABLED(CONFIG_CFI_CLANG)) {
+ /*
+ * The compiler generates the following instruction sequence
+ * for indirect call checks and cfi.c decodes this;
+ *
+ *  movl -<id>, %r10d ; 6 bytes
+ * addl -4(%reg), %r10d ; 4 bytes
+ * je .Ltmp1 ; 2 bytes
+ * ud2 ; <- regs->ip
+ * .Ltmp1:
+ *
+ * Also, these movl and addl are used for showing expected
+ * type. So those must not be touched.
+ */
+ if (insn.opcode.value == 0xBA)
+ offset = 12;
+ else if (insn.opcode.value == 0x3)
+ offset = 6;
+ else
+ goto out;
+
+ /* This movl/addl is used for decoding CFI. */
+ if (is_cfi_trap(addr + offset))
+ return false;
+ }
+
+out:
+ return true;
}
/* If x86 supports IBT (ENDBR) it must be skipped. */
@@ -416,18 +490,11 @@ void *alloc_insn_page(void)
if (!page)
return NULL;
- set_vm_flush_reset_perms(page);
- /*
- * First make the page read-only, and only then make it executable to
- * prevent it from being W+X in between.
- */
- set_memory_ro((unsigned long)page, 1);
-
/*
* TODO: Once additional kernel code protection mechanisms are set, ensure
* that the page was not maliciously altered and it is still zeroed.
*/
- set_memory_x((unsigned long)page, 1);
+ set_memory_rox((unsigned long)page, 1);
return page;
}
@@ -469,50 +536,26 @@ static void kprobe_emulate_call(struct kprobe *p, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(kprobe_emulate_call);
-static nokprobe_inline
-void __kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs, bool cond)
+static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs)
{
unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
- if (cond)
- ip += p->ainsn.rel32;
+ ip += p->ainsn.rel32;
int3_emulate_jmp(regs, ip);
}
-
-static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs)
-{
- __kprobe_emulate_jmp(p, regs, true);
-}
NOKPROBE_SYMBOL(kprobe_emulate_jmp);
-static const unsigned long jcc_mask[6] = {
- [0] = X86_EFLAGS_OF,
- [1] = X86_EFLAGS_CF,
- [2] = X86_EFLAGS_ZF,
- [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF,
- [4] = X86_EFLAGS_SF,
- [5] = X86_EFLAGS_PF,
-};
-
static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs)
{
- bool invert = p->ainsn.jcc.type & 1;
- bool match;
+ unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
- if (p->ainsn.jcc.type < 0xc) {
- match = regs->flags & jcc_mask[p->ainsn.jcc.type >> 1];
- } else {
- match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
- ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
- if (p->ainsn.jcc.type >= 0xe)
- match = match && (regs->flags & X86_EFLAGS_ZF);
- }
- __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert));
+ int3_emulate_jcc(regs, p->ainsn.jcc.type, ip, p->ainsn.rel32);
}
NOKPROBE_SYMBOL(kprobe_emulate_jcc);
static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs)
{
+ unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
bool match;
if (p->ainsn.loop.type != 3) { /* LOOP* */
@@ -540,7 +583,9 @@ static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs)
else if (p->ainsn.loop.type == 1) /* LOOPE */
match = match && (regs->flags & X86_EFLAGS_ZF);
- __kprobe_emulate_jmp(p, regs, match);
+ if (match)
+ ip += p->ainsn.rel32;
+ int3_emulate_jmp(regs, ip);
}
NOKPROBE_SYMBOL(kprobe_emulate_loop);
@@ -569,7 +614,8 @@ static void kprobe_emulate_call_indirect(struct kprobe *p, struct pt_regs *regs)
{
unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg];
- int3_emulate_call(regs, regs_get_register(regs, offs));
+ int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + p->ainsn.size);
+ int3_emulate_jmp(regs, regs_get_register(regs, offs));
}
NOKPROBE_SYMBOL(kprobe_emulate_call_indirect);
@@ -630,7 +676,7 @@ static int prepare_emulation(struct kprobe *p, struct insn *insn)
/* 1 byte conditional jump */
p->ainsn.emulate_op = kprobe_emulate_jcc;
p->ainsn.jcc.type = opcode & 0xf;
- p->ainsn.rel32 = *(char *)insn->immediate.bytes;
+ p->ainsn.rel32 = insn->immediate.value;
break;
case 0x0f:
opcode = insn->opcode.bytes[1];
@@ -664,17 +710,19 @@ static int prepare_emulation(struct kprobe *p, struct insn *insn)
* is determined by the MOD/RM byte.
*/
opcode = insn->modrm.bytes[0];
- if ((opcode & 0x30) == 0x10) {
- if ((opcode & 0x8) == 0x8)
- return -EOPNOTSUPP; /* far call */
- /* call absolute, indirect */
+ switch (X86_MODRM_REG(opcode)) {
+ case 0b010: /* FF /2, call near, absolute indirect */
p->ainsn.emulate_op = kprobe_emulate_call_indirect;
- } else if ((opcode & 0x30) == 0x20) {
- if ((opcode & 0x8) == 0x8)
- return -EOPNOTSUPP; /* far jmp */
- /* jmp near absolute indirect */
+ break;
+ case 0b100: /* FF /4, jmp near, absolute indirect */
p->ainsn.emulate_op = kprobe_emulate_jmp_indirect;
- } else
+ break;
+ case 0b011: /* FF /3, call far, absolute indirect */
+ case 0b101: /* FF /5, jmp far, absolute indirect */
+ return -EOPNOTSUPP;
+ }
+
+ if (!p->ainsn.emulate_op)
break;
if (insn->addr_bytes != sizeof(unsigned long))
@@ -814,16 +862,20 @@ set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
- if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
- cur->post_handler(cur, regs, 0);
- }
-
/* Restore back the original saved kprobes variables and continue. */
- if (kcb->kprobe_status == KPROBE_REENTER)
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ /* This will restore both kcb and current_kprobe */
restore_previous_kprobe(kcb);
- else
+ } else {
+ /*
+ * Always update the kcb status because
+ * reset_curent_kprobe() doesn't update kcb.
+ */
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ if (cur->post_handler)
+ cur->post_handler(cur, regs, 0);
reset_current_kprobe();
+ }
}
NOKPROBE_SYMBOL(kprobe_post_process);
@@ -991,20 +1043,6 @@ int kprobe_int3_handler(struct pt_regs *regs)
kprobe_post_process(p, regs, kcb);
return 1;
}
- }
-
- if (*addr != INT3_INSN_OPCODE) {
- /*
- * The breakpoint instruction was removed right
- * after we hit it. Another cpu has removed
- * either a probepoint or a debugger breakpoint
- * at this address. In either case, no further
- * handling of this interrupt is appropriate.
- * Back up over the (now missing) int3 and run
- * the original instruction.
- */
- regs->ip = (unsigned long)addr;
- return 1;
} /* else: not a kprobe fault; let the kernel handle it */
return 0;