From 768ae4406a5cab7e8702550f2446dbeb377b798d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 26 Aug 2019 14:48:23 +0200 Subject: x86/ftrace: Use text_poke() Move ftrace over to using the generic x86 text_poke functions; this avoids having a second/different copy of that code around. This also avoids ftrace violating the (new) W^X rule and avoids fragmenting the kernel text page-tables, due to no longer having to toggle them RW. Tested-by: Alexei Starovoitov Tested-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Acked-by: Alexei Starovoitov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Daniel Bristot de Oliveira Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191111132457.761255803@infradead.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 663 +++++++++-------------------------------------- 1 file changed, 120 insertions(+), 543 deletions(-) (limited to 'arch/x86/kernel/ftrace.c') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 024c3053dbba..3d8adeba2651 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -34,6 +34,8 @@ #ifdef CONFIG_DYNAMIC_FTRACE +static int ftrace_poke_late = 0; + int ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) { @@ -43,16 +45,20 @@ int ftrace_arch_code_modify_prepare(void) * ftrace has it set to "read/write". */ mutex_lock(&text_mutex); - set_kernel_text_rw(); - set_all_modules_text_rw(); + ftrace_poke_late = 1; return 0; } int ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) { - set_all_modules_text_ro(); - set_kernel_text_ro(); + /* + * ftrace_make_{call,nop}() may be called during + * module load, and we need to finish the text_poke_queue() + * that they do, here. + */ + text_poke_finish(); + ftrace_poke_late = 0; mutex_unlock(&text_mutex); return 0; } @@ -60,67 +66,34 @@ int ftrace_arch_code_modify_post_process(void) union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; struct { - unsigned char op; + char op; int offset; } __attribute__((packed)); }; -static int ftrace_calc_offset(long ip, long addr) -{ - return (int)(addr - ip); -} - -static unsigned char * -ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr) +static const char *ftrace_text_replace(char op, unsigned long ip, unsigned long addr) { static union ftrace_code_union calc; - calc.op = op; - calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); + calc.op = op; + calc.offset = (int)(addr - (ip + MCOUNT_INSN_SIZE)); return calc.code; } -static unsigned char * -ftrace_call_replace(unsigned long ip, unsigned long addr) -{ - return ftrace_text_replace(0xe8, ip, addr); -} - -static inline int -within(unsigned long addr, unsigned long start, unsigned long end) +static const char *ftrace_nop_replace(void) { - return addr >= start && addr < end; -} - -static unsigned long text_ip_addr(unsigned long ip) -{ - /* - * On x86_64, kernel text mappings are mapped read-only, so we use - * the kernel identity mapping instead of the kernel text mapping - * to modify the kernel text. - * - * For 32bit kernels, these mappings are same and we can use - * kernel identity mapping to modify code. - */ - if (within(ip, (unsigned long)_text, (unsigned long)_etext)) - ip = (unsigned long)__va(__pa_symbol(ip)); - - return ip; + return ideal_nops[NOP_ATOMIC5]; } -static const unsigned char *ftrace_nop_replace(void) +static const char *ftrace_call_replace(unsigned long ip, unsigned long addr) { - return ideal_nops[NOP_ATOMIC5]; + return ftrace_text_replace(CALL_INSN_OPCODE, ip, addr); } -static int -ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, - unsigned const char *new_code) +static int ftrace_verify_code(unsigned long ip, const char *old_code) { - unsigned char replaced[MCOUNT_INSN_SIZE]; - - ftrace_expected = old_code; + char cur_code[MCOUNT_INSN_SIZE]; /* * Note: @@ -129,31 +102,41 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, * Carefully read and modify the code with probe_kernel_*(), and make * sure what we read is what we expected it to be before modifying it. */ - /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { + WARN_ON(1); return -EFAULT; + } /* Make sure it is what we expect it to be */ - if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) + if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) { + WARN_ON(1); return -EINVAL; + } - ip = text_ip_addr(ip); - - /* replace the text with the new text */ - if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) - return -EPERM; + return 0; +} - sync_core(); +static int +ftrace_modify_code_direct(unsigned long ip, const char *old_code, + const char *new_code) +{ + int ret = ftrace_verify_code(ip, old_code); + if (ret) + return ret; + /* replace the text with the new text */ + if (ftrace_poke_late) + text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL); + else + text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE); return 0; } -int ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned const char *new, *old; unsigned long ip = rec->ip; + const char *new, *old; old = ftrace_call_replace(ip, addr); new = ftrace_nop_replace(); @@ -167,19 +150,20 @@ int ftrace_make_nop(struct module *mod, * just modify the code directly. */ if (addr == MCOUNT_ADDR) - return ftrace_modify_code_direct(rec->ip, old, new); + return ftrace_modify_code_direct(ip, old, new); - ftrace_expected = NULL; - - /* Normal cases use add_brk_on_nop */ + /* + * x86 overrides ftrace_replace_code -- this function will never be used + * in this case. + */ WARN_ONCE(1, "invalid use of ftrace_make_nop"); return -EINVAL; } int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned const char *new, *old; unsigned long ip = rec->ip; + const char *new, *old; old = ftrace_nop_replace(); new = ftrace_call_replace(ip, addr); @@ -188,43 +172,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return ftrace_modify_code_direct(rec->ip, old, new); } -/* - * The modifying_ftrace_code is used to tell the breakpoint - * handler to call ftrace_int3_handler(). If it fails to - * call this handler for a breakpoint added by ftrace, then - * the kernel may crash. - * - * As atomic_writes on x86 do not need a barrier, we do not - * need to add smp_mb()s for this to work. It is also considered - * that we can not read the modifying_ftrace_code before - * executing the breakpoint. That would be quite remarkable if - * it could do that. Here's the flow that is required: - * - * CPU-0 CPU-1 - * - * atomic_inc(mfc); - * write int3s - * // implicit (r)mb - * if (atomic_read(mfc)) - * call ftrace_int3_handler() - * - * Then when we are finished: - * - * atomic_dec(mfc); - * - * If we hit a breakpoint that was not set by ftrace, it does not - * matter if ftrace_int3_handler() is called or not. It will - * simply be ignored. But it is crucial that a ftrace nop/caller - * breakpoint is handled. No other user should ever place a - * breakpoint on an ftrace nop/caller location. It must only - * be done by this code. - */ -atomic_t modifying_ftrace_code __read_mostly; - -static int -ftrace_modify_code(unsigned long ip, unsigned const char *old_code, - unsigned const char *new_code); - /* * Should never be called: * As it is only called by __ftrace_replace_code() which is called by @@ -237,452 +184,84 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { WARN_ON(1); - ftrace_expected = NULL; return -EINVAL; } -static unsigned long ftrace_update_func; -static unsigned long ftrace_update_func_call; - -static int update_ftrace_func(unsigned long ip, void *new) -{ - unsigned char old[MCOUNT_INSN_SIZE]; - int ret; - - memcpy(old, (void *)ip, MCOUNT_INSN_SIZE); - - ftrace_update_func = ip; - /* Make sure the breakpoints see the ftrace_update_func update */ - smp_wmb(); - - /* See comment above by declaration of modifying_ftrace_code */ - atomic_inc(&modifying_ftrace_code); - - ret = ftrace_modify_code(ip, old, new); - - atomic_dec(&modifying_ftrace_code); - - return ret; -} - int ftrace_update_ftrace_func(ftrace_func_t func) -{ - unsigned long ip = (unsigned long)(&ftrace_call); - unsigned char *new; - int ret; - - ftrace_update_func_call = (unsigned long)func; - - new = ftrace_call_replace(ip, (unsigned long)func); - ret = update_ftrace_func(ip, new); - - /* Also update the regs callback function */ - if (!ret) { - ip = (unsigned long)(&ftrace_regs_call); - new = ftrace_call_replace(ip, (unsigned long)func); - ret = update_ftrace_func(ip, new); - } - - return ret; -} - -static nokprobe_inline int is_ftrace_caller(unsigned long ip) -{ - if (ip == ftrace_update_func) - return 1; - - return 0; -} - -/* - * A breakpoint was added to the code address we are about to - * modify, and this is the handle that will just skip over it. - * We are either changing a nop into a trace call, or a trace - * call to a nop. While the change is taking place, we treat - * it just like it was a nop. - */ -int ftrace_int3_handler(struct pt_regs *regs) { unsigned long ip; + const char *new; - if (WARN_ON_ONCE(!regs)) - return 0; - - ip = regs->ip - INT3_INSN_SIZE; - - if (ftrace_location(ip)) { - int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); - return 1; - } else if (is_ftrace_caller(ip)) { - if (!ftrace_update_func_call) { - int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); - return 1; - } - int3_emulate_call(regs, ftrace_update_func_call); - return 1; - } - - return 0; -} -NOKPROBE_SYMBOL(ftrace_int3_handler); - -static int ftrace_write(unsigned long ip, const char *val, int size) -{ - ip = text_ip_addr(ip); - - if (probe_kernel_write((void *)ip, val, size)) - return -EPERM; - - return 0; -} - -static int add_break(unsigned long ip, const char *old) -{ - unsigned char replaced[MCOUNT_INSN_SIZE]; - unsigned char brk = BREAKPOINT_INSTRUCTION; - - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - ftrace_expected = old; - - /* Make sure it is what we expect it to be */ - if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) - return -EINVAL; - - return ftrace_write(ip, &brk, 1); -} - -static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned const char *old; - unsigned long ip = rec->ip; - - old = ftrace_call_replace(ip, addr); - - return add_break(rec->ip, old); -} - - -static int add_brk_on_nop(struct dyn_ftrace *rec) -{ - unsigned const char *old; - - old = ftrace_nop_replace(); - - return add_break(rec->ip, old); -} - -static int add_breakpoints(struct dyn_ftrace *rec, bool enable) -{ - unsigned long ftrace_addr; - int ret; - - ftrace_addr = ftrace_get_addr_curr(rec); - - ret = ftrace_test_record(rec, enable); - - switch (ret) { - case FTRACE_UPDATE_IGNORE: - return 0; - - case FTRACE_UPDATE_MAKE_CALL: - /* converting nop to call */ - return add_brk_on_nop(rec); - - case FTRACE_UPDATE_MODIFY_CALL: - case FTRACE_UPDATE_MAKE_NOP: - /* converting a call to a nop */ - return add_brk_on_call(rec, ftrace_addr); - } - return 0; -} - -/* - * On error, we need to remove breakpoints. This needs to - * be done caefully. If the address does not currently have a - * breakpoint, we know we are done. Otherwise, we look at the - * remaining 4 bytes of the instruction. If it matches a nop - * we replace the breakpoint with the nop. Otherwise we replace - * it with the call instruction. - */ -static int remove_breakpoint(struct dyn_ftrace *rec) -{ - unsigned char ins[MCOUNT_INSN_SIZE]; - unsigned char brk = BREAKPOINT_INSTRUCTION; - const unsigned char *nop; - unsigned long ftrace_addr; - unsigned long ip = rec->ip; - - /* If we fail the read, just give up */ - if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - /* If this does not have a breakpoint, we are done */ - if (ins[0] != brk) - return 0; - - nop = ftrace_nop_replace(); - - /* - * If the last 4 bytes of the instruction do not match - * a nop, then we assume that this is a call to ftrace_addr. - */ - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) { - /* - * For extra paranoidism, we check if the breakpoint is on - * a call that would actually jump to the ftrace_addr. - * If not, don't touch the breakpoint, we make just create - * a disaster. - */ - ftrace_addr = ftrace_get_addr_new(rec); - nop = ftrace_call_replace(ip, ftrace_addr); - - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) - goto update; - - /* Check both ftrace_addr and ftrace_old_addr */ - ftrace_addr = ftrace_get_addr_curr(rec); - nop = ftrace_call_replace(ip, ftrace_addr); - - ftrace_expected = nop; - - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) - return -EINVAL; - } - - update: - return ftrace_write(ip, nop, 1); -} - -static int add_update_code(unsigned long ip, unsigned const char *new) -{ - /* skip breakpoint */ - ip++; - new++; - return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1); -} - -static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_call_replace(ip, addr); - return add_update_code(ip, new); -} - -static int add_update_nop(struct dyn_ftrace *rec) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_nop_replace(); - return add_update_code(ip, new); -} - -static int add_update(struct dyn_ftrace *rec, bool enable) -{ - unsigned long ftrace_addr; - int ret; - - ret = ftrace_test_record(rec, enable); - - ftrace_addr = ftrace_get_addr_new(rec); - - switch (ret) { - case FTRACE_UPDATE_IGNORE: - return 0; - - case FTRACE_UPDATE_MODIFY_CALL: - case FTRACE_UPDATE_MAKE_CALL: - /* converting nop to call */ - return add_update_call(rec, ftrace_addr); - - case FTRACE_UPDATE_MAKE_NOP: - /* converting a call to a nop */ - return add_update_nop(rec); - } - - return 0; -} - -static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_call_replace(ip, addr); - - return ftrace_write(ip, new, 1); -} - -static int finish_update_nop(struct dyn_ftrace *rec) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_nop_replace(); - - return ftrace_write(ip, new, 1); -} - -static int finish_update(struct dyn_ftrace *rec, bool enable) -{ - unsigned long ftrace_addr; - int ret; - - ret = ftrace_update_record(rec, enable); - - ftrace_addr = ftrace_get_addr_new(rec); - - switch (ret) { - case FTRACE_UPDATE_IGNORE: - return 0; - - case FTRACE_UPDATE_MODIFY_CALL: - case FTRACE_UPDATE_MAKE_CALL: - /* converting nop to call */ - return finish_update_call(rec, ftrace_addr); + ip = (unsigned long)(&ftrace_call); + new = ftrace_call_replace(ip, (unsigned long)func); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); - case FTRACE_UPDATE_MAKE_NOP: - /* converting a call to a nop */ - return finish_update_nop(rec); - } + ip = (unsigned long)(&ftrace_regs_call); + new = ftrace_call_replace(ip, (unsigned long)func); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); return 0; } -static void do_sync_core(void *data) -{ - sync_core(); -} - -static void run_sync(void) -{ - int enable_irqs; - - /* No need to sync if there's only one CPU */ - if (num_online_cpus() == 1) - return; - - enable_irqs = irqs_disabled(); - - /* We may be called with interrupts disabled (on bootup). */ - if (enable_irqs) - local_irq_enable(); - on_each_cpu(do_sync_core, NULL, 1); - if (enable_irqs) - local_irq_disable(); -} - void ftrace_replace_code(int enable) { struct ftrace_rec_iter *iter; struct dyn_ftrace *rec; - const char *report = "adding breakpoints"; - int count = 0; + const char *new, *old; int ret; for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); - ret = add_breakpoints(rec, enable); - if (ret) - goto remove_breakpoints; - count++; - } - - run_sync(); + switch (ftrace_test_record(rec, enable)) { + case FTRACE_UPDATE_IGNORE: + default: + continue; - report = "updating code"; - count = 0; + case FTRACE_UPDATE_MAKE_CALL: + old = ftrace_nop_replace(); + break; - for_ftrace_rec_iter(iter) { - rec = ftrace_rec_iter_record(iter); + case FTRACE_UPDATE_MODIFY_CALL: + case FTRACE_UPDATE_MAKE_NOP: + old = ftrace_call_replace(rec->ip, ftrace_get_addr_curr(rec)); + break; + } - ret = add_update(rec, enable); - if (ret) - goto remove_breakpoints; - count++; + ret = ftrace_verify_code(rec->ip, old); + if (ret) { + ftrace_bug(ret, rec); + return; + } } - run_sync(); - - report = "removing breakpoints"; - count = 0; - for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); - ret = finish_update(rec, enable); - if (ret) - goto remove_breakpoints; - count++; - } + switch (ftrace_test_record(rec, enable)) { + case FTRACE_UPDATE_IGNORE: + default: + continue; - run_sync(); + case FTRACE_UPDATE_MAKE_CALL: + case FTRACE_UPDATE_MODIFY_CALL: + new = ftrace_call_replace(rec->ip, ftrace_get_addr_new(rec)); + break; - return; + case FTRACE_UPDATE_MAKE_NOP: + new = ftrace_nop_replace(); + break; + } - remove_breakpoints: - pr_warn("Failed on %s (%d):\n", report, count); - ftrace_bug(ret, rec); - for_ftrace_rec_iter(iter) { - rec = ftrace_rec_iter_record(iter); - /* - * Breakpoints are handled only when this function is in - * progress. The system could not work with them. - */ - if (remove_breakpoint(rec)) - BUG(); + text_poke_queue((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL); + ftrace_update_record(rec, enable); } - run_sync(); -} - -static int -ftrace_modify_code(unsigned long ip, unsigned const char *old_code, - unsigned const char *new_code) -{ - int ret; - - ret = add_break(ip, old_code); - if (ret) - goto out; - - run_sync(); - - ret = add_update_code(ip, new_code); - if (ret) - goto fail_update; - - run_sync(); - - ret = ftrace_write(ip, new_code, 1); - /* - * The breakpoint is handled only when this function is in progress. - * The system could not work if we could not remove it. - */ - BUG_ON(ret); - out: - run_sync(); - return ret; - - fail_update: - /* Also here the system could not work with the breakpoint */ - if (ftrace_write(ip, old_code, 1)) - BUG(); - goto out; + text_poke_finish(); } void arch_ftrace_update_code(int command) { - /* See comment above by declaration of modifying_ftrace_code */ - atomic_inc(&modifying_ftrace_code); - ftrace_modify_all_code(command); - - atomic_dec(&modifying_ftrace_code); } int __init ftrace_dyn_arch_init(void) @@ -747,6 +326,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned long start_offset; unsigned long end_offset; unsigned long op_offset; + unsigned long call_offset; unsigned long offset; unsigned long npages; unsigned long size; @@ -763,10 +343,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) start_offset = (unsigned long)ftrace_regs_caller; end_offset = (unsigned long)ftrace_regs_caller_end; op_offset = (unsigned long)ftrace_regs_caller_op_ptr; + call_offset = (unsigned long)ftrace_regs_call; } else { start_offset = (unsigned long)ftrace_caller; end_offset = (unsigned long)ftrace_epilogue; op_offset = (unsigned long)ftrace_caller_op_ptr; + call_offset = (unsigned long)ftrace_call; } size = end_offset - start_offset; @@ -823,16 +405,21 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* put in the new offset to the ftrace_ops */ memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); + /* put in the call to the function */ + mutex_lock(&text_mutex); + call_offset -= start_offset; + memcpy(trampoline + call_offset, + text_gen_insn(CALL_INSN_OPCODE, + trampoline + call_offset, + ftrace_ops_get_func(ops)), CALL_INSN_SIZE); + mutex_unlock(&text_mutex); + /* ALLOC_TRAMP flags lets us know we created it */ ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; set_vm_flush_reset_perms(trampoline); - /* - * Module allocation needs to be completed by making the page - * executable. The page is still writable, which is a security hazard, - * but anyhow ftrace breaks W^X completely. - */ + set_memory_ro((unsigned long)trampoline, npages); set_memory_x((unsigned long)trampoline, npages); return (unsigned long)trampoline; fail: @@ -859,43 +446,35 @@ static unsigned long calc_trampoline_call_offset(bool save_regs) void arch_ftrace_update_trampoline(struct ftrace_ops *ops) { ftrace_func_t func; - unsigned char *new; unsigned long offset; unsigned long ip; unsigned int size; - int ret, npages; + const char *new; - if (ops->trampoline) { - /* - * The ftrace_ops caller may set up its own trampoline. - * In such a case, this code must not modify it. - */ - if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) - return; - npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT; - set_memory_rw(ops->trampoline, npages); - } else { + if (!ops->trampoline) { ops->trampoline = create_trampoline(ops, &size); if (!ops->trampoline) return; ops->trampoline_size = size; - npages = PAGE_ALIGN(size) >> PAGE_SHIFT; + return; } + /* + * The ftrace_ops caller may set up its own trampoline. + * In such a case, this code must not modify it. + */ + if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) + return; + offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); ip = ops->trampoline + offset; - func = ftrace_ops_get_func(ops); - ftrace_update_func_call = (unsigned long)func; - + mutex_lock(&text_mutex); /* Do a safe modify in case the trampoline is executing */ new = ftrace_call_replace(ip, (unsigned long)func); - ret = update_ftrace_func(ip, new); - set_memory_ro(ops->trampoline, npages); - - /* The update should never fail */ - WARN_ON(ret); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); + mutex_unlock(&text_mutex); } /* Return the address of the function the trampoline calls */ @@ -981,19 +560,18 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE extern void ftrace_graph_call(void); -static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) +static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) { - return ftrace_text_replace(0xe9, ip, addr); + return ftrace_text_replace(JMP32_INSN_OPCODE, ip, addr); } static int ftrace_mod_jmp(unsigned long ip, void *func) { - unsigned char *new; + const char *new; - ftrace_update_func_call = 0UL; new = ftrace_jmp_replace(ip, (unsigned long)func); - - return update_ftrace_func(ip, new); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); + return 0; } int ftrace_enable_ftrace_graph_caller(void) @@ -1019,10 +597,9 @@ int ftrace_disable_ftrace_graph_caller(void) void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, unsigned long frame_pointer) { + unsigned long return_hooker = (unsigned long)&return_to_handler; unsigned long old; int faulted; - unsigned long return_hooker = (unsigned long) - &return_to_handler; /* * When resuming from suspend-to-ram, this function can be indirectly -- cgit v1.2.3-59-g8ed1b From 67c1d4a28064f9ec63df03f7798e4a334176a9cd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 9 Oct 2019 12:44:14 +0200 Subject: x86/ftrace: Use text_gen_insn() Replace the ftrace_code_union with the generic text_gen_insn() helper, which does exactly this. Tested-by: Alexei Starovoitov Tested-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexei Starovoitov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191111132457.932808000@infradead.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/text-patching.h | 30 +++++++++++++++++++++++++++++- arch/x86/kernel/alternative.c | 26 -------------------------- arch/x86/kernel/ftrace.c | 32 +++++++------------------------- 3 files changed, 36 insertions(+), 52 deletions(-) (limited to 'arch/x86/kernel/ftrace.c') diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 93e4266cd5a3..ad8f9f433a1b 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -80,7 +80,35 @@ static inline int text_opcode_size(u8 opcode) return size; } -extern void *text_gen_insn(u8 opcode, const void *addr, const void *dest); +union text_poke_insn { + u8 text[POKE_MAX_OPCODE_SIZE]; + struct { + u8 opcode; + s32 disp; + } __attribute__((packed)); +}; + +static __always_inline +void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +{ + static union text_poke_insn insn; /* per instance */ + int size = text_opcode_size(opcode); + + insn.opcode = opcode; + + if (size > 1) { + insn.disp = (long)dest - (long)(addr + size); + if (size == 2) { + /* + * Ensure that for JMP9 the displacement + * actually fits the signed byte. + */ + BUG_ON((insn.disp >> 31) != (insn.disp >> 7)); + } + } + + return &insn.text; +} extern int after_bootmem; extern __ro_after_init struct mm_struct *poking_mm; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f8f34f94d13d..cfcfadf5cc80 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1247,29 +1247,3 @@ void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void * text_poke_loc_init(&tp, addr, opcode, len, emulate); text_poke_bp_batch(&tp, 1); } - -union text_poke_insn { - u8 text[POKE_MAX_OPCODE_SIZE]; - struct { - u8 opcode; - s32 disp; - } __attribute__((packed)); -}; - -void *text_gen_insn(u8 opcode, const void *addr, const void *dest) -{ - static union text_poke_insn insn; /* text_mutex */ - int size = text_opcode_size(opcode); - - lockdep_assert_held(&text_mutex); - - insn.opcode = opcode; - - if (size > 1) { - insn.disp = (long)dest - (long)(addr + size); - if (size == 2) - BUG_ON((insn.disp >> 31) != (insn.disp >> 7)); - } - - return &insn.text; -} diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3d8adeba2651..2a179fb35cd1 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -63,24 +63,6 @@ int ftrace_arch_code_modify_post_process(void) return 0; } -union ftrace_code_union { - char code[MCOUNT_INSN_SIZE]; - struct { - char op; - int offset; - } __attribute__((packed)); -}; - -static const char *ftrace_text_replace(char op, unsigned long ip, unsigned long addr) -{ - static union ftrace_code_union calc; - - calc.op = op; - calc.offset = (int)(addr - (ip + MCOUNT_INSN_SIZE)); - - return calc.code; -} - static const char *ftrace_nop_replace(void) { return ideal_nops[NOP_ATOMIC5]; @@ -88,7 +70,7 @@ static const char *ftrace_nop_replace(void) static const char *ftrace_call_replace(unsigned long ip, unsigned long addr) { - return ftrace_text_replace(CALL_INSN_OPCODE, ip, addr); + return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr); } static int ftrace_verify_code(unsigned long ip, const char *old_code) @@ -480,20 +462,20 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) /* Return the address of the function the trampoline calls */ static void *addr_from_call(void *ptr) { - union ftrace_code_union calc; + union text_poke_insn call; int ret; - ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE); + ret = probe_kernel_read(&call, ptr, CALL_INSN_SIZE); if (WARN_ON_ONCE(ret < 0)) return NULL; /* Make sure this is a call */ - if (WARN_ON_ONCE(calc.op != 0xe8)) { - pr_warn("Expected e8, got %x\n", calc.op); + if (WARN_ON_ONCE(call.opcode != CALL_INSN_OPCODE)) { + pr_warn("Expected E8, got %x\n", call.opcode); return NULL; } - return ptr + MCOUNT_INSN_SIZE + calc.offset; + return ptr + CALL_INSN_SIZE + call.disp; } void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, @@ -562,7 +544,7 @@ extern void ftrace_graph_call(void); static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) { - return ftrace_text_replace(JMP32_INSN_OPCODE, ip, addr); + return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr); } static int ftrace_mod_jmp(unsigned long ip, void *func) -- cgit v1.2.3-59-g8ed1b From 38ebd8d119245eecb99fe00b0f57e269baf22767 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 18 Nov 2019 18:20:12 +0100 Subject: x86/ftrace: Mark ftrace_modify_code_direct() __ref ... because it calls the .init.text function text_poke_early(). That is ok because it does call that function early, during boot. Fixes: 9706f7c3531f ("x86/ftrace: Use text_poke()") Suggested-by: Peter Zijlstra Signed-off-by: Borislav Petkov Acked-by: Steven Rostedt (VMware) Cc: Alexei Starovoitov Cc: Andy Lutomirski Cc: Daniel Bristot de Oliveira Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191116204607.GC23231@zn.tnic --- arch/x86/kernel/ftrace.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/ftrace.c') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 2a179fb35cd1..108ee96f8b66 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -99,7 +99,12 @@ static int ftrace_verify_code(unsigned long ip, const char *old_code) return 0; } -static int +/* + * Marked __ref because it calls text_poke_early() which is .init.text. That is + * ok because that call will happen early, during boot, when .init sections are + * still present. + */ +static int __ref ftrace_modify_code_direct(unsigned long ip, const char *old_code, const char *new_code) { -- cgit v1.2.3-59-g8ed1b