From 00619f7c650e4e46c650cb2e2fd5f438b32dc64b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 21 Sep 2021 21:54:32 +0200 Subject: sched,livepatch: Use task_call_func() Instead of frobbing around with scheduler internals, use the shiny new task_call_func() interface. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Petr Mladek Acked-by: Miroslav Benes Acked-by: Vasily Gorbik Tested-by: Petr Mladek Tested-by: Vasily Gorbik # on s390 Link: https://lkml.kernel.org/r/20210929152428.709906138@infradead.org --- kernel/livepatch/transition.c | 90 +++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 46 deletions(-) (limited to 'kernel/livepatch') diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index 291b857a6e20..75251e9dbc3c 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -13,7 +13,6 @@ #include "core.h" #include "patch.h" #include "transition.h" -#include "../sched/sched.h" #define MAX_STACK_ENTRIES 100 #define STACK_ERR_BUF_SIZE 128 @@ -240,7 +239,7 @@ static int klp_check_stack_func(struct klp_func *func, unsigned long *entries, * Determine whether it's safe to transition the task to the target patch state * by looking for any to-be-patched or to-be-unpatched functions on its stack. */ -static int klp_check_stack(struct task_struct *task, char *err_buf) +static int klp_check_stack(struct task_struct *task, const char **oldname) { static unsigned long entries[MAX_STACK_ENTRIES]; struct klp_object *obj; @@ -248,12 +247,8 @@ static int klp_check_stack(struct task_struct *task, char *err_buf) int ret, nr_entries; ret = stack_trace_save_tsk_reliable(task, entries, ARRAY_SIZE(entries)); - if (ret < 0) { - snprintf(err_buf, STACK_ERR_BUF_SIZE, - "%s: %s:%d has an unreliable stack\n", - __func__, task->comm, task->pid); - return ret; - } + if (ret < 0) + return -EINVAL; nr_entries = ret; klp_for_each_object(klp_transition_patch, obj) { @@ -262,11 +257,8 @@ static int klp_check_stack(struct task_struct *task, char *err_buf) klp_for_each_func(obj, func) { ret = klp_check_stack_func(func, entries, nr_entries); if (ret) { - snprintf(err_buf, STACK_ERR_BUF_SIZE, - "%s: %s:%d is sleeping on function %s\n", - __func__, task->comm, task->pid, - func->old_name); - return ret; + *oldname = func->old_name; + return -EADDRINUSE; } } } @@ -274,6 +266,22 @@ static int klp_check_stack(struct task_struct *task, char *err_buf) return 0; } +static int klp_check_and_switch_task(struct task_struct *task, void *arg) +{ + int ret; + + if (task_curr(task) && task != current) + return -EBUSY; + + ret = klp_check_stack(task, arg); + if (ret) + return ret; + + clear_tsk_thread_flag(task, TIF_PATCH_PENDING); + task->patch_state = klp_target_state; + return 0; +} + /* * Try to safely switch a task to the target patch state. If it's currently * running, or it's sleeping on a to-be-patched or to-be-unpatched function, or @@ -281,13 +289,8 @@ static int klp_check_stack(struct task_struct *task, char *err_buf) */ static bool klp_try_switch_task(struct task_struct *task) { - static char err_buf[STACK_ERR_BUF_SIZE]; - struct rq *rq; - struct rq_flags flags; + const char *old_name; int ret; - bool success = false; - - err_buf[0] = '\0'; /* check if this task has already switched over */ if (task->patch_state == klp_target_state) @@ -305,36 +308,31 @@ static bool klp_try_switch_task(struct task_struct *task) * functions. If all goes well, switch the task to the target patch * state. */ - rq = task_rq_lock(task, &flags); + ret = task_call_func(task, klp_check_and_switch_task, &old_name); + switch (ret) { + case 0: /* success */ + break; - if (task_running(rq, task) && task != current) { - snprintf(err_buf, STACK_ERR_BUF_SIZE, - "%s: %s:%d is running\n", __func__, task->comm, - task->pid); - goto done; + case -EBUSY: /* klp_check_and_switch_task() */ + pr_debug("%s: %s:%d is running\n", + __func__, task->comm, task->pid); + break; + case -EINVAL: /* klp_check_and_switch_task() */ + pr_debug("%s: %s:%d has an unreliable stack\n", + __func__, task->comm, task->pid); + break; + case -EADDRINUSE: /* klp_check_and_switch_task() */ + pr_debug("%s: %s:%d is sleeping on function %s\n", + __func__, task->comm, task->pid, old_name); + break; + + default: + pr_debug("%s: Unknown error code (%d) when trying to switch %s:%d\n", + __func__, ret, task->comm, task->pid); + break; } - ret = klp_check_stack(task, err_buf); - if (ret) - goto done; - - success = true; - - clear_tsk_thread_flag(task, TIF_PATCH_PENDING); - task->patch_state = klp_target_state; - -done: - task_rq_unlock(rq, task, &flags); - - /* - * Due to console deadlock issues, pr_debug() can't be used while - * holding the task rq lock. Instead we have to use a temporary buffer - * and print the debug message after releasing the lock. - */ - if (err_buf[0] != '\0') - pr_debug("%s", err_buf); - - return success; + return !ret; } /* -- cgit v1.3-8-gc7d7 From 5de62ea84abd732ded7c5569426fd71c0420f83e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 21 Sep 2021 22:16:02 +0200 Subject: sched,livepatch: Use wake_up_if_idle() Make sure to prod idle CPUs so they call klp_update_patch_state(). Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Petr Mladek Acked-by: Miroslav Benes Acked-by: Vasily Gorbik Tested-by: Petr Mladek Tested-by: Vasily Gorbik # on s390 Link: https://lkml.kernel.org/r/20210929151723.162004989@infradead.org --- include/linux/sched/idle.h | 4 ++++ kernel/livepatch/transition.c | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel/livepatch') diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h index 22873d276be6..d73d314d59c6 100644 --- a/include/linux/sched/idle.h +++ b/include/linux/sched/idle.h @@ -11,7 +11,11 @@ enum cpu_idle_type { CPU_MAX_IDLE_TYPES }; +#ifdef CONFIG_SMP extern void wake_up_if_idle(int cpu); +#else +static inline void wake_up_if_idle(int cpu) { } +#endif /* * Idle thread specific functions to determine the need_resched diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index 75251e9dbc3c..5683ac0d2566 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -413,8 +413,11 @@ void klp_try_complete_transition(void) for_each_possible_cpu(cpu) { task = idle_task(cpu); if (cpu_online(cpu)) { - if (!klp_try_switch_task(task)) + if (!klp_try_switch_task(task)) { complete = false; + /* Make idle task go through the main loop. */ + wake_up_if_idle(cpu); + } } else if (task->patch_state != klp_target_state) { /* offline idle tasks can be switched immediately */ clear_tsk_thread_flag(task, TIF_PATCH_PENDING); -- cgit v1.3-8-gc7d7 From ce5e48036c9e76a2a5bd4d9079eac273087a533a Mon Sep 17 00:00:00 2001 From: 王贇 Date: Wed, 27 Oct 2021 11:14:44 +0800 Subject: ftrace: disable preemption when recursion locked As the documentation explained, ftrace_test_recursion_trylock() and ftrace_test_recursion_unlock() were supposed to disable and enable preemption properly, however currently this work is done outside of the function, which could be missing by mistake. And since the internal using of trace_test_and_set_recursion() and trace_clear_recursion() also require preemption disabled, we can just merge the logical. This patch will make sure the preemption has been disabled when trace_test_and_set_recursion() return bit >= 0, and trace_clear_recursion() will enable the preemption if previously enabled. Link: https://lkml.kernel.org/r/13bde807-779c-aa4c-0672-20515ae365ea@linux.alibaba.com CC: Petr Mladek Cc: Guo Ren Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Joe Lawrence Cc: Masami Hiramatsu Cc: Nicholas Piggin Cc: Jisheng Zhang CC: Steven Rostedt CC: Miroslav Benes Reported-by: Abaci Suggested-by: Peter Zijlstra Signed-off-by: Michael Wang [ Removed extra line in comment - SDR ] Signed-off-by: Steven Rostedt (VMware) --- arch/csky/kernel/probes/ftrace.c | 2 -- arch/parisc/kernel/ftrace.c | 2 -- arch/powerpc/kernel/kprobes-ftrace.c | 2 -- arch/riscv/kernel/probes/ftrace.c | 2 -- arch/x86/kernel/kprobes/ftrace.c | 2 -- include/linux/trace_recursion.h | 11 ++++++++++- kernel/livepatch/patch.c | 12 ++++++------ kernel/trace/ftrace.c | 15 +++++---------- kernel/trace/trace_functions.c | 5 ----- 9 files changed, 21 insertions(+), 32 deletions(-) (limited to 'kernel/livepatch') diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c index b388228abbf2..834cffcfbce3 100644 --- a/arch/csky/kernel/probes/ftrace.c +++ b/arch/csky/kernel/probes/ftrace.c @@ -17,7 +17,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (!p) { p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE)); @@ -57,7 +56,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 01581f715737..b14011d3c2f1 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -211,7 +211,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -240,7 +239,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, } __this_cpu_write(current_kprobe, NULL); out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 7154d58338cc..072ebe7f290b 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -26,7 +26,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)nip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -61,7 +60,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c index aab85a82f419..7142ec42e889 100644 --- a/arch/riscv/kernel/probes/ftrace.c +++ b/arch/riscv/kernel/probes/ftrace.c @@ -15,7 +15,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -52,7 +51,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 596de2f6d3a5..dd2ec14adb77 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -25,7 +25,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -59,7 +58,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index 24f284eb55a7..a13f23b04d73 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -155,6 +155,9 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); # define do_ftrace_record_recursion(ip, pip) do { } while (0) #endif +/* + * Preemption is promised to be disabled when return bit >= 0. + */ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip, int start, int max) { @@ -189,14 +192,20 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign current->trace_recursion = val; barrier(); + preempt_disable_notrace(); + return bit + 1; } +/* + * Preemption will be enabled (if it was previously enabled). + */ static __always_inline void trace_clear_recursion(int bit) { if (!bit) return; + preempt_enable_notrace(); barrier(); bit--; trace_recursion_clear(bit); @@ -209,7 +218,7 @@ static __always_inline void trace_clear_recursion(int bit) * tracing recursed in the same context (normal vs interrupt), * * Returns: -1 if a recursion happened. - * >= 0 if no recursion + * >= 0 if no recursion. */ static __always_inline int ftrace_test_recursion_trylock(unsigned long ip, unsigned long parent_ip) diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index e8029aea67f1..fe316c021d73 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -49,14 +49,15 @@ static void notrace klp_ftrace_handler(unsigned long ip, ops = container_of(fops, struct klp_ops, fops); + /* + * The ftrace_test_recursion_trylock() will disable preemption, + * which is required for the variant of synchronize_rcu() that is + * used to allow patching functions where RCU is not watching. + * See klp_synchronize_transition() for more details. + */ bit = ftrace_test_recursion_trylock(ip, parent_ip); if (WARN_ON_ONCE(bit < 0)) return; - /* - * A variant of synchronize_rcu() is used to allow patching functions - * where RCU is not watching, see klp_synchronize_transition(). - */ - preempt_disable_notrace(); func = list_first_or_null_rcu(&ops->func_stack, struct klp_func, stack_node); @@ -120,7 +121,6 @@ static void notrace klp_ftrace_handler(unsigned long ip, klp_arch_set_pc(fregs, (unsigned long)func->new_func); unlock: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2057ad363772..b4ed1a301232 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7198,16 +7198,15 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op; int bit; + /* + * The ftrace_test_and_set_recursion() will disable preemption, + * which is required since some of the ops may be dynamically + * allocated, they must be freed after a synchronize_rcu(). + */ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX); if (bit < 0) return; - /* - * Some of the ops may be dynamically allocated, - * they must be freed after a synchronize_rcu(). - */ - preempt_disable_notrace(); - do_for_each_ftrace_op(op, ftrace_ops_list) { /* Stub functions don't need to be called nor tested */ if (op->flags & FTRACE_OPS_FL_STUB) @@ -7231,7 +7230,6 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, } } while_for_each_ftrace_op(op); out: - preempt_enable_notrace(); trace_clear_recursion(bit); } @@ -7279,12 +7277,9 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); - if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) op->func(ip, parent_ip, op, fregs); - preempt_enable_notrace(); trace_clear_recursion(bit); } NOKPROBE_SYMBOL(ftrace_ops_assist_func); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 1f0e63f5d1f9..9f1bfbe105e8 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -186,7 +186,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, return; trace_ctx = tracing_gen_ctx(); - preempt_disable_notrace(); cpu = smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); @@ -194,7 +193,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, trace_function(tr, ip, parent_ip, trace_ctx); ftrace_test_recursion_unlock(bit); - preempt_enable_notrace(); } #ifdef CONFIG_UNWINDER_ORC @@ -298,8 +296,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); - cpu = smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); if (atomic_read(&data->disabled)) @@ -324,7 +320,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, out: ftrace_test_recursion_unlock(bit); - preempt_enable_notrace(); } static void -- cgit v1.3-8-gc7d7