From 4fdd88877e5259dcc5e504dca449acc0324d71b9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 26 Mar 2020 23:49:36 +0900 Subject: kprobes: Lock kprobe_mutex while showing kprobe_blacklist Lock kprobe_mutex while showing kprobe_blacklist to prevent updating the kprobe_blacklist. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134059.571125195@linutronix.de --- kernel/kprobes.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 2625c241ac00..570d60827656 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2420,6 +2420,7 @@ static const struct file_operations debugfs_kprobes_operations = { /* kprobes/blacklist -- shows which functions can not be probed */ static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos) { + mutex_lock(&kprobe_mutex); return seq_list_start(&kprobe_blacklist, *pos); } @@ -2446,10 +2447,15 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v) return 0; } +static void kprobe_blacklist_seq_stop(struct seq_file *f, void *v) +{ + mutex_unlock(&kprobe_mutex); +} + static const struct seq_operations kprobe_blacklist_seq_ops = { .start = kprobe_blacklist_seq_start, .next = kprobe_blacklist_seq_next, - .stop = kprobe_seq_stop, /* Reuse void function */ + .stop = kprobe_blacklist_seq_stop, .show = kprobe_blacklist_seq_show, }; -- cgit v1.2.3-59-g8ed1b From 1e6769b0aece51ea7a3dc3117c37d4a5669e4a21 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 26 Mar 2020 23:49:48 +0900 Subject: kprobes: Support __kprobes blacklist in modules Support __kprobes attribute for blacklist functions in modules. The __kprobes attribute functions are stored in .kprobes.text section. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134059.678201813@linutronix.de --- include/linux/module.h | 4 ++++ kernel/kprobes.c | 42 ++++++++++++++++++++++++++++++++++++++++++ kernel/module.c | 4 ++++ 3 files changed, 50 insertions(+) diff --git a/include/linux/module.h b/include/linux/module.h index 1ad393e62bef..369c354f9207 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -489,6 +489,10 @@ struct module { unsigned int num_ftrace_callsites; unsigned long *ftrace_callsites; #endif +#ifdef CONFIG_KPROBES + void *kprobes_text_start; + unsigned int kprobes_text_size; +#endif #ifdef CONFIG_LIVEPATCH bool klp; /* Is this a livepatch module? */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 570d60827656..b7549992b9bd 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2179,6 +2179,19 @@ int kprobe_add_area_blacklist(unsigned long start, unsigned long end) return 0; } +/* Remove all symbols in given area from kprobe blacklist */ +static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end) +{ + struct kprobe_blacklist_entry *ent, *n; + + list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) { + if (ent->start_addr < start || ent->start_addr >= end) + continue; + list_del(&ent->list); + kfree(ent); + } +} + int __init __weak arch_populate_kprobe_blacklist(void) { return 0; @@ -2215,6 +2228,28 @@ static int __init populate_kprobe_blacklist(unsigned long *start, return ret ? : arch_populate_kprobe_blacklist(); } +static void add_module_kprobe_blacklist(struct module *mod) +{ + unsigned long start, end; + + start = (unsigned long)mod->kprobes_text_start; + if (start) { + end = start + mod->kprobes_text_size; + kprobe_add_area_blacklist(start, end); + } +} + +static void remove_module_kprobe_blacklist(struct module *mod) +{ + unsigned long start, end; + + start = (unsigned long)mod->kprobes_text_start; + if (start) { + end = start + mod->kprobes_text_size; + kprobe_remove_area_blacklist(start, end); + } +} + /* Module notifier call back, checking kprobes on the module */ static int kprobes_module_callback(struct notifier_block *nb, unsigned long val, void *data) @@ -2225,6 +2260,11 @@ static int kprobes_module_callback(struct notifier_block *nb, unsigned int i; int checkcore = (val == MODULE_STATE_GOING); + if (val == MODULE_STATE_COMING) { + mutex_lock(&kprobe_mutex); + add_module_kprobe_blacklist(mod); + mutex_unlock(&kprobe_mutex); + } if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE) return NOTIFY_DONE; @@ -2255,6 +2295,8 @@ static int kprobes_module_callback(struct notifier_block *nb, kill_kprobe(p); } } + if (val == MODULE_STATE_GOING) + remove_module_kprobe_blacklist(mod); mutex_unlock(&kprobe_mutex); return NOTIFY_DONE; } diff --git a/kernel/module.c b/kernel/module.c index 646f1e2330d2..978f3fa40850 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3193,6 +3193,10 @@ static int find_module_sections(struct module *mod, struct load_info *info) mod->ei_funcs = section_objs(info, "_error_injection_whitelist", sizeof(*mod->ei_funcs), &mod->num_ei_funcs); +#endif +#ifdef CONFIG_KPROBES + mod->kprobes_text_start = section_objs(info, ".kprobes.text", 1, + &mod->kprobes_text_size); #endif mod->extable = section_objs(info, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); -- cgit v1.2.3-59-g8ed1b From 16db6264c93d2d7df9eb8be5d9eb717ab30105fe Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 26 Mar 2020 23:50:00 +0900 Subject: kprobes: Support NOKPROBE_SYMBOL() in modules Support NOKPROBE_SYMBOL() in modules. NOKPROBE_SYMBOL() records only symbol address in "_kprobe_blacklist" section in the module. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134059.771170126@linutronix.de --- include/linux/module.h | 2 ++ kernel/kprobes.c | 17 +++++++++++++++++ kernel/module.c | 3 +++ 3 files changed, 22 insertions(+) diff --git a/include/linux/module.h b/include/linux/module.h index 369c354f9207..1192097c9a69 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -492,6 +492,8 @@ struct module { #ifdef CONFIG_KPROBES void *kprobes_text_start; unsigned int kprobes_text_size; + unsigned long *kprobe_blacklist; + unsigned int num_kprobe_blacklist; #endif #ifdef CONFIG_LIVEPATCH diff --git a/kernel/kprobes.c b/kernel/kprobes.c index b7549992b9bd..9eb5acf0a9f3 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2192,6 +2192,11 @@ static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end) } } +static void kprobe_remove_ksym_blacklist(unsigned long entry) +{ + kprobe_remove_area_blacklist(entry, entry + 1); +} + int __init __weak arch_populate_kprobe_blacklist(void) { return 0; @@ -2231,6 +2236,12 @@ static int __init populate_kprobe_blacklist(unsigned long *start, static void add_module_kprobe_blacklist(struct module *mod) { unsigned long start, end; + int i; + + if (mod->kprobe_blacklist) { + for (i = 0; i < mod->num_kprobe_blacklist; i++) + kprobe_add_ksym_blacklist(mod->kprobe_blacklist[i]); + } start = (unsigned long)mod->kprobes_text_start; if (start) { @@ -2242,6 +2253,12 @@ static void add_module_kprobe_blacklist(struct module *mod) static void remove_module_kprobe_blacklist(struct module *mod) { unsigned long start, end; + int i; + + if (mod->kprobe_blacklist) { + for (i = 0; i < mod->num_kprobe_blacklist; i++) + kprobe_remove_ksym_blacklist(mod->kprobe_blacklist[i]); + } start = (unsigned long)mod->kprobes_text_start; if (start) { diff --git a/kernel/module.c b/kernel/module.c index 978f3fa40850..faf733789560 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3197,6 +3197,9 @@ static int find_module_sections(struct module *mod, struct load_info *info) #ifdef CONFIG_KPROBES mod->kprobes_text_start = section_objs(info, ".kprobes.text", 1, &mod->kprobes_text_size); + mod->kprobe_blacklist = section_objs(info, "_kprobe_blacklist", + sizeof(unsigned long), + &mod->num_kprobe_blacklist); #endif mod->extable = section_objs(info, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); -- cgit v1.2.3-59-g8ed1b From d85eaa9411472a99de4b5732cb59c8bae629d5f1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 26 Mar 2020 23:50:11 +0900 Subject: samples/kprobes: Add __kprobes and NOKPROBE_SYMBOL() for handlers. Add __kprobes and NOKPROBE_SYMBOL() for sample kprobe handlers. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134059.878578033@linutronix.de --- samples/kprobes/kprobe_example.c | 6 ++++-- samples/kprobes/kretprobe_example.c | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c index d693c23a85e8..501911d1b327 100644 --- a/samples/kprobes/kprobe_example.c +++ b/samples/kprobes/kprobe_example.c @@ -25,7 +25,7 @@ static struct kprobe kp = { }; /* kprobe pre_handler: called just before the probed instruction is executed */ -static int handler_pre(struct kprobe *p, struct pt_regs *regs) +static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs) { #ifdef CONFIG_X86 pr_info("<%s> pre_handler: p->addr = 0x%p, ip = %lx, flags = 0x%lx\n", @@ -54,7 +54,7 @@ static int handler_pre(struct kprobe *p, struct pt_regs *regs) } /* kprobe post_handler: called after the probed instruction is executed */ -static void handler_post(struct kprobe *p, struct pt_regs *regs, +static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs, unsigned long flags) { #ifdef CONFIG_X86 @@ -90,6 +90,8 @@ static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr) /* Return 0 because we don't handle the fault. */ return 0; } +/* NOKPROBE_SYMBOL() is also available */ +NOKPROBE_SYMBOL(handler_fault); static int __init kprobe_init(void) { diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c index 186315ca88b3..013e8e6ebae9 100644 --- a/samples/kprobes/kretprobe_example.c +++ b/samples/kprobes/kretprobe_example.c @@ -48,6 +48,7 @@ static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) data->entry_stamp = ktime_get(); return 0; } +NOKPROBE_SYMBOL(entry_handler); /* * Return-probe handler: Log the return value and duration. Duration may turn @@ -67,6 +68,7 @@ static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs) func_name, retval, (long long)delta); return 0; } +NOKPROBE_SYMBOL(ret_handler); static struct kretprobe my_kretprobe = { .handler = ret_handler, -- cgit v1.2.3-59-g8ed1b From 6553896666433e7efec589838b400a2a652b3ffa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Mar 2020 22:47:17 +0100 Subject: vmlinux.lds.h: Create section for protection against instrumentation Some code pathes, especially the low level entry code, must be protected against instrumentation for various reasons: - Low level entry code can be a fragile beast, especially on x86. - With NO_HZ_FULL RCU state needs to be established before using it. Having a dedicated section for such code allows to validate with tooling that no unsafe functions are invoked. Add the .noinstr.text section and the noinstr attribute to mark functions. noinstr implies notrace. Kprobes will gain a section check later. Provide also a set of markers: instrumentation_begin()/end() These are used to mark code inside a noinstr function which calls into regular instrumentable text section as safe. The instrumentation markers are only active when CONFIG_DEBUG_ENTRY is enabled as the end marker emits a NOP to prevent the compiler from merging the annotation points. This means the objtool verification requires a kernel compiled with this option. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134100.075416272@linutronix.de --- arch/powerpc/kernel/vmlinux.lds.S | 1 + include/asm-generic/sections.h | 3 +++ include/asm-generic/vmlinux.lds.h | 10 ++++++++ include/linux/compiler.h | 53 +++++++++++++++++++++++++++++++++++++++ include/linux/compiler_types.h | 4 +++ scripts/mod/modpost.c | 2 +- 6 files changed, 72 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 31a0f201fb6f..a1706b63b82d 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -90,6 +90,7 @@ SECTIONS #ifdef CONFIG_PPC64 *(.tramp.ftrace.text); #endif + NOINSTR_TEXT SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index d1779d442aa5..66397ed10acb 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -53,6 +53,9 @@ extern char __ctors_start[], __ctors_end[]; /* Start and end of .opd section - used for function descriptors. */ extern char __start_opd[], __end_opd[]; +/* Start and end of instrumentation protected text section */ +extern char __noinstr_text_start[], __noinstr_text_end[]; + extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 71e387a5fe90..db600ef218d7 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -540,6 +540,15 @@ . = ALIGN((align)); \ __end_rodata = .; +/* + * Non-instrumentable text section + */ +#define NOINSTR_TEXT \ + ALIGN_FUNCTION(); \ + __noinstr_text_start = .; \ + *(.noinstr.text) \ + __noinstr_text_end = .; + /* * .text section. Map to function alignment to avoid address changes * during second ld run in second ld pass when generating System.map @@ -551,6 +560,7 @@ #define TEXT_TEXT \ ALIGN_FUNCTION(); \ *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \ + NOINSTR_TEXT \ *(.text..refcount) \ *(.ref.text) \ MEM_KEEP(init.text*) \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 034b0a644efc..e9ead0505671 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -120,12 +120,65 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(.rodata..c_jump_table) +#ifdef CONFIG_DEBUG_ENTRY +/* Begin/end of an instrumentation safe region */ +#define instrumentation_begin() ({ \ + asm volatile("%c0:\n\t" \ + ".pushsection .discard.instr_begin\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) + +/* + * Because instrumentation_{begin,end}() can nest, objtool validation considers + * _begin() a +1 and _end() a -1 and computes a sum over the instructions. + * When the value is greater than 0, we consider instrumentation allowed. + * + * There is a problem with code like: + * + * noinstr void foo() + * { + * instrumentation_begin(); + * ... + * if (cond) { + * instrumentation_begin(); + * ... + * instrumentation_end(); + * } + * bar(); + * instrumentation_end(); + * } + * + * If instrumentation_end() would be an empty label, like all the other + * annotations, the inner _end(), which is at the end of a conditional block, + * would land on the instruction after the block. + * + * If we then consider the sum of the !cond path, we'll see that the call to + * bar() is with a 0-value, even though, we meant it to happen with a positive + * value. + * + * To avoid this, have _end() be a NOP instruction, this ensures it will be + * part of the condition block and does not escape. + */ +#define instrumentation_end() ({ \ + asm volatile("%c0: nop\n\t" \ + ".pushsection .discard.instr_end\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) +#endif /* CONFIG_DEBUG_ENTRY */ + #else #define annotate_reachable() #define annotate_unreachable() #define __annotate_jump_table #endif +#ifndef instrumentation_begin +#define instrumentation_begin() do { } while(0) +#define instrumentation_end() do { } while(0) +#endif + #ifndef ASM_UNREACHABLE # define ASM_UNREACHABLE #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index e970f97a7fcb..5da257cbebf1 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -118,6 +118,10 @@ struct ftrace_likely_data { #define notrace __attribute__((__no_instrument_function__)) #endif +/* Section for code which can't be instrumented at all */ +#define noinstr \ + noinline notrace __attribute((__section__(".noinstr.text"))) + /* * it doesn't make sense on ARM (currently the only user of __naked) * to trace naked functions because then mcount is called without diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 5c3c50c5ec52..0053d4fea847 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -948,7 +948,7 @@ static void check_section(const char *modname, struct elf_info *elf, #define DATA_SECTIONS ".data", ".data.rel" #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ - ".kprobes.text", ".cpuidle.text" + ".kprobes.text", ".cpuidle.text", ".noinstr.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ ".fixup", ".entry.text", ".exception.text", ".text.*", \ ".coldtext" -- cgit v1.2.3-59-g8ed1b From 66e9b0717102507e64f638790eaece88765cc9e5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 10 Mar 2020 14:04:34 +0100 Subject: kprobes: Prevent probes in .noinstr.text section Instrumentation is forbidden in the .noinstr.text section. Make kprobes respect this. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Masami Hiramatsu Link: https://lkml.kernel.org/r/20200505134100.179862032@linutronix.de --- include/linux/module.h | 2 ++ kernel/kprobes.c | 18 ++++++++++++++++++ kernel/module.c | 3 +++ 3 files changed, 23 insertions(+) diff --git a/include/linux/module.h b/include/linux/module.h index 1192097c9a69..d849d06e4d44 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -458,6 +458,8 @@ struct module { void __percpu *percpu; unsigned int percpu_size; #endif + void *noinstr_text_start; + unsigned int noinstr_text_size; #ifdef CONFIG_TRACEPOINTS unsigned int num_tracepoints; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9eb5acf0a9f3..3f310df4a693 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2229,6 +2229,12 @@ static int __init populate_kprobe_blacklist(unsigned long *start, /* Symbols in __kprobes_text are blacklisted */ ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start, (unsigned long)__kprobes_text_end); + if (ret) + return ret; + + /* Symbols in noinstr section are blacklisted */ + ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start, + (unsigned long)__noinstr_text_end); return ret ? : arch_populate_kprobe_blacklist(); } @@ -2248,6 +2254,12 @@ static void add_module_kprobe_blacklist(struct module *mod) end = start + mod->kprobes_text_size; kprobe_add_area_blacklist(start, end); } + + start = (unsigned long)mod->noinstr_text_start; + if (start) { + end = start + mod->noinstr_text_size; + kprobe_add_area_blacklist(start, end); + } } static void remove_module_kprobe_blacklist(struct module *mod) @@ -2265,6 +2277,12 @@ static void remove_module_kprobe_blacklist(struct module *mod) end = start + mod->kprobes_text_size; kprobe_remove_area_blacklist(start, end); } + + start = (unsigned long)mod->noinstr_text_start; + if (start) { + end = start + mod->noinstr_text_size; + kprobe_remove_area_blacklist(start, end); + } } /* Module notifier call back, checking kprobes on the module */ diff --git a/kernel/module.c b/kernel/module.c index faf733789560..72ed2b3a6ee2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3150,6 +3150,9 @@ static int find_module_sections(struct module *mod, struct load_info *info) } #endif + mod->noinstr_text_start = section_objs(info, ".noinstr.text", 1, + &mod->noinstr_text_size); + #ifdef CONFIG_TRACEPOINTS mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs", sizeof(*mod->tracepoints_ptrs), -- cgit v1.2.3-59-g8ed1b