From 7375dca1647fa978310f2d706ddbff537f72110b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 20 May 2019 09:26:24 -0400 Subject: ftrace: Make enable and update parameters bool when applicable The code modification functions have "enable" and "update" variables that are sometimes "int" but used as "bool". Remove the ambiguity and make them "bool" when they are only used for true or false values. Link: http://lkml.kernel.org/r/e1429923d9eda92a3cf5ee9e33c7eacce539781d.1558115654.git.naveen.n.rao@linux.vnet.ibm.com Reported-by: "Naveen N. Rao" Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 4 ++-- kernel/trace/ftrace.c | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 25e2995d4a4c..8a8cb3c401b2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -427,8 +427,8 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter); iter = ftrace_rec_iter_next(iter)) -int ftrace_update_record(struct dyn_ftrace *rec, int enable); -int ftrace_test_record(struct dyn_ftrace *rec, int enable); +int ftrace_update_record(struct dyn_ftrace *rec, bool enable); +int ftrace_test_record(struct dyn_ftrace *rec, bool enable); void ftrace_run_stop_machine(int command); unsigned long ftrace_location(unsigned long ip); unsigned long ftrace_location_range(unsigned long start, unsigned long end); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a12aff849c04..4f2c26bebe2a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1768,7 +1768,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, count++; /* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */ - update |= ftrace_test_record(rec, 1) != FTRACE_UPDATE_IGNORE; + update |= ftrace_test_record(rec, true) != FTRACE_UPDATE_IGNORE; /* Shortcut, if we handled all records, we are done. */ if (!all && count == hash->count) @@ -2047,7 +2047,7 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) } } -static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) +static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) { unsigned long flag = 0UL; @@ -2146,28 +2146,28 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) /** * ftrace_update_record, set a record that now is tracing or not * @rec: the record to update - * @enable: set to 1 if the record is tracing, zero to force disable + * @enable: set to true if the record is tracing, false to force disable * * The records that represent all functions that can be traced need * to be updated when tracing has been enabled. */ -int ftrace_update_record(struct dyn_ftrace *rec, int enable) +int ftrace_update_record(struct dyn_ftrace *rec, bool enable) { - return ftrace_check_record(rec, enable, 1); + return ftrace_check_record(rec, enable, true); } /** * ftrace_test_record, check if the record has been enabled or not * @rec: the record to test - * @enable: set to 1 to check if enabled, 0 if it is disabled + * @enable: set to true to check if enabled, false if it is disabled * * The arch code may need to test if a record is already set to * tracing to determine how to modify the function code that it * represents. */ -int ftrace_test_record(struct dyn_ftrace *rec, int enable) +int ftrace_test_record(struct dyn_ftrace *rec, bool enable) { - return ftrace_check_record(rec, enable, 0); + return ftrace_check_record(rec, enable, false); } static struct ftrace_ops * @@ -2356,7 +2356,7 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec) } static int -__ftrace_replace_code(struct dyn_ftrace *rec, int enable) +__ftrace_replace_code(struct dyn_ftrace *rec, bool enable) { unsigned long ftrace_old_addr; unsigned long ftrace_addr; @@ -2395,7 +2395,7 @@ void __weak ftrace_replace_code(int mod_flags) { struct dyn_ftrace *rec; struct ftrace_page *pg; - int enable = mod_flags & FTRACE_MODIFY_ENABLE_FL; + bool enable = mod_flags & FTRACE_MODIFY_ENABLE_FL; int schedulable = mod_flags & FTRACE_MODIFY_MAY_SLEEP_FL; int failed; -- cgit v1.2.3-59-g8ed1b From 0c9f23797925069f9ce267c97e488e293f647c69 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 20 May 2019 09:38:11 -0400 Subject: x86/ftrace: Make enable parameter bool where applicable The code modification functions have an "enable" parameter that is an "int" but used as a boolean. Switch its type to "bool" to remove the ambiguity that "int" causes. Link: http://lkml.kernel.org/r/e1429923d9eda92a3cf5ee9e33c7eacce539781d.1558115654.git.naveen.n.rao@linux.vnet.ibm.com Reported-by: "Naveen N. Rao" Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/ftrace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 0927bb158ffc..ba37bcb7f92b 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -370,7 +370,7 @@ static int add_brk_on_nop(struct dyn_ftrace *rec) return add_break(rec->ip, old); } -static int add_breakpoints(struct dyn_ftrace *rec, int enable) +static int add_breakpoints(struct dyn_ftrace *rec, bool enable) { unsigned long ftrace_addr; int ret; @@ -478,7 +478,7 @@ static int add_update_nop(struct dyn_ftrace *rec) return add_update_code(ip, new); } -static int add_update(struct dyn_ftrace *rec, int enable) +static int add_update(struct dyn_ftrace *rec, bool enable) { unsigned long ftrace_addr; int ret; @@ -524,7 +524,7 @@ static int finish_update_nop(struct dyn_ftrace *rec) return ftrace_write(ip, new, 1); } -static int finish_update(struct dyn_ftrace *rec, int enable) +static int finish_update(struct dyn_ftrace *rec, bool enable) { unsigned long ftrace_addr; int ret; -- cgit v1.2.3-59-g8ed1b From 2d8d8fac3b4eab035dcd0068e1f5a746a697fbb3 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 15 May 2019 14:38:06 +0900 Subject: x86/uaccess: Allow access_ok() in irq context if pagefault_disabled WARN_ON_IN_IRQ() assumes that the access_ok() and following user memory access can sleep. But this assumption is not always correct; when the pagefault is disabled, following memory access will just returns -EFAULT and never sleep. Add pagefault_disabled() check in WARN_ON_ONCE() so that it can ignore the case we call it with disabling pagefault. For this purpose, this modified pagefault_disabled() as an inline function. Link: http://lkml.kernel.org/r/155789868664.26965.7932665824135793317.stgit@devnote2 Acked-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/x86/include/asm/uaccess.h | 4 +++- include/linux/uaccess.h | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index c82abd6e4ca3..9c4435307ff8 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -66,7 +66,9 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un }) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -# define WARN_ON_IN_IRQ() WARN_ON_ONCE(!in_task()) +static inline bool pagefault_disabled(void); +# define WARN_ON_IN_IRQ() \ + WARN_ON_ONCE(!in_task() && !pagefault_disabled()) #else # define WARN_ON_IN_IRQ() #endif diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 2b70130af585..5a43ef7db492 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -203,7 +203,10 @@ static inline void pagefault_enable(void) /* * Is the pagefault handler disabled? If so, user access methods will not sleep. */ -#define pagefault_disabled() (current->pagefault_disabled != 0) +static inline bool pagefault_disabled(void) +{ + return current->pagefault_disabled != 0; +} /* * The pagefault handler is in general disabled by pagefault_disable() or -- cgit v1.2.3-59-g8ed1b From 3d7081822f7f9eab867d9bcc8fd635208ec438e0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 15 May 2019 14:38:18 +0900 Subject: uaccess: Add non-pagefault user-space read functions Add probe_user_read(), strncpy_from_unsafe_user() and strnlen_unsafe_user() which allows caller to access user-space in IRQ context. Current probe_kernel_read() and strncpy_from_unsafe() are not available for user-space memory, because it sets KERNEL_DS while accessing data. On some arch, user address space and kernel address space can be co-exist, but others can not. In that case, setting KERNEL_DS means given address is treated as a kernel address space. Also strnlen_user() is only available from user context since it can sleep if pagefault is enabled. To access user-space memory without pagefault, we need these new functions which sets USER_DS while accessing the data. Link: http://lkml.kernel.org/r/155789869802.26965.4940338412595759063.stgit@devnote2 Acked-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/uaccess.h | 14 ++++++ mm/maccess.c | 122 +++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 130 insertions(+), 6 deletions(-) diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 5a43ef7db492..9c435c3f2105 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -242,6 +242,17 @@ static inline unsigned long __copy_from_user_inatomic_nocache(void *to, extern long probe_kernel_read(void *dst, const void *src, size_t size); extern long __probe_kernel_read(void *dst, const void *src, size_t size); +/* + * probe_user_read(): safely attempt to read from a location in user space + * @dst: pointer to the buffer that shall take the data + * @src: address to read from + * @size: size of the data chunk + * + * Safely read from address @src to the buffer at @dst. If a kernel fault + * happens, handle that and return -EFAULT. + */ +extern long probe_user_read(void *dst, const void __user *src, size_t size); + /* * probe_kernel_write(): safely attempt to write to a location * @dst: address to write to @@ -255,6 +266,9 @@ extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size); extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); +extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, + long count); +extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count); /** * probe_kernel_address(): safely attempt to read from a location diff --git a/mm/maccess.c b/mm/maccess.c index ec00be51a24f..19c8c3dc14df 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -5,8 +5,20 @@ #include #include +static __always_inline long +probe_read_common(void *dst, const void __user *src, size_t size) +{ + long ret; + + pagefault_disable(); + ret = __copy_from_user_inatomic(dst, src, size); + pagefault_enable(); + + return ret ? -EFAULT : 0; +} + /** - * probe_kernel_read(): safely attempt to read from a location + * probe_kernel_read(): safely attempt to read from a kernel-space location * @dst: pointer to the buffer that shall take the data * @src: address to read from * @size: size of the data chunk @@ -29,16 +41,40 @@ long __probe_kernel_read(void *dst, const void *src, size_t size) mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); - pagefault_disable(); - ret = __copy_from_user_inatomic(dst, - (__force const void __user *)src, size); - pagefault_enable(); + ret = probe_read_common(dst, (__force const void __user *)src, size); set_fs(old_fs); - return ret ? -EFAULT : 0; + return ret; } EXPORT_SYMBOL_GPL(probe_kernel_read); +/** + * probe_user_read(): safely attempt to read from a user-space location + * @dst: pointer to the buffer that shall take the data + * @src: address to read from. This must be a user address. + * @size: size of the data chunk + * + * Safely read from user address @src to the buffer at @dst. If a kernel fault + * happens, handle that and return -EFAULT. + */ + +long __weak probe_user_read(void *dst, const void __user *src, size_t size) + __attribute__((alias("__probe_user_read"))); + +long __probe_user_read(void *dst, const void __user *src, size_t size) +{ + long ret = -EFAULT; + mm_segment_t old_fs = get_fs(); + + set_fs(USER_DS); + if (access_ok(src, size)) + ret = probe_read_common(dst, src, size); + set_fs(old_fs); + + return ret; +} +EXPORT_SYMBOL_GPL(probe_user_read); + /** * probe_kernel_write(): safely attempt to write to a location * @dst: address to write to @@ -66,6 +102,7 @@ long __probe_kernel_write(void *dst, const void *src, size_t size) } EXPORT_SYMBOL_GPL(probe_kernel_write); + /** * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address. * @dst: Destination address, in kernel space. This buffer must be at @@ -105,3 +142,76 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count) return ret ? -EFAULT : src - unsafe_addr; } + +/** + * strncpy_from_unsafe_user: - Copy a NUL terminated string from unsafe user + * address. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @unsafe_addr: Unsafe user address. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from unsafe user address to kernel buffer. + * + * On success, returns the length of the string INCLUDING the trailing NUL. + * + * If access fails, returns -EFAULT (some data may have been copied + * and the trailing NUL added). + * + * If @count is smaller than the length of the string, copies @count-1 bytes, + * sets the last byte of @dst buffer to NUL and returns @count. + */ +long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, + long count) +{ + mm_segment_t old_fs = get_fs(); + long ret; + + if (unlikely(count <= 0)) + return 0; + + set_fs(USER_DS); + pagefault_disable(); + ret = strncpy_from_user(dst, unsafe_addr, count); + pagefault_enable(); + set_fs(old_fs); + + if (ret >= count) { + ret = count; + dst[ret - 1] = '\0'; + } else if (ret > 0) { + ret++; + } + + return ret; +} + +/** + * strnlen_unsafe_user: - Get the size of a user string INCLUDING final NUL. + * @unsafe_addr: The string to measure. + * @count: Maximum count (including NUL) + * + * Get the size of a NUL-terminated string in user space without pagefault. + * + * Returns the size of the string INCLUDING the terminating NUL. + * + * If the string is too long, returns a number larger than @count. User + * has to check the return value against "> count". + * On exception (or invalid count), returns 0. + * + * Unlike strnlen_user, this can be used from IRQ handler etc. because + * it disables pagefaults. + */ +long strnlen_unsafe_user(const void __user *unsafe_addr, long count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + + set_fs(USER_DS); + pagefault_disable(); + ret = strnlen_user(unsafe_addr, count); + pagefault_enable(); + set_fs(old_fs); + + return ret; +} -- cgit v1.2.3-59-g8ed1b From 88903c464321cdbc2d473c24cbf311f576cf05bc Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 15 May 2019 14:38:30 +0900 Subject: tracing/probe: Add ustring type for user-space string Add "ustring" type for fetching user-space string from kprobe event. User can specify ustring type at uprobe event, and it is same as "string" for uprobe. Note that probe-event provides this option but it doesn't choose the correct type automatically since we have not way to decide the address is in user-space or not on some arch (and on some other arch, you can fetch the string by "string" type). So user must carefully check the target code (e.g. if you see __user on the target variable) and use this new type. Link: http://lkml.kernel.org/r/155789871009.26965.14167558859557329331.stgit@devnote2 Acked-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/kprobetrace.rst | 9 ++++++-- kernel/trace/trace.c | 2 +- kernel/trace/trace_kprobe.c | 42 ++++++++++++++++++++++++++++++++++--- kernel/trace/trace_probe.c | 14 ++++++++++--- kernel/trace/trace_probe.h | 1 + kernel/trace/trace_probe_tmpl.h | 14 ++++++++++++- kernel/trace/trace_uprobe.c | 12 +++++++++++ 7 files changed, 84 insertions(+), 10 deletions(-) diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index 235ce2ab131a..a3ac7c9ac242 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -55,7 +55,8 @@ Synopsis of kprobe_events NAME=FETCHARG : Set NAME as the argument name of FETCHARG. FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types - (x8/x16/x32/x64), "string" and bitfield are supported. + (x8/x16/x32/x64), "string", "ustring" and bitfield + are supported. (\*1) only for the probe on function entry (offs == 0). (\*2) only for return probe. @@ -77,7 +78,11 @@ apply it to registers/stack-entries etc. (for example, '$stack1:x8[8]' is wrong, but '+8($stack):x8[8]' is OK.) String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container -has been paged out. +has been paged out. "ustring" type is an alternative of string for user-space. +Note that kprobe-event provides string/ustring types, but doesn't change it +automatically. So user has to decide if the targe string in kernel or in user +space carefully. On some arch, if you choose wrong one, it always fails to +record string data. The string array type is a bit different from other types. For other base types, [1] is equal to (e.g. +0(%di):x32[1] is same as +0(%di):x32.) But string[1] is not equal to string. The string type itself diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1c80521fd436..d3a477a16e70 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4847,7 +4847,7 @@ static const char readme_msg[] = "\t $stack, $stack, $retval, $comm\n" #endif "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n" - "\t b@/,\n" + "\t b@/, ustring,\n" "\t \\[\\]\n" #ifdef CONFIG_HIST_TRIGGERS "\t field: ;\n" diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7d736248a070..439bf04d14ce 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -886,6 +886,15 @@ fetch_store_strlen(unsigned long addr) return (ret < 0) ? ret : len; } +/* Return the length of string -- including null terminal byte */ +static nokprobe_inline int +fetch_store_strlen_user(unsigned long addr) +{ + const void __user *uaddr = (__force const void __user *)addr; + + return strnlen_unsafe_user(uaddr, MAX_STRING_SIZE); +} + /* * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max * length and relative data location. @@ -894,19 +903,46 @@ static nokprobe_inline int fetch_store_string(unsigned long addr, void *dest, void *base) { int maxlen = get_loc_len(*(u32 *)dest); - u8 *dst = get_loc_data(dest, base); + void *__dest; long ret; if (unlikely(!maxlen)) return -ENOMEM; + + __dest = get_loc_data(dest, base); + /* * Try to get string again, since the string can be changed while * probing. */ - ret = strncpy_from_unsafe(dst, (void *)addr, maxlen); + ret = strncpy_from_unsafe(__dest, (void *)addr, maxlen); + if (ret >= 0) + *(u32 *)dest = make_data_loc(ret, __dest - base); + + return ret; +} +/* + * Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf + * with max length and relative data location. + */ +static nokprobe_inline int +fetch_store_string_user(unsigned long addr, void *dest, void *base) +{ + const void __user *uaddr = (__force const void __user *)addr; + int maxlen = get_loc_len(*(u32 *)dest); + void *__dest; + long ret; + + if (unlikely(!maxlen)) + return -ENOMEM; + + __dest = get_loc_data(dest, base); + + ret = strncpy_from_unsafe_user(__dest, uaddr, maxlen); if (ret >= 0) - *(u32 *)dest = make_data_loc(ret, (void *)dst - base); + *(u32 *)dest = make_data_loc(ret, __dest - base); + return ret; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index a347faced959..5a0470f7b9de 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -78,6 +78,8 @@ static const struct fetch_type probe_fetch_types[] = { /* Special types */ __ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1, "__data_loc char[]"), + __ASSIGN_FETCH_TYPE("ustring", string, string, sizeof(u32), 1, + "__data_loc char[]"), /* Basic types */ ASSIGN_FETCH_TYPE(u8, u8, 0), ASSIGN_FETCH_TYPE(u16, u16, 0), @@ -569,7 +571,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, goto fail; /* Store operation */ - if (!strcmp(parg->type->name, "string")) { + if (!strcmp(parg->type->name, "string") || + !strcmp(parg->type->name, "ustring")) { if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM) { trace_probe_log_err(offset + (t ? (t - arg) : 0), @@ -590,7 +593,11 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, goto fail; } } - code->op = FETCH_OP_ST_STRING; /* In DEREF case, replace it */ + /* If op == DEREF, replace it with STRING */ + if (!strcmp(parg->type->name, "ustring")) + code->op = FETCH_OP_ST_USTRING; + else + code->op = FETCH_OP_ST_STRING; code->size = parg->type->size; parg->dynamic = true; } else if (code->op == FETCH_OP_DEREF) { @@ -618,7 +625,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, /* Loop(Array) operation */ if (parg->count) { if (scode->op != FETCH_OP_ST_MEM && - scode->op != FETCH_OP_ST_STRING) { + scode->op != FETCH_OP_ST_STRING && + scode->op != FETCH_OP_ST_USTRING) { trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_STRING); ret = -EINVAL; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index f9a8c632188b..c7546e7ff8e2 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -96,6 +96,7 @@ enum fetch_op { FETCH_OP_ST_RAW, /* Raw: .size */ FETCH_OP_ST_MEM, /* Mem: .offset, .size */ FETCH_OP_ST_STRING, /* String: .offset, .size */ + FETCH_OP_ST_USTRING, /* User String: .offset, .size */ // Stage 4 (modify) op FETCH_OP_MOD_BF, /* Bitfield: .basesize, .lshift, .rshift */ // Stage 5 (loop) op diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index c30c61f12ddd..2e9e4dae8839 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -59,6 +59,9 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, static nokprobe_inline int fetch_store_strlen(unsigned long addr); static nokprobe_inline int fetch_store_string(unsigned long addr, void *dest, void *base); +static nokprobe_inline int fetch_store_strlen_user(unsigned long addr); +static nokprobe_inline int +fetch_store_string_user(unsigned long addr, void *dest, void *base); static nokprobe_inline int probe_mem_read(void *dest, void *src, size_t size); @@ -91,6 +94,10 @@ stage3: ret = fetch_store_strlen(val + code->offset); code++; goto array; + } else if (code->op == FETCH_OP_ST_USTRING) { + ret += fetch_store_strlen_user(val + code->offset); + code++; + goto array; } else return -EILSEQ; } @@ -106,6 +113,10 @@ stage3: loc = *(u32 *)dest; ret = fetch_store_string(val + code->offset, dest, base); break; + case FETCH_OP_ST_USTRING: + loc = *(u32 *)dest; + ret = fetch_store_string_user(val + code->offset, dest, base); + break; default: return -EILSEQ; } @@ -123,7 +134,8 @@ array: total += ret; if (++i < code->param) { code = s3; - if (s3->op != FETCH_OP_ST_STRING) { + if (s3->op != FETCH_OP_ST_STRING && + s3->op != FETCH_OP_ST_USTRING) { dest += s3->size; val += s3->size; goto stage3; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index eb7e06b54741..852e998051f6 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -176,6 +176,12 @@ fetch_store_string(unsigned long addr, void *dest, void *base) return ret; } +static nokprobe_inline int +fetch_store_string_user(unsigned long addr, void *dest, void *base) +{ + return fetch_store_string(addr, dest, base); +} + /* Return the length of string -- including null terminal byte */ static nokprobe_inline int fetch_store_strlen(unsigned long addr) @@ -191,6 +197,12 @@ fetch_store_strlen(unsigned long addr) return (len > MAX_STRING_SIZE) ? 0 : len; } +static nokprobe_inline int +fetch_store_strlen_user(unsigned long addr) +{ + return fetch_store_strlen(addr); +} + static unsigned long translate_user_vaddr(unsigned long file_offset) { unsigned long base_addr; -- cgit v1.2.3-59-g8ed1b From e65f7ae7f4da56622ecf8f1eaed333b9a13f9435 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 15 May 2019 14:38:42 +0900 Subject: tracing/probe: Support user-space dereference Support user-space dereference syntax for probe event arguments to dereference the data-structure or array in user-space. The syntax is just adding 'u' before an offset value. +|-u() e.g. +u8(%ax), +u0(+0(%si)) For example, if you probe do_sched_setscheduler(pid, policy, param) and record param->sched_priority, you can add new probe as below; p do_sched_setscheduler priority=+u0($arg3) Note that kprobe event provides this and it doesn't change the dereference method automatically because we do not know whether the given address is in userspace or kernel on some archs. So as same as "ustring", this is an option for user, who has to carefully choose the dereference method. Link: http://lkml.kernel.org/r/155789872187.26965.4468456816590888687.stgit@devnote2 Acked-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/kprobetrace.rst | 27 ++++++++++++++++++++++----- Documentation/trace/uprobetracer.rst | 10 ++++++---- kernel/trace/trace.c | 5 +++-- kernel/trace/trace_kprobe.c | 6 ++++++ kernel/trace/trace_probe.c | 25 ++++++++++++++++++------- kernel/trace/trace_probe.h | 2 ++ kernel/trace/trace_probe_tmpl.h | 22 +++++++++++++++++----- kernel/trace/trace_uprobe.c | 7 +++++++ 8 files changed, 81 insertions(+), 23 deletions(-) diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index a3ac7c9ac242..09ff474493e1 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -51,7 +51,7 @@ Synopsis of kprobe_events $argN : Fetch the Nth function argument. (N >= 1) (\*1) $retval : Fetch return value.(\*2) $comm : Fetch current task comm. - +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(\*3) + +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4) NAME=FETCHARG : Set NAME as the argument name of FETCHARG. FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types @@ -61,6 +61,7 @@ Synopsis of kprobe_events (\*1) only for the probe on function entry (offs == 0). (\*2) only for return probe. (\*3) this is useful for fetching a field of data structures. + (\*4) "u" means user-space dereference. See :ref:`user_mem_access`. Types ----- @@ -79,10 +80,7 @@ wrong, but '+8($stack):x8[8]' is OK.) String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. "ustring" type is an alternative of string for user-space. -Note that kprobe-event provides string/ustring types, but doesn't change it -automatically. So user has to decide if the targe string in kernel or in user -space carefully. On some arch, if you choose wrong one, it always fails to -record string data. +See :ref:`user_mem_access` for more info.. The string array type is a bit different from other types. For other base types, [1] is equal to (e.g. +0(%di):x32[1] is same as +0(%di):x32.) But string[1] is not equal to string. The string type itself @@ -97,6 +95,25 @@ Symbol type('symbol') is an alias of u32 or u64 type (depends on BITS_PER_LONG) which shows given pointer in "symbol+offset" style. For $comm, the default type is "string"; any other type is invalid. +.. _user_mem_access: +User Memory Access +------------------ +Kprobe events supports user-space memory access. For that purpose, you can use +either user-space dereference syntax or 'ustring' type. + +The user-space dereference syntax allows you to access a field of a data +structure in user-space. This is done by adding the "u" prefix to the +dereference syntax. For example, +u4(%si) means it will read memory from the +address in the register %si offset by 4, and the memory is expected to be in +user-space. You can use this for strings too, e.g. +u0(%si):string will read +a string from the address in the register %si that is expected to be in user- +space. 'ustring' is a shortcut way of performing the same task. That is, ++0(%si):ustring is equivalent to +u0(%si):string. + +Note that kprobe-event provides the user-memory access syntax but it doesn't +use it transparently. This means if you use normal dereference or string type +for user memory, it might fail, and may always fail on some archs. The user +has to carefully check if the target data is in kernel or user space. Per-Probe Event Filtering ------------------------- diff --git a/Documentation/trace/uprobetracer.rst b/Documentation/trace/uprobetracer.rst index 4346e23e3ae7..ab13319c66ac 100644 --- a/Documentation/trace/uprobetracer.rst +++ b/Documentation/trace/uprobetracer.rst @@ -42,16 +42,18 @@ Synopsis of uprobe_tracer @+OFFSET : Fetch memory at OFFSET (OFFSET from same file as PATH) $stackN : Fetch Nth entry of stack (N >= 0) $stack : Fetch stack address. - $retval : Fetch return value.(*) + $retval : Fetch return value.(\*1) $comm : Fetch current task comm. - +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) + +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*2)(\*3) NAME=FETCHARG : Set NAME as the argument name of FETCHARG. FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types (x8/x16/x32/x64), "string" and bitfield are supported. - (*) only for return probe. - (**) this is useful for fetching a field of data structures. + (\*1) only for return probe. + (\*2) this is useful for fetching a field of data structures. + (\*3) Unlike kprobe event, "u" prefix will just be ignored, becuse uprobe + events can access only user-space memory. Types ----- diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d3a477a16e70..6b3b5b0495a8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4842,10 +4842,11 @@ static const char readme_msg[] = "\t args: =fetcharg[:type]\n" "\t fetcharg: %, @
, @[+|-],\n" #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API - "\t $stack, $stack, $retval, $comm, $arg\n" + "\t $stack, $stack, $retval, $comm, $arg,\n" #else - "\t $stack, $stack, $retval, $comm\n" + "\t $stack, $stack, $retval, $comm,\n" #endif + "\t +|-[u]()\n" "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n" "\t b@/, ustring,\n" "\t \\[\\]\n" diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 439bf04d14ce..ff14eb011c1c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -952,6 +952,12 @@ probe_mem_read(void *dest, void *src, size_t size) return probe_kernel_read(dest, src, size); } +static nokprobe_inline int +probe_mem_read_user(void *dest, void *src, size_t size) +{ + return probe_user_read(dest, src, size); +} + /* Note that we don't verify it, since the code does not come from user space */ static int process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 5a0470f7b9de..b6b0593844cd 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -324,6 +324,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, { struct fetch_insn *code = *pcode; unsigned long param; + int deref = FETCH_OP_DEREF; long offset = 0; char *tmp; int ret = 0; @@ -396,9 +397,14 @@ parse_probe_arg(char *arg, const struct fetch_type *type, break; case '+': /* deref memory */ - arg++; /* Skip '+', because kstrtol() rejects it. */ - /* fall through */ case '-': + if (arg[1] == 'u') { + deref = FETCH_OP_UDEREF; + arg[1] = arg[0]; + arg++; + } + if (arg[0] == '+') + arg++; /* Skip '+', because kstrtol() rejects it. */ tmp = strchr(arg, '('); if (!tmp) { trace_probe_log_err(offs, DEREF_NEED_BRACE); @@ -434,7 +440,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, } *pcode = code; - code->op = FETCH_OP_DEREF; + code->op = deref; code->offset = offset; } break; @@ -573,14 +579,15 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, /* Store operation */ if (!strcmp(parg->type->name, "string") || !strcmp(parg->type->name, "ustring")) { - if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_IMM && - code->op != FETCH_OP_COMM) { + if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF && + code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM) { trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_STRING); ret = -EINVAL; goto fail; } - if (code->op != FETCH_OP_DEREF || parg->count) { + if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) || + parg->count) { /* * IMM and COMM is pointing actual address, those must * be kept, and if parg->count != 0, this is an array @@ -594,7 +601,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, } } /* If op == DEREF, replace it with STRING */ - if (!strcmp(parg->type->name, "ustring")) + if (!strcmp(parg->type->name, "ustring") || + code->op == FETCH_OP_UDEREF) code->op = FETCH_OP_ST_USTRING; else code->op = FETCH_OP_ST_STRING; @@ -603,6 +611,9 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, } else if (code->op == FETCH_OP_DEREF) { code->op = FETCH_OP_ST_MEM; code->size = parg->type->size; + } else if (code->op == FETCH_OP_UDEREF) { + code->op = FETCH_OP_ST_UMEM; + code->size = parg->type->size; } else { code++; if (code->op != FETCH_OP_NOP) { diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index c7546e7ff8e2..42816358dd48 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -92,9 +92,11 @@ enum fetch_op { FETCH_OP_FOFFS, /* File offset: .immediate */ // Stage 2 (dereference) op FETCH_OP_DEREF, /* Dereference: .offset */ + FETCH_OP_UDEREF, /* User-space Dereference: .offset */ // Stage 3 (store) ops FETCH_OP_ST_RAW, /* Raw: .size */ FETCH_OP_ST_MEM, /* Mem: .offset, .size */ + FETCH_OP_ST_UMEM, /* Mem: .offset, .size */ FETCH_OP_ST_STRING, /* String: .offset, .size */ FETCH_OP_ST_USTRING, /* User String: .offset, .size */ // Stage 4 (modify) op diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index 2e9e4dae8839..e5282828f4a6 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -64,6 +64,8 @@ static nokprobe_inline int fetch_store_string_user(unsigned long addr, void *dest, void *base); static nokprobe_inline int probe_mem_read(void *dest, void *src, size_t size); +static nokprobe_inline int +probe_mem_read_user(void *dest, void *src, size_t size); /* From the 2nd stage, routine is same */ static nokprobe_inline int @@ -77,14 +79,21 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, stage2: /* 2nd stage: dereference memory if needed */ - while (code->op == FETCH_OP_DEREF) { - lval = val; - ret = probe_mem_read(&val, (void *)val + code->offset, - sizeof(val)); + do { + if (code->op == FETCH_OP_DEREF) { + lval = val; + ret = probe_mem_read(&val, (void *)val + code->offset, + sizeof(val)); + } else if (code->op == FETCH_OP_UDEREF) { + lval = val; + ret = probe_mem_read_user(&val, + (void *)val + code->offset, sizeof(val)); + } else + break; if (ret) return ret; code++; - } + } while (1); s3 = code; stage3: @@ -109,6 +118,9 @@ stage3: case FETCH_OP_ST_MEM: probe_mem_read(dest, (void *)val + code->offset, code->size); break; + case FETCH_OP_ST_UMEM: + probe_mem_read_user(dest, (void *)val + code->offset, code->size); + break; case FETCH_OP_ST_STRING: loc = *(u32 *)dest; ret = fetch_store_string(val + code->offset, dest, base); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 852e998051f6..3d6b868830f3 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -140,6 +140,13 @@ probe_mem_read(void *dest, void *src, size_t size) return copy_from_user(dest, vaddr, size) ? -EFAULT : 0; } + +static nokprobe_inline int +probe_mem_read_user(void *dest, void *src, size_t size) +{ + return probe_mem_read(dest, src, size); +} + /* * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max * length and relative data location. -- cgit v1.2.3-59-g8ed1b From bdf2b8cbf076568f7d17dc62467348d409142298 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 15 May 2019 14:38:54 +0900 Subject: selftests/ftrace: Add user-memory access syntax testcase Add a user-memory access syntax testcase which checks new user-memory access syntax and ustring type. Link: http://lkml.kernel.org/r/155789873385.26965.9557271156179140676.stgit@devnote2 Acked-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- .../ftrace/test.d/kprobe/kprobe_args_user.tc | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc new file mode 100644 index 000000000000..0f60087583d8 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc @@ -0,0 +1,32 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe event user-memory access + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +grep -q '\$arg' README || exit_unresolved # depends on arch +grep -A10 "fetcharg:" README | grep -q 'ustring' || exit_unsupported +grep -A10 "fetcharg:" README | grep -q '\[u\]' || exit_unsupported + +:;: "user-memory access syntax and ustring working on user memory";: +echo 'p:myevent do_sys_open path=+0($arg2):ustring path2=+u0($arg2):string' \ + > kprobe_events + +grep myevent kprobe_events | \ + grep -q 'path=+0($arg2):ustring path2=+u0($arg2):string' +echo 1 > events/kprobes/myevent/enable +echo > /dev/null +echo 0 > events/kprobes/myevent/enable + +grep myevent trace | grep -q 'path="/dev/null" path2="/dev/null"' + +:;: "user-memory access syntax and ustring not working with kernel memory";: +echo 'p:myevent vfs_symlink path=+0($arg3):ustring path2=+u0($arg3):string' \ + > kprobe_events +echo 1 > events/kprobes/myevent/enable +ln -s foo $TMPDIR/bar +echo 0 > events/kprobes/myevent/enable + +grep myevent trace | grep -q 'path=(fault) path2=(fault)' + +exit 0 -- cgit v1.2.3-59-g8ed1b From 1e032f7cfa141b4424827b0ecb0ea899f84e182e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 15 May 2019 14:39:05 +0900 Subject: perf-probe: Add user memory access attribute support Add user memory access attribute for kprobe event arguments. If a given 'local variable' is in user-space, User can specify memory access method by '@user' suffix. This is not only for string but also for data structure. If we access a field of data structure in user memory from kernel on some arch, it will fail. e.g. perf probe -a "sched_setscheduler param->sched_priority" This will fail to access the "param->sched_priority" because the param is __user pointer. Instead, we can now specify @user suffix for such argument. perf probe -a "sched_setscheduler param->sched_priority@user" Note that kernel memory access with "@user" must always fail on any arch. Link: http://lkml.kernel.org/r/155789874562.26965.10836126971405890891.stgit@devnote2 Acked-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/perf/Documentation/perf-probe.txt | 3 ++- tools/perf/util/probe-event.c | 11 +++++++++++ tools/perf/util/probe-event.h | 2 ++ tools/perf/util/probe-file.c | 7 +++++++ tools/perf/util/probe-file.h | 1 + tools/perf/util/probe-finder.c | 19 ++++++++++++------- 6 files changed, 35 insertions(+), 8 deletions(-) diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index b6866a05edd2..ed3ecfa422e1 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -194,12 +194,13 @@ PROBE ARGUMENT -------------- Each probe argument follows below syntax. - [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE] + [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE][@user] 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) '$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters. 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo (*). Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal integers (x/x8/x16/x32/x64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail) On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid. +"@user" is a special attribute which means the LOCALVAR will be treated as a user-space memory. This is only valid for kprobe event. TYPES ----- diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 198e09ff611e..a7ca17be5fc5 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1577,6 +1577,17 @@ static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg) str = tmp + 1; } + tmp = strchr(str, '@'); + if (tmp && tmp != str && strcmp(tmp + 1, "user")) { /* user attr */ + if (!user_access_is_supported()) { + semantic_error("ftrace does not support user access\n"); + return -EINVAL; + } + *tmp = '\0'; + arg->user_access = true; + pr_debug("user_access "); + } + tmp = strchr(str, ':'); if (tmp) { /* Type setting */ *tmp = '\0'; diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 05c8d571a901..96a319cd2378 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -37,6 +37,7 @@ struct probe_trace_point { struct probe_trace_arg_ref { struct probe_trace_arg_ref *next; /* Next reference */ long offset; /* Offset value */ + bool user_access; /* User-memory access */ }; /* kprobe-tracer and uprobe-tracer tracing argument */ @@ -82,6 +83,7 @@ struct perf_probe_arg { char *var; /* Variable name */ char *type; /* Type name */ struct perf_probe_arg_field *field; /* Structure fields */ + bool user_access; /* User-memory access */ }; /* Perf probe probing event (point + arg) */ diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 4062bc4412a9..89ce1a9c3798 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -1015,6 +1015,7 @@ enum ftrace_readme { FTRACE_README_PROBE_TYPE_X = 0, FTRACE_README_KRETPROBE_OFFSET, FTRACE_README_UPROBE_REF_CTR, + FTRACE_README_USER_ACCESS, FTRACE_README_END, }; @@ -1027,6 +1028,7 @@ static struct { DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"), DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"), DEFINE_TYPE(FTRACE_README_UPROBE_REF_CTR, "*ref_ctr_offset*"), + DEFINE_TYPE(FTRACE_README_USER_ACCESS, "*[u]*"), }; static bool scan_ftrace_readme(enum ftrace_readme type) @@ -1087,3 +1089,8 @@ bool uprobe_ref_ctr_is_supported(void) { return scan_ftrace_readme(FTRACE_README_UPROBE_REF_CTR); } + +bool user_access_is_supported(void) +{ + return scan_ftrace_readme(FTRACE_README_USER_ACCESS); +} diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index 2a249182f2a6..986c1c94f64f 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -70,6 +70,7 @@ int probe_cache__show_all_caches(struct strfilter *filter); bool probe_type_is_available(enum probe_type type); bool kretprobe_offset_is_supported(void); bool uprobe_ref_ctr_is_supported(void); +bool user_access_is_supported(void); #else /* ! HAVE_LIBELF_SUPPORT */ static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused, struct nsinfo *nsi __maybe_unused) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index c37fbef1711d..c202027716d0 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -294,7 +294,7 @@ static_var: static int convert_variable_type(Dwarf_Die *vr_die, struct probe_trace_arg *tvar, - const char *cast) + const char *cast, bool user_access) { struct probe_trace_arg_ref **ref_ptr = &tvar->ref; Dwarf_Die type; @@ -334,7 +334,8 @@ static int convert_variable_type(Dwarf_Die *vr_die, pr_debug("%s type is %s.\n", dwarf_diename(vr_die), dwarf_diename(&type)); - if (cast && strcmp(cast, "string") == 0) { /* String type */ + if (cast && (!strcmp(cast, "string") || !strcmp(cast, "ustring"))) { + /* String type */ ret = dwarf_tag(&type); if (ret != DW_TAG_pointer_type && ret != DW_TAG_array_type) { @@ -357,6 +358,7 @@ static int convert_variable_type(Dwarf_Die *vr_die, pr_warning("Out of memory error\n"); return -ENOMEM; } + (*ref_ptr)->user_access = user_access; } if (!die_compare_name(&type, "char") && !die_compare_name(&type, "unsigned char")) { @@ -411,7 +413,7 @@ formatted: static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, struct perf_probe_arg_field *field, struct probe_trace_arg_ref **ref_ptr, - Dwarf_Die *die_mem) + Dwarf_Die *die_mem, bool user_access) { struct probe_trace_arg_ref *ref = *ref_ptr; Dwarf_Die type; @@ -448,6 +450,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, *ref_ptr = ref; } ref->offset += dwarf_bytesize(&type) * field->index; + ref->user_access = user_access; goto next; } else if (tag == DW_TAG_pointer_type) { /* Check the pointer and dereference */ @@ -519,17 +522,18 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, } } ref->offset += (long)offs; + ref->user_access = user_access; /* If this member is unnamed, we need to reuse this field */ if (!dwarf_diename(die_mem)) return convert_variable_fields(die_mem, varname, field, - &ref, die_mem); + &ref, die_mem, user_access); next: /* Converting next field */ if (field->next) return convert_variable_fields(die_mem, field->name, - field->next, &ref, die_mem); + field->next, &ref, die_mem, user_access); else return 0; } @@ -555,11 +559,12 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) else if (ret == 0 && pf->pvar->field) { ret = convert_variable_fields(vr_die, pf->pvar->var, pf->pvar->field, &pf->tvar->ref, - &die_mem); + &die_mem, pf->pvar->user_access); vr_die = &die_mem; } if (ret == 0) - ret = convert_variable_type(vr_die, pf->tvar, pf->pvar->type); + ret = convert_variable_type(vr_die, pf->tvar, pf->pvar->type, + pf->pvar->user_access); /* *expr will be cached in libdw. Don't free it. */ return ret; } -- cgit v1.2.3-59-g8ed1b From f08367b3643b5340f9d9ea07808ddd72b74beb30 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Thu, 23 May 2019 12:26:28 -0700 Subject: tracing: Use correct function name in trace_filter_add_remove_task() comment The comment of trace_filter_add_remove_task() refers to the function as 'trace_pid_filter_add_remove_task', use the correct name. Link: http://lkml.kernel.org/r/20190523192628.134406-1-mka@chromium.org Signed-off-by: Matthias Kaehlcke Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6b3b5b0495a8..77b9c4ca5faa 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -366,7 +366,7 @@ trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct } /** - * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list + * trace_filter_add_remove_task - Add or remove a task from a pid_list * @pid_list: The list to modify * @self: The current task for fork or NULL for exit * @task: The task to add or remove -- cgit v1.2.3-59-g8ed1b From 87a90956eeab260a469a51897bfda27b28adf67d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 22 May 2019 17:27:44 +0900 Subject: uaccess: Add a prototype of non-static __probe_user_read() Declare a prototype of non-static __probe_user_read() as same as __probe_kernel_read() at uaccess.h. Reported-by: kbuild test robot Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/uaccess.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 9c435c3f2105..34a038563d97 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -252,6 +252,7 @@ extern long __probe_kernel_read(void *dst, const void *src, size_t size); * happens, handle that and return -EFAULT. */ extern long probe_user_read(void *dst, const void __user *src, size_t size); +extern long __probe_user_read(void *dst, const void __user *src, size_t size); /* * probe_kernel_write(): safely attempt to write to a location -- cgit v1.2.3-59-g8ed1b From 539b75b2b9eece3fc6da5672d4b67440d5e454a9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 22 May 2019 17:27:52 +0900 Subject: tracing/kprobe: Cast user-space address correctly Cast user-space address correctly to pass to probe_user_read(). Reported-by: kbuild test robot Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index ff14eb011c1c..2c5357dddb92 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -955,7 +955,9 @@ probe_mem_read(void *dest, void *src, size_t size) static nokprobe_inline int probe_mem_read_user(void *dest, void *src, size_t size) { - return probe_user_read(dest, src, size); + const void __user *uaddr = (__force const void __user *)src; + + return probe_user_read(dest, uaddr, size); } /* Note that we don't verify it, since the code does not come from user space */ -- cgit v1.2.3-59-g8ed1b From b5f8b32c93b21c457a958d2a6bf938dab41bac4e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 22 May 2019 17:32:27 +0900 Subject: kprobes: Initialize kprobes at postcore_initcall Initialize kprobes at postcore_initcall level instead of module_init since kprobes is not a module, and it depends on only subsystems initialized in core_initcall. This will allow ftrace kprobe event to add new events when it is initializing because ftrace kprobe event is initialized at later initcall level. Link: http://lkml.kernel.org/r/155851394736.15728.13626739508905120098.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index b1ea30a5540e..54aaaad00a47 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2289,6 +2289,7 @@ static int __init init_kprobes(void) init_test_probes(); return err; } +postcore_initcall(init_kprobes); #ifdef CONFIG_DEBUG_FS static void report_probe(struct seq_file *pi, struct kprobe *p, @@ -2614,5 +2615,3 @@ error: late_initcall(debugfs_kprobe_init); #endif /* CONFIG_DEBUG_FS */ - -module_init(init_kprobes); -- cgit v1.2.3-59-g8ed1b From 970988e19eb0a0dc24fe14bf91972019e48336e2 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 22 May 2019 17:32:35 +0900 Subject: tracing/kprobe: Add kprobe_event= boot parameter Add kprobe_event= boot parameter to define kprobe events at boot time. The definition syntax is similar to tracefs/kprobe_events interface, but use ',' and ';' instead of ' ' and '\n' respectively. e.g. kprobe_event=p,vfs_read,$arg1,$arg2 This puts a probe on vfs_read with argument1 and 2, and enable the new event. Link: http://lkml.kernel.org/r/155851395498.15728.830529496248543583.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/admin-guide/kernel-parameters.txt | 13 ++++++ Documentation/trace/kprobetrace.rst | 14 +++++++ kernel/trace/trace_kprobe.c | 54 +++++++++++++++++++++++++ 3 files changed, 81 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 138f6664b2e2..11b9ffb265eb 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2007,6 +2007,19 @@ Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y, the default is off. + kprobe_event=[probe-list] + [FTRACE] Add kprobe events and enable at boot time. + The probe-list is a semicolon delimited list of probe + definitions. Each definition is same as kprobe_events + interface, but the parameters are comma delimited. + For example, to add a kprobe event on vfs_read with + arg1 and arg2, add to the command line; + + kprobe_event=p,vfs_read,$arg1,$arg2 + + See also Documentation/trace/kprobetrace.rst "Kernel + Boot Parameter" section. + kpti= [ARM64] Control page table isolation of user and kernel address spaces. Default: enabled on cores which need mitigation. diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index 09ff474493e1..af776989caca 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -146,6 +146,20 @@ You can check the total number of probe hits and probe miss-hits via The first column is event name, the second is the number of probe hits, the third is the number of probe miss-hits. +Kernel Boot Parameter +--------------------- +You can add and enable new kprobe events when booting up the kernel by +"kprobe_event=" parameter. The parameter accepts a semicolon-delimited +kprobe events, which format is similar to the kprobe_events. +The difference is that the probe definition parameters are comma-delimited +instead of space. For example, adding myprobe event on do_sys_open like below + + p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack) + +should be below for kernel boot parameter (just replace spaces with comma) + + p:myprobe,do_sys_open,dfd=%ax,filename=%dx,flags=%cx,mode=+4($stack) + Usage examples -------------- diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 2c5357dddb92..004fffd24ec1 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -12,6 +12,8 @@ #include #include +#include /* for COMMAND_LINE_SIZE */ + #include "trace_dynevent.h" #include "trace_kprobe_selftest.h" #include "trace_probe.h" @@ -19,6 +21,17 @@ #define KPROBE_EVENT_SYSTEM "kprobes" #define KRETPROBE_MAXACTIVE_MAX 4096 +#define MAX_KPROBE_CMDLINE_SIZE 1024 + +/* Kprobe early definition from command line */ +static char kprobe_boot_events_buf[COMMAND_LINE_SIZE] __initdata; + +static int __init set_kprobe_boot_events(char *str) +{ + strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE); + return 0; +} +__setup("kprobe_event=", set_kprobe_boot_events); static int trace_kprobe_create(int argc, const char **argv); static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev); @@ -1494,6 +1507,44 @@ void destroy_local_trace_kprobe(struct trace_event_call *event_call) } #endif /* CONFIG_PERF_EVENTS */ +static __init void enable_boot_kprobe_events(void) +{ + struct trace_array *tr = top_trace_array(); + struct trace_event_file *file; + struct trace_kprobe *tk; + struct dyn_event *pos; + + mutex_lock(&event_mutex); + for_each_trace_kprobe(tk, pos) { + list_for_each_entry(file, &tr->events, list) + if (file->event_call == &tk->tp.call) + trace_event_enable_disable(file, 1, 0); + } + mutex_unlock(&event_mutex); +} + +static __init void setup_boot_kprobe_events(void) +{ + char *p, *cmd = kprobe_boot_events_buf; + int ret; + + strreplace(kprobe_boot_events_buf, ',', ' '); + + while (cmd && *cmd != '\0') { + p = strchr(cmd, ';'); + if (p) + *p++ = '\0'; + + ret = trace_run_command(cmd, create_or_delete_trace_kprobe); + if (ret) + pr_warn("Failed to add event(%d): %s\n", ret, cmd); + + cmd = p; + } + + enable_boot_kprobe_events(); +} + /* Make a tracefs interface for controlling probe points */ static __init int init_kprobe_trace(void) { @@ -1525,6 +1576,9 @@ static __init int init_kprobe_trace(void) if (!entry) pr_warn("Could not create tracefs 'kprobe_profile' entry\n"); + + setup_boot_kprobe_events(); + return 0; } fs_initcall(init_kprobe_trace); -- cgit v1.2.3-59-g8ed1b From b3015fe41d9af7515a7b7b6f7f8f172d193fb3a6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 23 May 2019 19:40:17 -0400 Subject: tracing: Make a separate config for trace event self tests The trace event self tests enable loop through *all* events, enables each one, one at a time, runs some code to trigger various events (not necessarily the same events), and checks if anything went wrong. The issue is that trace events are usually the least likely start up test to cause a problem, but they take the longest to run (because there are so many events). When one of the other tests trigger a bug, the trace event start up tests causes the bisect to take much longer, because it takes 10s of seconds to get through the trace event tests. By making them a separate config (even though they are enabled by default if start up tests are set), it is possible to turn them off and still run the other tracing start up tests much quicker. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 12 +++++++++++- kernel/trace/trace_events.c | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5d965cef6c77..07d22c61b634 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -596,9 +596,19 @@ config FTRACE_STARTUP_TEST functioning properly. It will do tests on all the configured tracers of ftrace. +config EVENT_TRACE_STARTUP_TEST + bool "Run selftest on trace events" + depends on FTRACE_STARTUP_TEST + default y + help + This option performs a test on all trace events in the system. + It basically just enables each event and runs some code that + will trigger events (not necessarily the event it enables) + This may take some time run as there are a lot of events. + config EVENT_TRACE_TEST_SYSCALLS bool "Run selftest on syscall events" - depends on FTRACE_STARTUP_TEST + depends on EVENT_TRACE_STARTUP_TEST help This option will also enable testing every syscall event. It only enables the event and disables it and runs various loads diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0ce3db67f556..edc72f3b080c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3190,7 +3190,7 @@ void __init trace_event_init(void) event_trace_enable(); } -#ifdef CONFIG_FTRACE_STARTUP_TEST +#ifdef CONFIG_EVENT_TRACE_STARTUP_TEST static DEFINE_SPINLOCK(test_spinlock); static DEFINE_SPINLOCK(test_spinlock_irq); -- cgit v1.2.3-59-g8ed1b From b6399cc789341fc49a0603fb7d388a4e50aca212 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 23 May 2019 19:50:34 -0400 Subject: tracing/kprobe: Do not run kprobe boot tests if kprobe_event is on cmdline When having kprobe trace event start up tests enabled and adding a kprobe_event on the kernel command line, it produced the following: trace_kprobe: Testing kprobe tracing: WARNING: CPU: 5 PID: 1 at kernel/trace/trace_kprobe.c:1724 kprobe_trace_self_tests_init+0x32d/0x36b Modules linked in: CPU: 5 PID: 1 Comm: swapper/0 Not tainted 5.2.0-rc1-test+ #249 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 RIP: 0010:kprobe_trace_self_tests_init+0x32d/0x36b Code: b7 e8 4f 8d a2 fe 85 c0 74 10 0f 0b 48 c7 c7 c8 1b 0d b7 ff c3 e8 19 af 99 fe 48 c7 c7 40 93 27 b7 e8 7f 1a a5 fe 85 c0 74 10 <0f> 0b 48 c7 c7 f8 1b 0d b7 ff c3 e8 f9 ae 9 a0 fe 85 RSP: 0018:ffffb36e40653e08 EFLAGS: 00010286 RAX: 00000000fffffff0 RBX: 0000000000000000 RCX: ffffb36e40653d5c RDX: 0000000000000000 RSI: ffffffffb72776e0 RDI: 0000000000000246 RBP: ffff98414fe58ff8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: ffff98415d8aa940 R12: 0000000000000000 R13: ffffffffb737c1b0 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff98415ea80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f959ce741b8 CR3: 000000011a210002 CR4: 00000000001606e0 Call Trace: ? init_kprobe_trace+0x19e/0x19e ? do_early_param+0x8e/0x8e do_one_initcall+0x6f/0x2b4 ? do_early_param+0x8e/0x8e kernel_init_freeable+0x21d/0x2c6 ? rest_init+0x146/0x146 kernel_init+0xa/0x10a ret_from_fork+0x3a/0x50 ---[ end trace 488430c083a4c956 ]--- As with the trace events, if a trace event is set on the kernel command line, the trace events start up tests are suspended. The kprobe start up tests should do the same when a kprobe is enabled on the kernel command line. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 004fffd24ec1..7958da2fd922 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -25,6 +25,7 @@ /* Kprobe early definition from command line */ static char kprobe_boot_events_buf[COMMAND_LINE_SIZE] __initdata; +static bool kprobe_boot_events_enabled __initdata; static int __init set_kprobe_boot_events(char *str) { @@ -1538,6 +1539,8 @@ static __init void setup_boot_kprobe_events(void) ret = trace_run_command(cmd, create_or_delete_trace_kprobe); if (ret) pr_warn("Failed to add event(%d): %s\n", ret, cmd); + else + kprobe_boot_events_enabled = true; cmd = p; } @@ -1611,6 +1614,11 @@ static __init int kprobe_trace_self_tests_init(void) if (tracing_is_disabled()) return -ENODEV; + if (kprobe_boot_events_enabled) { + pr_info("Skipping kprobe tests due to kprobe_event on cmdline\n"); + return 0; + } + target = kprobe_trace_selftest_target; pr_info("Testing kprobe tracing: "); -- cgit v1.2.3-59-g8ed1b From a124692b698b00026a58d89831ceda2331b2e1d0 Mon Sep 17 00:00:00 2001 From: Cheng Jian Date: Sat, 4 May 2019 19:39:39 +0800 Subject: ftrace: Enable trampoline when rec count returns back to one Custom trampolines can only be enabled if there is only a single ops attached to it. If there's only a single callback registered to a function, and the ops has a trampoline registered for it, then we can call the trampoline directly. This is very useful for improving the performance of ftrace and livepatch. If more than one callback is registered to a function, the general trampoline is used, and the custom trampoline is not restored back to the direct call even if all the other callbacks were unregistered and we are back to one callback for the function. To fix this, set FTRACE_FL_TRAMP flag if rec count is decremented to one, and the ops that left has a trampoline. Testing After this patch : insmod livepatch_unshare_files.ko cat /sys/kernel/debug/tracing/enabled_functions unshare_files (1) R I tramp: 0xffffffffc0000000(klp_ftrace_handler+0x0/0xa0) ->ftrace_ops_assist_func+0x0/0xf0 echo unshare_files > /sys/kernel/debug/tracing/set_ftrace_filter echo function > /sys/kernel/debug/tracing/current_tracer cat /sys/kernel/debug/tracing/enabled_functions unshare_files (2) R I ->ftrace_ops_list_func+0x0/0x150 echo nop > /sys/kernel/debug/tracing/current_tracer cat /sys/kernel/debug/tracing/enabled_functions unshare_files (1) R I tramp: 0xffffffffc0000000(klp_ftrace_handler+0x0/0xa0) ->ftrace_ops_assist_func+0x0/0xf0 Link: http://lkml.kernel.org/r/1556969979-111047-1-git-send-email-cj.chengjian@huawei.com Signed-off-by: Cheng Jian Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4f2c26bebe2a..5c3eadb143ed 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1622,6 +1622,11 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec) return keep_regs; } +static struct ftrace_ops * +ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); +static struct ftrace_ops * +ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops); + static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, int filter_hash, bool inc) @@ -1750,15 +1755,17 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, } /* - * If the rec had TRAMP enabled, then it needs to - * be cleared. As TRAMP can only be enabled iff - * there is only a single ops attached to it. - * In otherwords, always disable it on decrementing. - * In the future, we may set it if rec count is - * decremented to one, and the ops that is left - * has a trampoline. + * The TRAMP needs to be set only if rec count + * is decremented to one, and the ops that is + * left has a trampoline. As TRAMP can only be + * enabled if there is only a single ops attached + * to it. */ - rec->flags &= ~FTRACE_FL_TRAMP; + if (ftrace_rec_count(rec) == 1 && + ftrace_find_tramp_ops_any(rec)) + rec->flags |= FTRACE_FL_TRAMP; + else + rec->flags &= ~FTRACE_FL_TRAMP; /* * flags will be cleared in ftrace_check_record() @@ -1951,11 +1958,6 @@ static void print_ip_ins(const char *fmt, const unsigned char *p) printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); } -static struct ftrace_ops * -ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); -static struct ftrace_ops * -ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops); - enum ftrace_bug_type ftrace_bug_type; const void *ftrace_expected; -- cgit v1.2.3-59-g8ed1b From 86b3de60a0b634cdcef82d0a2091bc5444a00020 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 28 May 2019 09:36:19 -0400 Subject: ring-buffer: Remove HAVE_64BIT_ALIGNED_ACCESS Commit c19fa94a8fed ("Add HAVE_64BIT_ALIGNED_ACCESS") added the config for architectures that required 64bit aligned access for all 64bit words. As the ftrace ring buffer stores data on 4 byte alignment, this config option was used to force it to store data on 8 byte alignment to make sure the data being stored and written directly into the ring buffer was 8 byte aligned as it would cause issues trying to write an 8 byte word on a 4 not 8 byte aligned memory location. But with the removal of the metag architecture, which was the only architecture to use this, there is no architecture supported by Linux that requires 8 byte aligne access for all 8 byte words (4 byte alignment is good enough). Removing this config can simplify the code a bit. Signed-off-by: Steven Rostedt (VMware) --- arch/Kconfig | 16 ---------------- kernel/trace/ring_buffer.c | 17 ++++------------- 2 files changed, 4 insertions(+), 29 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index c47b328eada0..665a7557b15c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -128,22 +128,6 @@ config UPROBES managed by the kernel and kept transparent to the probed application. ) -config HAVE_64BIT_ALIGNED_ACCESS - def_bool 64BIT && !HAVE_EFFICIENT_UNALIGNED_ACCESS - help - Some architectures require 64 bit accesses to be 64 bit - aligned, which also requires structs containing 64 bit values - to be 64 bit aligned too. This includes some 32 bit - architectures which can do 64 bit accesses, as well as 64 bit - architectures without unaligned access. - - This symbol should be selected by an architecture if 64 bit - accesses are required to be 64 bit aligned in this way even - though it is not a 64 bit architecture. - - See Documentation/unaligned-memory-access.txt for more - information on the topic of unaligned memory accesses. - config HAVE_EFFICIENT_UNALIGNED_ACCESS bool help diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 05b0b3139ebc..66358d66c933 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -128,16 +128,7 @@ int ring_buffer_print_entry_header(struct trace_seq *s) #define RB_ALIGNMENT 4U #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ - -#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS -# define RB_FORCE_8BYTE_ALIGNMENT 0 -# define RB_ARCH_ALIGNMENT RB_ALIGNMENT -#else -# define RB_FORCE_8BYTE_ALIGNMENT 1 -# define RB_ARCH_ALIGNMENT 8U -#endif - -#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT) +#define RB_ALIGN_DATA __aligned(RB_ALIGNMENT) /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX @@ -2373,7 +2364,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, event->time_delta = delta; length -= RB_EVNT_HDR_SIZE; - if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { + if (length > RB_MAX_SMALL_DATA) { event->type_len = 0; event->array[0] = length; } else @@ -2388,11 +2379,11 @@ static unsigned rb_calculate_event_length(unsigned length) if (!length) length++; - if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) + if (length > RB_MAX_SMALL_DATA) length += sizeof(event.array[0]); length += RB_EVNT_HDR_SIZE; - length = ALIGN(length, RB_ARCH_ALIGNMENT); + length = ALIGN(length, RB_ALIGNMENT); /* * In case the time delta is larger than the 27 bits for it -- cgit v1.2.3-59-g8ed1b From f0553dcb9778c343641d3a41f1db01be02e7551b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 10 Jun 2019 16:22:19 -0500 Subject: tracepoint: Use struct_size() in kmalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct tp_probes { ... struct tracepoint_func probes[0]; }; instance = kmalloc(sizeof(sizeof(struct tp_probes) + sizeof(struct tracepoint_func) * count, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kmalloc(struct_size(instance, probes, count) GFP_KERNEL); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steven Rostedt (VMware) --- kernel/tracepoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 46f2ab1e08a9..fb9353ed901b 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -68,8 +68,8 @@ struct tp_probes { static inline void *allocate_probes(int count) { - struct tp_probes *p = kmalloc(count * sizeof(struct tracepoint_func) - + sizeof(struct tp_probes), GFP_KERNEL); + struct tp_probes *p = kmalloc(struct_size(p, probes, count), + GFP_KERNEL); return p == NULL ? NULL : p->probes; } -- cgit v1.2.3-59-g8ed1b From 65fc965c708c90b8c8b2cea980db0618333dd7fe Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 3 Jun 2019 22:04:42 +0900 Subject: kprobes: Fix to init kprobes in subsys_initcall Since arm64 kernel initializes breakpoint trap vector in arch_initcall(), initializing kprobe (and run smoke test) in postcore_initcall() causes a kernel panic. To fix this issue, move the kprobe initialization in subsys_initcall() (which is called right afer the arch_initcall). In-kernel kprobe users (ftrace and bpf) are using fs_initcall() which is called after subsys_initcall(), so this shouldn't cause more problem. Link: http://lkml.kernel.org/r/155956708268.12228.10363800793132214198.stgit@devnote2 Link: http://lkml.kernel.org/r/20190709153755.GB10123@lakrids.cambridge.arm.com Reported-by: Anders Roxell Fixes: b5f8b32c93b2 ("kprobes: Initialize kprobes at postcore_initcall") Tested-by: Anders Roxell Tested-by: Mark Rutland Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 54aaaad00a47..5471efbeb937 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2289,7 +2289,7 @@ static int __init init_kprobes(void) init_test_probes(); return err; } -postcore_initcall(init_kprobes); +subsys_initcall(init_kprobes); #ifdef CONFIG_DEBUG_FS static void report_probe(struct seq_file *pi, struct kprobe *p, -- cgit v1.2.3-59-g8ed1b From f730e0f2da4d0035775ab3c85757fee37bb9cbbe Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 1 Jun 2019 00:16:46 +0900 Subject: tracing/kprobe: Set print format right after parsed command Set event call's print format right after parsed command for simplifying (un)register_kprobe_event(). Link: http://lkml.kernel.org/r/155931580625.28323.5158822928646225903.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7958da2fd922..01fc49f08b70 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -272,6 +272,7 @@ static void free_trace_kprobe(struct trace_kprobe *tk) kfree(tk->tp.call.class->system); kfree(tk->tp.call.name); + kfree(tk->tp.call.print_fmt); kfree(tk->symbol); free_percpu(tk->nhit); kfree(tk); @@ -730,6 +731,10 @@ static int trace_kprobe_create(int argc, const char *argv[]) goto error; /* This can be -ENOMEM */ } + ret = traceprobe_set_print_fmt(&tk->tp, is_return); + if (ret < 0) + goto error; + ret = register_trace_kprobe(tk); if (ret) { trace_probe_log_set_index(1); @@ -1416,18 +1421,14 @@ static int register_kprobe_event(struct trace_kprobe *tk) init_trace_event_call(tk, call); - if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) - return -ENOMEM; ret = register_trace_event(&call->event); - if (!ret) { - kfree(call->print_fmt); + if (!ret) return -ENODEV; - } + ret = trace_add_event_call(call); if (ret) { pr_info("Failed to register kprobe event: %s\n", trace_event_name(call)); - kfree(call->print_fmt); unregister_trace_event(&call->event); } return ret; @@ -1435,13 +1436,8 @@ static int register_kprobe_event(struct trace_kprobe *tk) static int unregister_kprobe_event(struct trace_kprobe *tk) { - int ret; - /* tp->event is unregistered in trace_remove_event_call() */ - ret = trace_remove_event_call(&tk->tp.call); - if (!ret) - kfree(tk->tp.call.print_fmt); - return ret; + return trace_remove_event_call(&tk->tp.call); } #ifdef CONFIG_PERF_EVENTS @@ -1479,10 +1475,8 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs, } ret = __register_trace_kprobe(tk); - if (ret < 0) { - kfree(tk->tp.call.print_fmt); + if (ret < 0) goto error; - } return &tk->tp.call; error: @@ -1503,7 +1497,6 @@ void destroy_local_trace_kprobe(struct trace_event_call *event_call) __unregister_trace_kprobe(tk); - kfree(tk->tp.call.print_fmt); free_trace_kprobe(tk); } #endif /* CONFIG_PERF_EVENTS */ -- cgit v1.2.3-59-g8ed1b From b4d4b96be89466049a0d383d019edc1403bf0ba9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 1 Jun 2019 00:16:56 +0900 Subject: tracing/uprobe: Set print format when parsing command Set event call's print format right after parsed command for simplifying (un)register_uprobe_event(). Link: http://lkml.kernel.org/r/155931581659.28323.5404667166417404076.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_uprobe.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 3d6b868830f3..34ce671b6080 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -345,6 +345,7 @@ static void free_trace_uprobe(struct trace_uprobe *tu) path_put(&tu->path); kfree(tu->tp.call.class->system); kfree(tu->tp.call.name); + kfree(tu->tp.call.print_fmt); kfree(tu->filename); kfree(tu); } @@ -592,6 +593,10 @@ static int trace_uprobe_create(int argc, const char **argv) goto error; } + ret = traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)); + if (ret < 0) + goto error; + ret = register_trace_uprobe(tu); if (!ret) goto out; @@ -1362,21 +1367,15 @@ static int register_uprobe_event(struct trace_uprobe *tu) init_trace_event_call(tu, call); - if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) - return -ENOMEM; - ret = register_trace_event(&call->event); - if (!ret) { - kfree(call->print_fmt); + if (!ret) return -ENODEV; - } ret = trace_add_event_call(call); if (ret) { pr_info("Failed to register uprobe event: %s\n", trace_event_name(call)); - kfree(call->print_fmt); unregister_trace_event(&call->event); } @@ -1385,15 +1384,8 @@ static int register_uprobe_event(struct trace_uprobe *tu) static int unregister_uprobe_event(struct trace_uprobe *tu) { - int ret; - /* tu->event is unregistered in trace_remove_event_call() */ - ret = trace_remove_event_call(&tu->tp.call); - if (ret) - return ret; - kfree(tu->tp.call.print_fmt); - tu->tp.call.print_fmt = NULL; - return 0; + return trace_remove_event_call(&tu->tp.call); } #ifdef CONFIG_PERF_EVENTS @@ -1452,9 +1444,6 @@ void destroy_local_trace_uprobe(struct trace_event_call *event_call) tu = container_of(event_call, struct trace_uprobe, tp.call); - kfree(tu->tp.call.print_fmt); - tu->tp.call.print_fmt = NULL; - free_trace_uprobe(tu); } #endif /* CONFIG_PERF_EVENTS */ -- cgit v1.2.3-59-g8ed1b From 455b289973f7df350ea179c7eb8bfed0c766ec40 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 1 Jun 2019 00:17:06 +0900 Subject: tracing/probe: Add trace_probe init and free functions Add common trace_probe init and cleanup function in trace_probe.c, and use it from trace_kprobe.c and trace_uprobe.c Link: http://lkml.kernel.org/r/155931582664.28323.5934870189034740822.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 49 ++++++++++++--------------------------------- kernel/trace/trace_probe.c | 36 +++++++++++++++++++++++++++++++++ kernel/trace/trace_probe.h | 4 ++++ kernel/trace/trace_uprobe.c | 27 +++++-------------------- 4 files changed, 58 insertions(+), 58 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 01fc49f08b70..c43c2d419ded 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -197,6 +197,16 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); static int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs); +static void free_trace_kprobe(struct trace_kprobe *tk) +{ + if (tk) { + trace_probe_cleanup(&tk->tp); + kfree(tk->symbol); + free_percpu(tk->nhit); + kfree(tk); + } +} + /* * Allocate new trace_probe and initialize it (including kprobes). */ @@ -235,49 +245,17 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group, tk->rp.maxactive = maxactive; - if (!event || !group) { - ret = -EINVAL; - goto error; - } - - tk->tp.call.class = &tk->tp.class; - tk->tp.call.name = kstrdup(event, GFP_KERNEL); - if (!tk->tp.call.name) - goto error; - - tk->tp.class.system = kstrdup(group, GFP_KERNEL); - if (!tk->tp.class.system) + ret = trace_probe_init(&tk->tp, event, group); + if (ret < 0) goto error; dyn_event_init(&tk->devent, &trace_kprobe_ops); - INIT_LIST_HEAD(&tk->tp.files); return tk; error: - kfree(tk->tp.call.name); - kfree(tk->symbol); - free_percpu(tk->nhit); - kfree(tk); + free_trace_kprobe(tk); return ERR_PTR(ret); } -static void free_trace_kprobe(struct trace_kprobe *tk) -{ - int i; - - if (!tk) - return; - - for (i = 0; i < tk->tp.nr_args; i++) - traceprobe_free_probe_arg(&tk->tp.args[i]); - - kfree(tk->tp.call.class->system); - kfree(tk->tp.call.name); - kfree(tk->tp.call.print_fmt); - kfree(tk->symbol); - free_percpu(tk->nhit); - kfree(tk); -} - static struct trace_kprobe *find_trace_kprobe(const char *event, const char *group) { @@ -1400,7 +1378,6 @@ static struct trace_event_functions kprobe_funcs = { static inline void init_trace_event_call(struct trace_kprobe *tk, struct trace_event_call *call) { - INIT_LIST_HEAD(&call->class->fields); if (trace_kprobe_is_return(tk)) { call->event.funcs = &kretprobe_funcs; call->class->define_fields = kretprobe_event_define_fields; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index b6b0593844cd..fe4ee2e73d92 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -884,3 +884,39 @@ int traceprobe_define_arg_fields(struct trace_event_call *event_call, } return 0; } + + +void trace_probe_cleanup(struct trace_probe *tp) +{ + int i; + + for (i = 0; i < tp->nr_args; i++) + traceprobe_free_probe_arg(&tp->args[i]); + + kfree(tp->call.class->system); + kfree(tp->call.name); + kfree(tp->call.print_fmt); +} + +int trace_probe_init(struct trace_probe *tp, const char *event, + const char *group) +{ + if (!event || !group) + return -EINVAL; + + tp->call.class = &tp->class; + tp->call.name = kstrdup(event, GFP_KERNEL); + if (!tp->call.name) + return -ENOMEM; + + tp->class.system = kstrdup(group, GFP_KERNEL); + if (!tp->class.system) { + kfree(tp->call.name); + tp->call.name = NULL; + return -ENOMEM; + } + INIT_LIST_HEAD(&tp->files); + INIT_LIST_HEAD(&tp->class.fields); + + return 0; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 42816358dd48..818b1d7693ba 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -248,6 +248,10 @@ static inline bool trace_probe_is_registered(struct trace_probe *tp) return !!(tp->flags & TP_FLAG_REGISTERED); } +int trace_probe_init(struct trace_probe *tp, const char *event, + const char *group); +void trace_probe_cleanup(struct trace_probe *tp); + /* Check the name is good for event/group/fields */ static inline bool is_good_name(const char *name) { diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 34ce671b6080..b18b7eb1a76f 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -300,25 +300,17 @@ static struct trace_uprobe * alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) { struct trace_uprobe *tu; - - if (!event || !group) - return ERR_PTR(-EINVAL); + int ret; tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL); if (!tu) return ERR_PTR(-ENOMEM); - tu->tp.call.class = &tu->tp.class; - tu->tp.call.name = kstrdup(event, GFP_KERNEL); - if (!tu->tp.call.name) - goto error; - - tu->tp.class.system = kstrdup(group, GFP_KERNEL); - if (!tu->tp.class.system) + ret = trace_probe_init(&tu->tp, event, group); + if (ret < 0) goto error; dyn_event_init(&tu->devent, &trace_uprobe_ops); - INIT_LIST_HEAD(&tu->tp.files); tu->consumer.handler = uprobe_dispatcher; if (is_ret) tu->consumer.ret_handler = uretprobe_dispatcher; @@ -326,26 +318,18 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) return tu; error: - kfree(tu->tp.call.name); kfree(tu); - return ERR_PTR(-ENOMEM); + return ERR_PTR(ret); } static void free_trace_uprobe(struct trace_uprobe *tu) { - int i; - if (!tu) return; - for (i = 0; i < tu->tp.nr_args; i++) - traceprobe_free_probe_arg(&tu->tp.args[i]); - path_put(&tu->path); - kfree(tu->tp.call.class->system); - kfree(tu->tp.call.name); - kfree(tu->tp.call.print_fmt); + trace_probe_cleanup(&tu->tp); kfree(tu->filename); kfree(tu); } @@ -1351,7 +1335,6 @@ static struct trace_event_functions uprobe_funcs = { static inline void init_trace_event_call(struct trace_uprobe *tu, struct trace_event_call *call) { - INIT_LIST_HEAD(&call->class->fields); call->event.funcs = &uprobe_funcs; call->class->define_fields = uprobe_event_define_fields; -- cgit v1.2.3-59-g8ed1b From 46e5376d404d14cb321f5d4e446fe3fb6d8a93ab Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 1 Jun 2019 00:17:16 +0900 Subject: tracing/probe: Add trace_event_call register API for trace_probe Since trace_event_call is a field of trace_probe, these operations should be done in trace_probe.c. trace_kprobe and trace_uprobe use new functions to register/unregister trace_event_call. Link: http://lkml.kernel.org/r/155931583643.28323.14828411185591538876.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 20 +++----------------- kernel/trace/trace_probe.c | 16 ++++++++++++++++ kernel/trace/trace_probe.h | 6 ++++++ kernel/trace/trace_uprobe.c | 22 +++------------------- 4 files changed, 28 insertions(+), 36 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c43c2d419ded..7f802ee27266 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1393,28 +1393,14 @@ static inline void init_trace_event_call(struct trace_kprobe *tk, static int register_kprobe_event(struct trace_kprobe *tk) { - struct trace_event_call *call = &tk->tp.call; - int ret = 0; - - init_trace_event_call(tk, call); - - ret = register_trace_event(&call->event); - if (!ret) - return -ENODEV; + init_trace_event_call(tk, &tk->tp.call); - ret = trace_add_event_call(call); - if (ret) { - pr_info("Failed to register kprobe event: %s\n", - trace_event_name(call)); - unregister_trace_event(&call->event); - } - return ret; + return trace_probe_register_event_call(&tk->tp); } static int unregister_kprobe_event(struct trace_kprobe *tk) { - /* tp->event is unregistered in trace_remove_event_call() */ - return trace_remove_event_call(&tk->tp.call); + return trace_probe_unregister_event_call(&tk->tp); } #ifdef CONFIG_PERF_EVENTS diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index fe4ee2e73d92..509a26024b4f 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -920,3 +920,19 @@ int trace_probe_init(struct trace_probe *tp, const char *event, return 0; } + +int trace_probe_register_event_call(struct trace_probe *tp) +{ + struct trace_event_call *call = &tp->call; + int ret; + + ret = register_trace_event(&call->event); + if (!ret) + return -ENODEV; + + ret = trace_add_event_call(call); + if (ret) + unregister_trace_event(&call->event); + + return ret; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 818b1d7693ba..01d7b222e004 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -251,6 +251,12 @@ static inline bool trace_probe_is_registered(struct trace_probe *tp) int trace_probe_init(struct trace_probe *tp, const char *event, const char *group); void trace_probe_cleanup(struct trace_probe *tp); +int trace_probe_register_event_call(struct trace_probe *tp); +static inline int trace_probe_unregister_event_call(struct trace_probe *tp) +{ + /* tp->event is unregistered in trace_remove_event_call() */ + return trace_remove_event_call(&tp->call); +} /* Check the name is good for event/group/fields */ static inline bool is_good_name(const char *name) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index b18b7eb1a76f..c262494fa793 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1345,30 +1345,14 @@ static inline void init_trace_event_call(struct trace_uprobe *tu, static int register_uprobe_event(struct trace_uprobe *tu) { - struct trace_event_call *call = &tu->tp.call; - int ret = 0; - - init_trace_event_call(tu, call); - - ret = register_trace_event(&call->event); - if (!ret) - return -ENODEV; - - ret = trace_add_event_call(call); - - if (ret) { - pr_info("Failed to register uprobe event: %s\n", - trace_event_name(call)); - unregister_trace_event(&call->event); - } + init_trace_event_call(tu, &tu->tp.call); - return ret; + return trace_probe_register_event_call(&tu->tp); } static int unregister_uprobe_event(struct trace_uprobe *tu) { - /* tu->event is unregistered in trace_remove_event_call() */ - return trace_remove_event_call(&tu->tp.call); + return trace_probe_unregister_event_call(&tu->tp); } #ifdef CONFIG_PERF_EVENTS -- cgit v1.2.3-59-g8ed1b From b5f935ee133911b3ed2d4429dd86d2bd5385519d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 1 Jun 2019 00:17:26 +0900 Subject: tracing/probe: Add trace_event_file access APIs for trace_probe Add trace_event_file access APIs for trace_probe data structure. This simplifies enabling/disabling operations in uprobe and kprobe events so that those don't touch deep inside the trace_probe. This also removing a redundant synchronization when the kprobe event is used from perf, since the perf itself uses tracepoint_synchronize_unregister() after disabling (ftrace- defined) event, thus we don't have to synchronize in that path. Also we don't need to identify local trace_kprobe too anymore. Link: http://lkml.kernel.org/r/155931584587.28323.372301976283354629.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 85 ++++++++++++++++----------------------------- kernel/trace/trace_probe.c | 47 +++++++++++++++++++++++++ kernel/trace/trace_probe.h | 36 ++++++++++--------- kernel/trace/trace_uprobe.c | 42 +++++++--------------- 4 files changed, 109 insertions(+), 101 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7f802ee27266..87a52094378c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -290,34 +290,27 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk) static int enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) { - struct event_file_link *link; + bool enabled = trace_probe_is_enabled(&tk->tp); int ret = 0; if (file) { - link = kmalloc(sizeof(*link), GFP_KERNEL); - if (!link) { - ret = -ENOMEM; - goto out; - } - - link->file = file; - list_add_tail_rcu(&link->list, &tk->tp.files); + ret = trace_probe_add_file(&tk->tp, file); + if (ret) + return ret; + } else + tk->tp.flags |= TP_FLAG_PROFILE; - tk->tp.flags |= TP_FLAG_TRACE; - ret = __enable_trace_kprobe(tk); - if (ret) { - list_del_rcu(&link->list); - kfree(link); - tk->tp.flags &= ~TP_FLAG_TRACE; - } + if (enabled) + return 0; - } else { - tk->tp.flags |= TP_FLAG_PROFILE; - ret = __enable_trace_kprobe(tk); - if (ret) + ret = __enable_trace_kprobe(tk); + if (ret) { + if (file) + trace_probe_remove_file(&tk->tp, file); + else tk->tp.flags &= ~TP_FLAG_PROFILE; } - out: + return ret; } @@ -328,54 +321,34 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) static int disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) { - struct event_file_link *link = NULL; - int wait = 0; + struct trace_probe *tp = &tk->tp; int ret = 0; if (file) { - link = find_event_file_link(&tk->tp, file); - if (!link) { - ret = -EINVAL; - goto out; - } - - list_del_rcu(&link->list); - wait = 1; - if (!list_empty(&tk->tp.files)) + if (!trace_probe_get_file_link(tp, file)) + return -ENOENT; + if (!trace_probe_has_single_file(tp)) goto out; - - tk->tp.flags &= ~TP_FLAG_TRACE; + tp->flags &= ~TP_FLAG_TRACE; } else - tk->tp.flags &= ~TP_FLAG_PROFILE; + tp->flags &= ~TP_FLAG_PROFILE; - if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) { + if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) { if (trace_kprobe_is_return(tk)) disable_kretprobe(&tk->rp); else disable_kprobe(&tk->rp.kp); - wait = 1; } - /* - * if tk is not added to any list, it must be a local trace_kprobe - * created with perf_event_open. We don't need to wait for these - * trace_kprobes - */ - if (list_empty(&tk->devent.list)) - wait = 0; out: - if (wait) { + if (file) /* - * Synchronize with kprobe_trace_func/kretprobe_trace_func - * to ensure disabled (all running handlers are finished). - * This is not only for kfree(), but also the caller, - * trace_remove_event_call() supposes it for releasing - * event_call related objects, which will be accessed in - * the kprobe_trace_func/kretprobe_trace_func. + * Synchronization is done in below function. For perf event, + * file == NULL and perf_trace_event_unreg() calls + * tracepoint_synchronize_unregister() to ensure synchronize + * event. We don't need to care about it. */ - synchronize_rcu(); - kfree(link); /* Ignored if link == NULL */ - } + trace_probe_remove_file(tp, file); return ret; } @@ -1044,7 +1017,7 @@ kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct event_file_link *link; - list_for_each_entry_rcu(link, &tk->tp.files, list) + trace_probe_for_each_link_rcu(link, &tk->tp) __kprobe_trace_func(tk, regs, link->file); } NOKPROBE_SYMBOL(kprobe_trace_func); @@ -1094,7 +1067,7 @@ kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, { struct event_file_link *link; - list_for_each_entry_rcu(link, &tk->tp.files, list) + trace_probe_for_each_link_rcu(link, &tk->tp) __kretprobe_trace_func(tk, ri, regs, link->file); } NOKPROBE_SYMBOL(kretprobe_trace_func); diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 509a26024b4f..abb05608a09d 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -936,3 +936,50 @@ int trace_probe_register_event_call(struct trace_probe *tp) return ret; } + +int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file) +{ + struct event_file_link *link; + + link = kmalloc(sizeof(*link), GFP_KERNEL); + if (!link) + return -ENOMEM; + + link->file = file; + INIT_LIST_HEAD(&link->list); + list_add_tail_rcu(&link->list, &tp->files); + tp->flags |= TP_FLAG_TRACE; + return 0; +} + +struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp, + struct trace_event_file *file) +{ + struct event_file_link *link; + + trace_probe_for_each_link(link, tp) { + if (link->file == file) + return link; + } + + return NULL; +} + +int trace_probe_remove_file(struct trace_probe *tp, + struct trace_event_file *file) +{ + struct event_file_link *link; + + link = trace_probe_get_file_link(tp, file); + if (!link) + return -ENOENT; + + list_del_rcu(&link->list); + synchronize_rcu(); + kfree(link); + + if (list_empty(&tp->files)) + tp->flags &= ~TP_FLAG_TRACE; + + return 0; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 01d7b222e004..ab02007e131d 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -248,16 +248,32 @@ static inline bool trace_probe_is_registered(struct trace_probe *tp) return !!(tp->flags & TP_FLAG_REGISTERED); } -int trace_probe_init(struct trace_probe *tp, const char *event, - const char *group); -void trace_probe_cleanup(struct trace_probe *tp); -int trace_probe_register_event_call(struct trace_probe *tp); static inline int trace_probe_unregister_event_call(struct trace_probe *tp) { /* tp->event is unregistered in trace_remove_event_call() */ return trace_remove_event_call(&tp->call); } +static inline bool trace_probe_has_single_file(struct trace_probe *tp) +{ + return !!list_is_singular(&tp->files); +} + +int trace_probe_init(struct trace_probe *tp, const char *event, + const char *group); +void trace_probe_cleanup(struct trace_probe *tp); +int trace_probe_register_event_call(struct trace_probe *tp); +int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file); +int trace_probe_remove_file(struct trace_probe *tp, + struct trace_event_file *file); +struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp, + struct trace_event_file *file); + +#define trace_probe_for_each_link(pos, tp) \ + list_for_each_entry(pos, &(tp)->files, list) +#define trace_probe_for_each_link_rcu(pos, tp) \ + list_for_each_entry_rcu(pos, &(tp)->files, list) + /* Check the name is good for event/group/fields */ static inline bool is_good_name(const char *name) { @@ -270,18 +286,6 @@ static inline bool is_good_name(const char *name) return true; } -static inline struct event_file_link * -find_event_file_link(struct trace_probe *tp, struct trace_event_file *file) -{ - struct event_file_link *link; - - list_for_each_entry(link, &tp->files, list) - if (link->file == file) - return link; - - return NULL; -} - #define TPARG_FL_RETURN BIT(0) #define TPARG_FL_KERNEL BIT(1) #define TPARG_FL_FENTRY BIT(2) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index c262494fa793..a9f8045b6695 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -863,7 +863,7 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, return 0; rcu_read_lock(); - list_for_each_entry_rcu(link, &tu->tp.files, list) + trace_probe_for_each_link_rcu(link, &tu->tp) __uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file); rcu_read_unlock(); @@ -877,7 +877,7 @@ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct event_file_link *link; rcu_read_lock(); - list_for_each_entry_rcu(link, &tu->tp.files, list) + trace_probe_for_each_link_rcu(link, &tu->tp) __uprobe_trace_func(tu, func, regs, ucb, dsize, link->file); rcu_read_unlock(); } @@ -924,21 +924,15 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, filter_func_t filter) { bool enabled = trace_probe_is_enabled(&tu->tp); - struct event_file_link *link = NULL; int ret; if (file) { if (tu->tp.flags & TP_FLAG_PROFILE) return -EINTR; - link = kmalloc(sizeof(*link), GFP_KERNEL); - if (!link) - return -ENOMEM; - - link->file = file; - list_add_tail_rcu(&link->list, &tu->tp.files); - - tu->tp.flags |= TP_FLAG_TRACE; + ret = trace_probe_add_file(&tu->tp, file); + if (ret < 0) + return ret; } else { if (tu->tp.flags & TP_FLAG_TRACE) return -EINTR; @@ -973,13 +967,11 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, uprobe_buffer_disable(); err_flags: - if (file) { - list_del(&link->list); - kfree(link); - tu->tp.flags &= ~TP_FLAG_TRACE; - } else { + if (file) + trace_probe_remove_file(&tu->tp, file); + else tu->tp.flags &= ~TP_FLAG_PROFILE; - } + return ret; } @@ -990,26 +982,18 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file) return; if (file) { - struct event_file_link *link; - - link = find_event_file_link(&tu->tp, file); - if (!link) + if (trace_probe_remove_file(&tu->tp, file) < 0) return; - list_del_rcu(&link->list); - /* synchronize with u{,ret}probe_trace_func */ - synchronize_rcu(); - kfree(link); - - if (!list_empty(&tu->tp.files)) + if (trace_probe_is_enabled(&tu->tp)) return; - } + } else + tu->tp.flags &= ~TP_FLAG_PROFILE; WARN_ON(!uprobe_filter_is_empty(&tu->filter)); uprobe_unregister(tu->inode, tu->offset, &tu->consumer); tu->inode = NULL; - tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE; uprobe_buffer_disable(); } -- cgit v1.2.3-59-g8ed1b From 747774d6b018ca02493fd3f321624dfce749da61 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 1 Jun 2019 00:17:37 +0900 Subject: tracing/probe: Add trace flag access APIs for trace_probe Add trace_probe_test/set/clear_flag() functions for accessing trace_probe.flag field. This flags field should not be accessed directly. Link: http://lkml.kernel.org/r/155931585683.28323.314290023236905988.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 20 ++++++++++---------- kernel/trace/trace_probe.c | 4 ++-- kernel/trace/trace_probe.h | 22 ++++++++++++++++++++-- kernel/trace/trace_uprobe.c | 18 +++++++++--------- 4 files changed, 41 insertions(+), 23 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 87a52094378c..c3ab84cb25c8 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -298,7 +298,7 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) if (ret) return ret; } else - tk->tp.flags |= TP_FLAG_PROFILE; + trace_probe_set_flag(&tk->tp, TP_FLAG_PROFILE); if (enabled) return 0; @@ -308,7 +308,7 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) if (file) trace_probe_remove_file(&tk->tp, file); else - tk->tp.flags &= ~TP_FLAG_PROFILE; + trace_probe_clear_flag(&tk->tp, TP_FLAG_PROFILE); } return ret; @@ -329,9 +329,9 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) return -ENOENT; if (!trace_probe_has_single_file(tp)) goto out; - tp->flags &= ~TP_FLAG_TRACE; + trace_probe_clear_flag(tp, TP_FLAG_TRACE); } else - tp->flags &= ~TP_FLAG_PROFILE; + trace_probe_clear_flag(tp, TP_FLAG_PROFILE); if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) { if (trace_kprobe_is_return(tk)) @@ -408,7 +408,7 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) ret = register_kprobe(&tk->rp.kp); if (ret == 0) - tk->tp.flags |= TP_FLAG_REGISTERED; + trace_probe_set_flag(&tk->tp, TP_FLAG_REGISTERED); return ret; } @@ -420,7 +420,7 @@ static void __unregister_trace_kprobe(struct trace_kprobe *tk) unregister_kretprobe(&tk->rp); else unregister_kprobe(&tk->rp.kp); - tk->tp.flags &= ~TP_FLAG_REGISTERED; + trace_probe_clear_flag(&tk->tp, TP_FLAG_REGISTERED); /* Cleanup kprobe for reuse */ if (tk->rp.kp.symbol_name) tk->rp.kp.addr = NULL; @@ -1313,10 +1313,10 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) raw_cpu_inc(*tk->nhit); - if (tk->tp.flags & TP_FLAG_TRACE) + if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE)) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS - if (tk->tp.flags & TP_FLAG_PROFILE) + if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE)) ret = kprobe_perf_func(tk, regs); #endif return ret; @@ -1330,10 +1330,10 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) raw_cpu_inc(*tk->nhit); - if (tk->tp.flags & TP_FLAG_TRACE) + if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE)) kretprobe_trace_func(tk, ri, regs); #ifdef CONFIG_PERF_EVENTS - if (tk->tp.flags & TP_FLAG_PROFILE) + if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE)) kretprobe_perf_func(tk, ri, regs); #endif return 0; /* We don't tweek kernel, so just return 0 */ diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index abb05608a09d..323a11ad1dad 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -948,7 +948,7 @@ int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file) link->file = file; INIT_LIST_HEAD(&link->list); list_add_tail_rcu(&link->list, &tp->files); - tp->flags |= TP_FLAG_TRACE; + trace_probe_set_flag(tp, TP_FLAG_TRACE); return 0; } @@ -979,7 +979,7 @@ int trace_probe_remove_file(struct trace_probe *tp, kfree(link); if (list_empty(&tp->files)) - tp->flags &= ~TP_FLAG_TRACE; + trace_probe_clear_flag(tp, TP_FLAG_TRACE); return 0; } diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index ab02007e131d..87d48d850b63 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -238,14 +238,32 @@ struct event_file_link { struct list_head list; }; +static inline bool trace_probe_test_flag(struct trace_probe *tp, + unsigned int flag) +{ + return !!(tp->flags & flag); +} + +static inline void trace_probe_set_flag(struct trace_probe *tp, + unsigned int flag) +{ + tp->flags |= flag; +} + +static inline void trace_probe_clear_flag(struct trace_probe *tp, + unsigned int flag) +{ + tp->flags &= ~flag; +} + static inline bool trace_probe_is_enabled(struct trace_probe *tp) { - return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE)); + return trace_probe_test_flag(tp, TP_FLAG_TRACE | TP_FLAG_PROFILE); } static inline bool trace_probe_is_registered(struct trace_probe *tp) { - return !!(tp->flags & TP_FLAG_REGISTERED); + return trace_probe_test_flag(tp, TP_FLAG_REGISTERED); } static inline int trace_probe_unregister_event_call(struct trace_probe *tp) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index a9f8045b6695..f8e23ed47823 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -927,17 +927,17 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, int ret; if (file) { - if (tu->tp.flags & TP_FLAG_PROFILE) + if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) return -EINTR; ret = trace_probe_add_file(&tu->tp, file); if (ret < 0) return ret; } else { - if (tu->tp.flags & TP_FLAG_TRACE) + if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) return -EINTR; - tu->tp.flags |= TP_FLAG_PROFILE; + trace_probe_set_flag(&tu->tp, TP_FLAG_PROFILE); } WARN_ON(!uprobe_filter_is_empty(&tu->filter)); @@ -970,7 +970,7 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, if (file) trace_probe_remove_file(&tu->tp, file); else - tu->tp.flags &= ~TP_FLAG_PROFILE; + trace_probe_clear_flag(&tu->tp, TP_FLAG_PROFILE); return ret; } @@ -988,7 +988,7 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file) if (trace_probe_is_enabled(&tu->tp)) return; } else - tu->tp.flags &= ~TP_FLAG_PROFILE; + trace_probe_clear_flag(&tu->tp, TP_FLAG_PROFILE); WARN_ON(!uprobe_filter_is_empty(&tu->filter)); @@ -1266,11 +1266,11 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) ucb = uprobe_buffer_get(); store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize); - if (tu->tp.flags & TP_FLAG_TRACE) + if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) ret |= uprobe_trace_func(tu, regs, ucb, dsize); #ifdef CONFIG_PERF_EVENTS - if (tu->tp.flags & TP_FLAG_PROFILE) + if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) ret |= uprobe_perf_func(tu, regs, ucb, dsize); #endif uprobe_buffer_put(ucb); @@ -1301,11 +1301,11 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, ucb = uprobe_buffer_get(); store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize); - if (tu->tp.flags & TP_FLAG_TRACE) + if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) uretprobe_trace_func(tu, func, regs, ucb, dsize); #ifdef CONFIG_PERF_EVENTS - if (tu->tp.flags & TP_FLAG_PROFILE) + if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) uretprobe_perf_func(tu, func, regs, ucb, dsize); #endif uprobe_buffer_put(ucb); -- cgit v1.2.3-59-g8ed1b From b55ce203a8f327b623688c8fb551ac3f9781edea Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 1 Jun 2019 00:17:47 +0900 Subject: tracing/probe: Add probe event name and group name accesses APIs Add trace_probe_name() and trace_probe_group_name() functions for accessing probe name and group name of trace_probe. Link: http://lkml.kernel.org/r/155931586717.28323.8738615064952254761.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 24 ++++++++++++------------ kernel/trace/trace_probe.h | 10 ++++++++++ kernel/trace/trace_uprobe.c | 22 +++++++++++----------- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c3ab84cb25c8..3cf8cee4f276 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -142,8 +142,8 @@ static bool trace_kprobe_match(const char *system, const char *event, { struct trace_kprobe *tk = to_trace_kprobe(ev); - return strcmp(trace_event_name(&tk->tp.call), event) == 0 && - (!system || strcmp(tk->tp.call.class->system, system) == 0); + return strcmp(trace_probe_name(&tk->tp), event) == 0 && + (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0); } static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) @@ -263,8 +263,8 @@ static struct trace_kprobe *find_trace_kprobe(const char *event, struct trace_kprobe *tk; for_each_trace_kprobe(tk, pos) - if (strcmp(trace_event_name(&tk->tp.call), event) == 0 && - strcmp(tk->tp.call.class->system, group) == 0) + if (strcmp(trace_probe_name(&tk->tp), event) == 0 && + strcmp(trace_probe_group_name(&tk->tp), group) == 0) return tk; return NULL; } @@ -453,8 +453,8 @@ static int register_trace_kprobe(struct trace_kprobe *tk) mutex_lock(&event_mutex); /* Delete old (same name) event if exist */ - old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call), - tk->tp.call.class->system); + old_tk = find_trace_kprobe(trace_probe_name(&tk->tp), + trace_probe_group_name(&tk->tp)); if (old_tk) { ret = unregister_trace_kprobe(old_tk); if (ret < 0) @@ -507,7 +507,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb, ret = __register_trace_kprobe(tk); if (ret) pr_warn("Failed to re-register probe %s on %s: %d\n", - trace_event_name(&tk->tp.call), + trace_probe_name(&tk->tp), mod->name, ret); } } @@ -737,8 +737,8 @@ static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev) int i; seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p'); - seq_printf(m, ":%s/%s", tk->tp.call.class->system, - trace_event_name(&tk->tp.call)); + seq_printf(m, ":%s/%s", trace_probe_group_name(&tk->tp), + trace_probe_name(&tk->tp)); if (!tk->symbol) seq_printf(m, " 0x%p", tk->rp.kp.addr); @@ -812,7 +812,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) tk = to_trace_kprobe(ev); seq_printf(m, " %-44s %15lu %15lu\n", - trace_event_name(&tk->tp.call), + trace_probe_name(&tk->tp), trace_kprobe_nhit(tk), tk->rp.kp.nmissed); @@ -1084,7 +1084,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags, field = (struct kprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); - trace_seq_printf(s, "%s: (", trace_event_name(&tp->call)); + trace_seq_printf(s, "%s: (", trace_probe_name(tp)); if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) goto out; @@ -1111,7 +1111,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, field = (struct kretprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); - trace_seq_printf(s, "%s: (", trace_event_name(&tp->call)); + trace_seq_printf(s, "%s: (", trace_probe_name(tp)); if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) goto out; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 87d48d850b63..67424cb5d5d6 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -266,6 +266,16 @@ static inline bool trace_probe_is_registered(struct trace_probe *tp) return trace_probe_test_flag(tp, TP_FLAG_REGISTERED); } +static inline const char *trace_probe_name(struct trace_probe *tp) +{ + return trace_event_name(&tp->call); +} + +static inline const char *trace_probe_group_name(struct trace_probe *tp) +{ + return tp->call.class->system; +} + static inline int trace_probe_unregister_event_call(struct trace_probe *tp) { /* tp->event is unregistered in trace_remove_event_call() */ diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index f8e23ed47823..09fdba3ee9d9 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -289,8 +289,8 @@ static bool trace_uprobe_match(const char *system, const char *event, { struct trace_uprobe *tu = to_trace_uprobe(ev); - return strcmp(trace_event_name(&tu->tp.call), event) == 0 && - (!system || strcmp(tu->tp.call.class->system, system) == 0); + return strcmp(trace_probe_name(&tu->tp), event) == 0 && + (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0); } /* @@ -340,8 +340,8 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou struct trace_uprobe *tu; for_each_trace_uprobe(tu, pos) - if (strcmp(trace_event_name(&tu->tp.call), event) == 0 && - strcmp(tu->tp.call.class->system, group) == 0) + if (strcmp(trace_probe_name(&tu->tp), event) == 0 && + strcmp(trace_probe_group_name(&tu->tp), group) == 0) return tu; return NULL; @@ -376,8 +376,8 @@ static struct trace_uprobe *find_old_trace_uprobe(struct trace_uprobe *new) struct trace_uprobe *tmp, *old = NULL; struct inode *new_inode = d_real_inode(new->path.dentry); - old = find_probe_event(trace_event_name(&new->tp.call), - new->tp.call.class->system); + old = find_probe_event(trace_probe_name(&new->tp), + trace_probe_group_name(&new->tp)); for_each_trace_uprobe(tmp, pos) { if ((old ? old != tmp : true) && @@ -624,8 +624,8 @@ static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev) char c = is_ret_probe(tu) ? 'r' : 'p'; int i; - seq_printf(m, "%c:%s/%s %s:0x%0*lx", c, tu->tp.call.class->system, - trace_event_name(&tu->tp.call), tu->filename, + seq_printf(m, "%c:%s/%s %s:0x%0*lx", c, trace_probe_group_name(&tu->tp), + trace_probe_name(&tu->tp), tu->filename, (int)(sizeof(void *) * 2), tu->offset); if (tu->ref_ctr_offset) @@ -695,7 +695,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) tu = to_trace_uprobe(ev); seq_printf(m, " %s %-44s %15lu\n", tu->filename, - trace_event_name(&tu->tp.call), tu->nhit); + trace_probe_name(&tu->tp), tu->nhit); return 0; } @@ -896,12 +896,12 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e if (is_ret_probe(tu)) { trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", - trace_event_name(&tu->tp.call), + trace_probe_name(&tu->tp), entry->vaddr[1], entry->vaddr[0]); data = DATAOF_TRACE_ENTRY(entry, true); } else { trace_seq_printf(s, "%s: (0x%lx)", - trace_event_name(&tu->tp.call), + trace_probe_name(&tu->tp), entry->vaddr[0]); data = DATAOF_TRACE_ENTRY(entry, false); } -- cgit v1.2.3-59-g8ed1b From e3dc9f898ef9c6a1a96378517573ee2d04d0abcc Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 1 Jun 2019 00:17:57 +0900 Subject: tracing/probe: Add trace_event_call accesses APIs Add trace_event_call access APIs for trace_probe. Instead of accessing trace_probe.call directly, use those accesses by trace_probe_event_call() method. This hides the relationship of trace_event_call and trace_probe from trace_kprobe and trace_uprobe. Link: http://lkml.kernel.org/r/155931587711.28323.8335129014686133120.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 23 ++++++++++++----------- kernel/trace/trace_probe.c | 24 ++++++++++++++---------- kernel/trace/trace_probe.h | 6 ++++++ kernel/trace/trace_uprobe.c | 15 ++++++++------- 4 files changed, 40 insertions(+), 28 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3cf8cee4f276..62362ad1ad98 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -985,7 +985,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, struct ring_buffer *buffer; int size, dsize, pc; unsigned long irq_flags; - struct trace_event_call *call = &tk->tp.call; + struct trace_event_call *call = trace_probe_event_call(&tk->tp); WARN_ON(call != trace_file->event_call); @@ -1033,7 +1033,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct ring_buffer *buffer; int size, pc, dsize; unsigned long irq_flags; - struct trace_event_call *call = &tk->tp.call; + struct trace_event_call *call = trace_probe_event_call(&tk->tp); WARN_ON(call != trace_file->event_call); @@ -1163,7 +1163,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call) static int kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { - struct trace_event_call *call = &tk->tp.call; + struct trace_event_call *call = trace_probe_event_call(&tk->tp); struct kprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; @@ -1213,7 +1213,7 @@ static void kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs) { - struct trace_event_call *call = &tk->tp.call; + struct trace_event_call *call = trace_probe_event_call(&tk->tp); struct kretprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; @@ -1348,9 +1348,10 @@ static struct trace_event_functions kprobe_funcs = { .trace = print_kprobe_event }; -static inline void init_trace_event_call(struct trace_kprobe *tk, - struct trace_event_call *call) +static inline void init_trace_event_call(struct trace_kprobe *tk) { + struct trace_event_call *call = trace_probe_event_call(&tk->tp); + if (trace_kprobe_is_return(tk)) { call->event.funcs = &kretprobe_funcs; call->class->define_fields = kretprobe_event_define_fields; @@ -1366,7 +1367,7 @@ static inline void init_trace_event_call(struct trace_kprobe *tk, static int register_kprobe_event(struct trace_kprobe *tk) { - init_trace_event_call(tk, &tk->tp.call); + init_trace_event_call(tk); return trace_probe_register_event_call(&tk->tp); } @@ -1403,7 +1404,7 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs, return ERR_CAST(tk); } - init_trace_event_call(tk, &tk->tp.call); + init_trace_event_call(tk); if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) { ret = -ENOMEM; @@ -1414,7 +1415,7 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs, if (ret < 0) goto error; - return &tk->tp.call; + return trace_probe_event_call(&tk->tp); error: free_trace_kprobe(tk); return ERR_PTR(ret); @@ -1447,7 +1448,7 @@ static __init void enable_boot_kprobe_events(void) mutex_lock(&event_mutex); for_each_trace_kprobe(tk, pos) { list_for_each_entry(file, &tr->events, list) - if (file->event_call == &tk->tp.call) + if (file->event_call == trace_probe_event_call(&tk->tp)) trace_event_enable_disable(file, 1, 0); } mutex_unlock(&event_mutex); @@ -1523,7 +1524,7 @@ find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr) struct trace_event_file *file; list_for_each_entry(file, &tr->events, list) - if (file->event_call == &tk->tp.call) + if (file->event_call == trace_probe_event_call(&tk->tp)) return file; return NULL; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 323a11ad1dad..dbef0d135075 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -844,6 +844,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return) { + struct trace_event_call *call = trace_probe_event_call(tp); int len; char *print_fmt; @@ -855,7 +856,7 @@ int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return) /* Second: actually write the @print_fmt */ __set_print_fmt(tp, print_fmt, len + 1, is_return); - tp->call.print_fmt = print_fmt; + call->print_fmt = print_fmt; return 0; } @@ -888,31 +889,34 @@ int traceprobe_define_arg_fields(struct trace_event_call *event_call, void trace_probe_cleanup(struct trace_probe *tp) { + struct trace_event_call *call = trace_probe_event_call(tp); int i; for (i = 0; i < tp->nr_args; i++) traceprobe_free_probe_arg(&tp->args[i]); - kfree(tp->call.class->system); - kfree(tp->call.name); - kfree(tp->call.print_fmt); + kfree(call->class->system); + kfree(call->name); + kfree(call->print_fmt); } int trace_probe_init(struct trace_probe *tp, const char *event, const char *group) { + struct trace_event_call *call = trace_probe_event_call(tp); + if (!event || !group) return -EINVAL; - tp->call.class = &tp->class; - tp->call.name = kstrdup(event, GFP_KERNEL); - if (!tp->call.name) + call->class = &tp->class; + call->name = kstrdup(event, GFP_KERNEL); + if (!call->name) return -ENOMEM; tp->class.system = kstrdup(group, GFP_KERNEL); if (!tp->class.system) { - kfree(tp->call.name); - tp->call.name = NULL; + kfree(call->name); + call->name = NULL; return -ENOMEM; } INIT_LIST_HEAD(&tp->files); @@ -923,7 +927,7 @@ int trace_probe_init(struct trace_probe *tp, const char *event, int trace_probe_register_event_call(struct trace_probe *tp) { - struct trace_event_call *call = &tp->call; + struct trace_event_call *call = trace_probe_event_call(tp); int ret; ret = register_trace_event(&call->event); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 67424cb5d5d6..6c33d4aa36c3 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -276,6 +276,12 @@ static inline const char *trace_probe_group_name(struct trace_probe *tp) return tp->call.class->system; } +static inline struct trace_event_call * + trace_probe_event_call(struct trace_probe *tp) +{ + return &tp->call; +} + static inline int trace_probe_unregister_event_call(struct trace_probe *tp) { /* tp->event is unregistered in trace_remove_event_call() */ diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 09fdba3ee9d9..7fb9353b47d9 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -821,7 +821,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, struct ring_buffer *buffer; void *data; int size, esize; - struct trace_event_call *call = &tu->tp.call; + struct trace_event_call *call = trace_probe_event_call(&tu->tp); WARN_ON(call != trace_file->event_call); @@ -1113,7 +1113,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, struct uprobe_cpu_buffer *ucb, int dsize) { - struct trace_event_call *call = &tu->tp.call; + struct trace_event_call *call = trace_probe_event_call(&tu->tp); struct uprobe_trace_entry_head *entry; struct hlist_head *head; void *data; @@ -1316,9 +1316,10 @@ static struct trace_event_functions uprobe_funcs = { .trace = print_uprobe_event }; -static inline void init_trace_event_call(struct trace_uprobe *tu, - struct trace_event_call *call) +static inline void init_trace_event_call(struct trace_uprobe *tu) { + struct trace_event_call *call = trace_probe_event_call(&tu->tp); + call->event.funcs = &uprobe_funcs; call->class->define_fields = uprobe_event_define_fields; @@ -1329,7 +1330,7 @@ static inline void init_trace_event_call(struct trace_uprobe *tu, static int register_uprobe_event(struct trace_uprobe *tu) { - init_trace_event_call(tu, &tu->tp.call); + init_trace_event_call(tu); return trace_probe_register_event_call(&tu->tp); } @@ -1376,14 +1377,14 @@ create_local_trace_uprobe(char *name, unsigned long offs, tu->path = path; tu->ref_ctr_offset = ref_ctr_offset; tu->filename = kstrdup(name, GFP_KERNEL); - init_trace_event_call(tu, &tu->tp.call); + init_trace_event_call(tu); if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) { ret = -ENOMEM; goto error; } - return &tu->tp.call; + return trace_probe_event_call(&tu->tp); error: free_trace_uprobe(tu); return ERR_PTR(ret); -- cgit v1.2.3-59-g8ed1b From 715fa2fd4c6c3e9165659ac26a582b8a2e607b93 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 1 Jun 2019 00:18:07 +0900 Subject: tracing/kprobe: Check registered state using kprobe Change registered check only by trace_kprobe and remove TP_FLAG_REGISTERED from trace_probe, since this feature is only used for trace_kprobe. Link: http://lkml.kernel.org/r/155931588704.28323.4952266828256245833.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 23 +++++++++++++++-------- kernel/trace/trace_probe.h | 6 ------ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 62362ad1ad98..9d483ad9bb6c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -157,6 +157,12 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) return nhit; } +static nokprobe_inline bool trace_kprobe_is_registered(struct trace_kprobe *tk) +{ + return !(list_empty(&tk->rp.kp.list) && + hlist_unhashed(&tk->rp.kp.hlist)); +} + /* Return 0 if it fails to find the symbol address */ static nokprobe_inline unsigned long trace_kprobe_address(struct trace_kprobe *tk) @@ -244,6 +250,8 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group, tk->rp.kp.pre_handler = kprobe_dispatcher; tk->rp.maxactive = maxactive; + INIT_HLIST_NODE(&tk->rp.kp.hlist); + INIT_LIST_HEAD(&tk->rp.kp.list); ret = trace_probe_init(&tk->tp, event, group); if (ret < 0) @@ -273,7 +281,7 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk) { int ret = 0; - if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) { + if (trace_kprobe_is_registered(tk) && !trace_kprobe_has_gone(tk)) { if (trace_kprobe_is_return(tk)) ret = enable_kretprobe(&tk->rp); else @@ -333,7 +341,7 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) } else trace_probe_clear_flag(tp, TP_FLAG_PROFILE); - if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) { + if (!trace_probe_is_enabled(tp) && trace_kprobe_is_registered(tk)) { if (trace_kprobe_is_return(tk)) disable_kretprobe(&tk->rp); else @@ -381,7 +389,7 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) { int i, ret; - if (trace_probe_is_registered(&tk->tp)) + if (trace_kprobe_is_registered(tk)) return -EINVAL; if (within_notrace_func(tk)) { @@ -407,21 +415,20 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) else ret = register_kprobe(&tk->rp.kp); - if (ret == 0) - trace_probe_set_flag(&tk->tp, TP_FLAG_REGISTERED); return ret; } /* Internal unregister function - just handle k*probes and flags */ static void __unregister_trace_kprobe(struct trace_kprobe *tk) { - if (trace_probe_is_registered(&tk->tp)) { + if (trace_kprobe_is_registered(tk)) { if (trace_kprobe_is_return(tk)) unregister_kretprobe(&tk->rp); else unregister_kprobe(&tk->rp.kp); - trace_probe_clear_flag(&tk->tp, TP_FLAG_REGISTERED); - /* Cleanup kprobe for reuse */ + /* Cleanup kprobe for reuse and mark it unregistered */ + INIT_HLIST_NODE(&tk->rp.kp.hlist); + INIT_LIST_HEAD(&tk->rp.kp.list); if (tk->rp.kp.symbol_name) tk->rp.kp.addr = NULL; } diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 6c33d4aa36c3..d1714820efe1 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -55,7 +55,6 @@ /* Flags for trace_probe */ #define TP_FLAG_TRACE 1 #define TP_FLAG_PROFILE 2 -#define TP_FLAG_REGISTERED 4 /* data_loc: data location, compatible with u32 */ #define make_data_loc(len, offs) \ @@ -261,11 +260,6 @@ static inline bool trace_probe_is_enabled(struct trace_probe *tp) return trace_probe_test_flag(tp, TP_FLAG_TRACE | TP_FLAG_PROFILE); } -static inline bool trace_probe_is_registered(struct trace_probe *tp) -{ - return trace_probe_test_flag(tp, TP_FLAG_REGISTERED); -} - static inline const char *trace_probe_name(struct trace_probe *tp) { return trace_event_name(&tp->call); -- cgit v1.2.3-59-g8ed1b From cbd965bde74c3d0e53798404fd4b9829a68ccec8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 3 Jul 2019 14:40:42 -0400 Subject: ftrace/selftests: Return the skip code when tracing directory not configured in kernel If the kernel is not configured with ftrace enabled, the ftracetest selftests should return the error code of "4" as that is the kselftests "skip" code, and not "1" which means an error. To determine if ftrace is enabled, first the newer "tracefs" is searched for in /proc/mounts. If it is not found, then "debugfs" is searched for (as old kernels do not have tracefs). If that is not found, an attempt to mount the tracefs or debugfs is performed. This is done by seeing first if the /sys/kernel/tracing directory exists. If it does than tracefs is configured in the kernel and an attempt to mount it is performed. If /sys/kernel/tracing does not exist, then /sys/kernel/debug is tested to see if that directory exists. If it does, then an attempt to mount debugfs on that directory is performed. If it does not exist, then debugfs is not configured in the running kernel and the test exits with the skip code. If either mount fails, then a normal error is returned as they do exist in the kernel but something went wrong to mount them. This changes the test to always try the tracefs file system first as it has been in the kernel for some time now and it is better to test it if it is available instead of always testing debugfs. Link: http://lkml.kernel.org/r/20190702062358.7330-1-po-hsu.lin@canonical.com Reported-by: Po-Hsu Lin Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/testing/selftests/ftrace/ftracetest | 38 ++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 136387422b00..edf5ea35d2a7 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -23,9 +23,15 @@ echo " If is -, all logs output in console only" exit $1 } +# default error +err_ret=1 + +# kselftest skip code is 4 +err_skip=4 + errexit() { # message echo "Error: $1" 1>&2 - exit 1 + exit $err_ret } # Ensuring user privilege @@ -116,11 +122,31 @@ parse_opts() { # opts } # Parameters -DEBUGFS_DIR=`grep debugfs /proc/mounts | cut -f2 -d' ' | head -1` -if [ -z "$DEBUGFS_DIR" ]; then - TRACING_DIR=`grep tracefs /proc/mounts | cut -f2 -d' ' | head -1` -else - TRACING_DIR=$DEBUGFS_DIR/tracing +TRACING_DIR=`grep tracefs /proc/mounts | cut -f2 -d' ' | head -1` +if [ -z "$TRACING_DIR" ]; then + DEBUGFS_DIR=`grep debugfs /proc/mounts | cut -f2 -d' ' | head -1` + if [ -z "$DEBUGFS_DIR" ]; then + # If tracefs exists, then so does /sys/kernel/tracing + if [ -d "/sys/kernel/tracing" ]; then + mount -t tracefs nodev /sys/kernel/tracing || + errexit "Failed to mount /sys/kernel/tracing" + TRACING_DIR="/sys/kernel/tracing" + # If debugfs exists, then so does /sys/kernel/debug + elif [ -d "/sys/kernel/debug" ]; then + mount -t debugfs nodev /sys/kernel/debug || + errexit "Failed to mount /sys/kernel/debug" + TRACING_DIR="/sys/kernel/debug/tracing" + else + err_ret=$err_skip + errexit "debugfs and tracefs are not configured in this kernel" + fi + else + TRACING_DIR="$DEBUGFS_DIR/tracing" + fi +fi +if [ ! -d "$TRACING_DIR" ]; then + err_ret=$err_skip + errexit "ftrace is not configured in this kernel" fi TOP_DIR=`absdir $0` -- cgit v1.2.3-59-g8ed1b From 6e55f320f00e4db7144a4906dd8ac53701891e31 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 3 Jul 2019 15:46:08 -0400 Subject: ftrace/selftest: Test if set_event/ftrace_pid exists before writing While testing on a very old kernel (3.5), the tests failed because the write to set_event_pid in the setup code, did not exist. The tests themselves could pass, but the setup failed causing an error. Other files test for existance before writing to them. Do the same for set_event_pid and set_ftrace_pid. Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/testing/selftests/ftrace/test.d/functions | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 779ec11f61bd..1d96c5f7e402 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -91,8 +91,8 @@ initialize_ftrace() { # Reset ftrace to initial-state reset_events_filter reset_ftrace_filter disable_events - echo > set_event_pid # event tracer is always on - echo > set_ftrace_pid + [ -f set_event_pid ] && echo > set_event_pid + [ -f set_ftrace_pid ] && echo > set_ftrace_pid [ -f set_ftrace_filter ] && echo | tee set_ftrace_* [ -f set_graph_function ] && echo | tee set_graph_* [ -f stack_trace_filter ] && echo > stack_trace_filter -- cgit v1.2.3-59-g8ed1b From 46710f3a34b592ac5c51a95f696b2d2a2a0d9419 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 25 May 2019 09:57:59 -0700 Subject: tracing: Pass type into tracing_generic_entry_update() All callers of tracing_generic_entry_update() have to initialize entry->type, so let's just simply move it inside. Link: http://lkml.kernel.org/r/20190525165802.25944-2-xiyou.wangcong@gmail.com Cc: Ingo Molnar Signed-off-by: Cong Wang Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 1 + kernel/trace/trace.c | 8 ++++---- kernel/trace/trace_event_perf.c | 3 +-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 8a62731673f7..5c6f2a6c8cd2 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -142,6 +142,7 @@ enum print_line_t { enum print_line_t trace_handle_return(struct trace_seq *s); void tracing_generic_entry_update(struct trace_entry *entry, + unsigned short type, unsigned long flags, int pc); struct trace_event_file; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 77b9c4ca5faa..6b62e1718548 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -743,8 +743,7 @@ trace_event_setup(struct ring_buffer_event *event, { struct trace_entry *ent = ring_buffer_event_data(event); - tracing_generic_entry_update(ent, flags, pc); - ent->type = type; + tracing_generic_entry_update(ent, type, flags, pc); } static __always_inline struct ring_buffer_event * @@ -2312,13 +2311,14 @@ enum print_line_t trace_handle_return(struct trace_seq *s) EXPORT_SYMBOL_GPL(trace_handle_return); void -tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, - int pc) +tracing_generic_entry_update(struct trace_entry *entry, unsigned short type, + unsigned long flags, int pc) { struct task_struct *tsk = current; entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; + entry->type = type; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 4629a6104474..0892e38ed6fb 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -416,8 +416,7 @@ void perf_trace_buf_update(void *record, u16 type) unsigned long flags; local_save_flags(flags); - tracing_generic_entry_update(entry, flags, pc); - entry->type = type; + tracing_generic_entry_update(entry, type, flags, pc); } NOKPROBE_SYMBOL(perf_trace_buf_update); -- cgit v1.2.3-59-g8ed1b From 5967bd5c4239be449744a1471daf60c866486c24 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 25 May 2019 09:58:00 -0700 Subject: tracing: Let filter_assign_type() detect FILTER_PTR_STRING filter_assign_type() could detect dynamic string and static string, but not string pointers. Teach filter_assign_type() to detect string pointers, and this will be needed by trace event injection code. BTW, trace event hist uses FILTER_PTR_STRING too. Link: http://lkml.kernel.org/r/20190525165802.25944-3-xiyou.wangcong@gmail.com Cc: Ingo Molnar Signed-off-by: Cong Wang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_filter.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index d3e59312ef40..550e8a0d048a 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1080,6 +1080,9 @@ int filter_assign_type(const char *type) if (strchr(type, '[') && strstr(type, "char")) return FILTER_STATIC_STRING; + if (strcmp(type, "char *") == 0 || strcmp(type, "const char *") == 0) + return FILTER_PTR_STRING; + return FILTER_OTHER; } -- cgit v1.2.3-59-g8ed1b From 0aeb1def44169cbe7119f26cf10b974a2046142e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 25 May 2019 09:58:01 -0700 Subject: tracing: Make trace_get_fields() global trace_get_fields() is the only way to read tracepoint fields at run time, as their fields are defined at compile-time with macros. Make this function visible to all users and it will be used by trace event injection code to calculate the size of a tracepoint entry. Link: http://lkml.kernel.org/r/20190525165802.25944-4-xiyou.wangcong@gmail.com Cc: Ingo Molnar Signed-off-by: Cong Wang Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 8 ++++++++ kernel/trace/trace_events.c | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 5c6f2a6c8cd2..5150436783e8 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -318,6 +318,14 @@ trace_event_name(struct trace_event_call *call) return call->name; } +static inline struct list_head * +trace_get_fields(struct trace_event_call *event_call) +{ + if (!event_call->class->get_fields) + return &event_call->class->fields; + return event_call->class->get_fields(event_call); +} + struct trace_array; struct trace_subsystem_dir; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index edc72f3b080c..c7506bc81b75 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -70,14 +70,6 @@ static int system_refcount_dec(struct event_subsystem *system) #define while_for_each_event_file() \ } -static struct list_head * -trace_get_fields(struct trace_event_call *event_call) -{ - if (!event_call->class->get_fields) - return &event_call->class->fields; - return event_call->class->get_fields(event_call); -} - static struct ftrace_event_field * __find_event_field(struct list_head *head, char *name) { -- cgit v1.2.3-59-g8ed1b