From f8b8be8a310a55856fd2c369dade08088d85df3b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 21 Nov 2014 05:25:16 -0500 Subject: ftrace, kprobes: Support IPMODIFY flag to find IP modify conflict Introduce FTRACE_OPS_FL_IPMODIFY to avoid conflict among ftrace users who may modify regs->ip to change the execution path. If two or more users modify the regs->ip on the same function entry, one of them will be broken. So they must add IPMODIFY flag and make sure that ftrace_set_filter_ip() succeeds. Note that ftrace doesn't allow ftrace_ops which has IPMODIFY flag to have notrace hash, and the ftrace_ops must have a filter hash (so that the ftrace_ops can hook only specific entries), because it strongly depends on the address and must be allowed for only few selected functions. Link: http://lkml.kernel.org/r/20141121102516.11844.27829.stgit@localhost.localdomain Cc: Jiri Kosina Cc: Seth Jennings Cc: Petr Mladek Cc: Vojtech Pavlik Cc: Miroslav Benes Cc: Ingo Molnar Cc: Ananth N Mavinakayanahalli Cc: Josh Poimboeuf Cc: Namhyung Kim Signed-off-by: Masami Hiramatsu [ fixed up some of the comments ] Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 4da42616939f..f10f5f5d260d 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -234,6 +234,11 @@ of ftrace. Here is a list of some of the key files: will be displayed on the same line as the function that is returning registers. + If the callback registered to be traced by a function with + the "ip modify" attribute (thus the regs->ip can be changed), + an 'I' will be displayed on the same line as the function that + can be overridden. + function_profile_enabled: When set it will enable all functions with either the function -- cgit v1.2.3-59-g8ed1b From 3e6fb8e94329270c70ea00867112a2c6f348207f Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 3 Dec 2014 10:39:20 +0900 Subject: Documentation: describe trace_buf_size parameter more accurately I'm stuck into panic that too litte free memory is left when I boot with trace_buf_size parameter. After digging into the problem, I found that trace_buf_size is the size of trace buffer on each cpu rather than total size of trace buffer. To prevent victim like me, change description of trace_buf_size parameter more accurately. Link: http://lkml.kernel.org/r/1417570760-10620-1-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Joonsoo Kim Signed-off-by: Steven Rostedt --- Documentation/kernel-parameters.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7dbe5ec9d9cd..1d09eb37c562 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3477,7 +3477,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. are saved. trace_buf_size=nn[KMG] - [FTRACE] will set tracing buffer size. + [FTRACE] will set tracing buffer size on each cpu. trace_event=[event-list] [FTRACE] Set and start specified trace events in order -- cgit v1.2.3-59-g8ed1b From 8e1e1df29d837c589c8b4d7b49864481ff7795b8 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 24 Nov 2014 09:34:19 +0900 Subject: tracing: Add additional marks to signal very large time deltas Currently, function graph tracer prints "!" or "+" just before function execution time to signal a function overhead, depending on the time. And some tracers tracing latency also print "!" or "+" just after time to signal overhead, depending on the interval between events. Even it is usually enough to do that, we sometimes need to signal for bigger execution time than 100 micro seconds. For example, I used function graph tracer to detect if there is any case that exit_mm() takes too much time. I did following steps in /sys/kernel/debug/tracing. It was easier to detect very large excution time with patched kernel than with original kernel. $ echo exit_mm > set_graph_function $ echo function_graph > current_tracer $ echo > trace $ cat trace_pipe > $LOGFILE ... (do something and terminate logging) $ grep "\\$" $LOGFILE 3) $ 22082032 us | } /* kernel_map_pages */ 3) $ 22082040 us | } /* free_pages_prepare */ 3) $ 22082113 us | } /* free_hot_cold_page */ 3) $ 22083455 us | } /* free_hot_cold_page_list */ 3) $ 22083895 us | } /* release_pages */ 3) $ 22177873 us | } /* free_pages_and_swap_cache */ 3) $ 22178929 us | } /* unmap_single_vma */ 3) $ 22198885 us | } /* unmap_vmas */ 3) $ 22206949 us | } /* exit_mmap */ 3) $ 22207659 us | } /* mmput */ 3) $ 22207793 us | } /* exit_mm */ And then, it was easy to find out that a schedule-out occured by sub_preempt_count() within kernel_map_pages(). To detect very large function exection time caused by either problematic function implementation or scheduling issues, this patch can be useful. Link: http://lkml.kernel.org/r/1416789259-24038-1-git-send-email-byungchul.park@lge.com Signed-off-by: Byungchul Park Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 10 +++++++--- kernel/trace/trace.h | 2 ++ kernel/trace/trace_functions_graph.c | 23 +++-------------------- kernel/trace/trace_output.c | 34 +++++++++++++++++++++++++++++----- 4 files changed, 41 insertions(+), 28 deletions(-) (limited to 'Documentation') diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index f10f5f5d260d..8408e040f06f 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -685,9 +685,11 @@ The above is mostly meaningful for kernel developers. needs to be fixed to be only relative to the same CPU. The marks are determined by the difference between this current trace and the next trace. - '!' - greater than preempt_mark_thresh (default 100) - '+' - greater than 1 microsecond - ' ' - less than or equal to 1 microsecond. + '$' - greater than 1 second + '#' - greater than 1000 microsecond + '!' - greater than 100 microsecond + '+' - greater than 10 microsecond + ' ' - less than or equal to 10 microsecond. The rest is the same as the 'trace' file. @@ -1956,6 +1958,8 @@ want, depending on your needs. + means that the function exceeded 10 usecs. ! means that the function exceeded 100 usecs. + # means that the function exceeded 1000 usecs. + $ means that the function exceeded 1 sec. - The task/pid field displays the thread cmdline and pid which diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c3a37e55ec8b..3255dfb054a0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -708,6 +708,8 @@ enum print_line_t print_trace_line(struct trace_iterator *iter); extern unsigned long trace_flags; +extern char trace_find_mark(unsigned long long duration); + /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 100288d10e1f..6c2ab955018c 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -730,8 +730,6 @@ static void print_graph_duration(unsigned long long duration, struct trace_seq *s, u32 flags) { - bool duration_printed = false; - if (!(flags & TRACE_GRAPH_PRINT_DURATION) || !(trace_flags & TRACE_ITER_CONTEXT_INFO)) return; @@ -750,24 +748,9 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, } /* Signal a overhead of time execution to the output */ - if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { - /* Duration exceeded 100 usecs */ - if (duration > 100000ULL) { - trace_seq_puts(s, "! "); - duration_printed = true; - - /* Duration exceeded 10 usecs */ - } else if (duration > 10000ULL) { - trace_seq_puts(s, "+ "); - duration_printed = true; - } - } - - /* - * If we did not exceed the duration tresholds or we dont want - * to print out the overhead. Either way we need to fill out the space. - */ - if (!duration_printed) + if (flags & TRACE_GRAPH_PRINT_OVERHEAD) + trace_seq_printf(s, "%c ", trace_find_mark(duration)); + else trace_seq_puts(s, " "); trace_print_graph_duration(duration, s); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 723818bc83b4..b77b9a697619 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -115,7 +115,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, if (ret == (const char *)(trace_seq_buffer_ptr(p))) trace_seq_printf(p, "0x%lx", val); - + trace_seq_putc(p, 0); return ret; @@ -443,7 +443,32 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) return trace_print_lat_fmt(s, entry); } -static unsigned long preempt_mark_thresh_us = 100; +#undef MARK +#define MARK(v, s) {.val = v, .sym = s} +/* trace overhead mark */ +static const struct trace_mark { + unsigned long long val; /* unit: nsec */ + char sym; +} mark[] = { + MARK(1000000000ULL , '$'), /* 1 sec */ + MARK(1000000ULL , '#'), /* 1000 usecs */ + MARK(100000ULL , '!'), /* 100 usecs */ + MARK(10000ULL , '+'), /* 10 usecs */ +}; +#undef MARK + +char trace_find_mark(unsigned long long d) +{ + int i; + int size = ARRAY_SIZE(mark); + + for (i = 0; i < size; i++) { + if (d >= mark[i].val) + break; + } + + return (i == size) ? ' ' : mark[i].sym; +} static int lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) @@ -480,8 +505,7 @@ lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) trace_seq_printf( s, " %4lldus%c: ", abs_ts, - rel_ts > preempt_mark_thresh_us ? '!' : - rel_ts > 1 ? '+' : ' '); + trace_find_mark(rel_ts * NSEC_PER_USEC)); } else { /* !verbose && !in_ns */ trace_seq_printf(s, " %4lld: ", abs_ts); @@ -663,7 +687,7 @@ int register_ftrace_event(struct trace_event *event) goto out; } else { - + event->type = next_event_type++; list = &ftrace_event_list; } -- cgit v1.2.3-59-g8ed1b