From d6d55f0b9d900673548515614b56ab55aa2c51f8 Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Thu, 29 May 2014 17:26:50 +0200 Subject: perf/x86/amd: AMD support for bp_len > HW_BREAKPOINT_LEN_8 Implement hardware breakpoint address mask for AMD Family 16h and above processors. CPUID feature bit indicates hardware support for DRn_ADDR_MASK MSRs. These masks further qualify DRn/DR7 hardware breakpoint addresses to allow matching of larger addresses ranges. Valuable advice and pseudo code from Oleg Nesterov Signed-off-by: Jacob Shin Signed-off-by: Suravee Suthikulpanit Acked-by: Jiri Olsa Reviewed-by: Oleg Nesterov Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Cc: xiakaixu Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/cpufeature.h | 2 ++ arch/x86/include/asm/debugreg.h | 5 +++++ arch/x86/include/asm/hw_breakpoint.h | 1 + arch/x86/include/uapi/asm/msr-index.h | 4 ++++ arch/x86/kernel/cpu/amd.c | 19 +++++++++++++++++++ arch/x86/kernel/hw_breakpoint.c | 20 ++++++++++++++++---- 6 files changed, 47 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0bb1335313b2..53966d65591e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -174,6 +174,7 @@ #define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ +#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ /* @@ -383,6 +384,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) #define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT) +#define cpu_has_bpext boot_cpu_has(X86_FEATURE_BPEXT) #if __GNUC__ >= 4 extern void warn_pre_alternatives(void); diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 61fd18b83b6c..12cb66f6d3a5 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -114,5 +114,10 @@ static inline void debug_stack_usage_inc(void) { } static inline void debug_stack_usage_dec(void) { } #endif /* X86_64 */ +#ifdef CONFIG_CPU_SUP_AMD +extern void set_dr_addr_mask(unsigned long mask, int dr); +#else +static inline void set_dr_addr_mask(unsigned long mask, int dr) { } +#endif #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index ef1c4d2d41ec..6c98be864a75 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -12,6 +12,7 @@ */ struct arch_hw_breakpoint { unsigned long address; + unsigned long mask; u8 len; u8 type; }; diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 8f02f6990759..b1fb4fae03d3 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -212,6 +212,10 @@ /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 +#define MSR_F16H_DR1_ADDR_MASK 0xc0011019 +#define MSR_F16H_DR2_ADDR_MASK 0xc001101a +#define MSR_F16H_DR3_ADDR_MASK 0xc001101b +#define MSR_F16H_DR0_ADDR_MASK 0xc0011027 /* Fam 15h MSRs */ #define MSR_F15H_PERF_CTL 0xc0010200 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 813d29d00a17..abe4ec760db3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -870,3 +870,22 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) return false; } + +void set_dr_addr_mask(unsigned long mask, int dr) +{ + if (!cpu_has_bpext) + return; + + switch (dr) { + case 0: + wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0); + break; + case 1: + case 2: + case 3: + wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0); + break; + default: + break; + } +} diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 3d5fb509bdeb..b5cb0c59ea87 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -126,6 +126,8 @@ int arch_install_hw_breakpoint(struct perf_event *bp) *dr7 |= encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); + if (info->mask) + set_dr_addr_mask(info->mask, i); return 0; } @@ -161,6 +163,8 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) *dr7 &= ~__encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); + if (info->mask) + set_dr_addr_mask(0, i); } static int get_hbp_len(u8 hbp_len) @@ -277,6 +281,8 @@ static int arch_build_bp_info(struct perf_event *bp) } /* Len */ + info->mask = 0; + switch (bp->attr.bp_len) { case HW_BREAKPOINT_LEN_1: info->len = X86_BREAKPOINT_LEN_1; @@ -293,11 +299,17 @@ static int arch_build_bp_info(struct perf_event *bp) break; #endif default: - return -EINVAL; + if (!is_power_of_2(bp->attr.bp_len)) + return -EINVAL; + if (!cpu_has_bpext) + return -EOPNOTSUPP; + info->mask = bp->attr.bp_len - 1; + info->len = X86_BREAKPOINT_LEN_1; } return 0; } + /* * Validate the arch-specific HW Breakpoint register settings */ @@ -312,11 +324,11 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) if (ret) return ret; - ret = -EINVAL; - switch (info->len) { case X86_BREAKPOINT_LEN_1: align = 0; + if (info->mask) + align = info->mask; break; case X86_BREAKPOINT_LEN_2: align = 1; @@ -330,7 +342,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) break; #endif default: - return ret; + WARN_ON_ONCE(1); } /* -- cgit v1.2.3-59-g8ed1b From 3741eb9f8c3be3ec59583881c1f49980dad844e0 Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Thu, 29 May 2014 17:26:51 +0200 Subject: perf tools: allow user to specify hardware breakpoint bp_len Currently bp_len is given a default value of 4. Allow user to override it: $ perf stat -e mem:0x1000/8 ^ bp_len If no value is given, it will default to 4 as it did before. Signed-off-by: Jacob Shin Signed-off-by: Suravee Suthikulpanit Acked-by: Jiri Olsa Reviewed-by: Oleg Nesterov Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Cc: xiakaixu Signed-off-by: Frederic Weisbecker --- tools/perf/Documentation/perf-record.txt | 7 +++++-- tools/perf/util/parse-events.c | 21 +++++++++++---------- tools/perf/util/parse-events.h | 2 +- tools/perf/util/parse-events.l | 1 + tools/perf/util/parse-events.y | 26 ++++++++++++++++++++++++-- 5 files changed, 42 insertions(+), 15 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index af9a54ece024..81a20f21a3e6 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -33,12 +33,15 @@ OPTIONS - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a hexadecimal event descriptor. - - a hardware breakpoint event in the form of '\mem:addr[:access]' + - a hardware breakpoint event in the form of '\mem:addr[/len][:access]' where addr is the address in memory you want to break in. Access is the memory access type (read, write, execute) it can - be passed as follows: '\mem:addr[:[r][w][x]]'. + be passed as follows: '\mem:addr[:[r][w][x]]'. len is the range, + number of bytes from specified addr, which the breakpoint will cover. If you want to profile read-write accesses in 0x1000, just set 'mem:0x1000:rw'. + If you want to profile write accesses in [0x1000~1008), just set + 'mem:0x1000/8:w'. --filter=:: Event filter. diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index c659a3ca1283..efa1ff4cca63 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -526,7 +526,7 @@ do { \ } int parse_events_add_breakpoint(struct list_head *list, int *idx, - void *ptr, char *type) + void *ptr, char *type, u64 len) { struct perf_event_attr attr; @@ -536,14 +536,15 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, if (parse_breakpoint_type(type, &attr)) return -EINVAL; - /* - * We should find a nice way to override the access length - * Provide some defaults for now - */ - if (attr.bp_type == HW_BREAKPOINT_X) - attr.bp_len = sizeof(long); - else - attr.bp_len = HW_BREAKPOINT_LEN_4; + /* Provide some defaults if len is not specified */ + if (!len) { + if (attr.bp_type == HW_BREAKPOINT_X) + len = sizeof(long); + else + len = HW_BREAKPOINT_LEN_4; + } + + attr.bp_len = len; attr.type = PERF_TYPE_BREAKPOINT; attr.sample_period = 1; @@ -1364,7 +1365,7 @@ void print_events(const char *event_glob, bool name_only) printf("\n"); printf(" %-50s [%s]\n", - "mem:[:access]", + "mem:[/len][:access]", event_type_descriptors[PERF_TYPE_BREAKPOINT]); printf("\n"); } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index db2cf78ff0f3..a19fbeb80943 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -104,7 +104,7 @@ int parse_events_add_numeric(struct list_head *list, int *idx, int parse_events_add_cache(struct list_head *list, int *idx, char *type, char *op_result1, char *op_result2); int parse_events_add_breakpoint(struct list_head *list, int *idx, - void *ptr, char *type); + void *ptr, char *type, u64 len); int parse_events_add_pmu(struct list_head *list, int *idx, char *pmu , struct list_head *head_config); enum perf_pmu_event_symbol_type diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 906630bbf8eb..94eacb6c1ef7 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -159,6 +159,7 @@ branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE { {modifier_bp} { return str(yyscanner, PE_MODIFIER_BP); } : { return ':'; } +"/" { return '/'; } {num_dec} { return value(yyscanner, 10); } {num_hex} { return value(yyscanner, 16); } /* diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 93c4c9fbc922..72def077dbbf 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -326,6 +326,28 @@ PE_NAME_CACHE_TYPE } event_legacy_mem: +PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc +{ + struct parse_events_evlist *data = _data; + struct list_head *list; + + ALLOC_LIST(list); + ABORT_ON(parse_events_add_breakpoint(list, &data->idx, + (void *) $2, $6, $4)); + $$ = list; +} +| +PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc +{ + struct parse_events_evlist *data = _data; + struct list_head *list; + + ALLOC_LIST(list); + ABORT_ON(parse_events_add_breakpoint(list, &data->idx, + (void *) $2, NULL, $4)); + $$ = list; +} +| PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc { struct parse_events_evlist *data = _data; @@ -333,7 +355,7 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc ALLOC_LIST(list); ABORT_ON(parse_events_add_breakpoint(list, &data->idx, - (void *) $2, $4)); + (void *) $2, $4, 0)); $$ = list; } | @@ -344,7 +366,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc ALLOC_LIST(list); ABORT_ON(parse_events_add_breakpoint(list, &data->idx, - (void *) $2, NULL)); + (void *) $2, NULL, 0)); $$ = list; } -- cgit v1.2.3-59-g8ed1b From ec32398c231a019d39017afee490728ab3b60b92 Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Thu, 29 May 2014 17:26:52 +0200 Subject: perf tools: add hardware breakpoint bp_len test cases Signed-off-by: Jacob Shin Signed-off-by: Suravee Suthikulpanit Acked-by: Jiri Olsa Reviewed-by: Oleg Nesterov Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Cc: xiakaixu Signed-off-by: Frederic Weisbecker --- tools/perf/tests/parse-events.c | 58 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 7f2f51f93619..4169f460efa1 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1145,6 +1145,49 @@ static int test__pinned_group(struct perf_evlist *evlist) return 0; } +static int test__checkevent_breakpoint_len(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); + TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) == + evsel->attr.bp_type); + TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_1 == + evsel->attr.bp_len); + + return 0; +} + +static int test__checkevent_breakpoint_len_w(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); + TEST_ASSERT_VAL("wrong bp_type", HW_BREAKPOINT_W == + evsel->attr.bp_type); + TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_2 == + evsel->attr.bp_len); + + return 0; +} + +static int +test__checkevent_breakpoint_len_rw_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + + return test__checkevent_breakpoint_rw(evlist); +} + static int count_tracepoints(void) { char events_path[PATH_MAX]; @@ -1420,6 +1463,21 @@ static struct evlist_test test__events[] = { .check = test__pinned_group, .id = 41, }, + { + .name = "mem:0/1", + .check = test__checkevent_breakpoint_len, + .id = 42, + }, + { + .name = "mem:0/2:w", + .check = test__checkevent_breakpoint_len_w, + .id = 43, + }, + { + .name = "mem:0/4:rw:u", + .check = test__checkevent_breakpoint_len_rw_modifier, + .id = 44 + }, #if defined(__s390x__) { .name = "kvm-s390:kvm_s390_create_vm", -- cgit v1.2.3-59-g8ed1b From 36748b9518a2437beffe861b47dff6d12b736b3f Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Thu, 29 May 2014 17:26:53 +0200 Subject: perf/x86: Remove get_hbp_len and replace with bp_len Clean up the logic for determining the breakpoint length Signed-off-by: Jacob Shin Signed-off-by: Suravee Suthikulpanit Acked-by: Jiri Olsa Reviewed-by: Oleg Nesterov Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Cc: xiakaixu Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/hw_breakpoint.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index b5cb0c59ea87..7114ba220fd4 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -167,29 +167,6 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) set_dr_addr_mask(0, i); } -static int get_hbp_len(u8 hbp_len) -{ - unsigned int len_in_bytes = 0; - - switch (hbp_len) { - case X86_BREAKPOINT_LEN_1: - len_in_bytes = 1; - break; - case X86_BREAKPOINT_LEN_2: - len_in_bytes = 2; - break; - case X86_BREAKPOINT_LEN_4: - len_in_bytes = 4; - break; -#ifdef CONFIG_X86_64 - case X86_BREAKPOINT_LEN_8: - len_in_bytes = 8; - break; -#endif - } - return len_in_bytes; -} - /* * Check for virtual address in kernel space. */ @@ -200,7 +177,7 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) struct arch_hw_breakpoint *info = counter_arch_bp(bp); va = info->address; - len = get_hbp_len(info->len); + len = bp->attr.bp_len; return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); } -- cgit v1.2.3-59-g8ed1b From 86038c5ea81b519a8a1fcfcd5e4599aab0cdd119 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 16 Dec 2014 12:47:34 +0100 Subject: perf: Avoid horrible stack usage Both Linus (most recent) and Steve (a while ago) reported that perf related callbacks have massive stack bloat. The problem is that software events need a pt_regs in order to properly report the event location and unwind stack. And because we could not assume one was present we allocated one on stack and filled it with minimal bits required for operation. Now, pt_regs is quite large, so this is undesirable. Furthermore it turns out that most sites actually have a pt_regs pointer available, making this even more onerous, as the stack space is pointless waste. This patch addresses the problem by observing that software events have well defined nesting semantics, therefore we can use static per-cpu storage instead of on-stack. Linus made the further observation that all but the scheduler callers of perf_sw_event() have a pt_regs available, so we change the regular perf_sw_event() to require a valid pt_regs (where it used to be optional) and add perf_sw_event_sched() for the scheduler. We have a scheduler specific call instead of a more generic _noregs() like construct because we can assume non-recursion from the scheduler and thereby simplify the code further (_noregs would have to put the recursion context call inline in order to assertain which __perf_regs element to use). One last note on the implementation of perf_trace_buf_prepare(); we allow .regs = NULL for those cases where we already have a pt_regs pointer available and do not need another. Reported-by: Linus Torvalds Reported-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Javi Merino Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Oleg Nesterov Cc: Paul Mackerras Cc: Petr Mladek Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vaibhav Nagarnaik Link: http://lkml.kernel.org/r/20141216115041.GW3337@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 2 +- include/linux/perf_event.h | 28 +++++++++++++++++++++------- include/trace/ftrace.h | 7 ++++--- kernel/events/core.c | 23 +++++++++++++++++------ kernel/sched/core.c | 2 +- kernel/trace/trace_event_perf.c | 4 +++- kernel/trace/trace_kprobe.c | 4 ++-- kernel/trace/trace_syscalls.c | 4 ++-- kernel/trace/trace_uprobe.c | 2 +- 9 files changed, 52 insertions(+), 24 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0bebb5c348b8..d36f68b08acc 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -595,7 +595,7 @@ extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); extern void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs *regs, int *rctxp); + struct pt_regs **regs, int *rctxp); static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4f7a61ca4b39..3a7bd80b4db8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -665,6 +665,7 @@ static inline int is_software_event(struct perf_event *event) extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; +extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64); extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); #ifndef perf_arch_fetch_caller_regs @@ -689,14 +690,25 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs) static __always_inline void perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { - struct pt_regs hot_regs; + if (static_key_false(&perf_swevent_enabled[event_id])) + __perf_sw_event(event_id, nr, regs, addr); +} + +DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]); +/* + * 'Special' version for the scheduler, it hard assumes no recursion, + * which is guaranteed by us not actually scheduling inside other swevents + * because those disable preemption. + */ +static __always_inline void +perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) +{ if (static_key_false(&perf_swevent_enabled[event_id])) { - if (!regs) { - perf_fetch_caller_regs(&hot_regs); - regs = &hot_regs; - } - __perf_sw_event(event_id, nr, regs, addr); + struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); + + perf_fetch_caller_regs(regs); + ___perf_sw_event(event_id, nr, regs, addr); } } @@ -712,7 +724,7 @@ static inline void perf_event_task_sched_in(struct task_struct *prev, static inline void perf_event_task_sched_out(struct task_struct *prev, struct task_struct *next) { - perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); + perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0); if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_out(prev, next); @@ -823,6 +835,8 @@ static inline int perf_event_refresh(struct perf_event *event, int refresh) static inline void perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } static inline void +perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { } +static inline void perf_bp_event(struct perf_event *event, void *data) { } static inline int perf_register_guest_info_callbacks diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 139b5067345b..27609dfcce25 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -763,7 +763,7 @@ perf_trace_##call(void *__data, proto) \ struct ftrace_event_call *event_call = __data; \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_raw_##call *entry; \ - struct pt_regs __regs; \ + struct pt_regs *__regs; \ u64 __addr = 0, __count = 1; \ struct task_struct *__task = NULL; \ struct hlist_head *head; \ @@ -782,18 +782,19 @@ perf_trace_##call(void *__data, proto) \ sizeof(u64)); \ __entry_size -= sizeof(u32); \ \ - perf_fetch_caller_regs(&__regs); \ entry = perf_trace_buf_prepare(__entry_size, \ event_call->event.type, &__regs, &rctx); \ if (!entry) \ return; \ \ + perf_fetch_caller_regs(__regs); \ + \ tstruct \ \ { assign; } \ \ perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ - __count, &__regs, head, __task); \ + __count, __regs, head, __task); \ } /* diff --git a/kernel/events/core.c b/kernel/events/core.c index 882f835a0d85..c10124b772c4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5889,6 +5889,8 @@ end: rcu_read_unlock(); } +DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]); + int perf_swevent_get_recursion_context(void) { struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); @@ -5904,21 +5906,30 @@ inline void perf_swevent_put_recursion_context(int rctx) put_recursion_context(swhash->recursion, rctx); } -void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) +void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { struct perf_sample_data data; - int rctx; - preempt_disable_notrace(); - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) + if (WARN_ON_ONCE(!regs)) return; perf_sample_data_init(&data, addr, 0); - do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); +} + +void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) +{ + int rctx; + + preempt_disable_notrace(); + rctx = perf_swevent_get_recursion_context(); + if (unlikely(rctx < 0)) + goto fail; + + ___perf_sw_event(event_id, nr, regs, addr); perf_swevent_put_recursion_context(rctx); +fail: preempt_enable_notrace(); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c0accc00566e..d22fb16a7153 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1082,7 +1082,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) if (p->sched_class->migrate_task_rq) p->sched_class->migrate_task_rq(p, new_cpu); p->se.nr_migrations++; - perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); + perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); } __set_task_cpu(p, new_cpu); diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 4b9c114ee9de..6fa484de2ba1 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -261,7 +261,7 @@ void perf_trace_del(struct perf_event *p_event, int flags) } void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs *regs, int *rctxp) + struct pt_regs **regs, int *rctxp) { struct trace_entry *entry; unsigned long flags; @@ -280,6 +280,8 @@ void *perf_trace_buf_prepare(int size, unsigned short type, if (*rctxp < 0) return NULL; + if (regs) + *regs = this_cpu_ptr(&__perf_regs[*rctxp]); raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); /* zero the dead bytes from align to not leak stack to user */ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5edb518be345..296079ae6583 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1148,7 +1148,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); + entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); if (!entry) return; @@ -1179,7 +1179,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); + entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); if (!entry) return; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index c6ee36fcbf90..f97f6e3a676c 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -574,7 +574,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) size -= sizeof(u32); rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, - sys_data->enter_event->event.type, regs, &rctx); + sys_data->enter_event->event.type, NULL, &rctx); if (!rec) return; @@ -647,7 +647,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) size -= sizeof(u32); rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, - sys_data->exit_event->event.type, regs, &rctx); + sys_data->exit_event->event.type, NULL, &rctx); if (!rec) return; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 8520acc34b18..b11441321e7a 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1111,7 +1111,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, if (hlist_empty(head)) goto out; - entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); + entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); if (!entry) goto out; -- cgit v1.2.3-59-g8ed1b From 67121f85e464d66596f99afd8d188c1ae892f8fb Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 17 Dec 2014 16:23:55 +0100 Subject: perf mem: Enable sampling loads and stores simultaneously This patch modifies perf mem to default to sampling loads and stores simultaneously. It could only do one or the other before yet there was no hardware restriction preventing simultaneous collection. With this patch, one run is sufficient to collect both. It is still possible to sample only loads or stores by using the -t option: $ perf mem -t load rec $ perf mem -t load rep Or $ perf mem -t store rec $ perf mem -t store rep The perf report TUI will show one event at a time. The store output will contain a Weight column which will be empty. In V2, we updated the man pages to reflect the change and also simplify the initialization of the argv vector passed to the cmd_*() functions as per LKML feedback. In V3, we fixed typos in the changelog. Signed-off-by: Stephane Eranian Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Ingo Molnar Cc: Jiri Olsa Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Richard Fowles Link: http://lkml.kernel.org/r/20141217152355.GA10053@thinkpad Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-mem.txt | 9 +-- tools/perf/builtin-mem.c | 123 +++++++++++++++++++++++++++------- 2 files changed, 105 insertions(+), 27 deletions(-) diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 1d78a4064da4..43310d8661fe 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -12,11 +12,12 @@ SYNOPSIS DESCRIPTION ----------- -"perf mem -t record" runs a command and gathers memory operation data +"perf mem record" runs a command and gathers memory operation data from it, into perf.data. Perf record options are accepted and are passed through. -"perf mem -t report" displays the result. It invokes perf report with the -right set of options to display a memory access profile. +"perf mem report" displays the result. It invokes perf report with the +right set of options to display a memory access profile. By default, loads +and stores are sampled. Use the -t option to limit to loads or stores. Note that on Intel systems the memory latency reported is the use-latency, not the pure load (or store latency). Use latency includes any pipeline @@ -29,7 +30,7 @@ OPTIONS -t:: --type=:: - Select the memory operation type: load or store (default: load) + Select the memory operation type: load or store (default: load,store) -D:: --dump-raw-samples=:: diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 24db6ffe2957..1eded0a3a509 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -7,10 +7,13 @@ #include "util/session.h" #include "util/data.h" -#define MEM_OPERATION_LOAD "load" -#define MEM_OPERATION_STORE "store" +#define MEM_OPERATION_LOAD 0x1 +#define MEM_OPERATION_STORE 0x2 -static const char *mem_operation = MEM_OPERATION_LOAD; +/* + * default to both load an store sampling + */ +static int mem_operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE; struct perf_mem { struct perf_tool tool; @@ -25,26 +28,30 @@ static int __cmd_record(int argc, const char **argv) { int rec_argc, i = 0, j; const char **rec_argv; - char event[64]; int ret; - rec_argc = argc + 4; + rec_argc = argc + 7; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (!rec_argv) return -1; - rec_argv[i++] = strdup("record"); - if (!strcmp(mem_operation, MEM_OPERATION_LOAD)) - rec_argv[i++] = strdup("-W"); - rec_argv[i++] = strdup("-d"); - rec_argv[i++] = strdup("-e"); + rec_argv[i++] = "record"; - if (strcmp(mem_operation, MEM_OPERATION_LOAD)) - sprintf(event, "cpu/mem-stores/pp"); - else - sprintf(event, "cpu/mem-loads/pp"); + if (mem_operation & MEM_OPERATION_LOAD) + rec_argv[i++] = "-W"; + + rec_argv[i++] = "-d"; + + if (mem_operation & MEM_OPERATION_LOAD) { + rec_argv[i++] = "-e"; + rec_argv[i++] = "cpu/mem-loads/pp"; + } + + if (mem_operation & MEM_OPERATION_STORE) { + rec_argv[i++] = "-e"; + rec_argv[i++] = "cpu/mem-stores/pp"; + } - rec_argv[i++] = strdup(event); for (j = 1; j < argc; j++, i++) rec_argv[i] = argv[j]; @@ -162,17 +169,17 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) if (!rep_argv) return -1; - rep_argv[i++] = strdup("report"); - rep_argv[i++] = strdup("--mem-mode"); - rep_argv[i++] = strdup("-n"); /* display number of samples */ + rep_argv[i++] = "report"; + rep_argv[i++] = "--mem-mode"; + rep_argv[i++] = "-n"; /* display number of samples */ /* * there is no weight (cost) associated with stores, so don't print * the column */ - if (strcmp(mem_operation, MEM_OPERATION_LOAD)) - rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr," - "dso_daddr,tlb,locked"); + if (!(mem_operation & MEM_OPERATION_LOAD)) + rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," + "dso_daddr,tlb,locked"; for (j = 1; j < argc; j++, i++) rep_argv[i] = argv[j]; @@ -182,6 +189,75 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) return ret; } +struct mem_mode { + const char *name; + int mode; +}; + +#define MEM_OPT(n, m) \ + { .name = n, .mode = (m) } + +#define MEM_END { .name = NULL } + +static const struct mem_mode mem_modes[]={ + MEM_OPT("load", MEM_OPERATION_LOAD), + MEM_OPT("store", MEM_OPERATION_STORE), + MEM_END +}; + +static int +parse_mem_ops(const struct option *opt, const char *str, int unset) +{ + int *mode = (int *)opt->value; + const struct mem_mode *m; + char *s, *os = NULL, *p; + int ret = -1; + + if (unset) + return 0; + + /* str may be NULL in case no arg is passed to -t */ + if (str) { + /* because str is read-only */ + s = os = strdup(str); + if (!s) + return -1; + + /* reset mode */ + *mode = 0; + + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; + + for (m = mem_modes; m->name; m++) { + if (!strcasecmp(s, m->name)) + break; + } + if (!m->name) { + fprintf(stderr, "unknown sampling op %s," + " check man page\n", s); + goto error; + } + + *mode |= m->mode; + + if (!p) + break; + + s = p + 1; + } + } + ret = 0; + + if (*mode == 0) + *mode = MEM_OPERATION_LOAD; +error: + free(os); + return ret; +} + int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) { struct stat st; @@ -199,8 +275,9 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) .input_name = "perf.data", }; const struct option mem_options[] = { - OPT_STRING('t', "type", &mem_operation, - "type", "memory operations(load/store)"), + OPT_CALLBACK('t', "type", &mem_operation, + "type", "memory operations(load,store) Default load,store", + parse_mem_ops), OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw, "dump raw samples in ASCII"), OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved, -- cgit v1.2.3-59-g8ed1b From 6602412215c26cd94af1b78fef56b78a5027b19b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 17 Dec 2014 13:53:27 -0300 Subject: perf mem: Move the mem_operations global to struct perf_mem Just like the other parameters, grouping it on the builtin-mem specific config area: struct perf_mem. Acked-by: Stephane Eranian Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Ingo Molnar Cc: Jiri Olsa Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Richard Fowles Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ad8ns5l51ongemfsir3zy09x@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 1eded0a3a509..9b5663950a4d 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -10,21 +10,17 @@ #define MEM_OPERATION_LOAD 0x1 #define MEM_OPERATION_STORE 0x2 -/* - * default to both load an store sampling - */ -static int mem_operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE; - struct perf_mem { struct perf_tool tool; char const *input_name; bool hide_unresolved; bool dump_raw; + int operation; const char *cpu_list; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; -static int __cmd_record(int argc, const char **argv) +static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) { int rec_argc, i = 0, j; const char **rec_argv; @@ -37,17 +33,17 @@ static int __cmd_record(int argc, const char **argv) rec_argv[i++] = "record"; - if (mem_operation & MEM_OPERATION_LOAD) + if (mem->operation & MEM_OPERATION_LOAD) rec_argv[i++] = "-W"; rec_argv[i++] = "-d"; - if (mem_operation & MEM_OPERATION_LOAD) { + if (mem->operation & MEM_OPERATION_LOAD) { rec_argv[i++] = "-e"; rec_argv[i++] = "cpu/mem-loads/pp"; } - if (mem_operation & MEM_OPERATION_STORE) { + if (mem->operation & MEM_OPERATION_STORE) { rec_argv[i++] = "-e"; rec_argv[i++] = "cpu/mem-stores/pp"; } @@ -177,7 +173,7 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) * there is no weight (cost) associated with stores, so don't print * the column */ - if (!(mem_operation & MEM_OPERATION_LOAD)) + if (!(mem->operation & MEM_OPERATION_LOAD)) rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," "dso_daddr,tlb,locked"; @@ -273,9 +269,13 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) .ordered_events = true, }, .input_name = "perf.data", + /* + * default to both load an store sampling + */ + .operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE, }; const struct option mem_options[] = { - OPT_CALLBACK('t', "type", &mem_operation, + OPT_CALLBACK('t', "type", &mem.operation, "type", "memory operations(load,store) Default load,store", parse_mem_ops), OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw, @@ -302,7 +302,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation)) + if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation)) usage_with_options(mem_usage, mem_options); if (!mem.input_name || !strlen(mem.input_name)) { @@ -313,7 +313,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) } if (!strncmp(argv[0], "rec", 3)) - return __cmd_record(argc, argv); + return __cmd_record(argc, argv, &mem); else if (!strncmp(argv[0], "rep", 3)) return report_events(argc, argv, &mem); else -- cgit v1.2.3-59-g8ed1b From 48000a1aed7422a833220245da43114012c355d7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 17 Dec 2014 17:24:45 -0300 Subject: perf tools: Remove EOL whitespaces Janitorial stuff: boredom moment. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-u70i7shys3kths4hzru72bha@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-buildid-cache.txt | 2 +- tools/perf/Documentation/perf-script.txt | 28 ++++---- tools/perf/builtin-buildid-cache.c | 4 +- tools/perf/builtin-stat.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 80 +++++++++++----------- tools/perf/tests/attr.py | 1 - tools/perf/tests/make | 1 - tools/perf/tests/parse-events.c | 2 +- tools/perf/ui/browsers/annotate.c | 3 +- tools/perf/ui/progress.h | 4 +- tools/perf/util/annotate.c | 2 +- tools/perf/util/parse-events.c | 6 +- tools/perf/util/python.c | 2 +- .../util/scripting-engines/trace-event-python.c | 2 +- tools/perf/util/session.c | 2 +- tools/perf/util/symbol.c | 2 +- 17 files changed, 71 insertions(+), 74 deletions(-) diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index fd77d81ea748..0294c57b1f5e 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -38,7 +38,7 @@ OPTIONS --remove=:: Remove specified file from the cache. -M:: ---missing=:: +--missing=:: List missing build ids in the cache for the specified file. -u:: --update:: diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 21494806c0ab..a21eec05bc42 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -125,46 +125,46 @@ OPTIONS is equivalent to: perf script -f trace: -f sw: -f hw: - + i.e., the specified fields apply to all event types if the type string is not given. - + The arguments are processed in the order received. A later usage can reset a prior request. e.g.: - + -f trace: -f comm,tid,time,ip,sym - + The first -f suppresses trace events (field list is ""), but then the second invocation sets the fields to comm,tid,time,ip,sym. In this case a warning is given to the user: - + "Overriding previous field request for all events." - + Alternatively, consider the order: - + -f comm,tid,time,ip,sym -f trace: - + The first -f sets the fields for all events and the second -f suppresses trace events. The user is given a warning message about the override, and the result of the above is that only S/W and H/W events are displayed with the given fields. - + For the 'wildcard' option if a user selected field is invalid for an event type, a message is displayed to the user that the option is ignored for that type. For example: - + $ perf script -f comm,tid,trace 'trace' not valid for hardware events. Ignoring. 'trace' not valid for software events. Ignoring. - + Alternatively, if the type is given an invalid field is specified it is an error. For example: - + perf script -v -f sw:comm,tid,trace 'trace' not valid for software events. - + At this point usage is displayed, and perf-script exits. - + Finally, a user may not set fields to none for all event types. i.e., -f "" is not allowed. diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 77d5cae54c6a..50e6b66aea1f 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -236,10 +236,10 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) if (errno == ENOENT) return false; - pr_warning("Problems with %s file, consider removing it from the cache\n", + pr_warning("Problems with %s file, consider removing it from the cache\n", filename); } else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) { - pr_warning("Problems with %s file, consider removing it from the cache\n", + pr_warning("Problems with %s file, consider removing it from the cache\n", filename); } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 891086376381..e598e4e98170 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1730,7 +1730,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "detailed run - start a lot of events"), OPT_BOOLEAN('S', "sync", &sync_run, "call sync() before starting a run"), - OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, + OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, "print large numbers with thousands\' separators", stat__set_big_num), OPT_STRING('C', "cpu", &target.cpu_list, "cpu", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 616f0fcb4701..c4c7eac69de4 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -165,7 +165,7 @@ static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip) err ? "[unknown]" : uts.release, perf_version_string); if (use_browser <= 0) sleep(5); - + map->erange_warned = true; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index badfabc6a01f..258f6550c736 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -929,66 +929,66 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, { .name = "close", .errmsg = true, - .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, { .name = "connect", .errmsg = true, }, { .name = "dup", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "dup2", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "dup3", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, { .name = "eventfd2", .errmsg = true, .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, { .name = "faccessat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "fadvise64", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fallocate", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchdir", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchmod", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchmodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "fchown", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchownat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "fcntl", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [1] = SCA_STRARRAY, /* cmd */ }, .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, { .name = "fdatasync", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "flock", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [1] = SCA_FLOCK, /* cmd */ }, }, { .name = "fsetxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fstat", .errmsg = true, .alias = "newfstat", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fstatat", .errmsg = true, .alias = "newfstatat", - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "fstatfs", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fsync", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "ftruncate", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "futex", .errmsg = true, .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, { .name = "futimesat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "getdents", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getdents64", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "ioctl", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ + .arg_scnprintf = { [0] = SCA_FD, /* fd */ #if defined(__i386__) || defined(__x86_64__) /* * FIXME: Make this available to all arches. @@ -1002,7 +1002,7 @@ static struct syscall_fmt { { .name = "kill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, { .name = "linkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "lseek", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [2] = SCA_STRARRAY, /* whence */ }, @@ -1012,9 +1012,9 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_HEX, /* start */ [2] = SCA_MADV_BHV, /* behavior */ }, }, { .name = "mkdirat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "mknodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "mlock", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "mlockall", .errmsg = true, @@ -1036,9 +1036,9 @@ static struct syscall_fmt { { .name = "munmap", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "name_to_handle_at", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "newfstatat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "open", .errmsg = true, .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "open_by_handle_at", .errmsg = true, @@ -1052,20 +1052,20 @@ static struct syscall_fmt { { .name = "poll", .errmsg = true, .timeout = true, }, { .name = "ppoll", .errmsg = true, .timeout = true, }, { .name = "pread", .errmsg = true, .alias = "pread64", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "preadv", .errmsg = true, .alias = "pread", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, { .name = "pwrite", .errmsg = true, .alias = "pwrite64", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "pwritev", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "read", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "readlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "readv", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "recvfrom", .errmsg = true, .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmmsg", .errmsg = true, @@ -1073,7 +1073,7 @@ static struct syscall_fmt { { .name = "recvmsg", .errmsg = true, .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "renameat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "rt_sigaction", .errmsg = true, .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, @@ -1091,7 +1091,7 @@ static struct syscall_fmt { { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "shutdown", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "socket", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ [1] = SCA_SK_TYPE, /* type */ }, @@ -1102,7 +1102,7 @@ static struct syscall_fmt { .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, { .name = "symlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "tgkill", .errmsg = true, .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, { .name = "tkill", .errmsg = true, @@ -1113,9 +1113,9 @@ static struct syscall_fmt { { .name = "utimensat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, { .name = "write", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "writev", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, }; static int syscall_fmt__cmp(const void *name, const void *fmtp) @@ -1191,7 +1191,7 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) if (thread__priv(thread) == NULL) thread__set_priv(thread, thread_trace__new()); - + if (thread__priv(thread) == NULL) goto fail; diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index c9b4b6269b51..1091bd47adfd 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -104,7 +104,6 @@ class Event(dict): continue if not self.compare_data(self[t], other[t]): log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) - # Test file description needs to have following sections: # [config] diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 69a71ff84e01..75709d2b17b4 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -222,7 +222,6 @@ tarpkg: @cmd="$(PERF)/tests/perf-targz-src-pkg $(PERF)"; \ echo "- $@: $$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1 - all: $(run) $(run_O) tarpkg @echo OK diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 7f2f51f93619..d188e20d958f 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1471,7 +1471,7 @@ static int test_event(struct evlist_test *e) } else { ret = e->check(evlist); } - + perf_evlist__delete(evlist); return ret; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 1e0a2fd80115..9d32e3c0cfee 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -517,7 +517,7 @@ static bool annotate_browser__jump(struct annotate_browser *browser) } annotate_browser__set_top(browser, dl, idx); - + return true; } @@ -867,7 +867,6 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser ++browser->nr_jumps; } - } static inline int width_jumps(int n) diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h index f34f89eb607c..717d39d3052b 100644 --- a/tools/perf/ui/progress.h +++ b/tools/perf/ui/progress.h @@ -4,12 +4,12 @@ #include void ui_progress__finish(void); - + struct ui_progress { const char *title; u64 curr, next, step, total; }; - + void ui_progress__init(struct ui_progress *p, u64 total, const char *title); void ui_progress__update(struct ui_progress *p, u64 adv); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 01bc4e23a2cf..61bf9128e1f2 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -239,7 +239,7 @@ static int mov__parse(struct ins_operands *ops) *s = '\0'; ops->source.raw = strdup(ops->raw); *s = ','; - + if (ops->source.raw == NULL) return -1; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 77b43fe43d55..f36b80ecaf52 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1121,7 +1121,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, return; for_each_subsystem(sys_dir, sys_dirent, sys_next) { - if (subsys_glob != NULL && + if (subsys_glob != NULL && !strglobmatch(sys_dirent.d_name, subsys_glob)) continue; @@ -1132,7 +1132,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, continue; for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { - if (event_glob != NULL && + if (event_glob != NULL && !strglobmatch(evt_dirent.d_name, event_glob)) continue; @@ -1305,7 +1305,7 @@ static void print_symbol_events(const char *event_glob, unsigned type, for (i = 0; i < max; i++, syms++) { - if (event_glob != NULL && + if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) || (syms->alias && strglobmatch(syms->alias, event_glob)))) continue; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 3dda85ca50c1..d906d0ad5d40 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -768,7 +768,7 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, Py_DECREF(file); goto free_list; } - + Py_DECREF(file); } diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index d808a328f4dc..0c815a40a6e8 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -89,7 +89,7 @@ static void handler_call_die(const char *handler_name) /* * Insert val into into the dictionary and decrement the reference counter. - * This is necessary for dictionaries since PyDict_SetItemString() does not + * This is necessary for dictionaries since PyDict_SetItemString() does not * steal a reference, as opposed to PyTuple_SetItem(). */ static void pydict_set_item_string_decref(PyObject *dict, const char *key, PyObject *val) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5f0e05a76c05..b0ce3d6e6231 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -274,7 +274,7 @@ void perf_tool__fill_defaults(struct perf_tool *tool) if (tool->id_index == NULL) tool->id_index = process_id_index_stub; } - + static void swap_sample_id_all(union perf_event *event, void *data) { void *end = (void *) event + event->header.size; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a194702a0a2f..a69066865a55 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -685,7 +685,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta, struct machine *machine = kmaps->machine; struct map *curr_map = map; struct symbol *pos; - int count = 0, moved = 0; + int count = 0, moved = 0; struct rb_root *root = &dso->symbols[map->type]; struct rb_node *next = rb_first(root); int kernel_range = 0; -- cgit v1.2.3-59-g8ed1b From 6733d1bf7f77967747a5f85b832eaf4dba5999df Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 19 Dec 2014 12:31:40 -0300 Subject: perf hists: Rename hist_entry__free to __delete No logic changes, just to be consistent. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-f7n5y0mvk6gew5185h6fg316@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 2 +- tools/perf/tests/hists_cumulate.c | 2 +- tools/perf/tests/hists_output.c | 2 +- tools/perf/util/hist.c | 8 ++++---- tools/perf/util/hist.h | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 1fd96c13f199..318ab9c3f0ba 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -430,7 +430,7 @@ static void hists__baseline_only(struct hists *hists) next = rb_next(&he->rb_node_in); if (!hist_entry__next_pair(he)) { rb_erase(&he->rb_node_in, root); - hist_entry__free(he); + hist_entry__delete(he); } } } diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 8d110dec393e..18619966454c 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -140,7 +140,7 @@ static void del_hist_entries(struct hists *hists) he = rb_entry(node, struct hist_entry, rb_node); rb_erase(node, root_out); rb_erase(&he->rb_node_in, root_in); - hist_entry__free(he); + hist_entry__delete(he); } } diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index f5547610da02..b52c9faea224 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -106,7 +106,7 @@ static void del_hist_entries(struct hists *hists) he = rb_entry(node, struct hist_entry, rb_node); rb_erase(node, root_out); rb_erase(&he->rb_node_in, root_in); - hist_entry__free(he); + hist_entry__delete(he); } } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 182395546ddc..b4492de326ef 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -267,7 +267,7 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) if (!n->filtered) --hists->nr_non_filtered_entries; - hist_entry__free(n); + hist_entry__delete(n); } } } @@ -290,7 +290,7 @@ void hists__delete_entries(struct hists *hists) if (!n->filtered) --hists->nr_non_filtered_entries; - hist_entry__free(n); + hist_entry__delete(n); } } @@ -941,7 +941,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) return cmp; } -void hist_entry__free(struct hist_entry *he) +void hist_entry__delete(struct hist_entry *he) { zfree(&he->branch_info); zfree(&he->mem_info); @@ -981,7 +981,7 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, iter->callchain, he->callchain); } - hist_entry__free(he); + hist_entry__delete(he); return false; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 46bd50344f85..9305580cd53e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -119,7 +119,7 @@ int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); -void hist_entry__free(struct hist_entry *); +void hist_entry__delete(struct hist_entry *he); void hists__output_resort(struct hists *hists, struct ui_progress *prog); void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); -- cgit v1.2.3-59-g8ed1b From 956b65e1a7a6f10e630ee7f2f2f9de3aab001527 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 19 Dec 2014 12:41:28 -0300 Subject: perf hists: Introduce function for deleting/removing hist_entry The code being used when decaying and deleting entries from a hists instance was the same, provide a function to avoid code dup. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-j6ideab7lkakavfvfguw858z@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b4492de326ef..038483a24a54 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -241,6 +241,20 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) return he->stat.period == 0; } +static void hists__delete_entry(struct hists *hists, struct hist_entry *he) +{ + rb_erase(&he->rb_node, &hists->entries); + + if (sort__need_collapse) + rb_erase(&he->rb_node_in, &hists->entries_collapsed); + + --hists->nr_entries; + if (!he->filtered) + --hists->nr_non_filtered_entries; + + hist_entry__delete(he); +} + void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) { struct rb_node *next = rb_first(&hists->entries); @@ -258,16 +272,7 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) (zap_kernel && n->level != '.') || hists__decay_entry(hists, n)) && !n->used) { - rb_erase(&n->rb_node, &hists->entries); - - if (sort__need_collapse) - rb_erase(&n->rb_node_in, &hists->entries_collapsed); - - --hists->nr_entries; - if (!n->filtered) - --hists->nr_non_filtered_entries; - - hist_entry__delete(n); + hists__delete_entry(hists, n); } } } @@ -281,16 +286,7 @@ void hists__delete_entries(struct hists *hists) n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - rb_erase(&n->rb_node, &hists->entries); - - if (sort__need_collapse) - rb_erase(&n->rb_node_in, &hists->entries_collapsed); - - --hists->nr_entries; - if (!n->filtered) - --hists->nr_non_filtered_entries; - - hist_entry__delete(n); + hists__delete_entry(hists, n); } } -- cgit v1.2.3-59-g8ed1b From 590cd344e2099c7b040b29d3a711b4c26358def5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 22 Dec 2014 13:44:09 +0900 Subject: perf report: Get rid of report__inc_stat() The report__inc_stat() function collects the number of hist entries in the session in order to calculate the max size of the progess bar. It'd be better if it does it during the addition of hist entries so that it can be used by other places too. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1419223455-4362-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 16 +++------------- tools/perf/util/hist.c | 2 ++ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 072ae8ad67fc..2f91094e228b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -86,17 +86,6 @@ static int report__config(const char *var, const char *value, void *cb) return perf_default_config(var, value, cb); } -static void report__inc_stats(struct report *rep, struct hist_entry *he) -{ - /* - * The @he is either of a newly created one or an existing one - * merging current sample. We only want to count a new one so - * checking ->nr_events being 1. - */ - if (he->stat.nr_events == 1) - rep->nr_entries++; -} - static int hist_iter__report_callback(struct hist_entry_iter *iter, struct addr_location *al, bool single, void *arg) @@ -108,8 +97,6 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, struct mem_info *mi; struct branch_info *bi; - report__inc_stats(rep, he); - if (!ui__has_annotation()) return 0; @@ -499,6 +486,9 @@ static int __cmd_report(struct report *rep) report__warn_kptr_restrict(rep); + evlist__for_each(session->evlist, pos) + rep->nr_entries += evsel__hists(pos)->nr_entries; + if (use_browser == 0) { if (verbose > 3) perf_session__fprintf(session, stdout); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 038483a24a54..e17163fcb702 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -429,6 +429,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists, if (!he) return NULL; + hists->nr_entries++; + rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, hists->entries_in); out: -- cgit v1.2.3-59-g8ed1b From c8defe249460b6fa3103797273f590372cc62efc Mon Sep 17 00:00:00 2001 From: Rickard Strandqvist Date: Sat, 20 Dec 2014 13:51:05 +0100 Subject: perf tools: Remove some unused functions from color.c Removes some functions that are not used anywhere: color_parse_mem() color_parse() This was partially found by using a static code analysis program called cppcheck. Signed-off-by: Rickard Strandqvist Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Ramkumar Ramachandra Link: http://lkml.kernel.org/r/1419079865-354-1-git-send-email-rickard_strandqvist@spectrumdigital.se [ Remove now unused parse_{attr,color} routines too ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/color.c | 126 ------------------------------------------------ tools/perf/util/color.h | 2 - 2 files changed, 128 deletions(-) diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index f4654183d391..55355b3d4f85 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -5,132 +5,6 @@ int perf_use_color_default = -1; -static int parse_color(const char *name, int len) -{ - static const char * const color_names[] = { - "normal", "black", "red", "green", "yellow", - "blue", "magenta", "cyan", "white" - }; - char *end; - int i; - - for (i = 0; i < (int)ARRAY_SIZE(color_names); i++) { - const char *str = color_names[i]; - if (!strncasecmp(name, str, len) && !str[len]) - return i - 1; - } - i = strtol(name, &end, 10); - if (end - name == len && i >= -1 && i <= 255) - return i; - return -2; -} - -static int parse_attr(const char *name, int len) -{ - static const int attr_values[] = { 1, 2, 4, 5, 7 }; - static const char * const attr_names[] = { - "bold", "dim", "ul", "blink", "reverse" - }; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(attr_names); i++) { - const char *str = attr_names[i]; - if (!strncasecmp(name, str, len) && !str[len]) - return attr_values[i]; - } - return -1; -} - -void color_parse(const char *value, const char *var, char *dst) -{ - color_parse_mem(value, strlen(value), var, dst); -} - -void color_parse_mem(const char *value, int value_len, const char *var, - char *dst) -{ - const char *ptr = value; - int len = value_len; - int attr = -1; - int fg = -2; - int bg = -2; - - if (!strncasecmp(value, "reset", len)) { - strcpy(dst, PERF_COLOR_RESET); - return; - } - - /* [fg [bg]] [attr] */ - while (len > 0) { - const char *word = ptr; - int val, wordlen = 0; - - while (len > 0 && !isspace(word[wordlen])) { - wordlen++; - len--; - } - - ptr = word + wordlen; - while (len > 0 && isspace(*ptr)) { - ptr++; - len--; - } - - val = parse_color(word, wordlen); - if (val >= -1) { - if (fg == -2) { - fg = val; - continue; - } - if (bg == -2) { - bg = val; - continue; - } - goto bad; - } - val = parse_attr(word, wordlen); - if (val < 0 || attr != -1) - goto bad; - attr = val; - } - - if (attr >= 0 || fg >= 0 || bg >= 0) { - int sep = 0; - - *dst++ = '\033'; - *dst++ = '['; - if (attr >= 0) { - *dst++ = '0' + attr; - sep++; - } - if (fg >= 0) { - if (sep++) - *dst++ = ';'; - if (fg < 8) { - *dst++ = '3'; - *dst++ = '0' + fg; - } else { - dst += sprintf(dst, "38;5;%d", fg); - } - } - if (bg >= 0) { - if (sep++) - *dst++ = ';'; - if (bg < 8) { - *dst++ = '4'; - *dst++ = '0' + bg; - } else { - dst += sprintf(dst, "48;5;%d", bg); - } - } - *dst++ = 'm'; - } - *dst = 0; - return; -bad: - die("bad color value '%.*s' for variable '%s'", value_len, value, var); -} - int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) { if (value) { diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 0a594b8a0c26..38146f922c54 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -30,8 +30,6 @@ extern int perf_use_color_default; int perf_color_default_config(const char *var, const char *value, void *cb); int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty); -void color_parse(const char *value, const char *var, char *dst); -void color_parse_mem(const char *value, int len, const char *var, char *dst); int color_vsnprintf(char *bf, size_t size, const char *color, const char *fmt, va_list args); int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); -- cgit v1.2.3-59-g8ed1b From 688d4dfcdd624192cbf03c08402e444d1d11f294 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 7 Jan 2015 17:13:50 -0800 Subject: perf tools: Support parsing parameterized events Enable event specification like: pmu/event_name,param1=0x1,param2=0x4/ Assuming that /sys/bus/event_source/devices/pmu/events/event_name Contains something like param2=?,bar=1,param1=? Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa Cc: Cody P Schafer Cc: Haren Myneni Cc: Jiri Olsa Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1420679633-28856-2-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.h | 1 + tools/perf/util/pmu.c | 74 +++++++++++++++++++++++++++++++++++------- 2 files changed, 64 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index db2cf78ff0f3..ca226cef8460 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -71,6 +71,7 @@ struct parse_events_term { int type_val; int type_term; struct list_head list; + bool used; }; struct parse_events_evlist { diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 5c9c4947cfb4..bfbecf7abd38 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -550,6 +550,35 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, } } +/* + * Term is a string term, and might be a param-term. Try to look up it's value + * in the remaining terms. + * - We have a term like "base-or-format-term=param-term", + * - We need to find the value supplied for "param-term" (with param-term named + * in a config string) later on in the term list. + */ +static int pmu_resolve_param_term(struct parse_events_term *term, + struct list_head *head_terms, + __u64 *value) +{ + struct parse_events_term *t; + + list_for_each_entry(t, head_terms, list) { + if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { + if (!strcmp(t->config, term->config)) { + t->used = true; + *value = t->val.num; + return 0; + } + } + } + + if (verbose) + printf("Required parameter '%s' not specified\n", term->config); + + return -1; +} + /* * Setup one of config[12] attr members based on the * user input data - term parameter. @@ -557,25 +586,33 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, static int pmu_config_term(struct list_head *formats, struct perf_event_attr *attr, struct parse_events_term *term, + struct list_head *head_terms, bool zero) { struct perf_pmu_format *format; __u64 *vp; + __u64 val; + + /* + * If this is a parameter we've already used for parameterized-eval, + * skip it in normal eval. + */ + if (term->used) + return 0; /* - * Support only for hardcoded and numnerial terms. * Hardcoded terms should be already in, so nothing * to be done for them. */ if (parse_events__is_hardcoded_term(term)) return 0; - if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) - return -EINVAL; - format = pmu_find_format(formats, term->config); - if (!format) + if (!format) { + if (verbose) + printf("Invalid event/parameter '%s'\n", term->config); return -EINVAL; + } switch (format->value) { case PERF_PMU_FORMAT_VALUE_CONFIG: @@ -592,11 +629,25 @@ static int pmu_config_term(struct list_head *formats, } /* - * XXX If we ever decide to go with string values for - * non-hardcoded terms, here's the place to translate - * them into value. + * Either directly use a numeric term, or try to translate string terms + * using event parameters. */ - pmu_format_value(format->bits, term->val.num, vp, zero); + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) + val = term->val.num; + else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) { + if (strcmp(term->val.str, "?")) { + if (verbose) + pr_info("Invalid sysfs entry %s=%s\n", + term->config, term->val.str); + return -EINVAL; + } + + if (pmu_resolve_param_term(term, head_terms, &val)) + return -EINVAL; + } else + return -EINVAL; + + pmu_format_value(format->bits, val, vp, zero); return 0; } @@ -607,9 +658,10 @@ int perf_pmu__config_terms(struct list_head *formats, { struct parse_events_term *term; - list_for_each_entry(term, head_terms, list) - if (pmu_config_term(formats, attr, term, zero)) + list_for_each_entry(term, head_terms, list) { + if (pmu_config_term(formats, attr, term, head_terms, zero)) return -EINVAL; + } return 0; } -- cgit v1.2.3-59-g8ed1b From aaea36174991ff39c7a18044660db86527100c55 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 7 Jan 2015 17:13:51 -0800 Subject: perf tools: Extend format_alias() to include event parameters This causes `perf list pmu` to show parameters for parameterized events like: pmu/event_name,param1=?,param2=?/ [Kernel PMU event] An example: hv_24x7/HPM_TLBIE__PHYS_CORE,core=?/ [Kernel PMU event] Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa Cc: Cody P Schafer Cc: Haren Myneni Cc: Jiri Olsa Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1420679633-28856-3-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index bfbecf7abd38..48411674da0f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -819,10 +819,36 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to) set_bit(b, bits); } +static int sub_non_neg(int a, int b) +{ + if (b > a) + return 0; + return a - b; +} + static char *format_alias(char *buf, int len, struct perf_pmu *pmu, struct perf_pmu_alias *alias) { - snprintf(buf, len, "%s/%s/", pmu->name, alias->name); + struct parse_events_term *term; + int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name); + + list_for_each_entry(term, &alias->terms, list) { + if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) + used += snprintf(buf + used, sub_non_neg(len, used), + ",%s=%s", term->config, + term->val.str); + } + + if (sub_non_neg(len, used) > 0) { + buf[used] = '/'; + used++; + } + if (sub_non_neg(len, used) > 0) { + buf[used] = '\0'; + used++; + } else + buf[len - 1] = '\0'; + return buf; } -- cgit v1.2.3-59-g8ed1b From 98a43e0e9917059da32db89829b0eb95453a11ee Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 7 Jan 2015 17:13:52 -0800 Subject: perf Documentation: Add event parameters Event parameters are a basic way for partial events to be specified in sysfs with per-event names given to the fields that need to be filled in when using a particular event. It is intended for supporting cases where the single 'cpu' parameter is insufficient. For example, POWER 8 has events for physical sockets/cores/cpus that are accessible from with virtual machines. To keep using the single 'cpu' parameter we'd need to perform a mapping between Linux's cpus and the physical machine's cpus (in this case Linux is running under a hypervisor). This isn't possible because bindings between our cpus and physical cpus may not be fixed, and we probably won't have a "cpu" on each physical cpu. Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa Cc: Cody P Schafer Cc: Haren Myneni Cc: Jiri Olsa Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1420679633-28856-4-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- Documentation/ABI/testing/sysfs-bus-event_source-devices-events | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events index 20979f8b3edb..505f080d20a1 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events @@ -52,12 +52,18 @@ Description: Per-pmu performance monitoring events specific to the running syste event=0x2abc event=0x423,inv,cmask=0x3 domain=0x1,offset=0x8,starting_index=0xffff + domain=0x1,offset=0x8,core=? Each of the assignments indicates a value to be assigned to a particular set of bits (as defined by the format file corresponding to the ) in the perf_event structure passed to the perf_open syscall. + In the case of the last example, a value replacing "?" would + need to be provided by the user selecting the particular event. + This is referred to as "event parameterization". Event + parameters have the format 'param=?'. + What: /sys/bus/event_source/devices//events/.unit Date: 2014/02/24 Contact: Linux kernel mailing list -- cgit v1.2.3-59-g8ed1b From f9ab9c196d015f3bd8f6bd1c30785c5a49542323 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Wed, 7 Jan 2015 17:13:53 -0800 Subject: perf tools: Document parameterized and symbolic events Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa Cc: Cody P Schafer Cc: Haren Myneni Cc: Jiri Olsa Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1420679633-28856-5-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 13 +++++++++++++ tools/perf/Documentation/perf-record.txt | 12 ++++++++++++ tools/perf/Documentation/perf-stat.txt | 20 ++++++++++++++++---- 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index cbb4f743d921..3e2aec94f806 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -89,6 +89,19 @@ raw encoding of 0x1A8 can be used: You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. +PARAMETERIZED EVENTS +-------------------- + +Some pmu events listed by 'perf-list' will be displayed with '?' in them. For +example: + + hv_gpci/dtbp_ptitc,phys_processor_idx=?/ + +This means that when provided as an event, a value for '?' must +also be supplied. For example: + + perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ... + OPTIONS ------- diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index af9a54ece024..7d8df2e5edd8 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -33,6 +33,18 @@ OPTIONS - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a hexadecimal event descriptor. + - a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where + 'param1', 'param2', etc are defined as formats for the PMU in + /sys/bus/event_sources/devices//format/*. + + - a symbolically formed event like 'pmu/config=M,config1=N,config3=K/' + + where M, N, K are numbers (in decimal, hex, octal format). Acceptable + values for each of 'config', 'config1' and 'config2' are defined by + corresponding entries in /sys/bus/event_sources/devices//format/* + param1 and param2 are defined as formats for the PMU in: + /sys/bus/event_sources/devices//format/* + - a hardware breakpoint event in the form of '\mem:addr[:access]' where addr is the address in memory you want to break in. Access is the memory access type (read, write, execute) it can diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 29ee857c09c6..04e150d83e7d 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -25,10 +25,22 @@ OPTIONS -e:: --event=:: - Select the PMU event. Selection can be a symbolic event name - (use 'perf list' to list all events) or a raw PMU - event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + Select the PMU event. Selection can be: + + - a symbolic event name (use 'perf list' to list all events) + + - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. + + - a symbolically formed event like 'pmu/param1=0x3,param2/' where + param1 and param2 are defined as formats for the PMU in + /sys/bus/event_sources/devices//format/* + + - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/' + where M, N, K are numbers (in decimal, hex, octal format). + Acceptable values for each of 'config', 'config1' and 'config2' + parameters are defined by corresponding entries in + /sys/bus/event_sources/devices//format/* -i:: --no-inherit:: -- cgit v1.2.3-59-g8ed1b From 5594b557aacaafbba7ad8e5ed29005df883bfe3a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 10 Jan 2015 19:33:45 +0900 Subject: perf tools: Allow use of an exclusive option more than once The exclusive options are to prohibit use of conflicting options at the same time. But it had a side effect that it also limits a such option can be used at most once. Currently the only user of the flag is perf probe and it allows to use such options more than once, but when one tries to use it, perf will fail like below: $ sudo perf probe -x /lib/libc-2.20.so --add malloc --add free Error: option `add' cannot be used with add ... Signed-off-by: Namhyung Kim Reviewed-by: Masami Hiramatsu Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1420886028-15135-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index f62dee7bd924..4a015f77e2b5 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -46,7 +46,7 @@ static int get_value(struct parse_opt_ctx_t *p, return opterror(opt, "is not usable", flags); if (opt->flags & PARSE_OPT_EXCLUSIVE) { - if (p->excl_opt) { + if (p->excl_opt && p->excl_opt != opt) { char msg[128]; if (((flags & OPT_SHORT) && p->excl_opt->short_name) || -- cgit v1.2.3-59-g8ed1b From 38259a170dfab4865968b592ae90b373e9f7d5b5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 27 Dec 2014 14:06:30 +0900 Subject: perf diff: Get rid of hists__compute_resort() The hists__compute_resort() is to sort output fields based on the given field/criteria. This was done without the sort list but as we added the field to the sort list, we can do it with normal hists__output_resort() using the ->sort callback. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1419656793-32756-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 59 +++-------------------------------------------- 1 file changed, 3 insertions(+), 56 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 318ab9c3f0ba..72c718e6549c 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -581,68 +581,15 @@ hist_entry__cmp_wdiff(struct hist_entry *left, struct hist_entry *right) return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF); } -static void insert_hist_entry_by_compute(struct rb_root *root, - struct hist_entry *he, - int c) -{ - struct rb_node **p = &root->rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - if (hist_entry__cmp_compute(he, iter, c) < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, root); -} - -static void hists__compute_resort(struct hists *hists) -{ - struct rb_root *root; - struct rb_node *next; - - if (sort__need_collapse) - root = &hists->entries_collapsed; - else - root = hists->entries_in; - - hists->entries = RB_ROOT; - next = rb_first(root); - - hists__reset_stats(hists); - hists__reset_col_len(hists); - - while (next != NULL) { - struct hist_entry *he; - - he = rb_entry(next, struct hist_entry, rb_node_in); - next = rb_next(&he->rb_node_in); - - insert_hist_entry_by_compute(&hists->entries, he, compute); - hists__inc_stats(hists, he); - - if (!he->filtered) - hists__calc_col_len(hists, he); - } -} - static void hists__process(struct hists *hists) { if (show_baseline_only) hists__baseline_only(hists); - if (sort_compute) { + if (sort_compute) hists__precompute(hists); - hists__compute_resort(hists); - } else { - hists__output_resort(hists, NULL); - } + + hists__output_resort(hists, NULL); hists__fprintf(hists, true, 0, 0, 0, stdout); } -- cgit v1.2.3-59-g8ed1b From ec3d07cb630da5da3ccfdf2b2f5472cadedb9470 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 27 Dec 2014 14:06:31 +0900 Subject: perf diff: Print diff result more precisely Current perf diff result is somewhat confusing since it sometimes hide small result and sometimes there's no result. So do not hide small result (less than 0.01%) and print "N/A" if baseline is not recorded (for ratio and wdiff only). Blank means the baseline is available but its pairs are not. Before: # Baseline Delta Shared Object Symbol # ........ ....... ................. ......................... # ... 0.01% -0.01% [kernel.kallsyms] [k] native_write_msr_safe 0.01% [kernel.kallsyms] [k] scheduler_tick 0.01% [kernel.kallsyms] [k] native_read_msr_safe 0.00% [kernel.kallsyms] [k] __rcu_read_unlock [kernel.kallsyms] [k] _raw_spin_lock +0.01% [kernel.kallsyms] [k] apic_timer_interrupt [kernel.kallsyms] [k] read_tsc After: # Baseline Delta Shared Object Symbol # ........ ....... ................. ......................... # ... 0.01% -0.01% [kernel.kallsyms] [k] native_write_msr_safe 0.01% [kernel.kallsyms] [k] scheduler_tick 0.01% [kernel.kallsyms] [k] native_read_msr_safe 0.00% [kernel.kallsyms] [k] __rcu_read_unlock +0.01% [kernel.kallsyms] [k] _raw_spin_lock +0.01% [kernel.kallsyms] [k] apic_timer_interrupt +0.01% [kernel.kallsyms] [k] read_tsc Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1419656793-32756-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 72c718e6549c..3f86737da2c4 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -788,7 +788,7 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, char pfmt[20] = " "; if (!pair) - goto dummy_print; + goto no_print; switch (comparison_method) { case COMPUTE_DELTA: @@ -797,8 +797,6 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, else diff = compute_delta(he, pair); - if (fabs(diff) < 0.01) - goto dummy_print; scnprintf(pfmt, 20, "%%%+d.2f%%%%", dfmt->header_width - 1); return percent_color_snprintf(hpp->buf, hpp->size, pfmt, diff); @@ -829,6 +827,9 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, BUG_ON(1); } dummy_print: + return scnprintf(hpp->buf, hpp->size, "%*s", + dfmt->header_width, "N/A"); +no_print: return scnprintf(hpp->buf, hpp->size, "%*s", dfmt->header_width, pfmt); } @@ -879,14 +880,15 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair, else diff = compute_delta(he, pair); - if (fabs(diff) >= 0.01) - scnprintf(buf, size, "%+4.2F%%", diff); + scnprintf(buf, size, "%+4.2F%%", diff); break; case PERF_HPP_DIFF__RATIO: /* No point for ratio number if we are dummy.. */ - if (he->dummy) + if (he->dummy) { + scnprintf(buf, size, "N/A"); break; + } if (pair->diff.computed) ratio = pair->diff.period_ratio; @@ -899,8 +901,10 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair, case PERF_HPP_DIFF__WEIGHTED_DIFF: /* No point for wdiff number if we are dummy.. */ - if (he->dummy) + if (he->dummy) { + scnprintf(buf, size, "N/A"); break; + } if (pair->diff.computed) wdiff = pair->diff.wdiff; -- cgit v1.2.3-59-g8ed1b From ff21cef67e85ae77682560973b8c80ef64125221 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Jan 2015 09:45:45 +0900 Subject: perf diff: Introduce fmt_to_data_file() helper The fmt_to_data_file() is to retrieve struct data__file from perf_hpp_fmt which is embedded in diff_hpp_fmt. It'll be used by sort callback functions later. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1420677949-6719-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 3f86737da2c4..ae8f62151b34 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -390,6 +390,15 @@ static void perf_evlist__collapse_resort(struct perf_evlist *evlist) } } +static struct data__file *fmt_to_data_file(struct perf_hpp_fmt *fmt) +{ + struct diff_hpp_fmt *dfmt = container_of(fmt, struct diff_hpp_fmt, fmt); + void *ptr = dfmt - dfmt->idx; + struct data__file *d = container_of(ptr, struct data__file, fmt); + + return d; +} + static struct hist_entry* get_pair_data(struct hist_entry *he, struct data__file *d) { @@ -407,8 +416,7 @@ get_pair_data(struct hist_entry *he, struct data__file *d) static struct hist_entry* get_pair_fmt(struct hist_entry *he, struct diff_hpp_fmt *dfmt) { - void *ptr = dfmt - dfmt->idx; - struct data__file *d = container_of(ptr, struct data__file, fmt); + struct data__file *d = fmt_to_data_file(&dfmt->fmt); return get_pair_data(he, d); } -- cgit v1.2.3-59-g8ed1b From 87bbdf768ff962f1c04d3b8f6db1e179279132d1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Jan 2015 09:45:46 +0900 Subject: perf tools: Pass struct perf_hpp_fmt to its callbacks Currently ->cmp, ->collapse and ->sort callbacks doesn't pass corresponding fmt. But it'll be needed by upcoming changes in perf diff command. Suggested-by: Jiri Olsa Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1420677949-6719-6-git-send-email-namhyung@kernel.org [ fix build by passing perf_hpp_fmt pointer to hist_entry__cmp_ methods ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 15 ++++++++++----- tools/perf/ui/hist.c | 12 ++++++++---- tools/perf/util/hist.c | 6 +++--- tools/perf/util/hist.h | 9 ++++++--- tools/perf/util/sort.c | 37 ++++++++++++++++++++++++++++++++++--- 5 files changed, 61 insertions(+), 18 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index ae8f62151b34..4816989a84b0 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -554,14 +554,16 @@ hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, } static int64_t -hist_entry__cmp_nop(struct hist_entry *left __maybe_unused, +hist_entry__cmp_nop(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left __maybe_unused, struct hist_entry *right __maybe_unused) { return 0; } static int64_t -hist_entry__cmp_baseline(struct hist_entry *left, struct hist_entry *right) +hist_entry__cmp_baseline(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) { if (sort_compute) return 0; @@ -572,19 +574,22 @@ hist_entry__cmp_baseline(struct hist_entry *left, struct hist_entry *right) } static int64_t -hist_entry__cmp_delta(struct hist_entry *left, struct hist_entry *right) +hist_entry__cmp_delta(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) { return hist_entry__cmp_compute(right, left, COMPUTE_DELTA); } static int64_t -hist_entry__cmp_ratio(struct hist_entry *left, struct hist_entry *right) +hist_entry__cmp_ratio(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) { return hist_entry__cmp_compute(right, left, COMPUTE_RATIO); } static int64_t -hist_entry__cmp_wdiff(struct hist_entry *left, struct hist_entry *right) +hist_entry__cmp_wdiff(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) { return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF); } diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 482adae3cc44..25d608394d74 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -285,7 +285,8 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \ } #define __HPP_SORT_FN(_type, _field) \ -static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct hist_entry *a, struct hist_entry *b) \ { \ return __hpp__sort(a, b, he_get_##_field); \ } @@ -312,7 +313,8 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \ } #define __HPP_SORT_ACC_FN(_type, _field) \ -static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct hist_entry *a, struct hist_entry *b) \ { \ return __hpp__sort_acc(a, b, he_get_acc_##_field); \ } @@ -331,7 +333,8 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \ } #define __HPP_SORT_RAW_FN(_type, _field) \ -static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct hist_entry *a, struct hist_entry *b) \ { \ return __hpp__sort(a, b, he_get_raw_##_field); \ } @@ -361,7 +364,8 @@ HPP_PERCENT_ACC_FNS(overhead_acc, period) HPP_RAW_FNS(samples, nr_events) HPP_RAW_FNS(period, period) -static int64_t hpp__nop_cmp(struct hist_entry *a __maybe_unused, +static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *a __maybe_unused, struct hist_entry *b __maybe_unused) { return 0; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e17163fcb702..70b48a65064c 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -913,7 +913,7 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) if (perf_hpp__should_skip(fmt)) continue; - cmp = fmt->cmp(left, right); + cmp = fmt->cmp(fmt, left, right); if (cmp) break; } @@ -931,7 +931,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) if (perf_hpp__should_skip(fmt)) continue; - cmp = fmt->collapse(left, right); + cmp = fmt->collapse(fmt, left, right); if (cmp) break; } @@ -1061,7 +1061,7 @@ static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b) if (perf_hpp__should_skip(fmt)) continue; - cmp = fmt->sort(a, b); + cmp = fmt->sort(fmt, a, b); if (cmp) break; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 9305580cd53e..2b690d028907 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -195,9 +195,12 @@ struct perf_hpp_fmt { struct hist_entry *he); int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he); - int64_t (*cmp)(struct hist_entry *a, struct hist_entry *b); - int64_t (*collapse)(struct hist_entry *a, struct hist_entry *b); - int64_t (*sort)(struct hist_entry *a, struct hist_entry *b); + int64_t (*cmp)(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b); + int64_t (*collapse)(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b); + int64_t (*sort)(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b); struct list_head list; struct list_head sort_list; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 9139dda9f9a3..7a39c1ed8d37 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1304,6 +1304,37 @@ static int __sort__hpp_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, return hse->se->se_snprintf(he, hpp->buf, hpp->size, len); } +static int64_t __sort__hpp_cmp(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct hpp_sort_entry *hse; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + return hse->se->se_cmp(a, b); +} + +static int64_t __sort__hpp_collapse(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct hpp_sort_entry *hse; + int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *); + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + collapse_fn = hse->se->se_collapse ?: hse->se->se_cmp; + return collapse_fn(a, b); +} + +static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct hpp_sort_entry *hse; + int64_t (*sort_fn)(struct hist_entry *, struct hist_entry *); + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + sort_fn = hse->se->se_sort ?: hse->se->se_cmp; + return sort_fn(a, b); +} + static struct hpp_sort_entry * __sort_dimension__alloc_hpp(struct sort_dimension *sd) { @@ -1322,9 +1353,9 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) hse->hpp.entry = __sort__hpp_entry; hse->hpp.color = NULL; - hse->hpp.cmp = sd->entry->se_cmp; - hse->hpp.collapse = sd->entry->se_collapse ? : sd->entry->se_cmp; - hse->hpp.sort = sd->entry->se_sort ? : hse->hpp.collapse; + hse->hpp.cmp = __sort__hpp_cmp; + hse->hpp.collapse = __sort__hpp_collapse; + hse->hpp.sort = __sort__hpp_sort; INIT_LIST_HEAD(&hse->hpp.list); INIT_LIST_HEAD(&hse->hpp.sort_list); -- cgit v1.2.3-59-g8ed1b From 56495a8affabe35aa0d94aae050d3e0e60d0455f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Jan 2015 09:45:47 +0900 Subject: perf diff: Fix output ordering to honor next column When perf diff prints output, it sorts the entries using baseline field by default, but entries which don't have baseline are not sorted properly. This patch makes it sorted by values of next column. Before: # Baseline/0 Delta/1 Delta/2 Shared Object Symbol # .......... ....... ....... ................. .......................................... # 32.75% +0.28% -0.83% libc-2.20.so [.] malloc 31.50% -0.74% -0.23% libc-2.20.so [.] _int_free 22.98% +0.51% +0.52% libc-2.20.so [.] _int_malloc 5.70% +0.28% +0.30% libc-2.20.so [.] free 4.38% -0.21% +0.25% a.out [.] main 1.32% -0.15% +0.05% a.out [.] free@plt 1.31% +0.03% -0.06% a.out [.] malloc@plt 0.01% -0.01% -0.01% [kernel.kallsyms] [k] native_write_msr_safe 0.01% [kernel.kallsyms] [k] scheduler_tick 0.01% -0.00% [kernel.kallsyms] [k] native_read_msr_safe +0.01% [kernel.kallsyms] [k] _raw_spin_lock_irqsave +0.01% +0.01% [kernel.kallsyms] [k] apic_timer_interrupt +0.01% [kernel.kallsyms] [k] intel_pstate_timer_func +0.01% [kernel.kallsyms] [k] perf_adjust_freq_unthr_context.part.82 +0.01% [kernel.kallsyms] [k] read_tsc +0.01% [kernel.kallsyms] [k] timekeeping_update.constprop.8 After: # Baseline/0 Delta/1 Delta/2 Shared Object Symbol # .......... ....... ....... ................. .......................................... # 32.75% +0.28% -0.83% libc-2.20.so [.] malloc 31.50% -0.74% -0.23% libc-2.20.so [.] _int_free 22.98% +0.51% +0.52% libc-2.20.so [.] _int_malloc 5.70% +0.28% +0.30% libc-2.20.so [.] free 4.38% -0.21% +0.25% a.out [.] main 1.32% -0.15% +0.05% a.out [.] free@plt 1.31% +0.03% -0.06% a.out [.] malloc@plt 0.01% -0.01% -0.01% [kernel.kallsyms] [k] native_write_msr_safe 0.01% [kernel.kallsyms] [k] scheduler_tick 0.01% -0.00% [kernel.kallsyms] [k] native_read_msr_safe +0.01% +0.01% [kernel.kallsyms] [k] apic_timer_interrupt +0.01% [kernel.kallsyms] [k] read_tsc +0.01% [kernel.kallsyms] [k] perf_adjust_freq_unthr_context.part.82 +0.01% [kernel.kallsyms] [k] intel_pstate_timer_func +0.01% [kernel.kallsyms] [k] _raw_spin_lock_irqsave +0.01% [kernel.kallsyms] [k] timekeeping_update.constprop.8 Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1420677949-6719-7-git-send-email-namhyung@kernel.org [ Fixed up hist_entry__cmp_ method signatures, fallout from making previous cset buildable ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 65 +++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 4816989a84b0..98444561d9b4 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -456,26 +456,30 @@ static void hists__precompute(struct hists *hists) next = rb_first(root); while (next != NULL) { struct hist_entry *he, *pair; + struct data__file *d; + int i; he = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&he->rb_node_in); - pair = get_pair_data(he, &data__files[sort_compute]); - if (!pair) - continue; + data__for_each_file_new(i, d) { + pair = get_pair_data(he, d); + if (!pair) + continue; - switch (compute) { - case COMPUTE_DELTA: - compute_delta(he, pair); - break; - case COMPUTE_RATIO: - compute_ratio(he, pair); - break; - case COMPUTE_WEIGHTED_DIFF: - compute_wdiff(he, pair); - break; - default: - BUG_ON(1); + switch (compute) { + case COMPUTE_DELTA: + compute_delta(he, pair); + break; + case COMPUTE_RATIO: + compute_ratio(he, pair); + break; + case COMPUTE_WEIGHTED_DIFF: + compute_wdiff(he, pair); + break; + default: + BUG_ON(1); + } } } } @@ -525,7 +529,7 @@ __hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, static int64_t hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, - int c) + int c, int sort_idx) { bool pairs_left = hist_entry__has_pairs(left); bool pairs_right = hist_entry__has_pairs(right); @@ -537,8 +541,8 @@ hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, if (!pairs_left || !pairs_right) return pairs_left ? -1 : 1; - p_left = get_pair_data(left, &data__files[sort_compute]); - p_right = get_pair_data(right, &data__files[sort_compute]); + p_left = get_pair_data(left, &data__files[sort_idx]); + p_right = get_pair_data(right, &data__files[sort_idx]); if (!p_left && !p_right) return 0; @@ -565,33 +569,36 @@ static int64_t hist_entry__cmp_baseline(struct perf_hpp_fmt *fmt __maybe_unused, struct hist_entry *left, struct hist_entry *right) { - if (sort_compute) - return 0; - if (left->stat.period == right->stat.period) return 0; return left->stat.period > right->stat.period ? 1 : -1; } static int64_t -hist_entry__cmp_delta(struct perf_hpp_fmt *fmt __maybe_unused, +hist_entry__cmp_delta(struct perf_hpp_fmt *fmt, struct hist_entry *left, struct hist_entry *right) { - return hist_entry__cmp_compute(right, left, COMPUTE_DELTA); + struct data__file *d = fmt_to_data_file(fmt); + + return hist_entry__cmp_compute(right, left, COMPUTE_DELTA, d->idx); } static int64_t -hist_entry__cmp_ratio(struct perf_hpp_fmt *fmt __maybe_unused, +hist_entry__cmp_ratio(struct perf_hpp_fmt *fmt, struct hist_entry *left, struct hist_entry *right) { - return hist_entry__cmp_compute(right, left, COMPUTE_RATIO); + struct data__file *d = fmt_to_data_file(fmt); + + return hist_entry__cmp_compute(right, left, COMPUTE_RATIO, d->idx); } static int64_t -hist_entry__cmp_wdiff(struct perf_hpp_fmt *fmt __maybe_unused, +hist_entry__cmp_wdiff(struct perf_hpp_fmt *fmt, struct hist_entry *left, struct hist_entry *right) { - return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF); + struct data__file *d = fmt_to_data_file(fmt); + + return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF, d->idx); } static void hists__process(struct hists *hists) @@ -599,9 +606,7 @@ static void hists__process(struct hists *hists) if (show_baseline_only) hists__baseline_only(hists); - if (sort_compute) - hists__precompute(hists); - + hists__precompute(hists); hists__output_resort(hists, NULL); hists__fprintf(hists, true, 0, 0, 0, stdout); -- cgit v1.2.3-59-g8ed1b From 566b5cfb035fb496280be61f976b5281563bfa27 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Jan 2015 09:45:48 +0900 Subject: perf diff: Fix -o/--order option behavior The prior change fixes default output ordering with each column but it breaks -o/--order option. This patch prepends a new hpp fmt struct to sort list but not to output field list so that it can affect ordering without adding a new output column. The new hpp fmt uses its own compare functions which treats dummy entries (which have no baseline) little differently - the delta field can be computed without baseline but others (ratio and wdiff) are not. The new output will look like below: $ perf diff -o 2 perf.data.{old,cur,new} ... # Baseline/0 Delta/1 Delta/2 Shared Object Symbol # .......... ....... ....... ................. .......................................... 22.98% +0.51% +0.52% libc-2.20.so [.] _int_malloc 5.70% +0.28% +0.30% libc-2.20.so [.] free 4.38% -0.21% +0.25% a.out [.] main 1.32% -0.15% +0.05% a.out [.] free@plt +0.01% [kernel.kallsyms] [k] intel_pstate_timer_func +0.01% [kernel.kallsyms] [k] _raw_spin_lock_irqsave +0.01% [kernel.kallsyms] [k] timekeeping_update.constprop.8 +0.01% +0.01% [kernel.kallsyms] [k] apic_timer_interrupt 0.01% -0.00% [kernel.kallsyms] [k] native_read_msr_safe 0.01% -0.01% -0.01% [kernel.kallsyms] [k] native_write_msr_safe 1.31% +0.03% -0.06% a.out [.] malloc@plt 31.50% -0.74% -0.23% libc-2.20.so [.] _int_free 32.75% +0.28% -0.83% libc-2.20.so [.] malloc 0.01% [kernel.kallsyms] [k] scheduler_tick +0.01% [kernel.kallsyms] [k] read_tsc +0.01% [kernel.kallsyms] [k] perf_adjust_freq_unthr_context.part.82 In above example, the output was sorted by 'Delta/2' column first, and then 'Baseline/0' and finally 'Delta/1'. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1420677949-6719-8-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 101 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 99 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 98444561d9b4..74aada554b12 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -557,6 +557,37 @@ hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, return __hist_entry__cmp_compute(p_left, p_right, c); } +static int64_t +hist_entry__cmp_compute_idx(struct hist_entry *left, struct hist_entry *right, + int c, int sort_idx) +{ + struct hist_entry *p_right, *p_left; + + p_left = get_pair_data(left, &data__files[sort_idx]); + p_right = get_pair_data(right, &data__files[sort_idx]); + + if (!p_left && !p_right) + return 0; + + if (!p_left || !p_right) + return p_left ? -1 : 1; + + if (c != COMPUTE_DELTA) { + /* + * The delta can be computed without the baseline, but + * others are not. Put those entries which have no + * values below. + */ + if (left->dummy && right->dummy) + return 0; + + if (left->dummy || right->dummy) + return left->dummy ? 1 : -1; + } + + return __hist_entry__cmp_compute(p_left, p_right, c); +} + static int64_t hist_entry__cmp_nop(struct perf_hpp_fmt *fmt __maybe_unused, struct hist_entry *left __maybe_unused, @@ -601,6 +632,30 @@ hist_entry__cmp_wdiff(struct perf_hpp_fmt *fmt, return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF, d->idx); } +static int64_t +hist_entry__cmp_delta_idx(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute_idx(right, left, COMPUTE_DELTA, + sort_compute); +} + +static int64_t +hist_entry__cmp_ratio_idx(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute_idx(right, left, COMPUTE_RATIO, + sort_compute); +} + +static int64_t +hist_entry__cmp_wdiff_idx(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute_idx(right, left, COMPUTE_WEIGHTED_DIFF, + sort_compute); +} + static void hists__process(struct hists *hists) { if (show_baseline_only) @@ -1074,9 +1129,10 @@ static void data__hpp_register(struct data__file *d, int idx) perf_hpp__register_sort_field(fmt); } -static void ui_init(void) +static int ui_init(void) { struct data__file *d; + struct perf_hpp_fmt *fmt; int i; data__for_each_file(i, d) { @@ -1106,6 +1162,46 @@ static void ui_init(void) data__hpp_register(d, i ? PERF_HPP_DIFF__PERIOD : PERF_HPP_DIFF__PERIOD_BASELINE); } + + if (!sort_compute) + return 0; + + /* + * Prepend an fmt to sort on columns at 'sort_compute' first. + * This fmt is added only to the sort list but not to the + * output fields list. + * + * Note that this column (data) can be compared twice - one + * for this 'sort_compute' fmt and another for the normal + * diff_hpp_fmt. But it shouldn't a problem as most entries + * will be sorted out by first try or baseline and comparing + * is not a costly operation. + */ + fmt = zalloc(sizeof(*fmt)); + if (fmt == NULL) { + pr_err("Memory allocation failed\n"); + return -1; + } + + fmt->cmp = hist_entry__cmp_nop; + fmt->collapse = hist_entry__cmp_nop; + + switch (compute) { + case COMPUTE_DELTA: + fmt->sort = hist_entry__cmp_delta_idx; + break; + case COMPUTE_RATIO: + fmt->sort = hist_entry__cmp_ratio_idx; + break; + case COMPUTE_WEIGHTED_DIFF: + fmt->sort = hist_entry__cmp_wdiff_idx; + break; + default: + BUG_ON(1); + } + + list_add(&fmt->sort_list, &perf_hpp__sort_list); + return 0; } static int data_init(int argc, const char **argv) @@ -1171,7 +1267,8 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) if (data_init(argc, argv) < 0) return -1; - ui_init(); + if (ui_init() < 0) + return -1; sort__mode = SORT_MODE__DIFF; -- cgit v1.2.3-59-g8ed1b From e2726d99645c5fa1fd9abd6353270fde624696f8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Jan 2015 10:34:22 -0300 Subject: tools lib fs: Adopt debugfs open strerrno method As this is not specific to an evlist and may be used with other tools. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-a9up9mivx1pzdf5tqrqsx62d@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo tools/perf/util/include/asm/hash.h --- tools/lib/api/fs/debugfs.c | 27 +++++++++++++++++++++++++++ tools/lib/api/fs/debugfs.h | 2 ++ tools/perf/builtin-trace.c | 11 +++++++++-- tools/perf/util/evlist.c | 27 --------------------------- tools/perf/util/evlist.h | 1 - 5 files changed, 38 insertions(+), 30 deletions(-) diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index 86ea2d7b8845..fb700eed61c2 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -1,3 +1,4 @@ +#define _GNU_SOURCE #include #include #include @@ -98,3 +99,29 @@ char *debugfs_mount(const char *mountpoint) out: return debugfs_mountpoint; } + +int debugfs__strerror_open(int err, char *buf, size_t size) +{ + char sbuf[128]; + + switch (err) { + case ENOENT: + snprintf(buf, size, "%s", + "Error:\tUnable to find debugfs\n" + "Hint:\tWas your kernel compiled with debugfs support?\n" + "Hint:\tIs the debugfs filesystem mounted?\n" + "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); + break; + case EACCES: + snprintf(buf, size, + "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n" + "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", + debugfs_mountpoint, debugfs_mountpoint); + break; + default: + snprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf))); + break; + } + + return 0; +} diff --git a/tools/lib/api/fs/debugfs.h b/tools/lib/api/fs/debugfs.h index f19d3df9609d..afa5043fec61 100644 --- a/tools/lib/api/fs/debugfs.h +++ b/tools/lib/api/fs/debugfs.h @@ -26,4 +26,6 @@ char *debugfs_mount(const char *mountpoint); extern char debugfs_mountpoint[]; +int debugfs__strerror_open(int err, char *buf, size_t size); + #endif /* __API_DEBUGFS_H__ */ diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 258f6550c736..2f82dd78b086 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2062,8 +2062,15 @@ static int trace__run(struct trace *trace, int argc, const char **argv) perf_evlist__add_vfs_getname(evlist); if ((trace->trace_pgfaults & TRACE_PFMAJ) && - perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) + perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { + /* + * FIXME: This one needs better error handling, as by now we + * already checked that debugfs is mounted and that we have access to it, + * so probably the case is that something is busted wrt this specific + * software event, ditto for the next gotos to out_error_tp... + */ goto out_error_tp; + } if ((trace->trace_pgfaults & TRACE_PFMIN) && perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN)) @@ -2203,7 +2210,7 @@ out: char errbuf[BUFSIZ]; out_error_tp: - perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf)); + debugfs__strerror_open(errno, errbuf, sizeof(errbuf)); goto out_error; out_error_mmap: diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 2e507b5025a3..28b8ce86bf12 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1436,33 +1436,6 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) return printed + fprintf(fp, "\n"); } -int perf_evlist__strerror_tp(struct perf_evlist *evlist __maybe_unused, - int err, char *buf, size_t size) -{ - char sbuf[128]; - - switch (err) { - case ENOENT: - scnprintf(buf, size, "%s", - "Error:\tUnable to find debugfs\n" - "Hint:\tWas your kernel compiled with debugfs support?\n" - "Hint:\tIs the debugfs filesystem mounted?\n" - "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); - break; - case EACCES: - scnprintf(buf, size, - "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n" - "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", - debugfs_mountpoint, debugfs_mountpoint); - break; - default: - scnprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf))); - break; - } - - return 0; -} - int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, int err, char *buf, size_t size) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 0ba93f67ab94..c94a9e03ecf1 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -183,7 +183,6 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); -int perf_evlist__strerror_tp(struct perf_evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size); -- cgit v1.2.3-59-g8ed1b From 801c67b05f55d0cdafcda9fdcbb3da375b03c192 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Jan 2015 10:52:55 -0300 Subject: tools lib fs: Pass filename to debugfs__strerror_open It was hardcoded for one specific tracepoint, leftover from its initial user: 'perf trace'. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-j1jicvwljy5qx1nah4mkmyke@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/debugfs.c | 6 +++--- tools/lib/api/fs/debugfs.h | 2 +- tools/perf/builtin-trace.c | 5 +++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index fb700eed61c2..5e8f3913de43 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -100,7 +100,7 @@ out: return debugfs_mountpoint; } -int debugfs__strerror_open(int err, char *buf, size_t size) +int debugfs__strerror_open(int err, char *buf, size_t size, const char *filename) { char sbuf[128]; @@ -114,9 +114,9 @@ int debugfs__strerror_open(int err, char *buf, size_t size) break; case EACCES: snprintf(buf, size, - "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n" + "Error:\tNo permissions to read %s/%s\n" "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", - debugfs_mountpoint, debugfs_mountpoint); + debugfs_mountpoint, filename, debugfs_mountpoint); break; default: snprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf))); diff --git a/tools/lib/api/fs/debugfs.h b/tools/lib/api/fs/debugfs.h index afa5043fec61..a1799aecd4d5 100644 --- a/tools/lib/api/fs/debugfs.h +++ b/tools/lib/api/fs/debugfs.h @@ -26,6 +26,6 @@ char *debugfs_mount(const char *mountpoint); extern char debugfs_mountpoint[]; -int debugfs__strerror_open(int err, char *buf, size_t size); +int debugfs__strerror_open(int err, char *buf, size_t size, const char *filename); #endif /* __API_DEBUGFS_H__ */ diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2f82dd78b086..684609d7a83d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2056,7 +2056,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (trace->trace_syscalls && perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit)) - goto out_error_tp; + goto out_error_raw_syscalls; if (trace->trace_syscalls) perf_evlist__add_vfs_getname(evlist); @@ -2210,7 +2210,8 @@ out: char errbuf[BUFSIZ]; out_error_tp: - debugfs__strerror_open(errno, errbuf, sizeof(errbuf)); +out_error_raw_syscalls: + debugfs__strerror_open(errno, errbuf, sizeof(errbuf), "tracing/events/raw_syscalls"); goto out_error; out_error_mmap: -- cgit v1.2.3-59-g8ed1b From 5ed08dae9d2f2307c103e2dce94d801e74aefae4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Jan 2015 11:08:04 -0300 Subject: perf trace: Fix error reporting for evsel pgfault constructor In that case the only failure possible is not to have enough memory, as we are just creating the evsels, not trying to access any system facility such as debugfs files or syscalls. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-7k6asvfhiwiu2zs6o2oknchk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 684609d7a83d..eaaa540bf1f3 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2063,18 +2063,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if ((trace->trace_pgfaults & TRACE_PFMAJ) && perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { - /* - * FIXME: This one needs better error handling, as by now we - * already checked that debugfs is mounted and that we have access to it, - * so probably the case is that something is busted wrt this specific - * software event, ditto for the next gotos to out_error_tp... - */ - goto out_error_tp; + goto out_error_mem; } if ((trace->trace_pgfaults & TRACE_PFMIN) && perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN)) - goto out_error_tp; + goto out_error_mem; if (trace->sched && perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", @@ -2225,6 +2219,9 @@ out_error: fprintf(trace->output, "%s\n", errbuf); goto out_delete_evlist; } +out_error_mem: + fprintf(trace->output, "Not enough memory to run!\n"); + goto out_delete_evlist; } static int trace__replay(struct trace *trace) -- cgit v1.2.3-59-g8ed1b From 2cc990ba3af4f9511f0d016c73b8163591f890e7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Jan 2015 11:13:43 -0300 Subject: tools lib fs debugfs: Introduce debugfs__strerror_open_tp There will be other cases where not just a tracepoint event is being opened below the debugfs mountpoint, but it is rather common, so provide one helper for that. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-q6e6zct49ql6nbcw8kkg0lbj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/debugfs.c | 9 +++++++++ tools/lib/api/fs/debugfs.h | 1 + tools/perf/builtin-trace.c | 13 ++++++++----- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index 5e8f3913de43..bf9e21648894 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -125,3 +125,12 @@ int debugfs__strerror_open(int err, char *buf, size_t size, const char *filename return 0; } + +int debugfs__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name) +{ + char path[PATH_MAX]; + + snprintf(path, PATH_MAX, "tracing/events/%s/%s", sys, name ?: "*"); + + return debugfs__strerror_open(err, buf, size, path); +} diff --git a/tools/lib/api/fs/debugfs.h b/tools/lib/api/fs/debugfs.h index a1799aecd4d5..0739881a9897 100644 --- a/tools/lib/api/fs/debugfs.h +++ b/tools/lib/api/fs/debugfs.h @@ -27,5 +27,6 @@ char *debugfs_mount(const char *mountpoint); extern char debugfs_mountpoint[]; int debugfs__strerror_open(int err, char *buf, size_t size, const char *filename); +int debugfs__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name); #endif /* __API_DEBUGFS_H__ */ diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index eaaa540bf1f3..7e935f1083ec 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2071,9 +2071,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_error_mem; if (trace->sched && - perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", - trace__sched_stat_runtime)) - goto out_error_tp; + perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", + trace__sched_stat_runtime)) + goto out_error_sched_stat_runtime; err = perf_evlist__create_maps(evlist, &trace->opts.target); if (err < 0) { @@ -2203,9 +2203,12 @@ out: { char errbuf[BUFSIZ]; -out_error_tp: +out_error_sched_stat_runtime: + debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); + goto out_error; + out_error_raw_syscalls: - debugfs__strerror_open(errno, errbuf, sizeof(errbuf), "tracing/events/raw_syscalls"); + debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); goto out_error; out_error_mmap: -- cgit v1.2.3-59-g8ed1b From f816b3cc99804a9f771315331deb36abec47f5b4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Jan 2015 16:35:42 -0300 Subject: tools lib fs debugfs: Check if debugfs is mounted when handling ENOENT If debugfs was already mounted, then its a matter of not finding the tracepoint, tell the user that perhaps a CONFIG_ setting is not enabled. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-6chfytoflyx3jwfqm7ebltu0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/debugfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index bf9e21648894..d2b18e887071 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -106,6 +106,13 @@ int debugfs__strerror_open(int err, char *buf, size_t size, const char *filename switch (err) { case ENOENT: + if (debugfs_found) { + snprintf(buf, size, + "Error:\tFile %s/%s not found.\n" + "Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n", + debugfs_mountpoint, filename); + break; + } snprintf(buf, size, "%s", "Error:\tUnable to find debugfs\n" "Hint:\tWas your kernel compiled with debugfs support?\n" -- cgit v1.2.3-59-g8ed1b From 605a3069161dd966d6cea795133c673fb6706e52 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 22 Jan 2015 18:01:23 +0100 Subject: perf tests: Fix typo in sample-parsing.c It was testing the same buffer for differences: memcmp(s1->user_stack.data, s1->user_stack.data, s1->user_stack.size) I'm pretty sure this wasn't supposed to be dead code. Signed-off-by: Rasmus Villemoes Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1421946083-29863-1-git-send-email-linux@rasmusvillemoes.dk Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sample-parsing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 4908c648a597..30c02181e78b 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -110,7 +110,7 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_STACK_USER) { COMP(user_stack.size); - if (memcmp(s1->user_stack.data, s1->user_stack.data, + if (memcmp(s1->user_stack.data, s2->user_stack.data, s1->user_stack.size)) { pr_debug("Samples differ at 'user_stack'\n"); return false; -- cgit v1.2.3-59-g8ed1b From 4397bd2f90459d550deca7f6ba32c12e382d8b57 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 20 Jan 2015 15:40:50 +0900 Subject: perf ui/tui: Show fatal error message only if exists When perf exits with some error it shows the error message with ui__error() or ui__warning() and then calls ui__exit() during exit_browser(). On TUI, it then shows a window titled "Fatal Error" to inform user a last message which might be related with this condition. However it sometimes contains no message and just annoyes users. The usual case for this is running perf top as normal user. (And /proc/sys/kernel/perf_event_paranoid being 1). Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1421736050-5283-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/tui/helpline.c | 3 +++ tools/perf/ui/tui/setup.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/tui/helpline.c b/tools/perf/ui/tui/helpline.c index 1c8b9afd5d6e..88f5143a5981 100644 --- a/tools/perf/ui/tui/helpline.c +++ b/tools/perf/ui/tui/helpline.c @@ -9,6 +9,7 @@ #include "../libslang.h" char ui_helpline__last_msg[1024]; +bool tui_helpline__set; static void tui_helpline__pop(void) { @@ -35,6 +36,8 @@ static int tui_helpline__show(const char *format, va_list ap) sizeof(ui_helpline__last_msg) - backlog, format, ap); backlog += ret; + tui_helpline__set = true; + if (ui_helpline__last_msg[backlog - 1] == '\n') { ui_helpline__puts(ui_helpline__last_msg); SLsmg_refresh(); diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 3c38f25b1695..b77e1d771363 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -17,6 +17,7 @@ static volatile int ui__need_resize; extern struct perf_error_ops perf_tui_eops; +extern bool tui_helpline__set; extern void hist_browser__init_hpp(void); @@ -159,7 +160,7 @@ out: void ui__exit(bool wait_for_ok) { - if (wait_for_ok) + if (wait_for_ok && tui_helpline__set) ui__question_window("Fatal Error", ui_helpline__last_msg, "Press any key...", 0); -- cgit v1.2.3-59-g8ed1b From 3d199b5be53348bef84883013c484b414adf0a2e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 18 Dec 2014 19:11:11 -0700 Subject: tools lib traceevent: Add support for IP address formats Add helpers for the following kernel formats: %pi4 print an IPv4 address with leading zeros %pI4 print an IPv4 address without leading zeros %pi6 print an IPv6 address without colons %pI6 print an IPv6 address with colons %pI6c print an IPv6 address in compressed form with colons %pISpc print an IP address from a sockaddr Allows these formats to be used in tracepoints. Quite a bit of this is adapted from code in lib/vsprintf.c. v4: - fixed pI6c description in git commit message per Valdis' comment v3: - use of 'c' and 'p' requires 'I' v2: - pass ptr+1 to print_ip_arg per Namhyung's comments - added field length checks to sockaddr function Signed-off-by: David Ahern Acked-by: Steven Rostedt Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1418955071-36241-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 328 +++++++++++++++++++++++++++++++++++++ 1 file changed, 328 insertions(+) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index cf3a44bf1ec3..afe20ed9fac8 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -32,6 +32,7 @@ #include #include +#include #include "event-parse.h" #include "event-utils.h" @@ -4149,6 +4150,324 @@ static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size, trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); } +static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf) +{ + const char *fmt; + + if (i == 'i') + fmt = "%03d.%03d.%03d.%03d"; + else + fmt = "%d.%d.%d.%d"; + + trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]); +} + +static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) +{ + return ((unsigned long)(a->s6_addr32[0] | a->s6_addr32[1]) | + (unsigned long)(a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0UL; +} + +static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr) +{ + return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); +} + +static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr) +{ + int i, j, range; + unsigned char zerolength[8]; + int longest = 1; + int colonpos = -1; + uint16_t word; + uint8_t hi, lo; + bool needcolon = false; + bool useIPv4; + struct in6_addr in6; + + memcpy(&in6, addr, sizeof(struct in6_addr)); + + useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6); + + memset(zerolength, 0, sizeof(zerolength)); + + if (useIPv4) + range = 6; + else + range = 8; + + /* find position of longest 0 run */ + for (i = 0; i < range; i++) { + for (j = i; j < range; j++) { + if (in6.s6_addr16[j] != 0) + break; + zerolength[i]++; + } + } + for (i = 0; i < range; i++) { + if (zerolength[i] > longest) { + longest = zerolength[i]; + colonpos = i; + } + } + if (longest == 1) /* don't compress a single 0 */ + colonpos = -1; + + /* emit address */ + for (i = 0; i < range; i++) { + if (i == colonpos) { + if (needcolon || i == 0) + trace_seq_printf(s, ":"); + trace_seq_printf(s, ":"); + needcolon = false; + i += longest - 1; + continue; + } + if (needcolon) { + trace_seq_printf(s, ":"); + needcolon = false; + } + /* hex u16 without leading 0s */ + word = ntohs(in6.s6_addr16[i]); + hi = word >> 8; + lo = word & 0xff; + if (hi) + trace_seq_printf(s, "%x%02x", hi, lo); + else + trace_seq_printf(s, "%x", lo); + + needcolon = true; + } + + if (useIPv4) { + if (needcolon) + trace_seq_printf(s, ":"); + print_ip4_addr(s, 'I', &in6.s6_addr[12]); + } + + return; +} + +static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf) +{ + int j; + + for (j = 0; j < 16; j += 2) { + trace_seq_printf(s, "%02x%02x", buf[j], buf[j+1]); + if (i == 'I' && j < 14) + trace_seq_printf(s, ":"); + } +} + +/* + * %pi4 print an IPv4 address with leading zeros + * %pI4 print an IPv4 address without leading zeros + * %pi6 print an IPv6 address without colons + * %pI6 print an IPv6 address with colons + * %pI6c print an IPv6 address in compressed form with colons + * %pISpc print an IP address based on sockaddr; p adds port. + */ +static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + unsigned char *buf; + + if (arg->type == PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return 0; + } + + if (arg->type != PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return 0; + } + + if (!arg->field.field) { + arg->field.field = + pevent_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return 0; + } + } + + buf = data + arg->field.field->offset; + + if (arg->field.field->size != 4) { + trace_seq_printf(s, "INVALIDIPv4"); + return 0; + } + print_ip4_addr(s, i, buf); + + return 0; +} + +static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + char have_c = 0; + unsigned char *buf; + int rc = 0; + + /* pI6c */ + if (i == 'I' && *ptr == 'c') { + have_c = 1; + ptr++; + rc++; + } + + if (arg->type == PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return rc; + } + + if (arg->type != PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return rc; + } + + if (!arg->field.field) { + arg->field.field = + pevent_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return rc; + } + } + + buf = data + arg->field.field->offset; + + if (arg->field.field->size != 16) { + trace_seq_printf(s, "INVALIDIPv6"); + return rc; + } + + if (have_c) + print_ip6c_addr(s, buf); + else + print_ip6_addr(s, i, buf); + + return rc; +} + +static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + char have_c = 0, have_p = 0; + unsigned char *buf; + struct sockaddr_storage *sa; + int rc = 0; + + /* pISpc */ + if (i == 'I') { + if (*ptr == 'p') { + have_p = 1; + ptr++; + rc++; + } + if (*ptr == 'c') { + have_c = 1; + ptr++; + rc++; + } + } + + if (arg->type == PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return rc; + } + + if (arg->type != PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return rc; + } + + if (!arg->field.field) { + arg->field.field = + pevent_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return rc; + } + } + + sa = (struct sockaddr_storage *) (data + arg->field.field->offset); + + if (sa->ss_family == AF_INET) { + struct sockaddr_in *sa4 = (struct sockaddr_in *) sa; + + if (arg->field.field->size < sizeof(struct sockaddr_in)) { + trace_seq_printf(s, "INVALIDIPv4"); + return rc; + } + + print_ip4_addr(s, i, (unsigned char *) &sa4->sin_addr); + if (have_p) + trace_seq_printf(s, ":%d", ntohs(sa4->sin_port)); + + + } else if (sa->ss_family == AF_INET6) { + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) sa; + + if (arg->field.field->size < sizeof(struct sockaddr_in6)) { + trace_seq_printf(s, "INVALIDIPv6"); + return rc; + } + + if (have_p) + trace_seq_printf(s, "["); + + buf = (unsigned char *) &sa6->sin6_addr; + if (have_c) + print_ip6c_addr(s, buf); + else + print_ip6_addr(s, i, buf); + + if (have_p) + trace_seq_printf(s, "]:%d", ntohs(sa6->sin6_port)); + } + + return rc; +} + +static int print_ip_arg(struct trace_seq *s, const char *ptr, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + char i = *ptr; /* 'i' or 'I' */ + char ver; + int rc = 0; + + ptr++; + rc++; + + ver = *ptr; + ptr++; + rc++; + + switch (ver) { + case '4': + rc += print_ipv4_arg(s, ptr, i, data, size, event, arg); + break; + case '6': + rc += print_ipv6_arg(s, ptr, i, data, size, event, arg); + break; + case 'S': + rc += print_ipsa_arg(s, ptr, i, data, size, event, arg); + break; + default: + return 0; + } + + return rc; +} + static int is_printable_array(char *p, unsigned int len) { unsigned int i; @@ -4337,6 +4656,15 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event ptr++; arg = arg->next; break; + } else if (*(ptr+1) == 'I' || *(ptr+1) == 'i') { + int n; + + n = print_ip_arg(s, ptr+1, data, size, event, arg); + if (n > 0) { + ptr += n; + arg = arg->next; + break; + } } /* fall through */ -- cgit v1.2.3-59-g8ed1b From 8d9cbd8f870e1aab00fb0f8a465887877a1d6f82 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 13 Jan 2015 19:13:23 +0530 Subject: perf evsel: Don't rely on malloc working for sz 0 When running perf on ARC (uClibc based userspace), ran into this issue ------------->8---------------- [ARCLinux]$ ./perf record ls bin etc perf sys debug init perf.data tmp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (~24 samples) ] [ARCLinux]$ ./perf report incompatible file format (rerun with -v to learn more) ------------->8---------------- The problem happens in the following call stack when zalloc is called with size zero glibc default / uClibc with MALLOC_GLIBC_COMPAT are OK, but not if that config option is not enabled. cmd_report perf_session__new perf_session__open perf_session__read_header read_attr(fd, header, &f_attr) nr_ids = f_attr.ids.size / sizeof(u64); <-- 0 perf_evsel__alloc_id(vsel, 1, nr_ids) zalloc(ncpus * nthreads * sizeof(u64)) <-- 0 header.c: read_attr() (gdb) p *f_attr $17 = { attr = { type = 0, size = 96, config = 0, { sample_period = 4000, sample_freq = 4000 }, ... ids = { offset = 104, size = 0 <------ } } Signed-off-by: Vineet Gupta Suggested-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexey Brodkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1421156604-30603-5-git-send-email-vgupta@synopsys.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1e90c8557ede..1d826d63bc20 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -797,6 +797,9 @@ int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads) int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) { + if (ncpus == 0 || nthreads == 0) + return 0; + if (evsel->system_wide) nthreads = 1; -- cgit v1.2.3-59-g8ed1b From 459a3df76c99124fd222586be7f10f862547e7a9 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 13 Jan 2015 19:13:24 +0530 Subject: perf tools: Provide stub for missing pthread_attr_setaffinity_np uClibc Linuxthreads.old doesn't support the pthread_attr_setaffinity_np() functioo: ----------------->8----------------------- CC bench/futex-hash.o CC bench/futex-wake.o bench/futex-hash.c: In function 'bench_futex_hash': bench/futex-hash.c:161:3: error: implicit declaration of function 'pthread_attr_setaffinity_np' [-Werror=implicit-function-declaration] ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu); ^ bench/futex-hash.c:161:3: error: nested extern declaration of 'pthread_attr_setaffinity_np' [-Werror=nested-externs] ----------------->8----------------------- So introduce a test to check that and if not available provide a stub. Signed-off-by: Vineet Gupta Acked-by: Jiri Olsa Cc: Alexey Brodkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1421156604-30603-6-git-send-email-vgupta@synopsys.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex.h | 13 +++++++++++++ tools/perf/config/Makefile | 6 ++++++ tools/perf/config/feature-checks/Makefile | 4 ++++ tools/perf/config/feature-checks/test-all.c | 5 +++++ .../feature-checks/test-pthread-attr-setaffinity-np.c | 14 ++++++++++++++ 5 files changed, 42 insertions(+) create mode 100644 tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 71f2844cf97f..7ed22ff1e1ac 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -68,4 +68,17 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak val, opflags); } +#ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP +#include +static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr, + size_t cpusetsize, + cpu_set_t *cpuset) +{ + attr = attr; + cpusetsize = cpusetsize; + cpuset = cpuset; + return 0; +} +#endif + #endif /* _FUTEX_H */ diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 648e31ff4021..cc224080b525 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -198,6 +198,7 @@ CORE_FEATURE_TESTS = \ libpython-version \ libslang \ libunwind \ + pthread-attr-setaffinity-np \ stackprotector-all \ timerfd \ libdw-dwarf-unwind \ @@ -226,6 +227,7 @@ VF_FEATURE_TESTS = \ libelf-getphdrnum \ libelf-mmap \ libpython-version \ + pthread-attr-setaffinity-np \ stackprotector-all \ timerfd \ libunwind-debug-frame \ @@ -301,6 +303,10 @@ ifeq ($(feature-sync-compare-and-swap), 1) CFLAGS += -DHAVE_SYNC_COMPARE_AND_SWAP_SUPPORT endif +ifeq ($(feature-pthread-attr-setaffinity-np), 1) + CFLAGS += -DHAVE_PTHREAD_ATTR_SETAFFINITY_NP +endif + ifndef NO_BIONIC $(call feature_check,bionic) ifeq ($(feature-bionic), 1) diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 53f19b5dbc37..42ac05aaf8ac 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -25,6 +25,7 @@ FILES= \ test-libslang.bin \ test-libunwind.bin \ test-libunwind-debug-frame.bin \ + test-pthread-attr-setaffinity-np.bin \ test-stackprotector-all.bin \ test-timerfd.bin \ test-libdw-dwarf-unwind.bin \ @@ -47,6 +48,9 @@ test-all.bin: test-hello.bin: $(BUILD) +test-pthread-attr-setaffinity-np.bin: + $(BUILD) -Werror -lpthread + test-stackprotector-all.bin: $(BUILD) -Werror -fstack-protector-all diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c index 652e0098eba6..6d4d09323922 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/perf/config/feature-checks/test-all.c @@ -97,6 +97,10 @@ # include "test-zlib.c" #undef main +#define main main_test_pthread_attr_setaffinity_np +# include "test-pthread_attr_setaffinity_np.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -121,6 +125,7 @@ int main(int argc, char *argv[]) main_test_libdw_dwarf_unwind(); main_test_sync_compare_and_swap(argc, argv); main_test_zlib(); + main_test_pthread_attr_setaffinity_np(); return 0; } diff --git a/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c b/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c new file mode 100644 index 000000000000..0a0d3ecb4e8a --- /dev/null +++ b/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c @@ -0,0 +1,14 @@ +#include +#include + +int main(void) +{ + int ret = 0; + pthread_attr_t thread_attr; + + pthread_attr_init(&thread_attr); + /* don't care abt exact args, just the API itself in libpthread */ + ret = pthread_attr_setaffinity_np(&thread_attr, 0, NULL); + + return ret; +} -- cgit v1.2.3-59-g8ed1b From f1f13af99a903ae873f5373e965508e0486c1c29 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 29 Jan 2015 17:07:21 +0900 Subject: perf callchain: Cache eh/debug frame offset for dwarf unwind When libunwind tries to resolve callchains it needs to know the offset of .eh_frame_hdr or .debug_frame to access the dso. Since it will always return the same result for a given DSO, just cache the result as an optimization. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1422518843-25818-41-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.h | 1 + tools/perf/util/unwind-libunwind.c | 31 ++++++++++++++++++++----------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 3782c82c6e44..ced92841ff97 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -139,6 +139,7 @@ struct dso { u32 status_seen; size_t file_size; struct list_head open_entry; + u64 frame_offset; } data; union { /* Tool specific area */ diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 6edf535f65c2..e3c40a520a25 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -266,14 +266,17 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, u64 *fde_count) { int ret = -EINVAL, fd; - u64 offset; + u64 offset = dso->data.frame_offset; - fd = dso__data_fd(dso, machine); - if (fd < 0) - return -EINVAL; + if (offset == 0) { + fd = dso__data_fd(dso, machine); + if (fd < 0) + return -EINVAL; - /* Check the .eh_frame section for unwinding info */ - offset = elf_section_offset(fd, ".eh_frame_hdr"); + /* Check the .eh_frame section for unwinding info */ + offset = elf_section_offset(fd, ".eh_frame_hdr"); + dso->data.frame_offset = offset; + } if (offset) ret = unwind_spec_ehframe(dso, machine, offset, @@ -287,14 +290,20 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, static int read_unwind_spec_debug_frame(struct dso *dso, struct machine *machine, u64 *offset) { - int fd = dso__data_fd(dso, machine); + int fd; + u64 ofs = dso->data.frame_offset; - if (fd < 0) - return -EINVAL; + if (ofs == 0) { + fd = dso__data_fd(dso, machine); + if (fd < 0) + return -EINVAL; - /* Check the .debug_frame section for unwinding info */ - *offset = elf_section_offset(fd, ".debug_frame"); + /* Check the .debug_frame section for unwinding info */ + ofs = elf_section_offset(fd, ".debug_frame"); + dso->data.frame_offset = ofs; + } + *offset = ofs; if (*offset) return 0; -- cgit v1.2.3-59-g8ed1b From 4ac30cf74b308fb01338e660d3471cd490a7958a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 29 Jan 2015 17:06:43 +0900 Subject: perf tools: Do not use __perf_session__process_events() directly It's only used for perf record to process build-id because its file size it's not fixed at this time due to remaining header features. However data offset and size is available so that we can use the perf_session__process_events() once we set the file size as the current offset like for now. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1422518843-25818-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 7 +++---- tools/perf/util/session.c | 6 +++--- tools/perf/util/session.h | 3 --- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 8648c6d3003d..1134de22979e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -194,12 +194,13 @@ static int process_buildids(struct record *rec) { struct perf_data_file *file = &rec->file; struct perf_session *session = rec->session; - u64 start = session->header.data_offset; u64 size = lseek(file->fd, 0, SEEK_CUR); if (size == 0) return 0; + file->size = size; + /* * During this process, it'll load kernel map and replace the * dso->long_name to a real pathname it found. In this case @@ -211,9 +212,7 @@ static int process_buildids(struct record *rec) */ symbol_conf.ignore_vmlinux_buildid = true; - return __perf_session__process_events(session, start, - size - start, - size, &build_id__mark_dso_hit_ops); + return perf_session__process_events(session, &build_id__mark_dso_hit_ops); } static void perf_event__synthesize_guest_os(struct machine *machine, void *data) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index b0ce3d6e6231..0baf75f12b7c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1251,9 +1251,9 @@ fetch_mmaped_event(struct perf_session *session, #define NUM_MMAPS 128 #endif -int __perf_session__process_events(struct perf_session *session, - u64 data_offset, u64 data_size, - u64 file_size, struct perf_tool *tool) +static int __perf_session__process_events(struct perf_session *session, + u64 data_offset, u64 data_size, + u64 file_size, struct perf_tool *tool) { int fd = perf_data_file__fd(session->file); u64 head, page_offset, file_offset, file_pos, size; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index dc26ebf60fe4..6d663dc76404 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -49,9 +49,6 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, union perf_event **event_ptr, struct perf_sample *sample); -int __perf_session__process_events(struct perf_session *session, - u64 data_offset, u64 data_size, u64 size, - struct perf_tool *tool); int perf_session__process_events(struct perf_session *session, struct perf_tool *tool); -- cgit v1.2.3-59-g8ed1b From e3d5911221f5cf71e1f0306256d4e42d34a365d2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 29 Jan 2015 17:06:44 +0900 Subject: perf record: Show precise number of samples After perf record finishes, it prints file size and number of samples in the file but this info is wrong since it assumes typical sample size of 24 bytes and divides file size by the value. However as we post-process recorded samples for build-id, it can show correct number like below. If build-id post-processing is not requested just omit the wrong number of samples. $ perf record noploop 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.159 MB perf.data (3989 samples) ] $ perf report --stdio -n # To display the perf.data header info, please use --header/--header-only options. # # Samples: 3K of event 'cycles' # Event count (approx.): 3771330663 # # Overhead Samples Command Shared Object Symbol # ........ ............ ....... ................ .......................... # 99.90% 3982 noploop noploop [.] main 0.09% 1 noploop ld-2.17.so [.] _dl_check_map_versions 0.01% 1 noploop [kernel.vmlinux] [k] setup_arg_pages 0.00% 5 noploop [kernel.vmlinux] [k] intel_pmu_enable_all Reported-by: Milian Wolff Signed-off-by: Namhyung Kim Reviewed-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1422518843-25818-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 51 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1134de22979e..9900b433e861 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -190,6 +190,19 @@ out: return rc; } +static int process_sample_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct record *rec = container_of(tool, struct record, tool); + + rec->samples++; + + return build_id__mark_dso_hit(tool, event, sample, evsel, machine); +} + static int process_buildids(struct record *rec) { struct perf_data_file *file = &rec->file; @@ -212,7 +225,7 @@ static int process_buildids(struct record *rec) */ symbol_conf.ignore_vmlinux_buildid = true; - return perf_session__process_events(session, &build_id__mark_dso_hit_ops); + return perf_session__process_events(session, &rec->tool); } static void perf_event__synthesize_guest_os(struct machine *machine, void *data) @@ -503,19 +516,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } - if (!quiet) { + if (!quiet) fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); - /* - * Approximate RIP event size: 24 bytes. - */ - fprintf(stderr, - "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n", - (double)rec->bytes_written / 1024.0 / 1024.0, - file->path, - rec->bytes_written / 24); - } - out_child: if (forks) { int exit_status; @@ -534,6 +537,9 @@ out_child: } else status = err; + /* this will be recalculated during process_buildids() */ + rec->samples = 0; + if (!err && !file->is_pipe) { rec->session->header.data_size += rec->bytes_written; @@ -543,6 +549,20 @@ out_child: file->fd, true); } + if (!err && !quiet) { + char samples[128]; + + if (rec->samples) + scnprintf(samples, sizeof(samples), + " (%" PRIu64 " samples)", rec->samples); + else + samples[0] = '\0'; + + fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n", + perf_data_file__size(file) / 1024.0 / 1024.0, + file->path, samples); + } + out_delete_session: perf_session__delete(session); return status; @@ -719,6 +739,13 @@ static struct record record = { .default_per_cpu = true, }, }, + .tool = { + .sample = process_sample_event, + .fork = perf_event__process_fork, + .comm = perf_event__process_comm, + .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, + }, }; #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " -- cgit v1.2.3-59-g8ed1b From f7913971bdad1a72c6158074786babed477d61e2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 29 Jan 2015 17:06:45 +0900 Subject: perf header: Set header version correctly When check_magic_endian() is called, it checks the magic number in the perf data file to determine version and endianness. But if it uses a same endian the verison number wasn't updated and makes confusion. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1422518843-25818-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b20e40c74468..1f407f7352a7 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2237,6 +2237,7 @@ static int check_magic_endian(u64 magic, uint64_t hdr_sz, * - unique number to identify actual perf.data files * - encode endianness of file */ + ph->version = PERF_HEADER_VERSION_2; /* check magic number with one endianness */ if (magic == __perf_magic2) @@ -2247,7 +2248,6 @@ static int check_magic_endian(u64 magic, uint64_t hdr_sz, return -1; ph->needs_swap = true; - ph->version = PERF_HEADER_VERSION_2; return 0; } -- cgit v1.2.3-59-g8ed1b From 62e503b7ed98fcdf16308cda0b5378e7840f4339 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 29 Jan 2015 17:06:46 +0900 Subject: perf evsel: Set attr.task bit for a tracking event The perf_event_attr.task bit is to track task (fork and exit) events but it missed to be set by perf_evsel__config(). While it was not a problem in practice since setting other bits (comm/mmap) ended up being in same result, it'd be good to set it explicitly anyway. The attr->task is to track task related events (fork/exit) only but other meta events like comm and mmap[2] also needs the task events. So setting attr->comm and/or attr->mmap causes the kernel emits the task events anyway. So the attr->task is only meaningful when other bits are off but I'd like to set it for completeness. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1422518843-25818-6-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1d826d63bc20..ea51a90e20a0 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -709,6 +709,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (opts->sample_weight) perf_evsel__set_sample_bit(evsel, WEIGHT); + attr->task = track; attr->mmap = track; attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; -- cgit v1.2.3-59-g8ed1b From 0b064f43001fc2627ff8c3020647b85db040235f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 29 Jan 2015 17:06:42 +0900 Subject: perf symbols: Support to read compressed module from build-id cache The commit c00c48fc6e6e ("perf symbols: Preparation for compressed kernel module support") added support for compressed kernel modules but it only supports system path DSOs. When a dso is read from build-id cache, its filename doesn't end with ".gz" but has build-id. In this case, we should fallback to the original dso->name. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1422518843-25818-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 06fcd1bf98b6..b24f9d8727a8 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -574,13 +574,16 @@ static int decompress_kmodule(struct dso *dso, const char *name, const char *ext = strrchr(name, '.'); char tmpbuf[] = "/tmp/perf-kmod-XXXXXX"; - if ((type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP && - type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP) || - type != dso->symtab_type) + if (type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP && + type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP && + type != DSO_BINARY_TYPE__BUILD_ID_CACHE) return -1; - if (!ext || !is_supported_compression(ext + 1)) - return -1; + if (!ext || !is_supported_compression(ext + 1)) { + ext = strrchr(dso->name, '.'); + if (!ext || !is_supported_compression(ext + 1)) + return -1; + } fd = mkstemp(tmpbuf); if (fd < 0) -- cgit v1.2.3-59-g8ed1b From 42aa276f40730211383e9a9923416f1fb9841d68 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 29 Jan 2015 17:06:48 +0900 Subject: perf tools: Use perf_data_file__fd() consistently Do not reference file->fd directly since we want hide the implementation details from outside for possible future changes. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1422518843-25818-8-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 5 +++-- tools/perf/builtin-record.c | 14 +++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 84df2deed988..a13641e066f5 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -343,6 +343,7 @@ static int __cmd_inject(struct perf_inject *inject) int ret = -EINVAL; struct perf_session *session = inject->session; struct perf_data_file *file_out = &inject->output; + int fd = perf_data_file__fd(file_out); signal(SIGINT, sig_handler); @@ -376,7 +377,7 @@ static int __cmd_inject(struct perf_inject *inject) } if (!file_out->is_pipe) - lseek(file_out->fd, session->header.data_offset, SEEK_SET); + lseek(fd, session->header.data_offset, SEEK_SET); ret = perf_session__process_events(session, &inject->tool); @@ -385,7 +386,7 @@ static int __cmd_inject(struct perf_inject *inject) perf_header__set_feat(&session->header, HEADER_BUILD_ID); session->header.data_size = inject->bytes_written; - perf_session__write_header(session, session->evlist, file_out->fd, true); + perf_session__write_header(session, session->evlist, fd, true); } return ret; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 9900b433e861..404ab3434052 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -208,7 +208,7 @@ static int process_buildids(struct record *rec) struct perf_data_file *file = &rec->file; struct perf_session *session = rec->session; - u64 size = lseek(file->fd, 0, SEEK_CUR); + u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); if (size == 0) return 0; @@ -334,6 +334,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) struct perf_data_file *file = &rec->file; struct perf_session *session; bool disabled = false, draining = false; + int fd; rec->progname = argv[0]; @@ -348,6 +349,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) return -1; } + fd = perf_data_file__fd(file); rec->session = session; record__init_features(rec); @@ -372,12 +374,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); if (file->is_pipe) { - err = perf_header__write_pipe(file->fd); + err = perf_header__write_pipe(fd); if (err < 0) goto out_child; } else { - err = perf_session__write_header(session, rec->evlist, - file->fd, false); + err = perf_session__write_header(session, rec->evlist, fd, false); if (err < 0) goto out_child; } @@ -409,7 +410,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * return this more properly and also * propagate errors that now are calling die() */ - err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist, + err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, process_synthesized_event); if (err <= 0) { pr_err("Couldn't record tracing data.\n"); @@ -545,8 +546,7 @@ out_child: if (!rec->no_buildid) process_buildids(rec); - perf_session__write_header(rec->session, rec->evlist, - file->fd, true); + perf_session__write_header(rec->session, rec->evlist, fd, true); } if (!err && !quiet) { -- cgit v1.2.3-59-g8ed1b From c52686f9f888d23ca72f1309e86af8e91d075697 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 29 Jan 2015 17:02:01 -0300 Subject: perf symbols: Convert lseek + read to pread When dso_cache__read() is called, it reads data from the given offset using lseek + normal read syscall. It can be combined to a single pread syscall. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1422518843-25818-40-git-send-email-namhyung@kernel.org [ Fixed it up when cherry picking it from the multi threaded patchkit ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 45be944d450a..c2f7d3b90966 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -532,12 +532,8 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size) break; cache_offset = offset & DSO__DATA_CACHE_MASK; - ret = -EINVAL; - if (-1 == lseek(dso->data.fd, cache_offset, SEEK_SET)) - break; - - ret = read(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE); + ret = pread(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE, cache_offset); if (ret <= 0) break; -- cgit v1.2.3-59-g8ed1b From 652884fe0c7bd57f534c5fe68d6def0dc8c4b7ed Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 23 Jan 2015 11:20:10 +0100 Subject: perf: Add a bit of paranoia Add a few WARN()s to catch things that should never happen. Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20150123125834.150481799@infradead.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index b4a696c4dc76..b358cb38e4a5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1275,6 +1275,8 @@ static void perf_group_attach(struct perf_event *event) if (group_leader == event) return; + WARN_ON_ONCE(group_leader->ctx != event->ctx); + if (group_leader->group_flags & PERF_GROUP_SOFTWARE && !is_software_event(event)) group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; @@ -1296,6 +1298,10 @@ static void list_del_event(struct perf_event *event, struct perf_event_context *ctx) { struct perf_cpu_context *cpuctx; + + WARN_ON_ONCE(event->ctx != ctx); + lockdep_assert_held(&ctx->lock); + /* * We can have double detach due to exit/hot-unplug + close. */ @@ -1380,6 +1386,8 @@ static void perf_group_detach(struct perf_event *event) /* Inherit group flags from the previous leader */ sibling->group_flags = event->group_flags; + + WARN_ON_ONCE(sibling->ctx != event->ctx); } out: @@ -1442,6 +1450,10 @@ event_sched_out(struct perf_event *event, { u64 tstamp = perf_event_time(event); u64 delta; + + WARN_ON_ONCE(event->ctx != ctx); + lockdep_assert_held(&ctx->lock); + /* * An event which could not be activated because of * filter mismatch still needs to have its timings @@ -7822,14 +7834,19 @@ static void perf_free_event(struct perf_event *event, put_event(parent); + raw_spin_lock_irq(&ctx->lock); perf_group_detach(event); list_del_event(event, ctx); + raw_spin_unlock_irq(&ctx->lock); free_event(event); } /* - * free an unexposed, unused context as created by inheritance by + * Free an unexposed, unused context as created by inheritance by * perf_event_init_task below, used by fork() in case of fail. + * + * Not all locks are strictly required, but take them anyway to be nice and + * help out with the lockdep assertions. */ void perf_event_free_task(struct task_struct *task) { -- cgit v1.2.3-59-g8ed1b From f63a8daa5812afef4f06c962351687e1ff9ccb2b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 23 Jan 2015 12:24:14 +0100 Subject: perf: Fix event->ctx locking There have been a few reported issues wrt. the lack of locking around changing event->ctx. This patch tries to address those. It avoids the whole rwsem thing; and while it appears to work, please give it some thought in review. What I did fail at is sensible runtime checks on the use of event->ctx, the RCU use makes it very hard. Signed-off-by: Peter Zijlstra (Intel) Cc: Paul E. McKenney Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20150123125834.209535886@infradead.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 244 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 207 insertions(+), 37 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index b358cb38e4a5..417a96bf3d41 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -906,6 +906,77 @@ static void put_ctx(struct perf_event_context *ctx) } } +/* + * Because of perf_event::ctx migration in sys_perf_event_open::move_group and + * perf_pmu_migrate_context() we need some magic. + * + * Those places that change perf_event::ctx will hold both + * perf_event_ctx::mutex of the 'old' and 'new' ctx value. + * + * Lock ordering is by mutex address. There is one other site where + * perf_event_context::mutex nests and that is put_event(). But remember that + * that is a parent<->child context relation, and migration does not affect + * children, therefore these two orderings should not interact. + * + * The change in perf_event::ctx does not affect children (as claimed above) + * because the sys_perf_event_open() case will install a new event and break + * the ctx parent<->child relation, and perf_pmu_migrate_context() is only + * concerned with cpuctx and that doesn't have children. + * + * The places that change perf_event::ctx will issue: + * + * perf_remove_from_context(); + * synchronize_rcu(); + * perf_install_in_context(); + * + * to affect the change. The remove_from_context() + synchronize_rcu() should + * quiesce the event, after which we can install it in the new location. This + * means that only external vectors (perf_fops, prctl) can perturb the event + * while in transit. Therefore all such accessors should also acquire + * perf_event_context::mutex to serialize against this. + * + * However; because event->ctx can change while we're waiting to acquire + * ctx->mutex we must be careful and use the below perf_event_ctx_lock() + * function. + * + * Lock order: + * task_struct::perf_event_mutex + * perf_event_context::mutex + * perf_event_context::lock + * perf_event::child_mutex; + * perf_event::mmap_mutex + * mmap_sem + */ +static struct perf_event_context *perf_event_ctx_lock(struct perf_event *event) +{ + struct perf_event_context *ctx; + +again: + rcu_read_lock(); + ctx = ACCESS_ONCE(event->ctx); + if (!atomic_inc_not_zero(&ctx->refcount)) { + rcu_read_unlock(); + goto again; + } + rcu_read_unlock(); + + mutex_lock(&ctx->mutex); + if (event->ctx != ctx) { + mutex_unlock(&ctx->mutex); + put_ctx(ctx); + goto again; + } + + return ctx; +} + +static void perf_event_ctx_unlock(struct perf_event *event, + struct perf_event_context *ctx) +{ + mutex_unlock(&ctx->mutex); + put_ctx(ctx); +} + /* * This must be done under the ctx->lock, such as to serialize against * context_equiv(), therefore we cannot call put_ctx() since that might end up @@ -1666,7 +1737,7 @@ int __perf_event_disable(void *info) * is the current context on this CPU and preemption is disabled, * hence we can't get into perf_event_task_sched_out for this context. */ -void perf_event_disable(struct perf_event *event) +static void _perf_event_disable(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; @@ -1707,6 +1778,19 @@ retry: } raw_spin_unlock_irq(&ctx->lock); } + +/* + * Strictly speaking kernel users cannot create groups and therefore this + * interface does not need the perf_event_ctx_lock() magic. + */ +void perf_event_disable(struct perf_event *event) +{ + struct perf_event_context *ctx; + + ctx = perf_event_ctx_lock(event); + _perf_event_disable(event); + perf_event_ctx_unlock(event, ctx); +} EXPORT_SYMBOL_GPL(perf_event_disable); static void perf_set_shadow_time(struct perf_event *event, @@ -2170,7 +2254,7 @@ unlock: * perf_event_for_each_child or perf_event_for_each as described * for perf_event_disable. */ -void perf_event_enable(struct perf_event *event) +static void _perf_event_enable(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; @@ -2226,9 +2310,21 @@ retry: out: raw_spin_unlock_irq(&ctx->lock); } + +/* + * See perf_event_disable(); + */ +void perf_event_enable(struct perf_event *event) +{ + struct perf_event_context *ctx; + + ctx = perf_event_ctx_lock(event); + _perf_event_enable(event); + perf_event_ctx_unlock(event, ctx); +} EXPORT_SYMBOL_GPL(perf_event_enable); -int perf_event_refresh(struct perf_event *event, int refresh) +static int _perf_event_refresh(struct perf_event *event, int refresh) { /* * not supported on inherited events @@ -2237,10 +2333,25 @@ int perf_event_refresh(struct perf_event *event, int refresh) return -EINVAL; atomic_add(refresh, &event->event_limit); - perf_event_enable(event); + _perf_event_enable(event); return 0; } + +/* + * See perf_event_disable() + */ +int perf_event_refresh(struct perf_event *event, int refresh) +{ + struct perf_event_context *ctx; + int ret; + + ctx = perf_event_ctx_lock(event); + ret = _perf_event_refresh(event, refresh); + perf_event_ctx_unlock(event, ctx); + + return ret; +} EXPORT_SYMBOL_GPL(perf_event_refresh); static void ctx_sched_out(struct perf_event_context *ctx, @@ -3433,7 +3544,16 @@ static void perf_remove_from_owner(struct perf_event *event) rcu_read_unlock(); if (owner) { - mutex_lock(&owner->perf_event_mutex); + /* + * If we're here through perf_event_exit_task() we're already + * holding ctx->mutex which would be an inversion wrt. the + * normal lock order. + * + * However we can safely take this lock because its the child + * ctx->mutex. + */ + mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING); + /* * We have to re-check the event->owner field, if it is cleared * we raced with perf_event_exit_task(), acquiring the mutex @@ -3559,12 +3679,13 @@ static int perf_event_read_group(struct perf_event *event, u64 read_format, char __user *buf) { struct perf_event *leader = event->group_leader, *sub; - int n = 0, size = 0, ret = -EFAULT; struct perf_event_context *ctx = leader->ctx; - u64 values[5]; + int n = 0, size = 0, ret; u64 count, enabled, running; + u64 values[5]; + + lockdep_assert_held(&ctx->mutex); - mutex_lock(&ctx->mutex); count = perf_event_read_value(leader, &enabled, &running); values[n++] = 1 + leader->nr_siblings; @@ -3579,7 +3700,7 @@ static int perf_event_read_group(struct perf_event *event, size = n * sizeof(u64); if (copy_to_user(buf, values, size)) - goto unlock; + return -EFAULT; ret = size; @@ -3593,14 +3714,11 @@ static int perf_event_read_group(struct perf_event *event, size = n * sizeof(u64); if (copy_to_user(buf + ret, values, size)) { - ret = -EFAULT; - goto unlock; + return -EFAULT; } ret += size; } -unlock: - mutex_unlock(&ctx->mutex); return ret; } @@ -3672,8 +3790,14 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct perf_event *event = file->private_data; + struct perf_event_context *ctx; + int ret; - return perf_read_hw(event, buf, count); + ctx = perf_event_ctx_lock(event); + ret = perf_read_hw(event, buf, count); + perf_event_ctx_unlock(event, ctx); + + return ret; } static unsigned int perf_poll(struct file *file, poll_table *wait) @@ -3699,7 +3823,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) return events; } -static void perf_event_reset(struct perf_event *event) +static void _perf_event_reset(struct perf_event *event) { (void)perf_event_read(event); local64_set(&event->count, 0); @@ -3718,6 +3842,7 @@ static void perf_event_for_each_child(struct perf_event *event, struct perf_event *child; WARN_ON_ONCE(event->ctx->parent_ctx); + mutex_lock(&event->child_mutex); func(event); list_for_each_entry(child, &event->child_list, child_list) @@ -3731,14 +3856,13 @@ static void perf_event_for_each(struct perf_event *event, struct perf_event_context *ctx = event->ctx; struct perf_event *sibling; - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); + lockdep_assert_held(&ctx->mutex); + event = event->group_leader; perf_event_for_each_child(event, func); list_for_each_entry(sibling, &event->sibling_list, group_entry) perf_event_for_each_child(sibling, func); - mutex_unlock(&ctx->mutex); } static int perf_event_period(struct perf_event *event, u64 __user *arg) @@ -3808,25 +3932,24 @@ static int perf_event_set_output(struct perf_event *event, struct perf_event *output_event); static int perf_event_set_filter(struct perf_event *event, void __user *arg); -static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) { - struct perf_event *event = file->private_data; void (*func)(struct perf_event *); u32 flags = arg; switch (cmd) { case PERF_EVENT_IOC_ENABLE: - func = perf_event_enable; + func = _perf_event_enable; break; case PERF_EVENT_IOC_DISABLE: - func = perf_event_disable; + func = _perf_event_disable; break; case PERF_EVENT_IOC_RESET: - func = perf_event_reset; + func = _perf_event_reset; break; case PERF_EVENT_IOC_REFRESH: - return perf_event_refresh(event, arg); + return _perf_event_refresh(event, arg); case PERF_EVENT_IOC_PERIOD: return perf_event_period(event, (u64 __user *)arg); @@ -3873,6 +3996,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return 0; } +static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct perf_event *event = file->private_data; + struct perf_event_context *ctx; + long ret; + + ctx = perf_event_ctx_lock(event); + ret = _perf_ioctl(event, cmd, arg); + perf_event_ctx_unlock(event, ctx); + + return ret; +} + #ifdef CONFIG_COMPAT static long perf_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -3895,11 +4031,15 @@ static long perf_compat_ioctl(struct file *file, unsigned int cmd, int perf_event_task_enable(void) { + struct perf_event_context *ctx; struct perf_event *event; mutex_lock(¤t->perf_event_mutex); - list_for_each_entry(event, ¤t->perf_event_list, owner_entry) - perf_event_for_each_child(event, perf_event_enable); + list_for_each_entry(event, ¤t->perf_event_list, owner_entry) { + ctx = perf_event_ctx_lock(event); + perf_event_for_each_child(event, _perf_event_enable); + perf_event_ctx_unlock(event, ctx); + } mutex_unlock(¤t->perf_event_mutex); return 0; @@ -3907,11 +4047,15 @@ int perf_event_task_enable(void) int perf_event_task_disable(void) { + struct perf_event_context *ctx; struct perf_event *event; mutex_lock(¤t->perf_event_mutex); - list_for_each_entry(event, ¤t->perf_event_list, owner_entry) - perf_event_for_each_child(event, perf_event_disable); + list_for_each_entry(event, ¤t->perf_event_list, owner_entry) { + ctx = perf_event_ctx_lock(event); + perf_event_for_each_child(event, _perf_event_disable); + perf_event_ctx_unlock(event, ctx); + } mutex_unlock(¤t->perf_event_mutex); return 0; @@ -7269,6 +7413,15 @@ out: return ret; } +static void mutex_lock_double(struct mutex *a, struct mutex *b) +{ + if (b < a) + swap(a, b); + + mutex_lock(a); + mutex_lock_nested(b, SINGLE_DEPTH_NESTING); +} + /** * sys_perf_event_open - open a performance event, associate it to a task/cpu * @@ -7284,7 +7437,7 @@ SYSCALL_DEFINE5(perf_event_open, struct perf_event *group_leader = NULL, *output_event = NULL; struct perf_event *event, *sibling; struct perf_event_attr attr; - struct perf_event_context *ctx; + struct perf_event_context *ctx, *uninitialized_var(gctx); struct file *event_file = NULL; struct fd group = {NULL, 0}; struct task_struct *task = NULL; @@ -7482,9 +7635,14 @@ SYSCALL_DEFINE5(perf_event_open, } if (move_group) { - struct perf_event_context *gctx = group_leader->ctx; + gctx = group_leader->ctx; + + /* + * See perf_event_ctx_lock() for comments on the details + * of swizzling perf_event::ctx. + */ + mutex_lock_double(&gctx->mutex, &ctx->mutex); - mutex_lock(&gctx->mutex); perf_remove_from_context(group_leader, false); /* @@ -7499,15 +7657,19 @@ SYSCALL_DEFINE5(perf_event_open, perf_event__state_init(sibling); put_ctx(gctx); } - mutex_unlock(&gctx->mutex); - put_ctx(gctx); + } else { + mutex_lock(&ctx->mutex); } WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); if (move_group) { + /* + * Wait for everybody to stop referencing the events through + * the old lists, before installing it on new lists. + */ synchronize_rcu(); + perf_install_in_context(ctx, group_leader, group_leader->cpu); get_ctx(ctx); list_for_each_entry(sibling, &group_leader->sibling_list, @@ -7519,6 +7681,11 @@ SYSCALL_DEFINE5(perf_event_open, perf_install_in_context(ctx, event, event->cpu); perf_unpin_context(ctx); + + if (move_group) { + mutex_unlock(&gctx->mutex); + put_ctx(gctx); + } mutex_unlock(&ctx->mutex); put_online_cpus(); @@ -7626,7 +7793,11 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; - mutex_lock(&src_ctx->mutex); + /* + * See perf_event_ctx_lock() for comments on the details + * of swizzling perf_event::ctx. + */ + mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex); list_for_each_entry_safe(event, tmp, &src_ctx->event_list, event_entry) { perf_remove_from_context(event, false); @@ -7634,11 +7805,9 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) put_ctx(src_ctx); list_add(&event->migrate_entry, &events); } - mutex_unlock(&src_ctx->mutex); synchronize_rcu(); - mutex_lock(&dst_ctx->mutex); list_for_each_entry_safe(event, tmp, &events, migrate_entry) { list_del(&event->migrate_entry); if (event->state >= PERF_EVENT_STATE_OFF) @@ -7648,6 +7817,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) get_ctx(dst_ctx); } mutex_unlock(&dst_ctx->mutex); + mutex_unlock(&src_ctx->mutex); } EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); -- cgit v1.2.3-59-g8ed1b From 8f95b435b62522aed3381aaea920de8d09ccabf3 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 27 Jan 2015 11:53:12 +0100 Subject: perf: Fix move_group() order Jiri reported triggering the new WARN_ON_ONCE in event_sched_out over the weekend: event_sched_out.isra.79+0x2b9/0x2d0 group_sched_out+0x69/0xc0 ctx_sched_out+0x106/0x130 task_ctx_sched_out+0x37/0x70 __perf_install_in_context+0x70/0x1a0 remote_function+0x48/0x60 generic_exec_single+0x15b/0x1d0 smp_call_function_single+0x67/0xa0 task_function_call+0x53/0x80 perf_install_in_context+0x8b/0x110 I think the below should cure this; if we install a group leader it will iterate the (still intact) group list and find its siblings and try and install those too -- even though those still have the old event->ctx -- in the new ctx. Upon installing the first group sibling we'd try and schedule out the group and trigger the above warn. Fix this by installing the group leader last, installing siblings would have no effect, they're not reachable through the group lists and therefore we don't schedule them. Also delay resetting the state until we're absolutely sure the events are quiescent. Reported-by: Jiri Olsa Reported-by: vincent.weaver@maine.edu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20150126162639.GA21418@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 56 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 417a96bf3d41..142dbabc1615 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7645,16 +7645,9 @@ SYSCALL_DEFINE5(perf_event_open, perf_remove_from_context(group_leader, false); - /* - * Removing from the context ends up with disabled - * event. What we want here is event in the initial - * startup state, ready to be add into new context. - */ - perf_event__state_init(group_leader); list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { perf_remove_from_context(sibling, false); - perf_event__state_init(sibling); put_ctx(gctx); } } else { @@ -7670,13 +7663,31 @@ SYSCALL_DEFINE5(perf_event_open, */ synchronize_rcu(); - perf_install_in_context(ctx, group_leader, group_leader->cpu); - get_ctx(ctx); + /* + * Install the group siblings before the group leader. + * + * Because a group leader will try and install the entire group + * (through the sibling list, which is still in-tact), we can + * end up with siblings installed in the wrong context. + * + * By installing siblings first we NO-OP because they're not + * reachable through the group lists. + */ list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { + perf_event__state_init(sibling); perf_install_in_context(ctx, sibling, sibling->cpu); get_ctx(ctx); } + + /* + * Removing from the context ends up with disabled + * event. What we want here is event in the initial + * startup state, ready to be add into new context. + */ + perf_event__state_init(group_leader); + perf_install_in_context(ctx, group_leader, group_leader->cpu); + get_ctx(ctx); } perf_install_in_context(ctx, event, event->cpu); @@ -7806,8 +7817,35 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) list_add(&event->migrate_entry, &events); } + /* + * Wait for the events to quiesce before re-instating them. + */ synchronize_rcu(); + /* + * Re-instate events in 2 passes. + * + * Skip over group leaders and only install siblings on this first + * pass, siblings will not get enabled without a leader, however a + * leader will enable its siblings, even if those are still on the old + * context. + */ + list_for_each_entry_safe(event, tmp, &events, migrate_entry) { + if (event->group_leader == event) + continue; + + list_del(&event->migrate_entry); + if (event->state >= PERF_EVENT_STATE_OFF) + event->state = PERF_EVENT_STATE_INACTIVE; + account_event_cpu(event, dst_cpu); + perf_install_in_context(dst_ctx, event, dst_cpu); + get_ctx(dst_ctx); + } + + /* + * Once all the siblings are setup properly, install the group leaders + * to make it go. + */ list_for_each_entry_safe(event, tmp, &events, migrate_entry) { list_del(&event->migrate_entry); if (event->state >= PERF_EVENT_STATE_OFF) -- cgit v1.2.3-59-g8ed1b From a83fe28e2e45392464858a96745db26ac73670c8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Jan 2015 14:44:34 +0100 Subject: perf: Fix put_event() ctx lock So what I suspect; but I'm in zombie mode today it seems; is that while I initially thought that it was impossible for ctx to change when refcount dropped to 0, I now suspect its possible. Note that until perf_remove_from_context() the event is still active and visible on the lists. So a concurrent sys_perf_event_open() from another task into this task can race. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Stephane Eranian Cc: mark.rutland@arm.com Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20150129134434.GB26304@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 142dbabc1615..f773fa13d7c2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -947,7 +947,8 @@ static void put_ctx(struct perf_event_context *ctx) * perf_event::mmap_mutex * mmap_sem */ -static struct perf_event_context *perf_event_ctx_lock(struct perf_event *event) +static struct perf_event_context * +perf_event_ctx_lock_nested(struct perf_event *event, int nesting) { struct perf_event_context *ctx; @@ -960,7 +961,7 @@ again: } rcu_read_unlock(); - mutex_lock(&ctx->mutex); + mutex_lock_nested(&ctx->mutex, nesting); if (event->ctx != ctx) { mutex_unlock(&ctx->mutex); put_ctx(ctx); @@ -970,6 +971,12 @@ again: return ctx; } +static inline struct perf_event_context * +perf_event_ctx_lock(struct perf_event *event) +{ + return perf_event_ctx_lock_nested(event, 0); +} + static void perf_event_ctx_unlock(struct perf_event *event, struct perf_event_context *ctx) { @@ -3572,7 +3579,7 @@ static void perf_remove_from_owner(struct perf_event *event) */ static void put_event(struct perf_event *event) { - struct perf_event_context *ctx = event->ctx; + struct perf_event_context *ctx; if (!atomic_long_dec_and_test(&event->refcount)) return; @@ -3580,7 +3587,6 @@ static void put_event(struct perf_event *event) if (!is_kernel_event(event)) perf_remove_from_owner(event); - WARN_ON_ONCE(ctx->parent_ctx); /* * There are two ways this annotation is useful: * @@ -3593,7 +3599,8 @@ static void put_event(struct perf_event *event) * the last filedesc died, so there is no possibility * to trigger the AB-BA case. */ - mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); + ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING); + WARN_ON_ONCE(ctx->parent_ctx); perf_remove_from_context(event, true); mutex_unlock(&ctx->mutex); -- cgit v1.2.3-59-g8ed1b From 7c60fc0e022858e19c66289ec65cc3effc8c0b1f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 28 Jan 2015 18:54:38 +0100 Subject: perf: Use POLLIN instead of POLL_IN for perf poll data in flag Currently we flag available data (via poll syscall) on perf fd with POLL_IN macro, which is normally used for SIGIO interface. We've been lucky, because POLLIN (0x1) is subset of POLL_IN (0x20001) and sys_poll (do_pollfd function) cut the extra bit out (0x20000). Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Stephane Eranian Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1422467678-22341-1-git-send-email-jolsa@kernel.org Signed-off-by: Ingo Molnar --- kernel/events/ring_buffer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 146a5792b1d2..eadb95ce7aac 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -13,12 +13,13 @@ #include #include #include +#include #include "internal.h" static void perf_output_wakeup(struct perf_output_handle *handle) { - atomic_set(&handle->rb->poll, POLL_IN); + atomic_set(&handle->rb->poll, POLLIN); handle->event->pending_wakeup = 1; irq_work_queue(&handle->event->pending); -- cgit v1.2.3-59-g8ed1b From cc34b98bacb0e102fb720d95a25fed5c6090a70d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 7 Jan 2015 14:56:51 +0000 Subject: perf: Drop module reference on event init failure When initialising an event, perf_init_event will call try_module_get() to ensure that the PMU's module cannot be removed for the lifetime of the event, with __free_event() dropping the reference when the event is finally destroyed. If something fails after the event has been initialised, but before the event is installed, perf_event_alloc will drop the reference on the module. However, if we fail to initialise an event for some reason (e.g. we ask an uncore PMU to perform sampling, and it refuses to initialise the event), we do not drop the refcount. If we try to open such a bogus event without a precise IDR type, we will loop over each PMU in the pmus list, incrementing each of their refcounts without decrementing them. This patch adds a module_put when pmu->event_init(event) fails, ensuring that the refcounts are balanced in failure cases. As the innards of the precise and search based initialisation look very similar, this logic is hoisted out into a new helper function. While the early return for the failed try_module_get is removed from the search case, this is handled by the remaining return when ret is not -ENOENT. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Cc: Will Deacon Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1420642611-22667-1-git-send-email-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f773fa13d7c2..37cc20e8aa3b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7027,6 +7027,20 @@ void perf_pmu_unregister(struct pmu *pmu) } EXPORT_SYMBOL_GPL(perf_pmu_unregister); +static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) +{ + int ret; + + if (!try_module_get(pmu->module)) + return -ENODEV; + event->pmu = pmu; + ret = pmu->event_init(event); + if (ret) + module_put(pmu->module); + + return ret; +} + struct pmu *perf_init_event(struct perf_event *event) { struct pmu *pmu = NULL; @@ -7039,24 +7053,14 @@ struct pmu *perf_init_event(struct perf_event *event) pmu = idr_find(&pmu_idr, event->attr.type); rcu_read_unlock(); if (pmu) { - if (!try_module_get(pmu->module)) { - pmu = ERR_PTR(-ENODEV); - goto unlock; - } - event->pmu = pmu; - ret = pmu->event_init(event); + ret = perf_try_init_event(pmu, event); if (ret) pmu = ERR_PTR(ret); goto unlock; } list_for_each_entry_rcu(pmu, &pmus, entry) { - if (!try_module_get(pmu->module)) { - pmu = ERR_PTR(-ENODEV); - goto unlock; - } - event->pmu = pmu; - ret = pmu->event_init(event); + ret = perf_try_init_event(pmu, event); if (!ret) goto unlock; -- cgit v1.2.3-59-g8ed1b From 2fde4f94e0a9531251e706fa57131b51b0df042e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 7 Jan 2015 15:01:54 +0000 Subject: perf: Decouple unthrottling and rotating Currently the adjusments made as part of perf_event_task_tick() use the percpu rotation lists to iterate over any active PMU contexts, but these are not used by the context rotation code, having been replaced by separate (per-context) hrtimer callbacks. However, some manipulation of the rotation lists (i.e. removal of contexts) has remained in perf_rotate_context(). This leads to the following issues: * Contexts are not always removed from the rotation lists. Removal of PMUs which have been placed in rotation lists, but have not been removed by a hrtimer callback can result in corruption of the rotation lists (when memory backing the context is freed). This has been observed to result in hangs when PMU drivers built as modules are inserted and removed around the creation of events for said PMUs. * Contexts which do not require rotation may be removed from the rotation lists as a result of a hrtimer, and will not be considered by the unthrottling code in perf_event_task_tick. This patch fixes the issue by updating the rotation ist when events are scheduled in/out, ensuring that each rotation list stays in sync with the HW state. As each event holds a refcount on the module of its PMU, this ensures that when a PMU module is unloaded none of its CPU contexts can be in a rotation list. By maintaining a list of perf_event_contexts rather than perf_event_cpu_contexts, we don't need separate paths to handle the cpu and task contexts, which also makes the code a little simpler. As the rotation_list variables are not used for rotation, these are renamed to active_ctx_list, which better matches their current function. perf_pmu_rotate_{start,stop} are renamed to perf_pmu_ctx_{activate,deactivate}. Reported-by: Johannes Jensen Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Cc: Will Deacon Cc: Arnaldo Carvalho de Melo Cc: Fengguang Wu Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20150129134511.GR17721@leverpostej Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- kernel/events/core.c | 81 +++++++++++++++++----------------------------- 2 files changed, 30 insertions(+), 53 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 216653466a67..5cad0e6f3552 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -469,6 +469,7 @@ struct perf_event_context { */ struct mutex mutex; + struct list_head active_ctx_list; struct list_head pinned_groups; struct list_head flexible_groups; struct list_head event_list; @@ -519,7 +520,6 @@ struct perf_cpu_context { int exclusive; struct hrtimer hrtimer; ktime_t hrtimer_interval; - struct list_head rotation_list; struct pmu *unique_pmu; struct perf_cgroup *cgrp; }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 37cc20e8aa3b..7f2fbb8b5069 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -872,22 +872,32 @@ void perf_pmu_enable(struct pmu *pmu) pmu->pmu_enable(pmu); } -static DEFINE_PER_CPU(struct list_head, rotation_list); +static DEFINE_PER_CPU(struct list_head, active_ctx_list); /* - * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized - * because they're strictly cpu affine and rotate_start is called with IRQs - * disabled, while rotate_context is called from IRQ context. + * perf_event_ctx_activate(), perf_event_ctx_deactivate(), and + * perf_event_task_tick() are fully serialized because they're strictly cpu + * affine and perf_event_ctx{activate,deactivate} are called with IRQs + * disabled, while perf_event_task_tick is called from IRQ context. */ -static void perf_pmu_rotate_start(struct pmu *pmu) +static void perf_event_ctx_activate(struct perf_event_context *ctx) { - struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - struct list_head *head = this_cpu_ptr(&rotation_list); + struct list_head *head = this_cpu_ptr(&active_ctx_list); WARN_ON(!irqs_disabled()); - if (list_empty(&cpuctx->rotation_list)) - list_add(&cpuctx->rotation_list, head); + WARN_ON(!list_empty(&ctx->active_ctx_list)); + + list_add(&ctx->active_ctx_list, head); +} + +static void perf_event_ctx_deactivate(struct perf_event_context *ctx) +{ + WARN_ON(!irqs_disabled()); + + WARN_ON(list_empty(&ctx->active_ctx_list)); + + list_del_init(&ctx->active_ctx_list); } static void get_ctx(struct perf_event_context *ctx) @@ -1233,8 +1243,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) ctx->nr_branch_stack++; list_add_rcu(&event->event_entry, &ctx->event_list); - if (!ctx->nr_events) - perf_pmu_rotate_start(ctx->pmu); ctx->nr_events++; if (event->attr.inherit_stat) ctx->nr_stat++; @@ -1561,7 +1569,8 @@ event_sched_out(struct perf_event *event, if (!is_software_event(event)) cpuctx->active_oncpu--; - ctx->nr_active--; + if (!--ctx->nr_active) + perf_event_ctx_deactivate(ctx); if (event->attr.freq && event->attr.sample_freq) ctx->nr_freq--; if (event->attr.exclusive || !cpuctx->active_oncpu) @@ -1885,7 +1894,8 @@ event_sched_in(struct perf_event *event, if (!is_software_event(event)) cpuctx->active_oncpu++; - ctx->nr_active++; + if (!ctx->nr_active++) + perf_event_ctx_activate(ctx); if (event->attr.freq && event->attr.sample_freq) ctx->nr_freq++; @@ -2742,12 +2752,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, perf_pmu_enable(ctx->pmu); perf_ctx_unlock(cpuctx, ctx); - - /* - * Since these rotations are per-cpu, we need to ensure the - * cpu-context we got scheduled on is actually rotating. - */ - perf_pmu_rotate_start(ctx->pmu); } /* @@ -3035,25 +3039,18 @@ static void rotate_ctx(struct perf_event_context *ctx) list_rotate_left(&ctx->flexible_groups); } -/* - * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized - * because they're strictly cpu affine and rotate_start is called with IRQs - * disabled, while rotate_context is called from IRQ context. - */ static int perf_rotate_context(struct perf_cpu_context *cpuctx) { struct perf_event_context *ctx = NULL; - int rotate = 0, remove = 1; + int rotate = 0; if (cpuctx->ctx.nr_events) { - remove = 0; if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) rotate = 1; } ctx = cpuctx->task_ctx; if (ctx && ctx->nr_events) { - remove = 0; if (ctx->nr_events != ctx->nr_active) rotate = 1; } @@ -3077,8 +3074,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx) perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); done: - if (remove) - list_del_init(&cpuctx->rotation_list); return rotate; } @@ -3096,9 +3091,8 @@ bool perf_event_can_stop_tick(void) void perf_event_task_tick(void) { - struct list_head *head = this_cpu_ptr(&rotation_list); - struct perf_cpu_context *cpuctx, *tmp; - struct perf_event_context *ctx; + struct list_head *head = this_cpu_ptr(&active_ctx_list); + struct perf_event_context *ctx, *tmp; int throttled; WARN_ON(!irqs_disabled()); @@ -3106,14 +3100,8 @@ void perf_event_task_tick(void) __this_cpu_inc(perf_throttled_seq); throttled = __this_cpu_xchg(perf_throttled_count, 0); - list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { - ctx = &cpuctx->ctx; + list_for_each_entry_safe(ctx, tmp, head, active_ctx_list) perf_adjust_freq_unthr_context(ctx, throttled); - - ctx = cpuctx->task_ctx; - if (ctx) - perf_adjust_freq_unthr_context(ctx, throttled); - } } static int event_enable_on_exec(struct perf_event *event, @@ -3272,6 +3260,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx) { raw_spin_lock_init(&ctx->lock); mutex_init(&ctx->mutex); + INIT_LIST_HEAD(&ctx->active_ctx_list); INIT_LIST_HEAD(&ctx->pinned_groups); INIT_LIST_HEAD(&ctx->flexible_groups); INIT_LIST_HEAD(&ctx->event_list); @@ -6954,7 +6943,6 @@ skip_type: __perf_cpu_hrtimer_init(cpuctx, cpu); - INIT_LIST_HEAD(&cpuctx->rotation_list); cpuctx->unique_pmu = pmu; } @@ -8384,7 +8372,7 @@ static void __init perf_event_init_all_cpus(void) for_each_possible_cpu(cpu) { swhash = &per_cpu(swevent_htable, cpu); mutex_init(&swhash->hlist_mutex); - INIT_LIST_HEAD(&per_cpu(rotation_list, cpu)); + INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu)); } } @@ -8405,22 +8393,11 @@ static void perf_event_init_cpu(int cpu) } #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC -static void perf_pmu_rotate_stop(struct pmu *pmu) -{ - struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - - WARN_ON(!irqs_disabled()); - - list_del_init(&cpuctx->rotation_list); -} - static void __perf_event_exit_context(void *__info) { struct remove_event re = { .detach_group = true }; struct perf_event_context *ctx = __info; - perf_pmu_rotate_stop(ctx->pmu); - rcu_read_lock(); list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) __perf_remove_from_context(&re); -- cgit v1.2.3-59-g8ed1b