From 4b04e0decd2518e54e3f371abf3d883b3198663d Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Mon, 17 Aug 2020 09:27:54 +0200 Subject: perf test: Fix basic bpf filtering test BPF basic filtering test fails on s390x (when vmlinux debuginfo is utilized instead of /proc/kallsyms) Info: - bpf_probe_load installs the bpf code at do_epoll_wait. - For s390x, do_epoll_wait resolves to 3 functions including inlines. found inline addr: 0x43769e Probe point found: __s390_sys_epoll_wait+6 found inline addr: 0x437290 Probe point found: do_epoll_wait+0 found inline addr: 0x4375d6 Probe point found: __se_sys_epoll_wait+6 - add_bpf_event creates evsel for every probe in a BPF object. This results in 3 evsels. Solution: - Expected result = 50% of the samples to be collected from epoll_wait * number of entries present in the evlist. Committer testing: # perf test 42 42: BPF filter : 42.1: Basic BPF filtering : Ok 42.2: BPF pinning : Ok 42.3: BPF prologue generation : Ok 42.4: BPF relocation checker : Ok # Signed-off-by: Sumanth Korikkar Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: bpf@vger.kernel.org Cc: Heiko Carstens Cc: Jiri Olsa Cc: Sven Schnelle LPU-Reference: 20200817072754.58344-1-sumanthk@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 5d20bf8397f0..cd77e334e577 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -197,7 +197,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), perf_mmap__read_done(&md->core); } - if (count != expect) { + if (count != expect * evlist->core.nr_entries) { pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count); goto out_delete_evlist; } -- cgit v1.2.3-59-g8ed1b From a74eaf1605d42391c2357a70e94e5a2c7780fea9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 17 Aug 2020 11:09:42 -0600 Subject: perf sched timehist: Fix use of CPU list with summary option Do not update thread stats or show idle summary unless CPU is in the list of interest. Fixes: c30d630d1bcfad8d ("perf sched timehist: Add support for filtering on CPU") Signed-off-by: David Ahern Acked-by: Namhyung Kim Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200817170943.1486-1-dsahern@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 0c7d599fa555..e6fc297cee91 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2584,7 +2584,8 @@ static int timehist_sched_change_event(struct perf_tool *tool, } if (!sched->idle_hist || thread->tid == 0) { - timehist_update_runtime_stats(tr, t, tprev); + if (!cpu_list || test_bit(sample->cpu, cpu_bitmap)) + timehist_update_runtime_stats(tr, t, tprev); if (sched->idle_hist) { struct idle_thread_runtime *itr = (void *)tr; @@ -2857,6 +2858,9 @@ static void timehist_print_summary(struct perf_sched *sched, printf("\nIdle stats:\n"); for (i = 0; i < idle_max_cpu; ++i) { + if (cpu_list && !test_bit(i, cpu_bitmap)) + continue; + t = idle_threads[i]; if (!t) continue; -- cgit v1.2.3-59-g8ed1b From 0c5f1acc2a14416bf30023f373558d369afdbfc8 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 18 Aug 2020 20:30:37 +0800 Subject: perf top: Skip side-band event setup if HAVE_LIBBPF_SUPPORT is not set When I execute 'perf top' without HAVE_LIBBPF_SUPPORT, there exists the following segmentation fault, skip the side-band event setup to fix it, this is similar with commit 1101c872c8c7 ("perf record: Skip side-band event setup if HAVE_LIBBPF_SUPPORT is not set"). [yangtiezhu@linux perf]$ ./perf top perf: Segmentation fault Obtained 6 stack frames. ./perf(sighandler_dump_stack+0x5c) [0x12011b604] [0xffffffc010] ./perf(perf_mmap__read_init+0x3e) [0x1201feeae] ./perf() [0x1200d715c] /lib64/libpthread.so.0(+0xab9c) [0xffee10ab9c] /lib64/libc.so.6(+0x128f4c) [0xffedc08f4c] Segmentation fault [yangtiezhu@linux perf]$ I use git bisect to find commit b38d85ef49cf ("perf bpf: Decouple creating the evlist from adding the SB event") is the first bad commit, so also add the Fixes tag. Committer testing: First build perf explicitely disabling libbpf: $ make NO_LIBBPF=1 O=/tmp/build/perf -C tools/perf install-bin && perf test python Now make sure it isn't linked: $ perf -vv | grep -w bpf bpf: [ OFF ] # HAVE_LIBBPF_SUPPORT $ $ nm ~/bin/perf | grep libbpf $ And now try to run 'perf top': # perf top perf: Segmentation fault -------- backtrace -------- perf[0x5bcd6d] /lib64/libc.so.6(+0x3ca6f)[0x7fd0f5a66a6f] perf(perf_mmap__read_init+0x1e)[0x5e1afe] perf[0x4cc468] /lib64/libpthread.so.0(+0x9431)[0x7fd0f645a431] /lib64/libc.so.6(clone+0x42)[0x7fd0f5b2b912] # Applying this patch fixes the issue. Fixes: b38d85ef49cf ("perf bpf: Decouple creating the evlist from adding the SB event") Signed-off-by: Tiezhu Yang Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Xuefeng Li Link: http://lore.kernel.org/lkml/1597753837-16222-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 994c230027bb..7c64134472c7 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1746,6 +1746,7 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; } +#ifdef HAVE_LIBBPF_SUPPORT if (!top.record_opts.no_bpf_event) { top.sb_evlist = evlist__new(); @@ -1759,6 +1760,7 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; } } +#endif if (perf_evlist__start_sb_thread(top.sb_evlist, target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); -- cgit v1.2.3-59-g8ed1b From 783abbd444926f0d94039c4cf5e5ca1d07496bce Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Aug 2020 10:00:13 -0300 Subject: perf symbols: Add mwait_idle_with_hints.constprop.0 to the list of idle symbols The "mwait_idle_with_hints" one was already there, some compiler artifact now adds this ".constprop.0" suffix, cover that one too. At some point we need to put these in a special bucket and show it somewhere on the screen. Noticed building the kernel on a fedora:32 system using: gcc version 10.2.1 20200723 (Red Hat 10.2.1-1) (GCC) Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1f5fcb828a21..5151a8c0b791 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -663,6 +663,7 @@ static bool symbol__is_idle(const char *name) "exit_idle", "mwait_idle", "mwait_idle_with_hints", + "mwait_idle_with_hints.constprop.0", "poll_idle", "ppc64_runlatch_off", "pseries_dedicated_idle_sleep", -- cgit v1.2.3-59-g8ed1b From 19684e969debc912b3ec5eda72b4f90aa73f1d30 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Fri, 24 Jul 2020 15:26:28 +0800 Subject: perf: arm-spe: Fix check error when synthesizing events In arm_spe_read_record(), when we are processing an events packet, 'decoder->packet.index' is the length of payload, which has been transformed in payloadlen(). So correct the check of 'idx'. Signed-off-by: Wei Li Reviewed-by: Leo Yan Cc: Alexander Shishkin Cc: Hanjun Guo Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200724072628.35904-1-liwei391@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 302a14d0aca9..93e063f22be5 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -182,15 +182,15 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) if (payload & BIT(EV_TLB_ACCESS)) decoder->record.type |= ARM_SPE_TLB_ACCESS; - if ((idx == 1 || idx == 2 || idx == 3) && + if ((idx == 2 || idx == 4 || idx == 8) && (payload & BIT(EV_LLC_MISS))) decoder->record.type |= ARM_SPE_LLC_MISS; - if ((idx == 1 || idx == 2 || idx == 3) && + if ((idx == 2 || idx == 4 || idx == 8) && (payload & BIT(EV_LLC_ACCESS))) decoder->record.type |= ARM_SPE_LLC_ACCESS; - if ((idx == 1 || idx == 2 || idx == 3) && + if ((idx == 2 || idx == 4 || idx == 8) && (payload & BIT(EV_REMOTE_ACCESS))) decoder->record.type |= ARM_SPE_REMOTE_ACCESS; -- cgit v1.2.3-59-g8ed1b From 20befbb1080307e70c7893ef9840d32e3ef8ac45 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 20 Aug 2020 22:25:01 +0100 Subject: perf tools: Use %zd for size_t printf formats on 32-bit A couple of trivial fixes for using %zd for size_t in the code supporting the ZSTD compression library. Signed-off-by: Chris Wilson Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexey Budankov Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200820212501.24421-1-chris@chris-wilson.co.uk Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 2 +- tools/perf/util/zstd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ffbc9d35a383..7a5f03764702 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -87,7 +87,7 @@ static int perf_session__process_compressed_event(struct perf_session *session, session->decomp_last = decomp; } - pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size); + pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size); return 0; } diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c index d2202392ffdb..48dd2b018c47 100644 --- a/tools/perf/util/zstd.c +++ b/tools/perf/util/zstd.c @@ -99,7 +99,7 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size while (input.pos < input.size) { ret = ZSTD_decompressStream(data->dstream, &output, &input); if (ZSTD_isError(ret)) { - pr_err("failed to decompress (B): %ld -> %ld, dst_size %ld : %s\n", + pr_err("failed to decompress (B): %zd -> %zd, dst_size %zd : %s\n", src_size, output.size, dst_size, ZSTD_getErrorName(ret)); break; } -- cgit v1.2.3-59-g8ed1b From a060c1f12b525ba828f871eff3127dabf8daa1e6 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Wed, 19 Aug 2020 11:19:47 +0800 Subject: perf record: Correct the help info of option "--no-bpf-event" The help info of option "--no-bpf-event" is wrongly described as "record bpf events", correct it. Committer testing: $ perf record -h bpf Usage: perf record [] [] or: perf record [] -- [] --clang-opt options passed to clang when compiling BPF scriptlets --clang-path clang binary to use for compiling BPF scriptlets --no-bpf-event do not record bpf events $ Fixes: 71184c6ab7e6 ("perf record: Replace option --bpf-event with --no-bpf-event") Signed-off-by: Wei Li Acked-by: Song Liu Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Hanjun Guo Cc: Jiri Olsa Cc: Li Bin Cc: Mark Rutland Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20200819031947.12115-1-liwei391@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f91352f847c0..772f1057647f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2452,7 +2452,7 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, "synthesize non-sample events at the end of output"), OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), - OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"), + OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, "Fail if the specified frequency can't be used"), OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", -- cgit v1.2.3-59-g8ed1b From 943b69ac1884d8e0260ee653e696456810d7c6e3 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 14 Aug 2020 09:21:20 +0800 Subject: perf parse-events: Set exclude_guest=1 for user-space counting Currently if we run 'perf record -e cycles:u', exclude_guest=0. But it doesn't make sense in most cases that we request for user-space counting but we also get the guest report. Of course, we also need to consider 'perf kvm' usage case that authorized perf users on the host may only want to count guest user space events. For example, # perf kvm --guest record -e cycles:u When we have 'exclude_guest=1' for 'perf kvm' usage, we may get nothing from guest events. To keep perf semantics consistent and clear, this patch sets exclude_guest=1 for user-space counting but except for 'perf kvm' usage. Before: perf record -e cycles:u ./div perf evlist -v cycles:u: ..., exclude_kernel: 1, exclude_hv: 1, ... After: perf record -e cycles:u ./div perf evlist -v cycles:u: ..., exclude_kernel: 1, exclude_hv: 1, exclude_guest: 1, ... Before: perf kvm --guest record -e cycles:u -vvv perf_event_attr: size 120 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|ID|CPU|PERIOD read_format ID disabled 1 inherit 1 exclude_kernel 1 exclude_hv 1 freq 1 sample_id_all 1 After: perf kvm --guest record -e cycles:u -vvv perf_event_attr: size 120 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|ID|CPU|PERIOD read_format ID disabled 1 inherit 1 exclude_kernel 1 exclude_hv 1 freq 1 sample_id_all 1 For Before/After, exclude_guest are both 0 for perf kvm usage. perf test 6 6: Parse event definition strings : Ok Signed-off-by: Jin Yao Tested-by: Like Xu Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200814012120.16647-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 4 ++-- tools/perf/util/parse-events.c | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 7f9f87a470c3..aae0fd9045c1 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -719,7 +719,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -842,7 +842,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 9f7260e69113..ff4c23d2a0f3 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -37,6 +37,7 @@ #include "util/evsel_config.h" #include "util/event.h" #include "util/pfm.h" +#include "perf.h" #define MAX_NAME_LEN 100 @@ -1794,6 +1795,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, if (*str == 'u') { if (!exclude) exclude = eu = ek = eh = 1; + if (!exclude_GH && !perf_guest) + eG = 1; eu = 0; } else if (*str == 'k') { if (!exclude) -- cgit v1.2.3-59-g8ed1b From 492d4d876c293e64266222ecec0573103dfc2625 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 25 Aug 2020 09:12:11 +0200 Subject: perf test: Set NULL sentinel in pmu_events table in "Parse and process metrics" test Linux 5.9 introduced perf test case "Parse and process metrics" and on s390 this test case always dumps core: [root@t35lp67 perf]# ./perf test -vvvv -F 67 67: Parse and process metrics : --- start --- metric expr inst_retired.any / cpu_clk_unhalted.thread for IPC parsing metric: inst_retired.any / cpu_clk_unhalted.thread Segmentation fault (core dumped) [root@t35lp67 perf]# I debugged this core dump and gdb shows this call chain: (gdb) where #0 0x000003ffabc3192a in __strnlen_c_1 () from /lib64/libc.so.6 #1 0x000003ffabc293de in strcasestr () from /lib64/libc.so.6 #2 0x0000000001102ba2 in match_metric(list=0x1e6ea20 "inst_retired.any", n=) at util/metricgroup.c:368 #3 find_metric (map=, map=, metric=0x1e6ea20 "inst_retired.any") at util/metricgroup.c:765 #4 __resolve_metric (ids=0x0, map=, metric_list=0x0, metric_no_group=, m=) at util/metricgroup.c:844 #5 resolve_metric (ids=0x0, map=0x0, metric_list=0x0, metric_no_group=) at util/metricgroup.c:881 #6 metricgroup__add_metric (metric=, metric_no_group=metric_no_group@entry=false, events=, events@entry=0x3ffd84fb878, metric_list=0x0, metric_list@entry=0x3ffd84fb868, map=0x0) at util/metricgroup.c:943 #7 0x00000000011034ae in metricgroup__add_metric_list (map=0x13f9828 , metric_list=0x3ffd84fb868, events=0x3ffd84fb878, metric_no_group=, list=) at util/metricgroup.c:988 #8 parse_groups (perf_evlist=perf_evlist@entry=0x1e70260, str=str@entry=0x12f34b2 "IPC", metric_no_group=, metric_no_merge=, fake_pmu=fake_pmu@entry=0x1462f18 , metric_events=0x3ffd84fba58, map=0x1) at util/metricgroup.c:1040 #9 0x0000000001103eb2 in metricgroup__parse_groups_test( evlist=evlist@entry=0x1e70260, map=map@entry=0x13f9828 , str=str@entry=0x12f34b2 "IPC", metric_no_group=metric_no_group@entry=false, metric_no_merge=metric_no_merge@entry=false, metric_events=0x3ffd84fba58) at util/metricgroup.c:1082 #10 0x00000000010c84d8 in __compute_metric (ratio2=0x0, name2=0x0, ratio1=, name1=0x12f34b2 "IPC", vals=0x3ffd84fbad8, name=0x12f34b2 "IPC") at tests/parse-metric.c:159 #11 compute_metric (ratio=, vals=0x3ffd84fbad8, name=0x12f34b2 "IPC") at tests/parse-metric.c:189 #12 test_ipc () at tests/parse-metric.c:208 ..... ..... omitted many more lines This test case was added with commit 218ca91df477 ("perf tests: Add parse metric test for frontend metric"). When I compile with make DEBUG=y it works fine and I do not get a core dump. It turned out that the above listed function call chain worked on a struct pmu_event array which requires a trailing element with zeroes which was missing. The marco map_for_each_event() loops over that array tests for members metric_expr/metric_name/metric_group being non-NULL. Adding this element fixes the issue. Output after: [root@t35lp46 perf]# ./perf test 67 67: Parse and process metrics : Ok [root@t35lp46 perf]# Committer notes: As Ian remarks, this is not s390 specific: This also shows up with address sanitizer on all architectures (perhaps change the patch title) and perhaps add a "Fixes: " tag. ================================================================= ==4718==ERROR: AddressSanitizer: global-buffer-overflow on address 0x55c93b4d59e8 at pc 0x55c93a1541e2 bp 0x7ffd24327c60 sp 0x7ffd24327c58 READ of size 8 at 0x55c93b4d59e8 thread T0 #0 0x55c93a1541e1 in find_metric tools/perf/util/metricgroup.c:764:2 #1 0x55c93a153e6c in __resolve_metric tools/perf/util/metricgroup.c:844:9 #2 0x55c93a152f18 in resolve_metric tools/perf/util/metricgroup.c:881:9 #3 0x55c93a1528db in metricgroup__add_metric tools/perf/util/metricgroup.c:943:9 #4 0x55c93a151996 in metricgroup__add_metric_list tools/perf/util/metricgroup.c:988:9 #5 0x55c93a1511b9 in parse_groups tools/perf/util/metricgroup.c:1040:8 #6 0x55c93a1513e1 in metricgroup__parse_groups_test tools/perf/util/metricgroup.c:1082:9 #7 0x55c93a0108ae in __compute_metric tools/perf/tests/parse-metric.c:159:8 #8 0x55c93a010744 in compute_metric tools/perf/tests/parse-metric.c:189:9 #9 0x55c93a00f5ee in test_ipc tools/perf/tests/parse-metric.c:208:2 #10 0x55c93a00f1e8 in test__parse_metric tools/perf/tests/parse-metric.c:345:2 #11 0x55c939fd7202 in run_test tools/perf/tests/builtin-test.c:410:9 #12 0x55c939fd6736 in test_and_print tools/perf/tests/builtin-test.c:440:9 #13 0x55c939fd58c3 in __cmd_test tools/perf/tests/builtin-test.c:661:4 #14 0x55c939fd4e02 in cmd_test tools/perf/tests/builtin-test.c:807:9 #15 0x55c939e4763d in run_builtin tools/perf/perf.c:313:11 #16 0x55c939e46475 in handle_internal_command tools/perf/perf.c:365:8 #17 0x55c939e4737e in run_argv tools/perf/perf.c:409:2 #18 0x55c939e45f7e in main tools/perf/perf.c:539:3 0x55c93b4d59e8 is located 0 bytes to the right of global variable 'pme_test' defined in 'tools/perf/tests/parse-metric.c:17:25' (0x55c93b4d54a0) of size 1352 SUMMARY: AddressSanitizer: global-buffer-overflow tools/perf/util/metricgroup.c:764:2 in find_metric Shadow bytes around the buggy address: 0x0ab9a7692ae0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ab9a7692af0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ab9a7692b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ab9a7692b10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ab9a7692b20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 =>0x0ab9a7692b30: 00 00 00 00 00 00 00 00 00 00 00 00 00[f9]f9 f9 0x0ab9a7692b40: f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 0x0ab9a7692b50: f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 0x0ab9a7692b60: f9 f9 f9 f9 f9 f9 f9 f9 00 00 00 00 00 00 00 00 0x0ab9a7692b70: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ab9a7692b80: f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb Shadow gap: cc I'm also adding the missing "Fixes" tag and setting just .name to NULL, as doing it that way is more compact (the compiler will zero out everything else) and the table iterators look for .name being NULL as the sentinel marking the end of the table. Fixes: 0a507af9c681ac2a ("perf tests: Add parse metric test for ipc metric") Signed-off-by: Thomas Richter Reviewed-by: Sumanth Korikkar Acked-by: Ian Rogers Cc: Heiko Carstens Cc: Jiri Olsa Cc: Namhyung Kim Cc: Sven Schnelle Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20200825071211.16959-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-metric.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index fc0838a7abc2..23db8acc492d 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -70,6 +70,9 @@ static struct pmu_event pme_test[] = { { .metric_expr = "1/m3", .metric_name = "M3", +}, +{ + .name = NULL, } }; -- cgit v1.2.3-59-g8ed1b From 313146a844182c4829acd5e1d60246367212088f Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 25 Aug 2020 08:33:04 +0200 Subject: perf stat: Fix out of bounds array access in the print_counters() evlist method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a compile error on F32 and gcc version 10.1 on s390 in file utils/stat-display.c. The error does not show up with make DEBUG=y. In fact the issue shows up when using both compiler options -O6 and -D_FORTIFY_SOURCE=2 (which are omitted with DEBUG=Y). This is the offending call chain: print_counter_aggr() printout(config, -1, 0, ...) with 2nd parm id set to -1 aggr_printout(config, x, id --> -1, ...) which leads to this code: case AGGR_NONE: if (evsel->percore && !config->percore_show_thread) { .... } else { fprintf(config->output, "CPU%*d%s", config->csv_output ? 0 : -7, evsel__cpus(evsel)->map[id], ^^ id is -1 !!!! config->csv_sep); } This is a compiler inlining issue which is detected on s390 but not on other plattforms. Output before: # make util/stat-display.o ..... util/stat-display.c: In function ‘perf_evlist__print_counters’: util/stat-display.c:121:4: error: array subscript -1 is below array bounds of ‘int[]’ [-Werror=array-bounds] 121 | fprintf(config->output, "CPU%*d%s", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 122 | config->csv_output ? 0 : -7, | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 123 | evsel__cpus(evsel)->map[id], | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 124 | config->csv_sep); | ~~~~~~~~~~~~~~~~ In file included from util/evsel.h:13, from util/evlist.h:13, from util/stat-display.c:9: /root/linux/tools/lib/perf/include/internal/cpumap.h:10:7: note: while referencing ‘map’ 10 | int map[]; | ^~~ cc1: all warnings being treated as errors mv: cannot stat 'util/.stat-display.o.tmp': No such file or directory make[3]: *** [/root/linux/tools/build/Makefile.build:97: util/stat-display.o] Error 1 make[2]: *** [Makefile.perf:716: util/stat-display.o] Error 2 make[1]: *** [Makefile.perf:231: sub-make] Error 2 make: *** [Makefile:110: util/stat-display.o] Error 2 [root@t35lp46 perf]# Output after: # make util/stat-display.o ..... CC util/stat-display.o [root@t35lp46 perf]# Committer notes: Removed the removal of {} enclosing the multiline else block, as pointed out by Jiri Olsa. Suggested-by: Jiri Olsa Signed-off-by: Thomas Richter Acked-by: Jiri Olsa Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20200825063304.77733-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 57d0706e1330..493ec372fdec 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -117,7 +117,7 @@ static void aggr_printout(struct perf_stat_config *config, cpu_map__id_to_die(id), config->csv_output ? 0 : -3, cpu_map__id_to_cpu(id), config->csv_sep); - } else { + } else if (id > -1) { fprintf(config->output, "CPU%*d%s", config->csv_output ? 0 : -7, evsel__cpus(evsel)->map[id], -- cgit v1.2.3-59-g8ed1b From 33321a06c70b44dd391b4cc01568a20d53fb3a6e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 25 Aug 2020 21:29:10 -0700 Subject: perf parse-events: Avoid an uninitialized read when using fake PMUs With a fake_pmu the pmu_info isn't populated by perf_pmu__check_alias. In this case, don't try to copy the uninitialized values to the evsel. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200826042910.1902374-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index ff4c23d2a0f3..c4d2394e2b2d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1534,19 +1534,23 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel = __add_event(list, &parse_state->idx, &attr, true, get_config_name(head_config), pmu, &config_terms, auto_merge_stats, NULL); - if (evsel) { - evsel->unit = info.unit; - evsel->scale = info.scale; - evsel->per_pkg = info.per_pkg; - evsel->snapshot = info.snapshot; - evsel->metric_expr = info.metric_expr; - evsel->metric_name = info.metric_name; - evsel->pmu_name = name ? strdup(name) : NULL; - evsel->use_uncore_alias = use_uncore_alias; - evsel->percore = config_term_percore(&evsel->config_terms); - } - - return evsel ? 0 : -ENOMEM; + if (!evsel) + return -ENOMEM; + + evsel->pmu_name = name ? strdup(name) : NULL; + evsel->use_uncore_alias = use_uncore_alias; + evsel->percore = config_term_percore(&evsel->config_terms); + + if (parse_state->fake_pmu) + return 0; + + evsel->unit = info.unit; + evsel->scale = info.scale; + evsel->per_pkg = info.per_pkg; + evsel->snapshot = info.snapshot; + evsel->metric_expr = info.metric_expr; + evsel->metric_name = info.metric_name; + return 0; } int parse_events_multi_pmu_add(struct parse_events_state *parse_state, -- cgit v1.2.3-59-g8ed1b From d4ccbacb9c217fefb4332a9af81b785690cf1053 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Aug 2020 16:17:00 -0300 Subject: perf top/report: Fix infinite loop in the TUI for grouped events For a while we need to have a dummy event for doing things like receiving PERF_RECORD_COMM, PERF_RECORD_EXEC, etc for threads being created and dying while we synthesize the pre-existing ones at tool start. This 'dummy' event is needed for keeping track of thread lifetime events early in the session but are uninteresting otherwise, i.e. no need to have it in a initial events menu for the non-grouped case, i.e. for: # perf top -e cycles,instructions or even for plain: # perf top When 'cycles' and that 'dummy' event are in place. The code to remove that 'dummy' event ended up creating an endless loop for the grouped case, i.e.: # perf top -e '{cycles,instructions}' Fix it. Fixes: bee9ca1c8a237ca1 ("perf report TUI: Remove needless 'dummy' event from menu") Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index be9c4c0549bc..a07626f07208 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3629,8 +3629,8 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help, { int nr_entries = evlist->core.nr_entries; -single_entry: if (perf_evlist__single_entry(evlist)) { +single_entry: { struct evsel *first = evlist__first(evlist); return perf_evsel__hists_browse(first, nr_entries, help, @@ -3638,6 +3638,7 @@ single_entry: env, warn_lost_event, annotation_opts); } + } if (symbol_conf.event_group) { struct evsel *pos; -- cgit v1.2.3-59-g8ed1b From f5f8e7e55fbdb4fdddec73518e23c48083108fbb Mon Sep 17 00:00:00 2001 From: Al Grant Date: Wed, 19 Aug 2020 16:47:50 +0800 Subject: perf cs-etm: Fix corrupt data after perf inject from Commit 42bbabed09ce6208 ("perf tools: Add hw_idx in struct branch_stack") changed the format of branch stacks in perf samples. When samples use this new format, a flag must be set in the corresponding event. Synthesized branch stacks generated from CoreSight ETM trace were using the new format, but not setting the event attribute, leading to consumers seeing corrupt data. This patch fixes the issue by setting the event attribute to indicate use of the new format. Fixes: 42bbabed09ce6208 ("perf tools: Add hw_idx in struct branch_stack") Signed-off-by: Al Grant Reviewed-by: Andrea Brunato Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Leo Yan Link: http://lore.kernel.org/lkml/20200819084751.17686-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index c283223fb31f..a2a369e2fbb6 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1344,8 +1344,15 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; } - if (etm->synth_opts.last_branch) + if (etm->synth_opts.last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + /* + * We don't use the hardware index, but the sample generation + * code uses the new format branch_stack with this field, + * so the event attributes must indicate that it's present. + */ + attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; + } if (etm->synth_opts.instructions) { attr.config = PERF_COUNT_HW_INSTRUCTIONS; -- cgit v1.2.3-59-g8ed1b From a347306fbec5dcaf7c276777b11d530eab6a4526 Mon Sep 17 00:00:00 2001 From: Al Grant Date: Wed, 19 Aug 2020 16:47:51 +0800 Subject: perf intel-pt: Fix corrupt data after perf inject from Commit 42bbabed09ce6208 ("perf tools: Add hw_idx in struct branch_stack") changed the format of branch stacks in perf samples. When samples use this new format, a flag must be set in the corresponding event. Synthesized branch stacks generated from Intel PT were using the new format, but not setting the event attribute, leading to consumers seeing corrupt data. This patch fixes the issue by setting the event attribute to indicate use of the new format. Fixes: 42bbabed09ce6208 ("perf tools: Add hw_idx in struct branch_stack") Signed-off-by: Al Grant Acked-by: Adrian Hunter Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200819084751.17686-2-leo.yan@linaro.org Signed-off-by: Leo Yan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 2a8d245351e7..0af4e81c46e2 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -3017,8 +3017,15 @@ static int intel_pt_synth_events(struct intel_pt *pt, if (pt->synth_opts.callchain) attr.sample_type |= PERF_SAMPLE_CALLCHAIN; - if (pt->synth_opts.last_branch) + if (pt->synth_opts.last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + /* + * We don't use the hardware index, but the sample generation + * code uses the new format branch_stack with this field, + * so the event attributes must indicate that it's present. + */ + attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; + } if (pt->synth_opts.instructions) { attr.config = PERF_COUNT_HW_INSTRUCTIONS; -- cgit v1.2.3-59-g8ed1b From 39c0a53b114d0317e5c4e76b631f41d133af5cb0 Mon Sep 17 00:00:00 2001 From: Al Grant Date: Tue, 1 Sep 2020 12:10:14 -0300 Subject: perf tools: Correct SNOOPX field offset perf_event.h has macros that define the field offsets in the data_src bitmask in perf records. The SNOOPX and REMOTE offsets were both 37. These are distinct fields, and the bitfield layout in perf_mem_data_src confirms that SNOOPX should be at offset 38. Committer notes: This was extracted from a larger patch that also contained kernel changes. Fixes: 52839e653b5629bd ("perf tools: Add support for printing new mem_info encodings") Signed-off-by: Al Grant Reviewed-by: Andi Kleen Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/9974f2d0-bf7f-518e-d9f7-4520e5ff1bb0@foss.arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 077e7ee69e3d..3e5dcdd48a49 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1196,7 +1196,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ /* 1 free */ -#define PERF_MEM_SNOOPX_SHIFT 37 +#define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ #define PERF_MEM_LOCK_NA 0x01 /* not available */ -- cgit v1.2.3-59-g8ed1b From 977f739b7126bf98b5202e243f60cbc0a1ec2c3b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 27 Aug 2020 15:48:29 +0200 Subject: perf report: Disable ordered_events for raw dump Disable ordered_events for report raw dump, because for raw dump we want to see events as they are stored in the perf.data file, not sorted by time. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200827134830.126721-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ece1cddfcd7c..3c74c9c0f3c3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1332,6 +1332,9 @@ int cmd_report(int argc, const char **argv) if (report.mmaps_mode) report.tasks_mode = true; + if (dump_trace) + report.tool.ordered_events = false; + if (quiet) perf_quiet_option(); -- cgit v1.2.3-59-g8ed1b