From 182bb594e0678b3ceac99f4ec3daa5d22ea3d0ce Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 3 Oct 2022 13:46:46 -0700 Subject: perf tools: Add evlist__add_sched_switch() Add a help to create a system-wide sched_switch event. One merit is that it sets the system-wide bit before adding it to evlist so that the libperf can handle the cpu and thread maps correctly. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221003204647.1481128-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'tools/perf/arch') diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 13933020a79e..793b35f2221a 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "../../../util/session.h" @@ -426,20 +427,14 @@ static int intel_pt_track_switches(struct evlist *evlist) if (!evlist__can_select_event(evlist, sched_switch)) return -EPERM; - err = parse_event(evlist, sched_switch); - if (err) { - pr_debug2("%s: failed to parse %s, error %d\n", + evsel = evlist__add_sched_switch(evlist, true); + if (IS_ERR(evsel)) { + err = PTR_ERR(evsel); + pr_debug2("%s: failed to create %s, error = %d\n", __func__, sched_switch, err); return err; } - evsel = evlist__last(evlist); - - evsel__set_sample_bit(evsel, CPU); - evsel__set_sample_bit(evsel, TIME); - - evsel->core.system_wide = true; - evsel->no_aux_samples = true; evsel->immediate = true; return 0; -- cgit v1.2.3-59-g8ed1b From f7b58cbdb3ff36eba8622e67eee66c10dd1c9995 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 6 Oct 2022 21:09:43 +0530 Subject: perf mem/c2c: Add load store event mappings for AMD The 'perf mem' and 'perf c2c' tools are wrappers around 'perf record' with mem load/ store events. IBS tagged load/store sample provides most of the information needed for these tools. Wire in the "ibs_op//" event as mem-ldst event for AMD. There are some limitations though: Only load/store micro-ops provide mem/c2c information. Whereas, IBS does not have a way to choose a particular type of micro-op to tag. This results in many non-LS micro-ops being tagged which appear as N/A in the perf report. IBS, being an uncore pmu from kernel point of view[1], does not support per process monitoring. Thus, perf mem/c2c on AMD are currently supported in per-cpu mode only. Example: $ sudo perf mem record -- -c 10000 ^C[ perf record: Woken up 227 times to write data ] [ perf record: Captured and wrote 58.760 MB perf.data (836978 samples) ] $ sudo perf mem report -F mem,sample,snoop Samples: 836K of event 'ibs_op//', Event count (approx.): 8418762 Memory access Samples Snoop N/A 700620 N/A L1 hit 126675 N/A L2 hit 424 N/A L3 hit 664 HitM L3 hit 10 N/A Local RAM hit 2 N/A Remote RAM (1 hop) hit 8558 N/A Remote Cache (1 hop) hit 3 N/A Remote Cache (1 hop) hit 2 HitM Remote Cache (2 hops) hit 10 HitM Remote Cache (2 hops) hit 6 N/A Uncached hit 4 N/A $ [1]: https://lore.kernel.org/lkml/20220829113347.295-1-ravi.bangoria@amd.com Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ali Saidi Cc: Ananth Narayan Cc: Andi Kleen Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Ian Rogers Cc: Ingo Molnar Cc: Joe Mario Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Stephane Eranian Cc: Thomas Gleixner Cc: x86@kernel.org Link: https://lore.kernel.org/r/20221006153946.7816-6-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-c2c.txt | 14 ++++++++++---- tools/perf/Documentation/perf-mem.txt | 3 ++- tools/perf/arch/x86/util/mem-events.c | 31 +++++++++++++++++++++++++++++-- 3 files changed, 41 insertions(+), 7 deletions(-) (limited to 'tools/perf/arch') diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index f1f7ae6b08d1..5c5eb2def83e 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -19,9 +19,10 @@ C2C stands for Cache To Cache. The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows you to track down the cacheline contentions. -On x86, the tool is based on load latency and precise store facility events +On Intel, the tool is based on load latency and precise store facility events provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling -with thresholding feature. +with thresholding feature. On AMD, the tool uses IBS op pmu (due to hardware +limitations, perf c2c is not supported on Zen3 cpus). These events provide: - memory address of the access @@ -49,7 +50,8 @@ RECORD OPTIONS -l:: --ldlat:: - Configure mem-loads latency. (x86 only) + Configure mem-loads latency. Supported on Intel and Arm64 processors + only. Ignored on other archs. -k:: --all-kernel:: @@ -135,11 +137,15 @@ Following perf record options are configured by default: -W,-d,--phys-data,--sample-cpu Unless specified otherwise with '-e' option, following events are monitored by -default on x86: +default on Intel: cpu/mem-loads,ldlat=30/P cpu/mem-stores/P +following on AMD: + + ibs_op// + and following on PowerPC: cpu/mem-loads/ diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 66177511c5c4..005c95580b1e 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -85,7 +85,8 @@ RECORD OPTIONS Be more verbose (show counter open errors, etc) --ldlat :: - Specify desired latency for loads event. (x86 only) + Specify desired latency for loads event. Supported on Intel and Arm64 + processors only. Ignored on other archs. In addition, for report all perf report options are valid, and for record all perf record options. diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c index 5214370ca4e4..f683ac702247 100644 --- a/tools/perf/arch/x86/util/mem-events.c +++ b/tools/perf/arch/x86/util/mem-events.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include "util/pmu.h" +#include "util/env.h" #include "map_symbol.h" #include "mem-events.h" +#include "linux/string.h" static char mem_loads_name[100]; static bool mem_loads_name__init; @@ -12,18 +14,43 @@ static char mem_stores_name[100]; #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } -static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { +static struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX] = { E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "%s/events/mem-loads"), E("ldlat-stores", "%s/mem-stores/P", "%s/events/mem-stores"), E(NULL, NULL, NULL), }; +static struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = { + E(NULL, NULL, NULL), + E(NULL, NULL, NULL), + E("mem-ldst", "ibs_op//", "ibs_op"), +}; + +static int perf_mem_is_amd_cpu(void) +{ + struct perf_env env = { .total_mem = 0, }; + + perf_env__cpuid(&env); + if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD")) + return 1; + return -1; +} + struct perf_mem_event *perf_mem_events__ptr(int i) { + /* 0: Uninitialized, 1: Yes, -1: No */ + static int is_amd; + if (i >= PERF_MEM_EVENTS__MAX) return NULL; - return &perf_mem_events[i]; + if (!is_amd) + is_amd = perf_mem_is_amd_cpu(); + + if (is_amd == 1) + return &perf_mem_events_amd[i]; + + return &perf_mem_events_intel[i]; } bool is_mem_loads_aux_event(struct evsel *leader) -- cgit v1.2.3-59-g8ed1b