aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-27 13:42:32 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-27 13:42:32 -0700
commit7b58b82b86c8b65a2b57a4c6cb96a460654f9e09 (patch)
treea13e19f216389f16f1cb6641d54751f167482515 /tools/perf/arch
parentMerge tag 'memblock-v5.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock (diff)
parentperf evsel: Improve AMD IBS (Instruction-Based Sampling) error handling messages (diff)
downloadlinux-dev-7b58b82b86c8b65a2b57a4c6cb96a460654f9e09.tar.xz
linux-dev-7b58b82b86c8b65a2b57a4c6cb96a460654f9e09.zip
Merge tag 'perf-tools-for-v5.18-2022-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools updates from Arnaldo Carvalho de Melo: "New features: perf ftrace: - Add -n/--use-nsec option to the 'latency' subcommand. Default: usecs: $ sudo perf ftrace latency -T dput -a sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 2098375 | ############################# | 1 - 2 us | 61 | | 2 - 4 us | 33 | | 4 - 8 us | 13 | | 8 - 16 us | 124 | | 16 - 32 us | 123 | | 32 - 64 us | 1 | | 64 - 128 us | 0 | | 128 - 256 us | 1 | | 256 - 512 us | 0 | | Better granularity with nsec: $ sudo perf ftrace latency -T dput -a -n sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 0 | | 1 - 2 ns | 0 | | 2 - 4 ns | 0 | | 4 - 8 ns | 0 | | 8 - 16 ns | 0 | | 16 - 32 ns | 0 | | 32 - 64 ns | 0 | | 64 - 128 ns | 1163434 | ############## | 128 - 256 ns | 914102 | ############# | 256 - 512 ns | 884 | | 512 - 1024 ns | 613 | | 1 - 2 us | 31 | | 2 - 4 us | 17 | | 4 - 8 us | 7 | | 8 - 16 us | 123 | | 16 - 32 us | 83 | | perf lock: - Add -c/--combine-locks option to merge lock instances in the same class into a single entry. # perf lock report -c Name acquired contended avg wait(ns) total wait(ns) max wait(ns) min wait(ns) rcu_read_lock 251225 0 0 0 0 0 hrtimer_bases.lock 39450 0 0 0 0 0 &sb->s_type->i_l... 10301 1 662 662 662 662 ptlock_ptr(page) 10173 2 701 1402 760 642 &(ei->i_block_re... 8732 0 0 0 0 0 &xa->xa_lock 8088 0 0 0 0 0 &base->lock 6705 0 0 0 0 0 &p->pi_lock 5549 0 0 0 0 0 &dentry->d_lockr... 5010 4 1274 5097 1844 789 &ep->lock 3958 0 0 0 0 0 - Add -F/--field option to customize the list of fields to output: $ perf lock report -F contended,wait_max -k avg_wait Name contended max wait(ns) avg wait(ns) slock-AF_INET6 1 23543 23543 &lruvec->lru_lock 5 18317 11254 slock-AF_INET6 1 10379 10379 rcu_node_1 1 2104 2104 &dentry->d_lockr... 1 1844 1844 &dentry->d_lockr... 1 1672 1672 &newf->file_lock 15 2279 1025 &dentry->d_lockr... 1 792 792 - Add --synth=no option for record, as there is no need to symbolize, lock names comes from the tracepoints. perf record: - Threaded recording, opt-in, via the new --threads command line option. - Improve AMD IBS (Instruction-Based Sampling) error handling messages. perf script: - Add 'brstackinsnlen' field (use it with -F) for branch stacks. - Output branch sample type in 'perf script'. perf report: - Add "addr_from" and "addr_to" sort dimensions. - Print branch stack entry type in 'perf report --dump-raw-trace' - Fix symbolization for chrooted workloads. Hardware tracing: Intel PT: - Add CFE (Control Flow Event) and EVD (Event Data) packets support. - Add MODE.Exec IFLAG bit support. Explanation about these features from the "Intel® 64 and IA-32 architectures software developer’s manual combined volumes: 1, 2A, 2B, 2C, 2D, 3A, 3B, 3C, 3D, and 4" PDF at: https://cdrdv2.intel.com/v1/dl/getContent/671200 At page 3951: "32.2.4 Event Trace is a capability that exposes details about the asynchronous events, when they are generated, and when their corresponding software event handler completes execution. These include: o Interrupts, including NMI and SMI, including the interrupt vector when defined. o Faults, exceptions including the fault vector. - Page faults additionally include the page fault address, when in context. o Event handler returns, including IRET and RSM. o VM exits and VM entries.¹ - VM exits include the values written to the “exit reason” and “exit qualification” VMCS fields. INIT and SIPI events. o TSX aborts, including the abort status returned for the RTM instructions. o Shutdown. Additionally, it provides indication of the status of the Interrupt Flag (IF), to indicate when interrupts are masked" ARM CoreSight: - Use advertised caps/min_interval as default sample_period on ARM spe. - Update deduction of TRCCONFIGR register for branch broadcast on ARM's CoreSight ETM. Vendor Events (JSON): Intel: - Update events and metrics for: Alderlake, Broadwell, Broadwell DE, BroadwellX, CascadelakeX, Elkhartlake, Bonnell, Goldmont, GoldmontPlus, Westmere EP-DP, Haswell, HaswellX, Icelake, IcelakeX, Ivybridge, Ivytown, Jaketown, Knights Landing, Nehalem EP, Sandybridge, Silvermont, Skylake, Skylake Server, SkylakeX, Tigerlake, TremontX, Westmere EP-SP, and Westmere EX. ARM: - Add support for HiSilicon CPA PMU aliasing. perf stat: - Fix forked applications enablement of counters. - The 'slots' should only be printed on a different order than the one specified on the command line when 'topdown' events are present, fix it. Miscellaneous: - Sync msr-index, cpufeatures header files with the kernel sources. - Stop using some deprecated libbpf APIs in 'perf trace'. - Fix some spelling mistakes. - Refactor the maps pointers usage to pave the way for using refcount debugging. - Only offer the --tui option on perf top, report and annotate when perf was built with libslang. - Don't mention --to-ctf in 'perf data --help' when not linking with the required library, libbabeltrace. - Use ARRAY_SIZE() instead of ad hoc equivalent, spotted by array_size.cocci. - Enhance the matching of sub-commands abbreviations: 'perf c2c rec' -> 'perf c2c record' 'perf c2c recport -> error - Set build-id using build-id header on new mmap records. - Fix generation of 'perf --version' string. perf test: - Add test for the arm_spe event. - Add test to check unwinding using fame-pointer (fp) mode on arm64. - Make metric testing more robust in 'perf test'. - Add error message for unsupported branch stack cases. libperf: - Add API for allocating new thread map array. - Fix typo in perf_evlist__open() failure error messages in libperf tests. perf c2c: - Replace bitmap_weight() with bitmap_empty() where appropriate" * tag 'perf-tools-for-v5.18-2022-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (143 commits) perf evsel: Improve AMD IBS (Instruction-Based Sampling) error handling messages perf python: Add perf_env stubs that will be needed in evsel__open_strerror() perf tools: Enhance the matching of sub-commands abbreviations libperf tests: Fix typo in perf_evlist__open() failure error messages tools arm64: Import cputype.h perf lock: Add -F/--field option to control output perf lock: Extend struct lock_key to have print function perf lock: Add --synth=no option for record tools headers cpufeatures: Sync with the kernel sources tools headers cpufeatures: Sync with the kernel sources perf stat: Fix forked applications enablement of counters tools arch x86: Sync the msr-index.h copy with the kernel sources perf evsel: Make evsel__env() always return a valid env perf build-id: Fix spelling mistake "Cant" -> "Can't" perf header: Fix spelling mistake "could't" -> "couldn't" perf script: Add 'brstackinsnlen' for branch stacks perf parse-events: Move slots only with topdown perf ftrace latency: Update documentation perf ftrace latency: Add -n/--use-nsec option perf tools: Fix version kernel tag ...
Diffstat (limited to 'tools/perf/arch')
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c3
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c2
-rw-r--r--tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c17
-rw-r--r--tools/perf/arch/x86/util/event.c2
-rw-r--r--tools/perf/arch/x86/util/evlist.c18
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c7
6 files changed, 40 insertions, 9 deletions
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 2e8b2c4365a0..cbc555245959 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -510,6 +510,9 @@ static u64 cs_etmv4_get_config(struct auxtrace_record *itr)
if (config_opts & BIT(ETM_OPT_CTXTID2))
config |= BIT(ETM4_CFG_BIT_VMID) |
BIT(ETM4_CFG_BIT_VMID_OPT);
+ if (config_opts & BIT(ETM_OPT_BRANCH_BROADCAST))
+ config |= BIT(ETM4_CFG_BIT_BB);
+
return config;
}
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 2100d46ccf5e..5860bbaea95a 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -158,7 +158,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
return -EINVAL;
}
evsel->core.attr.freq = 0;
- evsel->core.attr.sample_period = 1;
+ evsel->core.attr.sample_period = arm_spe_pmu->default_config->sample_period;
arm_spe_evsel = evsel;
opts->full_auxtrace = true;
}
diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
index 2fc882ab24c1..42237656f453 100644
--- a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
+++ b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
@@ -17,7 +17,7 @@
* @new_ctx: expected new packet context
* @ctx_unchanged: the packet context must not change
*/
-struct test_data {
+static struct test_data {
int len;
u8 bytes[INTEL_PT_PKT_MAX_SZ];
enum intel_pt_pkt_ctx ctx;
@@ -70,8 +70,11 @@ struct test_data {
{8, {0x02, 0x43, 3, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0xC0A08060403}, 0, 0 },
/* Mode Exec Packet */
{2, {0x99, 0x00}, 0, {INTEL_PT_MODE_EXEC, 0, 16}, 0, 0 },
- {2, {0x99, 0x01}, 0, {INTEL_PT_MODE_EXEC, 0, 64}, 0, 0 },
- {2, {0x99, 0x02}, 0, {INTEL_PT_MODE_EXEC, 0, 32}, 0, 0 },
+ {2, {0x99, 0x01}, 0, {INTEL_PT_MODE_EXEC, 1, 64}, 0, 0 },
+ {2, {0x99, 0x02}, 0, {INTEL_PT_MODE_EXEC, 2, 32}, 0, 0 },
+ {2, {0x99, 0x04}, 0, {INTEL_PT_MODE_EXEC, 4, 16}, 0, 0 },
+ {2, {0x99, 0x05}, 0, {INTEL_PT_MODE_EXEC, 5, 64}, 0, 0 },
+ {2, {0x99, 0x06}, 0, {INTEL_PT_MODE_EXEC, 6, 32}, 0, 0 },
/* Mode TSX Packet */
{2, {0x99, 0x20}, 0, {INTEL_PT_MODE_TSX, 0, 0}, 0, 0 },
{2, {0x99, 0x21}, 0, {INTEL_PT_MODE_TSX, 0, 1}, 0, 0 },
@@ -166,6 +169,14 @@ struct test_data {
{2, {0x02, 0xb3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 },
{2, {0x02, 0x33}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 },
{2, {0x02, 0xb3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 },
+ /* Control Flow Event Packet */
+ {4, {0x02, 0x13, 0x01, 0x03}, 0, {INTEL_PT_CFE, 1, 3}, 0, 0 },
+ {4, {0x02, 0x13, 0x81, 0x03}, 0, {INTEL_PT_CFE_IP, 1, 3}, 0, 0 },
+ {4, {0x02, 0x13, 0x1f, 0x00}, 0, {INTEL_PT_CFE, 0x1f, 0}, 0, 0 },
+ {4, {0x02, 0x13, 0x9f, 0xff}, 0, {INTEL_PT_CFE_IP, 0x1f, 0xff}, 0, 0 },
+ /* */
+ {11, {0x02, 0x53, 0x09, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_EVD, 0x09, 0x7060504030201}, 0, 0 },
+ {11, {0x02, 0x53, 0x3f, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_EVD, 0x3f, 0x8070605040302}, 0, 0 },
/* Terminator */
{0, {0}, 0, {0, 0, 0}, 0, 0 },
};
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index 9b31734ee968..e670f3547581 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -18,7 +18,7 @@ int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
{
int rc = 0;
struct map *pos;
- struct maps *kmaps = &machine->kmaps;
+ struct maps *kmaps = machine__kernel_maps(machine);
union perf_event *event = zalloc(sizeof(event->mmap) +
machine->id_hdr_size);
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index 8d9b55959256..cfc208d71f00 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -20,17 +20,27 @@ int arch_evlist__add_default_attrs(struct evlist *evlist)
struct evsel *arch_evlist__leader(struct list_head *list)
{
- struct evsel *evsel, *first;
+ struct evsel *evsel, *first, *slots = NULL;
+ bool has_topdown = false;
first = list_first_entry(list, struct evsel, core.node);
if (!pmu_have_event("cpu", "slots"))
return first;
+ /* If there is a slots event and a topdown event then the slots event comes first. */
__evlist__for_each_entry(list, evsel) {
- if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") &&
- evsel->name && strcasestr(evsel->name, "slots"))
- return evsel;
+ if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") && evsel->name) {
+ if (strcasestr(evsel->name, "slots")) {
+ slots = evsel;
+ if (slots == first)
+ return first;
+ }
+ if (!strncasecmp(evsel->name, "topdown", 7))
+ has_topdown = true;
+ if (slots && has_topdown)
+ return slots;
+ }
}
return first;
}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 6df0dc00d73a..8c31578d6f4a 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -306,6 +306,7 @@ intel_pt_info_priv_size(struct auxtrace_record *itr, struct evlist *evlist)
ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) +
intel_pt_filter_bytes(filter);
+ ptr->priv_size += sizeof(u64); /* Cap Event Trace */
return ptr->priv_size;
}
@@ -335,6 +336,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
unsigned long max_non_turbo_ratio;
size_t filter_str_len;
const char *filter;
+ int event_trace;
__u64 *info;
int err;
@@ -357,6 +359,9 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio",
"%lu", &max_non_turbo_ratio) != 1)
max_non_turbo_ratio = 0;
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/event_trace",
+ "%d", &event_trace) != 1)
+ event_trace = 0;
filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu);
filter_str_len = filter ? strlen(filter) : 0;
@@ -407,6 +412,8 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
info += len >> 3;
}
+ *info++ = event_trace;
+
return 0;
}