aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/build/Build.include2
-rw-r--r--tools/include/uapi/linux/perf_event.h5
-rw-r--r--tools/lib/perf/evlist.c15
-rw-r--r--tools/lib/perf/evsel.c19
-rw-r--r--tools/lib/perf/include/internal/evlist.h2
-rw-r--r--tools/lib/perf/include/perf/evsel.h4
-rw-r--r--tools/lib/perf/libperf.map1
-rw-r--r--tools/lib/perf/mmap.c2
-rw-r--r--tools/lib/perf/tests/test-evlist.c157
-rw-r--r--tools/perf/Documentation/perf-list.txt48
-rw-r--r--tools/perf/Documentation/perf-record.txt6
-rw-r--r--tools/perf/Documentation/perf-stat.txt10
-rw-r--r--tools/perf/Documentation/perf-top.txt7
-rw-r--r--tools/perf/Makefile.config10
-rw-r--r--tools/perf/Makefile.perf4
-rw-r--r--tools/perf/arch/arm/include/perf_regs.h42
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c19
-rw-r--r--tools/perf/arch/arm64/include/perf_regs.h78
-rw-r--r--tools/perf/arch/arm64/util/machine.c7
-rw-r--r--tools/perf/arch/csky/include/perf_regs.h82
-rw-r--r--tools/perf/arch/mips/include/perf_regs.h69
-rw-r--r--tools/perf/arch/powerpc/include/perf_regs.h66
-rw-r--r--tools/perf/arch/riscv/include/perf_regs.h74
-rw-r--r--tools/perf/arch/s390/include/perf_regs.h78
-rw-r--r--tools/perf/arch/x86/include/perf_regs.h82
-rw-r--r--tools/perf/arch/x86/util/evlist.c17
-rw-r--r--tools/perf/builtin-bench.c5
-rw-r--r--tools/perf/builtin-ftrace.c445
-rw-r--r--tools/perf/builtin-record.c8
-rw-r--r--tools/perf/builtin-report.c4
-rw-r--r--tools/perf/builtin-script.c31
-rw-r--r--tools/perf/builtin-stat.c24
-rw-r--r--tools/perf/builtin-trace.c3
-rw-r--r--tools/perf/dlfilters/dlfilter-test-api-v0.c2
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json8
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json20
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json155
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json47
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json143
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json38
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json5
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json23
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json14
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json29
-rw-r--r--tools/perf/pmu-events/arch/arm64/common-and-microarch.json (renamed from tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json)198
-rw-r--r--tools/perf/pmu-events/arch/arm64/mapfile.csv1
-rw-r--r--tools/perf/pmu-events/arch/arm64/recommended.json (renamed from tools/perf/pmu-events/arch/arm64/armv8-recommended.json)202
-rw-r--r--tools/perf/tests/Build1
-rw-r--r--tools/perf/tests/builtin-test.c16
-rw-r--r--tools/perf/tests/sigtrap.c177
-rw-r--r--tools/perf/tests/tests.h1
-rw-r--r--tools/perf/util/Build2
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c2
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h1
-rw-r--r--tools/perf/util/arm-spe.c67
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.c63
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.h10
-rw-r--r--tools/perf/util/bpf-loader.c15
-rw-r--r--tools/perf/util/bpf_ftrace.c152
-rw-r--r--tools/perf/util/bpf_skel/func_latency.bpf.c114
-rw-r--r--tools/perf/util/callchain.c14
-rw-r--r--tools/perf/util/callchain.h4
-rw-r--r--tools/perf/util/evlist.h2
-rw-r--r--tools/perf/util/evsel.c19
-rw-r--r--tools/perf/util/evsel.h3
-rw-r--r--tools/perf/util/ftrace.h81
-rw-r--r--tools/perf/util/libunwind/arm64.c2
-rw-r--r--tools/perf/util/machine.c50
-rw-r--r--tools/perf/util/machine.h1
-rw-r--r--tools/perf/util/mem-events.c29
-rw-r--r--tools/perf/util/metricgroup.c42
-rw-r--r--tools/perf/util/namespaces.c76
-rw-r--r--tools/perf/util/namespaces.h2
-rw-r--r--tools/perf/util/parse-events-hybrid.c9
-rw-r--r--tools/perf/util/parse-events.c10
-rw-r--r--tools/perf/util/perf_regs.c666
-rw-r--r--tools/perf/util/perf_regs.h17
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c10
-rw-r--r--tools/perf/util/session.c25
79 files changed, 3064 insertions, 930 deletions
diff --git a/tools/build/Build.include b/tools/build/Build.include
index 2cf3b1bde86e..c2a95ab47379 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -99,7 +99,7 @@ cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXX
###
## HOSTCC C flags
-host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(KBUILD_HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj))
+host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj))
# output directory for tests below
TMPOUT = .tmp_$$$$
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index bd8860eeb291..4cd39aaccbe7 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -1332,7 +1332,10 @@ union perf_mem_data_src {
/* hop level */
#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
-/* 2-7 available */
+#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
+#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
+#define PERF_MEM_HOPS_3 0x04 /* remote board */
+/* 5-7 available */
#define PERF_MEM_HOPS_SHIFT 43
#define PERF_MEM_S(a, s) \
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index e37dfad31383..245acbc53bd3 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -643,14 +643,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map,
return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first;
}
-void __perf_evlist__set_leader(struct list_head *list)
+void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader)
{
- struct perf_evsel *evsel, *leader;
+ struct perf_evsel *first, *last, *evsel;
- leader = list_entry(list->next, struct perf_evsel, node);
- evsel = list_entry(list->prev, struct perf_evsel, node);
+ first = list_first_entry(list, struct perf_evsel, node);
+ last = list_last_entry(list, struct perf_evsel, node);
- leader->nr_members = evsel->idx - leader->idx + 1;
+ leader->nr_members = last->idx - first->idx + 1;
__perf_evlist__for_each_entry(list, evsel)
evsel->leader = leader;
@@ -659,7 +659,10 @@ void __perf_evlist__set_leader(struct list_head *list)
void perf_evlist__set_leader(struct perf_evlist *evlist)
{
if (evlist->nr_entries) {
+ struct perf_evsel *first = list_entry(evlist->entries.next,
+ struct perf_evsel, node);
+
evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
- __perf_evlist__set_leader(&evlist->entries);
+ __perf_evlist__set_leader(&evlist->entries, first);
}
}
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index 8441e3e1aaac..68f83d2c27c1 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -431,3 +431,22 @@ void perf_evsel__free_id(struct perf_evsel *evsel)
zfree(&evsel->id);
evsel->ids = 0;
}
+
+void perf_counts_values__scale(struct perf_counts_values *count,
+ bool scale, __s8 *pscaled)
+{
+ s8 scaled = 0;
+
+ if (scale) {
+ if (count->run == 0) {
+ scaled = -1;
+ count->val = 0;
+ } else if (count->run < count->ena) {
+ scaled = 1;
+ count->val = (u64)((double)count->val * count->ena / count->run);
+ }
+ }
+
+ if (pscaled)
+ *pscaled = scaled;
+}
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
index f366dbad6a88..6f74269a3ad4 100644
--- a/tools/lib/perf/include/internal/evlist.h
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -127,5 +127,5 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,
void perf_evlist__reset_id_hash(struct perf_evlist *evlist);
-void __perf_evlist__set_leader(struct list_head *list);
+void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader);
#endif /* __LIBPERF_INTERNAL_EVLIST_H */
diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h
index 60eae25076d3..f401c7484bec 100644
--- a/tools/lib/perf/include/perf/evsel.h
+++ b/tools/lib/perf/include/perf/evsel.h
@@ -4,6 +4,8 @@
#include <stdint.h>
#include <perf/core.h>
+#include <stdbool.h>
+#include <linux/types.h>
struct perf_evsel;
struct perf_event_attr;
@@ -39,5 +41,7 @@ LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
+LIBPERF_API void perf_counts_values__scale(struct perf_counts_values *count,
+ bool scale, __s8 *pscaled);
#endif /* __LIBPERF_EVSEL_H */
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 71468606e8a7..5979bf92d98f 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -50,6 +50,7 @@ LIBPERF_0.0.1 {
perf_mmap__read_init;
perf_mmap__read_done;
perf_mmap__read_event;
+ perf_counts_values__scale;
local:
*;
};
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index c89dfa5f67b3..aaa457904008 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -353,8 +353,6 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count
count->ena += delta;
if (idx)
count->run += delta;
-
- cnt = mul_u64_u64_div64(cnt, count->ena, count->run);
}
count->val = cnt;
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
index ce91a582f0e4..e7afff12c35a 100644
--- a/tools/lib/perf/tests/test-evlist.c
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -21,6 +21,9 @@
#include "tests.h"
#include <internal/evsel.h>
+#define EVENT_NUM 15
+#define WAIT_COUNT 100000000UL
+
static int libperf_print(enum libperf_print_level level,
const char *fmt, va_list ap)
{
@@ -413,6 +416,159 @@ static int test_mmap_cpus(void)
return 0;
}
+static double display_error(long long average,
+ long long high,
+ long long low,
+ long long expected)
+{
+ double error;
+
+ error = (((double)average - expected) / expected) * 100.0;
+
+ __T_VERBOSE(" Expected: %lld\n", expected);
+ __T_VERBOSE(" High: %lld Low: %lld Average: %lld\n",
+ high, low, average);
+
+ __T_VERBOSE(" Average Error = %.2f%%\n", error);
+
+ return error;
+}
+
+static int test_stat_multiplexing(void)
+{
+ struct perf_counts_values expected_counts = { .val = 0 };
+ struct perf_counts_values counts[EVENT_NUM] = {{ .val = 0 },};
+ struct perf_thread_map *threads;
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_INSTRUCTIONS,
+ .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING,
+ .disabled = 1,
+ };
+ int err, i, nonzero = 0;
+ unsigned long count;
+ long long max = 0, min = 0, avg = 0;
+ double error = 0.0;
+ s8 scaled = 0;
+
+ /* read for non-multiplexing event count */
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ evsel = perf_evsel__new(&attr);
+ __T("failed to create evsel", evsel);
+
+ err = perf_evsel__open(evsel, NULL, threads);
+ __T("failed to open evsel", err == 0);
+
+ err = perf_evsel__enable(evsel);
+ __T("failed to enable evsel", err == 0);
+
+ /* wait loop */
+ count = WAIT_COUNT;
+ while (count--)
+ ;
+
+ perf_evsel__read(evsel, 0, 0, &expected_counts);
+ __T("failed to read value for evsel", expected_counts.val != 0);
+ __T("failed to read non-multiplexing event count",
+ expected_counts.ena == expected_counts.run);
+
+ err = perf_evsel__disable(evsel);
+ __T("failed to enable evsel", err == 0);
+
+ perf_evsel__close(evsel);
+ perf_evsel__delete(evsel);
+
+ perf_thread_map__put(threads);
+
+ /* read for multiplexing event count */
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ evlist = perf_evlist__new();
+ __T("failed to create evlist", evlist);
+
+ for (i = 0; i < EVENT_NUM; i++) {
+ evsel = perf_evsel__new(&attr);
+ __T("failed to create evsel", evsel);
+
+ perf_evlist__add(evlist, evsel);
+ }
+ perf_evlist__set_maps(evlist, NULL, threads);
+
+ err = perf_evlist__open(evlist);
+ __T("failed to open evsel", err == 0);
+
+ perf_evlist__enable(evlist);
+
+ /* wait loop */
+ count = WAIT_COUNT;
+ while (count--)
+ ;
+
+ i = 0;
+ perf_evlist__for_each_evsel(evlist, evsel) {
+ perf_evsel__read(evsel, 0, 0, &counts[i]);
+ __T("failed to read value for evsel", counts[i].val != 0);
+ i++;
+ }
+
+ perf_evlist__disable(evlist);
+
+ min = counts[0].val;
+ for (i = 0; i < EVENT_NUM; i++) {
+ __T_VERBOSE("Event %2d -- Raw count = %lu, run = %lu, enable = %lu\n",
+ i, counts[i].val, counts[i].run, counts[i].ena);
+
+ perf_counts_values__scale(&counts[i], true, &scaled);
+ if (scaled == 1) {
+ __T_VERBOSE("\t Scaled count = %lu (%.2lf%%, %lu/%lu)\n",
+ counts[i].val,
+ (double)counts[i].run / (double)counts[i].ena * 100.0,
+ counts[i].run, counts[i].ena);
+ } else if (scaled == -1) {
+ __T_VERBOSE("\t Not Running\n");
+ } else {
+ __T_VERBOSE("\t Not Scaling\n");
+ }
+
+ if (counts[i].val > max)
+ max = counts[i].val;
+
+ if (counts[i].val < min)
+ min = counts[i].val;
+
+ avg += counts[i].val;
+
+ if (counts[i].val != 0)
+ nonzero++;
+ }
+
+ if (nonzero != 0)
+ avg = avg / nonzero;
+ else
+ avg = 0;
+
+ error = display_error(avg, max, min, expected_counts.val);
+
+ __T("Error out of range!", ((error <= 1.0) && (error >= -1.0)));
+
+ perf_evlist__close(evlist);
+ perf_evlist__delete(evlist);
+
+ perf_thread_map__put(threads);
+
+ return 0;
+}
+
int test_evlist(int argc, char **argv)
{
__T_START;
@@ -424,6 +580,7 @@ int test_evlist(int argc, char **argv)
test_stat_thread_enable();
test_mmap_thread();
test_mmap_cpus();
+ test_stat_multiplexing();
__T_END;
return tests_failed == 0 ? 0 : -1;
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 4dc8d0af19df..57384a97c04f 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -81,7 +81,11 @@ On AMD systems it is implemented using IBS (up to precise-level 2).
The precise modifier works with event types 0x76 (cpu-cycles, CPU
clocks not halted) and 0xC1 (micro-ops retired). Both events map to
IBS execution sampling (IBS op) with the IBS Op Counter Control bit
-(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmer’s
+(IbsOpCntCtl) set respectively (see the
+Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS)
+section of the [AMD Processor Programming Reference (PPR)] relevant to the
+family, model and stepping of the processor being used).
+
Manual Volume 2: System Programming, 13.3 Instruction-Based
Sampling). Examples to use IBS:
@@ -94,10 +98,12 @@ RAW HARDWARE EVENT DESCRIPTOR
Even when an event is not available in a symbolic form within perf right now,
it can be encoded in a per processor specific way.
-For instance For x86 CPUs NNN represents the raw register encoding with the
+For instance on x86 CPUs, N is a hexadecimal value that represents the raw register encoding with the
layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide] Figure 30-1 Layout
-of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344,
-Figure 13-7 Performance Event-Select Register (PerfEvtSeln)).
+of IA32_PERFEVTSELx MSRs) or AMD's PERF_CTL MSRs (see the
+Core Complex (CCX) -> Processor x86 Core -> MSR Registers section of the
+[AMD Processor Programming Reference (PPR)] relevant to the family, model
+and stepping of the processor being used).
Note: Only the following bit fields can be set in x86 counter
registers: event, umask, edge, inv, cmask. Esp. guest/host only and
@@ -126,6 +132,38 @@ It's also possible to use pmu syntax:
perf record -e cpu/r1a8/ ...
perf record -e cpu/r0x1a8/ ...
+Some processors, like those from AMD, support event codes and unit masks
+larger than a byte. In such cases, the bits corresponding to the event
+configuration parameters can be seen with:
+
+ cat /sys/bus/event_source/devices/<pmu>/format/<config>
+
+Example:
+
+If the AMD docs for an EPYC 7713 processor describe an event as:
+
+ Event Umask Event Mask
+ Num. Value Mnemonic Description
+
+ 28FH 03H op_cache_hit_miss.op_cache_hit Counts Op Cache micro-tag
+ hit events.
+
+raw encoding of 0x0328F cannot be used since the upper nibble of the
+EventSelect bits have to be specified via bits 32-35 as can be seen with:
+
+ cat /sys/bus/event_source/devices/cpu/format/event
+
+raw encoding of 0x20000038F should be used instead:
+
+ perf stat -e r20000038f -a sleep 1
+ perf record -e r20000038f ...
+
+It's also possible to use pmu syntax:
+
+ perf record -e r20000038f -a sleep 1
+ perf record -e cpu/r20000038f/ ...
+ perf record -e cpu/r0x20000038f/ ...
+
You should refer to the processor specific documentation for getting these
details. Some of them are referenced in the SEE ALSO section below.
@@ -316,4 +354,4 @@ SEE ALSO
linkperf:perf-stat[1], linkperf:perf-top[1],
linkperf:perf-record[1],
http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
-http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming]
+https://bugzilla.kernel.org/show_bug.cgi?id=206537[AMD Processor Programming Reference (PPR)]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 3cf7bac67239..55df7b073a55 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -30,8 +30,10 @@ OPTIONS
- a symbolic event name (use 'perf list' to list all events)
- - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
- hexadecimal event descriptor.
+ - a raw PMU event in the form of rN where N is a hexadecimal value
+ that represents the raw register encoding with the layout of the
+ event control registers as described by entries in
+ /sys/bus/event_sources/devices/cpu/format/*.
- a symbolic or raw PMU event followed by an optional colon
and a list of event modifiers, e.g., cpu-cycles:p. See the
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 7e6fb7cbc0f4..c06c341e72b9 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -36,8 +36,10 @@ report::
- a symbolic event name (use 'perf list' to list all events)
- - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
- hexadecimal event descriptor.
+ - a raw PMU event in the form of rN where N is a hexadecimal value
+ that represents the raw register encoding with the layout of the
+ event control registers as described by entries in
+ /sys/bus/event_sources/devices/cpu/format/*.
- a symbolic or raw PMU event followed by an optional colon
and a list of event modifiers, e.g., cpu-cycles:p. See the
@@ -493,6 +495,10 @@ This option can be enabled in perf config by setting the variable
$ perf config stat.no-csv-summary=true
+--cputype::
+Only enable events on applying cpu with this type for hybrid platform
+(e.g. core or atom)"
+
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 9898a32b8d9c..cac3dfbee7d8 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -38,9 +38,10 @@ Default is to monitor all CPUS.
-e <event>::
--event=<event>::
Select the PMU event. Selection can be a symbolic event name
- (use 'perf list' to list all events) or a raw PMU
- event (eventsel+umask) in the form of rNNN where NNN is a
- hexadecimal event descriptor.
+ (use 'perf list' to list all events) or a raw PMU event in the form
+ of rN where N is a hexadecimal value that represents the raw register
+ encoding with the layout of the event control registers as described
+ by entries in /sys/bus/event_sources/devices/cpu/format/*.
-E <entries>::
--entries=<entries>::
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 3df74cf5651a..96ad944ca6a8 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -17,6 +17,7 @@ detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected)
detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected)
CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
+HOSTCFLAGS := $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
include $(srctree)/tools/scripts/Makefile.arch
@@ -143,7 +144,10 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
ifdef CSINCLUDES
LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
endif
-OPENCSDLIBS := -lopencsd_c_api -lopencsd -lstdc++
+OPENCSDLIBS := -lopencsd_c_api
+ifeq ($(findstring -static,${LDFLAGS}),-static)
+ OPENCSDLIBS += -lopencsd -lstdc++
+endif
ifdef CSLIBS
LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
endif
@@ -211,6 +215,7 @@ endif
ifneq ($(WERROR),0)
CORE_CFLAGS += -Werror
CXXFLAGS += -Werror
+ HOSTCFLAGS += -Werror
endif
ifndef DEBUG
@@ -290,6 +295,9 @@ CXXFLAGS += -ggdb3
CXXFLAGS += -funwind-tables
CXXFLAGS += -Wno-strict-aliasing
+HOSTCFLAGS += -Wall
+HOSTCFLAGS += -Wextra
+
# Enforce a non-executable stack, as we may regress (again) in the future by
# adding assembler files missing the .GNU-stack linker note.
LDFLAGS += -Wl,-z,noexecstack
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 80522bcfafe0..ac861e42c8f7 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -226,7 +226,7 @@ else
endif
export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK
-export HOSTCC HOSTLD HOSTAR
+export HOSTCC HOSTLD HOSTAR HOSTCFLAGS
include $(srctree)/tools/build/Makefile.include
@@ -1041,7 +1041,7 @@ SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
-SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h
+SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT):
$(Q)$(MKDIR) -p $@
diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h
index 4085419283d0..99a06550e25d 100644
--- a/tools/perf/arch/arm/include/perf_regs.h
+++ b/tools/perf/arch/arm/include/perf_regs.h
@@ -15,46 +15,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_ARM_PC
#define PERF_REG_SP PERF_REG_ARM_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_ARM_R0:
- return "r0";
- case PERF_REG_ARM_R1:
- return "r1";
- case PERF_REG_ARM_R2:
- return "r2";
- case PERF_REG_ARM_R3:
- return "r3";
- case PERF_REG_ARM_R4:
- return "r4";
- case PERF_REG_ARM_R5:
- return "r5";
- case PERF_REG_ARM_R6:
- return "r6";
- case PERF_REG_ARM_R7:
- return "r7";
- case PERF_REG_ARM_R8:
- return "r8";
- case PERF_REG_ARM_R9:
- return "r9";
- case PERF_REG_ARM_R10:
- return "r10";
- case PERF_REG_ARM_FP:
- return "fp";
- case PERF_REG_ARM_IP:
- return "ip";
- case PERF_REG_ARM_SP:
- return "sp";
- case PERF_REG_ARM_LR:
- return "lr";
- case PERF_REG_ARM_PC:
- return "pc";
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 293a23bf8be3..8a3d54a86c9c 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -407,25 +407,6 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
}
- /* Validate auxtrace_mmap_pages provided by user */
- if (opts->auxtrace_mmap_pages) {
- unsigned int max_page = (KiB(128) / page_size);
- size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
-
- if (!privileged &&
- opts->auxtrace_mmap_pages > max_page) {
- opts->auxtrace_mmap_pages = max_page;
- pr_err("auxtrace too big, truncating to %d\n",
- max_page);
- }
-
- if (!is_power_of_2(sz)) {
- pr_err("Invalid mmap size for %s: must be a power of 2\n",
- CORESIGHT_ETM_PMU_NAME);
- return -EINVAL;
- }
- }
-
if (opts->auxtrace_snapshot_mode)
pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME,
opts->auxtrace_snapshot_size);
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
index fa3e07459f76..35a3cc775b39 100644
--- a/tools/perf/arch/arm64/include/perf_regs.h
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -4,7 +4,9 @@
#include <stdlib.h>
#include <linux/types.h>
+#define perf_event_arm_regs perf_event_arm64_regs
#include <asm/perf_regs.h>
+#undef perf_event_arm_regs
void perf_regs_load(u64 *regs);
@@ -15,80 +17,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_ARM64_PC
#define PERF_REG_SP PERF_REG_ARM64_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_ARM64_X0:
- return "x0";
- case PERF_REG_ARM64_X1:
- return "x1";
- case PERF_REG_ARM64_X2:
- return "x2";
- case PERF_REG_ARM64_X3:
- return "x3";
- case PERF_REG_ARM64_X4:
- return "x4";
- case PERF_REG_ARM64_X5:
- return "x5";
- case PERF_REG_ARM64_X6:
- return "x6";
- case PERF_REG_ARM64_X7:
- return "x7";
- case PERF_REG_ARM64_X8:
- return "x8";
- case PERF_REG_ARM64_X9:
- return "x9";
- case PERF_REG_ARM64_X10:
- return "x10";
- case PERF_REG_ARM64_X11:
- return "x11";
- case PERF_REG_ARM64_X12:
- return "x12";
- case PERF_REG_ARM64_X13:
- return "x13";
- case PERF_REG_ARM64_X14:
- return "x14";
- case PERF_REG_ARM64_X15:
- return "x15";
- case PERF_REG_ARM64_X16:
- return "x16";
- case PERF_REG_ARM64_X17:
- return "x17";
- case PERF_REG_ARM64_X18:
- return "x18";
- case PERF_REG_ARM64_X19:
- return "x19";
- case PERF_REG_ARM64_X20:
- return "x20";
- case PERF_REG_ARM64_X21:
- return "x21";
- case PERF_REG_ARM64_X22:
- return "x22";
- case PERF_REG_ARM64_X23:
- return "x23";
- case PERF_REG_ARM64_X24:
- return "x24";
- case PERF_REG_ARM64_X25:
- return "x25";
- case PERF_REG_ARM64_X26:
- return "x26";
- case PERF_REG_ARM64_X27:
- return "x27";
- case PERF_REG_ARM64_X28:
- return "x28";
- case PERF_REG_ARM64_X29:
- return "x29";
- case PERF_REG_ARM64_SP:
- return "sp";
- case PERF_REG_ARM64_LR:
- return "lr";
- case PERF_REG_ARM64_PC:
- return "pc";
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
index 7e7714290a87..d2ce31e28cd7 100644
--- a/tools/perf/arch/arm64/util/machine.c
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -5,6 +5,8 @@
#include <string.h>
#include "debug.h"
#include "symbol.h"
+#include "callchain.h"
+#include "record.h"
/* On arm64, kernel text segment starts at high memory address,
* for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
@@ -26,3 +28,8 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
p->end = c->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}
+
+void arch__add_leaf_frame_record_opts(struct record_opts *opts)
+{
+ opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask;
+}
diff --git a/tools/perf/arch/csky/include/perf_regs.h b/tools/perf/arch/csky/include/perf_regs.h
index 25ac3bdcb9d1..1afcc0e916c2 100644
--- a/tools/perf/arch/csky/include/perf_regs.h
+++ b/tools/perf/arch/csky/include/perf_regs.h
@@ -15,86 +15,4 @@
#define PERF_REG_IP PERF_REG_CSKY_PC
#define PERF_REG_SP PERF_REG_CSKY_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_CSKY_A0:
- return "a0";
- case PERF_REG_CSKY_A1:
- return "a1";
- case PERF_REG_CSKY_A2:
- return "a2";
- case PERF_REG_CSKY_A3:
- return "a3";
- case PERF_REG_CSKY_REGS0:
- return "regs0";
- case PERF_REG_CSKY_REGS1:
- return "regs1";
- case PERF_REG_CSKY_REGS2:
- return "regs2";
- case PERF_REG_CSKY_REGS3:
- return "regs3";
- case PERF_REG_CSKY_REGS4:
- return "regs4";
- case PERF_REG_CSKY_REGS5:
- return "regs5";
- case PERF_REG_CSKY_REGS6:
- return "regs6";
- case PERF_REG_CSKY_REGS7:
- return "regs7";
- case PERF_REG_CSKY_REGS8:
- return "regs8";
- case PERF_REG_CSKY_REGS9:
- return "regs9";
- case PERF_REG_CSKY_SP:
- return "sp";
- case PERF_REG_CSKY_LR:
- return "lr";
- case PERF_REG_CSKY_PC:
- return "pc";
-#if defined(__CSKYABIV2__)
- case PERF_REG_CSKY_EXREGS0:
- return "exregs0";
- case PERF_REG_CSKY_EXREGS1:
- return "exregs1";
- case PERF_REG_CSKY_EXREGS2:
- return "exregs2";
- case PERF_REG_CSKY_EXREGS3:
- return "exregs3";
- case PERF_REG_CSKY_EXREGS4:
- return "exregs4";
- case PERF_REG_CSKY_EXREGS5:
- return "exregs5";
- case PERF_REG_CSKY_EXREGS6:
- return "exregs6";
- case PERF_REG_CSKY_EXREGS7:
- return "exregs7";
- case PERF_REG_CSKY_EXREGS8:
- return "exregs8";
- case PERF_REG_CSKY_EXREGS9:
- return "exregs9";
- case PERF_REG_CSKY_EXREGS10:
- return "exregs10";
- case PERF_REG_CSKY_EXREGS11:
- return "exregs11";
- case PERF_REG_CSKY_EXREGS12:
- return "exregs12";
- case PERF_REG_CSKY_EXREGS13:
- return "exregs13";
- case PERF_REG_CSKY_EXREGS14:
- return "exregs14";
- case PERF_REG_CSKY_TLS:
- return "tls";
- case PERF_REG_CSKY_HI:
- return "hi";
- case PERF_REG_CSKY_LO:
- return "lo";
-#endif
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/mips/include/perf_regs.h b/tools/perf/arch/mips/include/perf_regs.h
index ee73b36a14d1..b8cd8bbb37ba 100644
--- a/tools/perf/arch/mips/include/perf_regs.h
+++ b/tools/perf/arch/mips/include/perf_regs.h
@@ -12,73 +12,4 @@
#define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1)
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_MIPS_PC:
- return "PC";
- case PERF_REG_MIPS_R1:
- return "$1";
- case PERF_REG_MIPS_R2:
- return "$2";
- case PERF_REG_MIPS_R3:
- return "$3";
- case PERF_REG_MIPS_R4:
- return "$4";
- case PERF_REG_MIPS_R5:
- return "$5";
- case PERF_REG_MIPS_R6:
- return "$6";
- case PERF_REG_MIPS_R7:
- return "$7";
- case PERF_REG_MIPS_R8:
- return "$8";
- case PERF_REG_MIPS_R9:
- return "$9";
- case PERF_REG_MIPS_R10:
- return "$10";
- case PERF_REG_MIPS_R11:
- return "$11";
- case PERF_REG_MIPS_R12:
- return "$12";
- case PERF_REG_MIPS_R13:
- return "$13";
- case PERF_REG_MIPS_R14:
- return "$14";
- case PERF_REG_MIPS_R15:
- return "$15";
- case PERF_REG_MIPS_R16:
- return "$16";
- case PERF_REG_MIPS_R17:
- return "$17";
- case PERF_REG_MIPS_R18:
- return "$18";
- case PERF_REG_MIPS_R19:
- return "$19";
- case PERF_REG_MIPS_R20:
- return "$20";
- case PERF_REG_MIPS_R21:
- return "$21";
- case PERF_REG_MIPS_R22:
- return "$22";
- case PERF_REG_MIPS_R23:
- return "$23";
- case PERF_REG_MIPS_R24:
- return "$24";
- case PERF_REG_MIPS_R25:
- return "$25";
- case PERF_REG_MIPS_R28:
- return "$28";
- case PERF_REG_MIPS_R29:
- return "$29";
- case PERF_REG_MIPS_R30:
- return "$30";
- case PERF_REG_MIPS_R31:
- return "$31";
- default:
- break;
- }
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
index 93339d17acc4..9bb17c3f370b 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -19,70 +19,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_POWERPC_NIP
#define PERF_REG_SP PERF_REG_POWERPC_R1
-static const char *reg_names[] = {
- [PERF_REG_POWERPC_R0] = "r0",
- [PERF_REG_POWERPC_R1] = "r1",
- [PERF_REG_POWERPC_R2] = "r2",
- [PERF_REG_POWERPC_R3] = "r3",
- [PERF_REG_POWERPC_R4] = "r4",
- [PERF_REG_POWERPC_R5] = "r5",
- [PERF_REG_POWERPC_R6] = "r6",
- [PERF_REG_POWERPC_R7] = "r7",
- [PERF_REG_POWERPC_R8] = "r8",
- [PERF_REG_POWERPC_R9] = "r9",
- [PERF_REG_POWERPC_R10] = "r10",
- [PERF_REG_POWERPC_R11] = "r11",
- [PERF_REG_POWERPC_R12] = "r12",
- [PERF_REG_POWERPC_R13] = "r13",
- [PERF_REG_POWERPC_R14] = "r14",
- [PERF_REG_POWERPC_R15] = "r15",
- [PERF_REG_POWERPC_R16] = "r16",
- [PERF_REG_POWERPC_R17] = "r17",
- [PERF_REG_POWERPC_R18] = "r18",
- [PERF_REG_POWERPC_R19] = "r19",
- [PERF_REG_POWERPC_R20] = "r20",
- [PERF_REG_POWERPC_R21] = "r21",
- [PERF_REG_POWERPC_R22] = "r22",
- [PERF_REG_POWERPC_R23] = "r23",
- [PERF_REG_POWERPC_R24] = "r24",
- [PERF_REG_POWERPC_R25] = "r25",
- [PERF_REG_POWERPC_R26] = "r26",
- [PERF_REG_POWERPC_R27] = "r27",
- [PERF_REG_POWERPC_R28] = "r28",
- [PERF_REG_POWERPC_R29] = "r29",
- [PERF_REG_POWERPC_R30] = "r30",
- [PERF_REG_POWERPC_R31] = "r31",
- [PERF_REG_POWERPC_NIP] = "nip",
- [PERF_REG_POWERPC_MSR] = "msr",
- [PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
- [PERF_REG_POWERPC_CTR] = "ctr",
- [PERF_REG_POWERPC_LINK] = "link",
- [PERF_REG_POWERPC_XER] = "xer",
- [PERF_REG_POWERPC_CCR] = "ccr",
- [PERF_REG_POWERPC_SOFTE] = "softe",
- [PERF_REG_POWERPC_TRAP] = "trap",
- [PERF_REG_POWERPC_DAR] = "dar",
- [PERF_REG_POWERPC_DSISR] = "dsisr",
- [PERF_REG_POWERPC_SIER] = "sier",
- [PERF_REG_POWERPC_MMCRA] = "mmcra",
- [PERF_REG_POWERPC_MMCR0] = "mmcr0",
- [PERF_REG_POWERPC_MMCR1] = "mmcr1",
- [PERF_REG_POWERPC_MMCR2] = "mmcr2",
- [PERF_REG_POWERPC_MMCR3] = "mmcr3",
- [PERF_REG_POWERPC_SIER2] = "sier2",
- [PERF_REG_POWERPC_SIER3] = "sier3",
- [PERF_REG_POWERPC_PMC1] = "pmc1",
- [PERF_REG_POWERPC_PMC2] = "pmc2",
- [PERF_REG_POWERPC_PMC3] = "pmc3",
- [PERF_REG_POWERPC_PMC4] = "pmc4",
- [PERF_REG_POWERPC_PMC5] = "pmc5",
- [PERF_REG_POWERPC_PMC6] = "pmc6",
- [PERF_REG_POWERPC_SDAR] = "sdar",
- [PERF_REG_POWERPC_SIAR] = "siar",
-};
-
-static inline const char *__perf_reg_name(int id)
-{
- return reg_names[id];
-}
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/riscv/include/perf_regs.h b/tools/perf/arch/riscv/include/perf_regs.h
index 6b02a767c918..6944bf0de53e 100644
--- a/tools/perf/arch/riscv/include/perf_regs.h
+++ b/tools/perf/arch/riscv/include/perf_regs.h
@@ -19,78 +19,4 @@
#define PERF_REG_IP PERF_REG_RISCV_PC
#define PERF_REG_SP PERF_REG_RISCV_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_RISCV_PC:
- return "pc";
- case PERF_REG_RISCV_RA:
- return "ra";
- case PERF_REG_RISCV_SP:
- return "sp";
- case PERF_REG_RISCV_GP:
- return "gp";
- case PERF_REG_RISCV_TP:
- return "tp";
- case PERF_REG_RISCV_T0:
- return "t0";
- case PERF_REG_RISCV_T1:
- return "t1";
- case PERF_REG_RISCV_T2:
- return "t2";
- case PERF_REG_RISCV_S0:
- return "s0";
- case PERF_REG_RISCV_S1:
- return "s1";
- case PERF_REG_RISCV_A0:
- return "a0";
- case PERF_REG_RISCV_A1:
- return "a1";
- case PERF_REG_RISCV_A2:
- return "a2";
- case PERF_REG_RISCV_A3:
- return "a3";
- case PERF_REG_RISCV_A4:
- return "a4";
- case PERF_REG_RISCV_A5:
- return "a5";
- case PERF_REG_RISCV_A6:
- return "a6";
- case PERF_REG_RISCV_A7:
- return "a7";
- case PERF_REG_RISCV_S2:
- return "s2";
- case PERF_REG_RISCV_S3:
- return "s3";
- case PERF_REG_RISCV_S4:
- return "s4";
- case PERF_REG_RISCV_S5:
- return "s5";
- case PERF_REG_RISCV_S6:
- return "s6";
- case PERF_REG_RISCV_S7:
- return "s7";
- case PERF_REG_RISCV_S8:
- return "s8";
- case PERF_REG_RISCV_S9:
- return "s9";
- case PERF_REG_RISCV_S10:
- return "s10";
- case PERF_REG_RISCV_S11:
- return "s11";
- case PERF_REG_RISCV_T3:
- return "t3";
- case PERF_REG_RISCV_T4:
- return "t4";
- case PERF_REG_RISCV_T5:
- return "t5";
- case PERF_REG_RISCV_T6:
- return "t6";
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h
index ce3031526623..52fcc0891da6 100644
--- a/tools/perf/arch/s390/include/perf_regs.h
+++ b/tools/perf/arch/s390/include/perf_regs.h
@@ -14,82 +14,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_S390_PC
#define PERF_REG_SP PERF_REG_S390_R15
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_S390_R0:
- return "R0";
- case PERF_REG_S390_R1:
- return "R1";
- case PERF_REG_S390_R2:
- return "R2";
- case PERF_REG_S390_R3:
- return "R3";
- case PERF_REG_S390_R4:
- return "R4";
- case PERF_REG_S390_R5:
- return "R5";
- case PERF_REG_S390_R6:
- return "R6";
- case PERF_REG_S390_R7:
- return "R7";
- case PERF_REG_S390_R8:
- return "R8";
- case PERF_REG_S390_R9:
- return "R9";
- case PERF_REG_S390_R10:
- return "R10";
- case PERF_REG_S390_R11:
- return "R11";
- case PERF_REG_S390_R12:
- return "R12";
- case PERF_REG_S390_R13:
- return "R13";
- case PERF_REG_S390_R14:
- return "R14";
- case PERF_REG_S390_R15:
- return "R15";
- case PERF_REG_S390_FP0:
- return "FP0";
- case PERF_REG_S390_FP1:
- return "FP1";
- case PERF_REG_S390_FP2:
- return "FP2";
- case PERF_REG_S390_FP3:
- return "FP3";
- case PERF_REG_S390_FP4:
- return "FP4";
- case PERF_REG_S390_FP5:
- return "FP5";
- case PERF_REG_S390_FP6:
- return "FP6";
- case PERF_REG_S390_FP7:
- return "FP7";
- case PERF_REG_S390_FP8:
- return "FP8";
- case PERF_REG_S390_FP9:
- return "FP9";
- case PERF_REG_S390_FP10:
- return "FP10";
- case PERF_REG_S390_FP11:
- return "FP11";
- case PERF_REG_S390_FP12:
- return "FP12";
- case PERF_REG_S390_FP13:
- return "FP13";
- case PERF_REG_S390_FP14:
- return "FP14";
- case PERF_REG_S390_FP15:
- return "FP15";
- case PERF_REG_S390_MASK:
- return "MASK";
- case PERF_REG_S390_PC:
- return "PC";
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h
index cddc4cdc0d9b..16e23b722042 100644
--- a/tools/perf/arch/x86/include/perf_regs.h
+++ b/tools/perf/arch/x86/include/perf_regs.h
@@ -23,86 +23,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_X86_IP
#define PERF_REG_SP PERF_REG_X86_SP
-static inline const char *__perf_reg_name(int id)
-{
- switch (id) {
- case PERF_REG_X86_AX:
- return "AX";
- case PERF_REG_X86_BX:
- return "BX";
- case PERF_REG_X86_CX:
- return "CX";
- case PERF_REG_X86_DX:
- return "DX";
- case PERF_REG_X86_SI:
- return "SI";
- case PERF_REG_X86_DI:
- return "DI";
- case PERF_REG_X86_BP:
- return "BP";
- case PERF_REG_X86_SP:
- return "SP";
- case PERF_REG_X86_IP:
- return "IP";
- case PERF_REG_X86_FLAGS:
- return "FLAGS";
- case PERF_REG_X86_CS:
- return "CS";
- case PERF_REG_X86_SS:
- return "SS";
- case PERF_REG_X86_DS:
- return "DS";
- case PERF_REG_X86_ES:
- return "ES";
- case PERF_REG_X86_FS:
- return "FS";
- case PERF_REG_X86_GS:
- return "GS";
-#ifdef HAVE_ARCH_X86_64_SUPPORT
- case PERF_REG_X86_R8:
- return "R8";
- case PERF_REG_X86_R9:
- return "R9";
- case PERF_REG_X86_R10:
- return "R10";
- case PERF_REG_X86_R11:
- return "R11";
- case PERF_REG_X86_R12:
- return "R12";
- case PERF_REG_X86_R13:
- return "R13";
- case PERF_REG_X86_R14:
- return "R14";
- case PERF_REG_X86_R15:
- return "R15";
-#endif /* HAVE_ARCH_X86_64_SUPPORT */
-
-#define XMM(x) \
- case PERF_REG_X86_XMM ## x: \
- case PERF_REG_X86_XMM ## x + 1: \
- return "XMM" #x;
- XMM(0)
- XMM(1)
- XMM(2)
- XMM(3)
- XMM(4)
- XMM(5)
- XMM(6)
- XMM(7)
- XMM(8)
- XMM(9)
- XMM(10)
- XMM(11)
- XMM(12)
- XMM(13)
- XMM(14)
- XMM(15)
-#undef XMM
- default:
- return NULL;
- }
-
- return NULL;
-}
-
#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index 0b0951030a2f..f924246eff78 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -17,3 +17,20 @@ int arch_evlist__add_default_attrs(struct evlist *evlist)
else
return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL);
}
+
+struct evsel *arch_evlist__leader(struct list_head *list)
+{
+ struct evsel *evsel, *first;
+
+ first = list_first_entry(list, struct evsel, core.node);
+
+ if (!pmu_have_event("cpu", "slots"))
+ return first;
+
+ __evlist__for_each_entry(list, evsel) {
+ if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") &&
+ evsel->name && strstr(evsel->name, "slots"))
+ return evsel;
+ }
+ return first;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index d0895162c2ba..d291f3a8af5f 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -226,7 +226,6 @@ static void run_collection(struct collection *coll)
if (!bench->fn)
break;
printf("# Running %s/%s benchmark...\n", coll->name, bench->name);
- fflush(stdout);
argv[1] = bench->name;
run_bench(coll->name, bench->name, bench->fn, 1, argv);
@@ -247,6 +246,9 @@ int cmd_bench(int argc, const char **argv)
struct collection *coll;
int ret = 0;
+ /* Unbuffered output */
+ setvbuf(stdout, NULL, _IONBF, 0);
+
if (argc < 2) {
/* No collection specified. */
print_usage();
@@ -300,7 +302,6 @@ int cmd_bench(int argc, const char **argv)
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name);
- fflush(stdout);
ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1);
goto end;
}
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 87cb11a7a3ee..2b54e2ddc80a 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -13,7 +13,9 @@
#include <signal.h>
#include <stdlib.h>
#include <fcntl.h>
+#include <math.h>
#include <poll.h>
+#include <ctype.h>
#include <linux/capability.h>
#include <linux/string.h>
@@ -28,36 +30,12 @@
#include "strfilter.h"
#include "util/cap.h"
#include "util/config.h"
+#include "util/ftrace.h"
#include "util/units.h"
#include "util/parse-sublevel-options.h"
#define DEFAULT_TRACER "function_graph"
-struct perf_ftrace {
- struct evlist *evlist;
- struct target target;
- const char *tracer;
- struct list_head filters;
- struct list_head notrace;
- struct list_head graph_funcs;
- struct list_head nograph_funcs;
- int graph_depth;
- unsigned long percpu_buffer_size;
- bool inherit;
- int func_stack_trace;
- int func_irq_info;
- int graph_nosleep_time;
- int graph_noirqs;
- int graph_verbose;
- int graph_thresh;
- unsigned int initial_delay;
-};
-
-struct filter_entry {
- struct list_head list;
- char name[];
-};
-
static volatile int workload_exec_errno;
static bool done;
@@ -565,7 +543,24 @@ static int set_tracing_options(struct perf_ftrace *ftrace)
return 0;
}
-static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
+static void select_tracer(struct perf_ftrace *ftrace)
+{
+ bool graph = !list_empty(&ftrace->graph_funcs) ||
+ !list_empty(&ftrace->nograph_funcs);
+ bool func = !list_empty(&ftrace->filters) ||
+ !list_empty(&ftrace->notrace);
+
+ /* The function_graph has priority over function tracer. */
+ if (graph)
+ ftrace->tracer = "function_graph";
+ else if (func)
+ ftrace->tracer = "function";
+ /* Otherwise, the default tracer is used. */
+
+ pr_debug("%s tracer is used\n", ftrace->tracer);
+}
+
+static int __cmd_ftrace(struct perf_ftrace *ftrace)
{
char *trace_file;
int trace_fd;
@@ -586,10 +581,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
return -1;
}
- signal(SIGINT, sig_handler);
- signal(SIGUSR1, sig_handler);
- signal(SIGCHLD, sig_handler);
- signal(SIGPIPE, sig_handler);
+ select_tracer(ftrace);
if (reset_tracing_files(ftrace) < 0) {
pr_err("failed to reset ftrace\n");
@@ -600,11 +592,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
if (write_tracing_file("trace", "0") < 0)
goto out;
- if (argc && evlist__prepare_workload(ftrace->evlist, &ftrace->target, argv, false,
- ftrace__workload_exec_failed_signal) < 0) {
- goto out;
- }
-
if (set_tracing_options(ftrace) < 0)
goto out_reset;
@@ -693,6 +680,270 @@ out:
return (done && !workload_exec_errno) ? 0 : -1;
}
+static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
+{
+ char *p, *q;
+ char *unit;
+ double num;
+ int i;
+
+ /* ensure NUL termination */
+ buf[len] = '\0';
+
+ /* handle data line by line */
+ for (p = buf; (q = strchr(p, '\n')) != NULL; p = q + 1) {
+ *q = '\0';
+ /* move it to the line buffer */
+ strcat(linebuf, p);
+
+ /*
+ * parse trace output to get function duration like in
+ *
+ * # tracer: function_graph
+ * #
+ * # CPU DURATION FUNCTION CALLS
+ * # | | | | | | |
+ * 1) + 10.291 us | do_filp_open();
+ * 1) 4.889 us | do_filp_open();
+ * 1) 6.086 us | do_filp_open();
+ *
+ */
+ if (linebuf[0] == '#')
+ goto next;
+
+ /* ignore CPU */
+ p = strchr(linebuf, ')');
+ if (p == NULL)
+ p = linebuf;
+
+ while (*p && !isdigit(*p) && (*p != '|'))
+ p++;
+
+ /* no duration */
+ if (*p == '\0' || *p == '|')
+ goto next;
+
+ num = strtod(p, &unit);
+ if (!unit || strncmp(unit, " us", 3))
+ goto next;
+
+ i = log2(num);
+ if (i < 0)
+ i = 0;
+ if (i >= NUM_BUCKET)
+ i = NUM_BUCKET - 1;
+
+ buckets[i]++;
+
+next:
+ /* empty the line buffer for the next output */
+ linebuf[0] = '\0';
+ }
+
+ /* preserve any remaining output (before newline) */
+ strcat(linebuf, p);
+}
+
+static void display_histogram(int buckets[])
+{
+ int i;
+ int total = 0;
+ int bar_total = 46; /* to fit in 80 column */
+ char bar[] = "###############################################";
+ int bar_len;
+
+ for (i = 0; i < NUM_BUCKET; i++)
+ total += buckets[i];
+
+ if (total == 0) {
+ printf("No data found\n");
+ return;
+ }
+
+ printf("# %14s | %10s | %-*s |\n",
+ " DURATION ", "COUNT", bar_total, "GRAPH");
+
+ bar_len = buckets[0] * bar_total / total;
+ printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
+ 0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
+
+ for (i = 1; i < NUM_BUCKET - 1; i++) {
+ int start = (1 << (i - 1));
+ int stop = 1 << i;
+ const char *unit = "us";
+
+ if (start >= 1024) {
+ start >>= 10;
+ stop >>= 10;
+ unit = "ms";
+ }
+ bar_len = buckets[i] * bar_total / total;
+ printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
+ start, stop, unit, buckets[i], bar_len, bar,
+ bar_total - bar_len, "");
+ }
+
+ bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
+ printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
+ 1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar,
+ bar_total - bar_len, "");
+
+}
+
+static int prepare_func_latency(struct perf_ftrace *ftrace)
+{
+ char *trace_file;
+ int fd;
+
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_prepare_bpf(ftrace);
+
+ if (reset_tracing_files(ftrace) < 0) {
+ pr_err("failed to reset ftrace\n");
+ return -1;
+ }
+
+ /* reset ftrace buffer */
+ if (write_tracing_file("trace", "0") < 0)
+ return -1;
+
+ if (set_tracing_options(ftrace) < 0)
+ return -1;
+
+ /* force to use the function_graph tracer to track duration */
+ if (write_tracing_file("current_tracer", "function_graph") < 0) {
+ pr_err("failed to set current_tracer to function_graph\n");
+ return -1;
+ }
+
+ trace_file = get_tracing_file("trace_pipe");
+ if (!trace_file) {
+ pr_err("failed to open trace_pipe\n");
+ return -1;
+ }
+
+ fd = open(trace_file, O_RDONLY);
+ if (fd < 0)
+ pr_err("failed to open trace_pipe\n");
+
+ put_tracing_file(trace_file);
+ return fd;
+}
+
+static int start_func_latency(struct perf_ftrace *ftrace)
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_start_bpf(ftrace);
+
+ if (write_tracing_file("tracing_on", "1") < 0) {
+ pr_err("can't enable tracing\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int stop_func_latency(struct perf_ftrace *ftrace)
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_stop_bpf(ftrace);
+
+ write_tracing_file("tracing_on", "0");
+ return 0;
+}
+
+static int read_func_latency(struct perf_ftrace *ftrace, int buckets[])
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_read_bpf(ftrace, buckets);
+
+ return 0;
+}
+
+static int cleanup_func_latency(struct perf_ftrace *ftrace)
+{
+ if (ftrace->target.use_bpf)
+ return perf_ftrace__latency_cleanup_bpf(ftrace);
+
+ reset_tracing_files(ftrace);
+ return 0;
+}
+
+static int __cmd_latency(struct perf_ftrace *ftrace)
+{
+ int trace_fd;
+ char buf[4096];
+ char line[256];
+ struct pollfd pollfd = {
+ .events = POLLIN,
+ };
+ int buckets[NUM_BUCKET] = { };
+
+ if (!(perf_cap__capable(CAP_PERFMON) ||
+ perf_cap__capable(CAP_SYS_ADMIN))) {
+ pr_err("ftrace only works for %s!\n",
+#ifdef HAVE_LIBCAP_SUPPORT
+ "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
+#else
+ "root"
+#endif
+ );
+ return -1;
+ }
+
+ trace_fd = prepare_func_latency(ftrace);
+ if (trace_fd < 0)
+ goto out;
+
+ fcntl(trace_fd, F_SETFL, O_NONBLOCK);
+ pollfd.fd = trace_fd;
+
+ if (start_func_latency(ftrace) < 0)
+ goto out;
+
+ evlist__start_workload(ftrace->evlist);
+
+ line[0] = '\0';
+ while (!done) {
+ if (poll(&pollfd, 1, -1) < 0)
+ break;
+
+ if (pollfd.revents & POLLIN) {
+ int n = read(trace_fd, buf, sizeof(buf) - 1);
+ if (n < 0)
+ break;
+
+ make_histogram(buckets, buf, n, line);
+ }
+ }
+
+ stop_func_latency(ftrace);
+
+ if (workload_exec_errno) {
+ const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
+ pr_err("workload failed: %s\n", emsg);
+ goto out;
+ }
+
+ /* read remaining buffer contents */
+ while (!ftrace->target.use_bpf) {
+ int n = read(trace_fd, buf, sizeof(buf) - 1);
+ if (n <= 0)
+ break;
+ make_histogram(buckets, buf, n, line);
+ }
+
+ read_func_latency(ftrace, buckets);
+
+ display_histogram(buckets);
+
+out:
+ close(trace_fd);
+ cleanup_func_latency(ftrace);
+
+ return (done && !workload_exec_errno) ? 0 : -1;
+}
+
static int perf_ftrace_config(const char *var, const char *value, void *cb)
{
struct perf_ftrace *ftrace = cb;
@@ -855,22 +1106,11 @@ static int parse_graph_tracer_opts(const struct option *opt,
return 0;
}
-static void select_tracer(struct perf_ftrace *ftrace)
-{
- bool graph = !list_empty(&ftrace->graph_funcs) ||
- !list_empty(&ftrace->nograph_funcs);
- bool func = !list_empty(&ftrace->filters) ||
- !list_empty(&ftrace->notrace);
-
- /* The function_graph has priority over function tracer. */
- if (graph)
- ftrace->tracer = "function_graph";
- else if (func)
- ftrace->tracer = "function";
- /* Otherwise, the default tracer is used. */
-
- pr_debug("%s tracer is used\n", ftrace->tracer);
-}
+enum perf_ftrace_subcommand {
+ PERF_FTRACE_NONE,
+ PERF_FTRACE_TRACE,
+ PERF_FTRACE_LATENCY,
+};
int cmd_ftrace(int argc, const char **argv)
{
@@ -879,17 +1119,7 @@ int cmd_ftrace(int argc, const char **argv)
.tracer = DEFAULT_TRACER,
.target = { .uid = UINT_MAX, },
};
- const char * const ftrace_usage[] = {
- "perf ftrace [<options>] [<command>]",
- "perf ftrace [<options>] -- <command> [<options>]",
- NULL
- };
- const struct option ftrace_options[] = {
- OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
- "Tracer to use: function_graph(default) or function"),
- OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
- "Show available functions to filter",
- opt_list_avail_functions, "*"),
+ const struct option common_options[] = {
OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
"Trace on existing process id"),
/* TODO: Add short option -t after -t/--tracer can be removed. */
@@ -901,6 +1131,14 @@ int cmd_ftrace(int argc, const char **argv)
"System-wide collection from all CPUs"),
OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
"List of cpus to monitor"),
+ OPT_END()
+ };
+ const struct option ftrace_options[] = {
+ OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
+ "Tracer to use: function_graph(default) or function"),
+ OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
+ "Show available functions to filter",
+ opt_list_avail_functions, "*"),
OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
"Trace given functions using function tracer",
parse_filter_func),
@@ -923,24 +1161,65 @@ int cmd_ftrace(int argc, const char **argv)
"Trace children processes"),
OPT_UINTEGER('D', "delay", &ftrace.initial_delay,
"Number of milliseconds to wait before starting tracing after program start"),
- OPT_END()
+ OPT_PARENT(common_options),
+ };
+ const struct option latency_options[] = {
+ OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
+ "Show latency of given function", parse_filter_func),
+#ifdef HAVE_BPF_SKEL
+ OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
+ "Use BPF to measure function latency"),
+#endif
+ OPT_PARENT(common_options),
+ };
+ const struct option *options = ftrace_options;
+
+ const char * const ftrace_usage[] = {
+ "perf ftrace [<options>] [<command>]",
+ "perf ftrace [<options>] -- [<command>] [<options>]",
+ "perf ftrace {trace|latency} [<options>] [<command>]",
+ "perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]",
+ NULL
};
+ enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE;
INIT_LIST_HEAD(&ftrace.filters);
INIT_LIST_HEAD(&ftrace.notrace);
INIT_LIST_HEAD(&ftrace.graph_funcs);
INIT_LIST_HEAD(&ftrace.nograph_funcs);
+ signal(SIGINT, sig_handler);
+ signal(SIGUSR1, sig_handler);
+ signal(SIGCHLD, sig_handler);
+ signal(SIGPIPE, sig_handler);
+
ret = perf_config(perf_ftrace_config, &ftrace);
if (ret < 0)
return -1;
- argc = parse_options(argc, argv, ftrace_options, ftrace_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
- if (!argc && target__none(&ftrace.target))
- ftrace.target.system_wide = true;
+ if (argc > 1) {
+ if (!strcmp(argv[1], "trace")) {
+ subcmd = PERF_FTRACE_TRACE;
+ } else if (!strcmp(argv[1], "latency")) {
+ subcmd = PERF_FTRACE_LATENCY;
+ options = latency_options;
+ }
+
+ if (subcmd != PERF_FTRACE_NONE) {
+ argc--;
+ argv++;
+ }
+ }
+ /* for backward compatibility */
+ if (subcmd == PERF_FTRACE_NONE)
+ subcmd = PERF_FTRACE_TRACE;
- select_tracer(&ftrace);
+ argc = parse_options(argc, argv, options, ftrace_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (argc < 0) {
+ ret = -EINVAL;
+ goto out_delete_filters;
+ }
ret = target__validate(&ftrace.target);
if (ret) {
@@ -961,7 +1240,35 @@ int cmd_ftrace(int argc, const char **argv)
if (ret < 0)
goto out_delete_evlist;
- ret = __cmd_ftrace(&ftrace, argc, argv);
+ if (argc) {
+ ret = evlist__prepare_workload(ftrace.evlist, &ftrace.target,
+ argv, false,
+ ftrace__workload_exec_failed_signal);
+ if (ret < 0)
+ goto out_delete_evlist;
+ }
+
+ switch (subcmd) {
+ case PERF_FTRACE_TRACE:
+ if (!argc && target__none(&ftrace.target))
+ ftrace.target.system_wide = true;
+ ret = __cmd_ftrace(&ftrace);
+ break;
+ case PERF_FTRACE_LATENCY:
+ if (list_empty(&ftrace.filters)) {
+ pr_err("Should provide a function to measure\n");
+ parse_options_usage(ftrace_usage, options, "T", 1);
+ ret = -EINVAL;
+ goto out_delete_evlist;
+ }
+ ret = __cmd_latency(&ftrace);
+ break;
+ case PERF_FTRACE_NONE:
+ default:
+ pr_err("Invalid subcommand\n");
+ ret = -EINVAL;
+ break;
+ }
out_delete_evlist:
evlist__delete(ftrace.evlist);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0338b813585a..6ac2160913ea 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2267,6 +2267,10 @@ out_free:
return ret;
}
+void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
+{
+}
+
static int parse_control_option(const struct option *opt,
const char *str,
int unset __maybe_unused)
@@ -2898,6 +2902,10 @@ int cmd_record(int argc, const char **argv)
}
rec->opts.target.hybrid = perf_pmu__has_hybrid();
+
+ if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
+ arch__add_leaf_frame_record_opts(&rec->opts);
+
err = -ENOMEM;
if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
usage_with_options(record_usage, record_options);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8ae400429870..1dd92d8c9279 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -410,7 +410,7 @@ static int report__setup_sample_type(struct report *rep)
}
}
- callchain_param_setup(sample_type);
+ callchain_param_setup(sample_type, perf_env__arch(&rep->session->header.env));
if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
@@ -1127,7 +1127,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused,
* on events sample_type.
*/
sample_type = evlist__combined_sample_type(*pevlist);
- callchain_param_setup(sample_type);
+ callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
return 0;
}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c82b033e8942..c9b3002ec254 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -15,6 +15,7 @@
#include "util/symbol.h"
#include "util/thread.h"
#include "util/trace-event.h"
+#include "util/env.h"
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/evsel_fprintf.h"
@@ -648,7 +649,7 @@ out:
return 0;
}
-static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
+static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, const char *arch,
FILE *fp)
{
unsigned i = 0, r;
@@ -661,7 +662,7 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
u64 val = regs->regs[i++];
- printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
+ printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r, arch), val);
}
return printed;
@@ -718,17 +719,17 @@ tod_scnprintf(struct perf_script *script, char *buf, int buflen,
}
static int perf_sample__fprintf_iregs(struct perf_sample *sample,
- struct perf_event_attr *attr, FILE *fp)
+ struct perf_event_attr *attr, const char *arch, FILE *fp)
{
return perf_sample__fprintf_regs(&sample->intr_regs,
- attr->sample_regs_intr, fp);
+ attr->sample_regs_intr, arch, fp);
}
static int perf_sample__fprintf_uregs(struct perf_sample *sample,
- struct perf_event_attr *attr, FILE *fp)
+ struct perf_event_attr *attr, const char *arch, FILE *fp)
{
return perf_sample__fprintf_regs(&sample->user_regs,
- attr->sample_regs_user, fp);
+ attr->sample_regs_user, arch, fp);
}
static int perf_sample__fprintf_start(struct perf_script *script,
@@ -2000,6 +2001,7 @@ static void process_event(struct perf_script *script,
struct evsel_script *es = evsel->priv;
FILE *fp = es->fp;
char str[PAGE_SIZE_NAME_LEN];
+ const char *arch = perf_env__arch(machine->env);
if (output[type].fields == 0)
return;
@@ -2066,10 +2068,10 @@ static void process_event(struct perf_script *script,
}
if (PRINT_FIELD(IREGS))
- perf_sample__fprintf_iregs(sample, attr, fp);
+ perf_sample__fprintf_iregs(sample, attr, arch, fp);
if (PRINT_FIELD(UREGS))
- perf_sample__fprintf_uregs(sample, attr, fp);
+ perf_sample__fprintf_uregs(sample, attr, arch, fp);
if (PRINT_FIELD(BRSTACK))
perf_sample__fprintf_brstack(sample, thread, attr, fp);
@@ -2316,7 +2318,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
* on events sample_type.
*/
sample_type = evlist__combined_sample_type(evlist);
- callchain_param_setup(sample_type);
+ callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
/* Enable fields for callchain entries */
if (symbol_conf.use_callchain &&
@@ -3466,16 +3468,7 @@ static void script__setup_sample_type(struct perf_script *script)
struct perf_session *session = script->session;
u64 sample_type = evlist__combined_sample_type(session->evlist);
- if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
- if ((sample_type & PERF_SAMPLE_REGS_USER) &&
- (sample_type & PERF_SAMPLE_STACK_USER)) {
- callchain_param.record_mode = CALLCHAIN_DWARF;
- dwarf_callchain_users = true;
- } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
- callchain_param.record_mode = CALLCHAIN_LBR;
- else
- callchain_param.record_mode = CALLCHAIN_FP;
- }
+ callchain_param_setup(sample_type, perf_env__arch(session->machines.host.env));
if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 7974933dbc77..f6ca2b054c5b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1168,6 +1168,26 @@ static int parse_stat_cgroups(const struct option *opt,
return parse_cgroups(opt, str, unset);
}
+static int parse_hybrid_type(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ struct evlist *evlist = *(struct evlist **)opt->value;
+
+ if (!list_empty(&evlist->core.entries)) {
+ fprintf(stderr, "Must define cputype before events/metrics\n");
+ return -1;
+ }
+
+ evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu(str);
+ if (!evlist->hybrid_pmu_name) {
+ fprintf(stderr, "--cputype %s is not supported!\n", str);
+ return -1;
+ }
+
+ return 0;
+}
+
static struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
@@ -1282,6 +1302,10 @@ static struct option stat_options[] = {
"don't print 'summary' for CSV summary output"),
OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
"don't print output (useful with record)"),
+ OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
+ "Only enable events on applying cpu with this type "
+ "for hybrid platform (e.g. core or atom)",
+ parse_hybrid_type),
#ifdef HAVE_LIBPFM
OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
"libpfm4 event selector. use 'perf list' to list available events",
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index ef94388e8323..4282ef9ec354 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3951,6 +3951,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
evlist__add(evlist, pgfault_min);
}
+ /* Enable ignoring missing threads when -u/-p option is defined. */
+ trace->opts.ignore_missing_thread = trace->opts.target.uid != UINT_MAX || trace->opts.target.pid;
+
if (trace->sched &&
evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
goto out_error_sched_stat_runtime;
diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c
index 7565a1852c74..b17eb52a0694 100644
--- a/tools/perf/dlfilters/dlfilter-test-api-v0.c
+++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c
@@ -308,8 +308,6 @@ int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, vo
int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
{
- struct filter_data *d = data;
-
pr_debug("%s API\n", __func__);
return do_checks(data, sample, ctx, false);
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json
new file mode 100644
index 000000000000..79f2016c53b0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json
@@ -0,0 +1,8 @@
+[
+ {
+ "ArchStdEvent": "BR_MIS_PRED"
+ },
+ {
+ "ArchStdEvent": "BR_PRED"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json
new file mode 100644
index 000000000000..579c1c993d17
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json
@@ -0,0 +1,20 @@
+[
+ {
+ "ArchStdEvent": "CPU_CYCLES"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS"
+ },
+ {
+ "ArchStdEvent": "BUS_CYCLES"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_RD"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_WR"
+ },
+ {
+ "ArchStdEvent": "CNT_CYCLES"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json
new file mode 100644
index 000000000000..0141f749bff3
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json
@@ -0,0 +1,155 @@
+[
+ {
+ "ArchStdEvent": "L1I_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1I_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1I_CACHE"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_ALLOCATE"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB"
+ },
+ {
+ "ArchStdEvent": "L1I_TLB"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_ALLOCATE"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB"
+ },
+ {
+ "ArchStdEvent": "DTLB_WALK"
+ },
+ {
+ "ArchStdEvent": "ITLB_WALK"
+ },
+ {
+ "ArchStdEvent": "LL_CACHE_RD"
+ },
+ {
+ "ArchStdEvent": "LL_CACHE_MISS_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_LMISS_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WR"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_WR"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_INNER"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_OUTER"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB_VICTIM"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB_CLEAN"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_INVAL"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_WR"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_RD"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB_VICTIM"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB_CLEAN"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_INVAL"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_RD"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_WR"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_RD"
+ },
+ {
+ "ArchStdEvent": "L1I_CACHE_LMISS"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_LMISS_RD"
+ },
+ {
+ "ArchStdEvent": "L3D_CACHE_LMISS_RD"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json
new file mode 100644
index 000000000000..344a2d552ad5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json
@@ -0,0 +1,47 @@
+[
+ {
+ "ArchStdEvent": "EXC_TAKEN"
+ },
+ {
+ "ArchStdEvent": "MEMORY_ERROR"
+ },
+ {
+ "ArchStdEvent": "EXC_UNDEF"
+ },
+ {
+ "ArchStdEvent": "EXC_SVC"
+ },
+ {
+ "ArchStdEvent": "EXC_PABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_DABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_IRQ"
+ },
+ {
+ "ArchStdEvent": "EXC_FIQ"
+ },
+ {
+ "ArchStdEvent": "EXC_SMC"
+ },
+ {
+ "ArchStdEvent": "EXC_HVC"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_PABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_DABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_OTHER"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_IRQ"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_FIQ"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json
new file mode 100644
index 000000000000..e57cd55937c6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json
@@ -0,0 +1,143 @@
+[
+ {
+ "ArchStdEvent": "SW_INCR"
+ },
+ {
+ "ArchStdEvent": "INST_RETIRED"
+ },
+ {
+ "ArchStdEvent": "EXC_RETURN"
+ },
+ {
+ "ArchStdEvent": "CID_WRITE_RETIRED"
+ },
+ {
+ "ArchStdEvent": "INST_SPEC"
+ },
+ {
+ "ArchStdEvent": "TTBR_WRITE_RETIRED"
+ },
+ {
+ "ArchStdEvent": "BR_RETIRED"
+ },
+ {
+ "ArchStdEvent": "BR_MIS_PRED_RETIRED"
+ },
+ {
+ "ArchStdEvent": "OP_RETIRED"
+ },
+ {
+ "ArchStdEvent": "OP_SPEC"
+ },
+ {
+ "ArchStdEvent": "LDREX_SPEC"
+ },
+ {
+ "ArchStdEvent": "STREX_PASS_SPEC"
+ },
+ {
+ "ArchStdEvent": "STREX_FAIL_SPEC"
+ },
+ {
+ "ArchStdEvent": "STREX_SPEC"
+ },
+ {
+ "ArchStdEvent": "LD_SPEC"
+ },
+ {
+ "ArchStdEvent": "ST_SPEC"
+ },
+ {
+ "ArchStdEvent": "DP_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SPEC"
+ },
+ {
+ "ArchStdEvent": "VFP_SPEC"
+ },
+ {
+ "ArchStdEvent": "PC_WRITE_SPEC"
+ },
+ {
+ "ArchStdEvent": "CRYPTO_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_IMMED_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_RETURN_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_INDIRECT_SPEC"
+ },
+ {
+ "ArchStdEvent": "ISB_SPEC"
+ },
+ {
+ "ArchStdEvent": "DSB_SPEC"
+ },
+ {
+ "ArchStdEvent": "DMB_SPEC"
+ },
+ {
+ "ArchStdEvent": "RC_LD_SPEC"
+ },
+ {
+ "ArchStdEvent": "RC_ST_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_INST_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_INST_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_HP_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_SP_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_DP_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_EMPTY_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_FULL_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_PARTIAL_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_NOT_FULL_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_LDFF_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_LDFF_FAULT_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_SCALE_OPS_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_FIXED_OPS_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT8_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT16_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT32_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT64_SPEC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json
new file mode 100644
index 000000000000..e522113aeb96
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json
@@ -0,0 +1,38 @@
+[
+ {
+ "ArchStdEvent": "MEM_ACCESS"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_RD"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_WR"
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_LD_SPEC"
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_ST_SPEC"
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_LDST_SPEC"
+ },
+ {
+ "ArchStdEvent": "LDST_ALIGN_LAT"
+ },
+ {
+ "ArchStdEvent": "LD_ALIGN_LAT"
+ },
+ {
+ "ArchStdEvent": "ST_ALIGN_LAT"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_CHECKED"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_CHECKED_RD"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_CHECKED_WR"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json
new file mode 100644
index 000000000000..20d8365756c5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json
@@ -0,0 +1,5 @@
+[
+ {
+ "ArchStdEvent": "REMOTE_ACCESS"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json
new file mode 100644
index 000000000000..f9fae15f7555
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json
@@ -0,0 +1,23 @@
+[
+ {
+ "ArchStdEvent": "STALL_FRONTEND"
+ },
+ {
+ "ArchStdEvent": "STALL_BACKEND"
+ },
+ {
+ "ArchStdEvent": "STALL"
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT_BACKEND"
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT_FRONTEND"
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT"
+ },
+ {
+ "ArchStdEvent": "STALL_BACKEND_MEM"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json
new file mode 100644
index 000000000000..20f2165c85fe
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json
@@ -0,0 +1,14 @@
+[
+ {
+ "ArchStdEvent": "SAMPLE_POP"
+ },
+ {
+ "ArchStdEvent": "SAMPLE_FEED"
+ },
+ {
+ "ArchStdEvent": "SAMPLE_FILTRATE"
+ },
+ {
+ "ArchStdEvent": "SAMPLE_COLLISION"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json
new file mode 100644
index 000000000000..3116135c59e2
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json
@@ -0,0 +1,29 @@
+[
+ {
+ "ArchStdEvent": "TRB_WRAP"
+ },
+ {
+ "ArchStdEvent": "TRCEXTOUT0"
+ },
+ {
+ "ArchStdEvent": "TRCEXTOUT1"
+ },
+ {
+ "ArchStdEvent": "TRCEXTOUT2"
+ },
+ {
+ "ArchStdEvent": "TRCEXTOUT3"
+ },
+ {
+ "ArchStdEvent": "CTI_TRIGOUT4"
+ },
+ {
+ "ArchStdEvent": "CTI_TRIGOUT5"
+ },
+ {
+ "ArchStdEvent": "CTI_TRIGOUT6"
+ },
+ {
+ "ArchStdEvent": "CTI_TRIGOUT7"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json
index 423767510aff..80d7a70829a0 100644
--- a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json
+++ b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json
@@ -300,6 +300,30 @@
"BriefDescription": "No operation sent for execution on a slot"
},
{
+ "PublicDescription": "Sample Population",
+ "EventCode": "0x4000",
+ "EventName": "SAMPLE_POP",
+ "BriefDescription": "Sample Population"
+ },
+ {
+ "PublicDescription": "Sample Taken",
+ "EventCode": "0x4001",
+ "EventName": "SAMPLE_FEED",
+ "BriefDescription": "Sample Taken"
+ },
+ {
+ "PublicDescription": "Sample Taken and not removed by filtering",
+ "EventCode": "0x4002",
+ "EventName": "SAMPLE_FILTRATE",
+ "BriefDescription": "Sample Taken and not removed by filtering"
+ },
+ {
+ "PublicDescription": "Sample collided with previous sample",
+ "EventCode": "0x4003",
+ "EventName": "SAMPLE_COLLISION",
+ "BriefDescription": "Sample collided with previous sample"
+ },
+ {
"PublicDescription": "Constant frequency cycles. The counter increments at a constant frequency equal to the rate of increment of the system counter, CNTPCT_EL0.",
"EventCode": "0x4004",
"EventName": "CNT_CYCLES",
@@ -330,6 +354,96 @@
"BriefDescription": "Level 3 data cache long-latency read miss"
},
{
+ "PublicDescription": "Trace buffer current write pointer wrapped",
+ "EventCode": "0x400C",
+ "EventName": "TRB_WRAP",
+ "BriefDescription": "Trace buffer current write pointer wrapped"
+ },
+ {
+ "PublicDescription": "PE Trace Unit external output 0",
+ "EventCode": "0x4010",
+ "EventName": "TRCEXTOUT0",
+ "BriefDescription": "PE Trace Unit external output 0"
+ },
+ {
+ "PublicDescription": "PE Trace Unit external output 1",
+ "EventCode": "0x4011",
+ "EventName": "TRCEXTOUT1",
+ "BriefDescription": "PE Trace Unit external output 1"
+ },
+ {
+ "PublicDescription": "PE Trace Unit external output 2",
+ "EventCode": "0x4012",
+ "EventName": "TRCEXTOUT2",
+ "BriefDescription": "PE Trace Unit external output 2"
+ },
+ {
+ "PublicDescription": "PE Trace Unit external output 3",
+ "EventCode": "0x4013",
+ "EventName": "TRCEXTOUT3",
+ "BriefDescription": "PE Trace Unit external output 3"
+ },
+ {
+ "PublicDescription": "Cross-trigger Interface output trigger 4",
+ "EventCode": "0x4018",
+ "EventName": "CTI_TRIGOUT4",
+ "BriefDescription": "Cross-trigger Interface output trigger 4"
+ },
+ {
+ "PublicDescription": "Cross-trigger Interface output trigger 5 ",
+ "EventCode": "0x4019",
+ "EventName": "CTI_TRIGOUT5",
+ "BriefDescription": "Cross-trigger Interface output trigger 5 "
+ },
+ {
+ "PublicDescription": "Cross-trigger Interface output trigger 6",
+ "EventCode": "0x401A",
+ "EventName": "CTI_TRIGOUT6",
+ "BriefDescription": "Cross-trigger Interface output trigger 6"
+ },
+ {
+ "PublicDescription": "Cross-trigger Interface output trigger 7",
+ "EventCode": "0x401B",
+ "EventName": "CTI_TRIGOUT7",
+ "BriefDescription": "Cross-trigger Interface output trigger 7"
+ },
+ {
+ "PublicDescription": "Access with additional latency from alignment",
+ "EventCode": "0x4020",
+ "EventName": "LDST_ALIGN_LAT",
+ "BriefDescription": "Access with additional latency from alignment"
+ },
+ {
+ "PublicDescription": "Load with additional latency from alignment",
+ "EventCode": "0x4021",
+ "EventName": "LD_ALIGN_LAT",
+ "BriefDescription": "Load with additional latency from alignment"
+ },
+ {
+ "PublicDescription": "Store with additional latency from alignment",
+ "EventCode": "0x4022",
+ "EventName": "ST_ALIGN_LAT",
+ "BriefDescription": "Store with additional latency from alignment"
+ },
+ {
+ "PublicDescription": "Checked data memory access",
+ "EventCode": "0x4024",
+ "EventName": "MEM_ACCESS_CHECKED",
+ "BriefDescription": "Checked data memory access"
+ },
+ {
+ "PublicDescription": "Checked data memory access, read",
+ "EventCode": "0x4025",
+ "EventName": "MEM_ACCESS_CHECKED_RD",
+ "BriefDescription": "Checked data memory access, read"
+ },
+ {
+ "PublicDescription": "Checked data memory access, write",
+ "EventCode": "0x4026",
+ "EventName": "MEM_ACCESS_CHECKED_WR",
+ "BriefDescription": "Checked data memory access, write"
+ },
+ {
"PublicDescription": "SIMD Instruction architecturally executed.",
"EventCode": "0x8000",
"EventName": "SIMD_INST_RETIRED",
@@ -342,6 +456,18 @@
"BriefDescription": "Instruction architecturally executed, SVE."
},
{
+ "PublicDescription": "ASE operations speculatively executed",
+ "EventCode": "0x8005",
+ "EventName": "ASE_INST_SPEC",
+ "BriefDescription": "ASE operations speculatively executed"
+ },
+ {
+ "PublicDescription": "SVE operations speculatively executed",
+ "EventCode": "0x8006",
+ "EventName": "SVE_INST_SPEC",
+ "BriefDescription": "SVE operations speculatively executed"
+ },
+ {
"PublicDescription": "Microarchitectural operation, Operations speculatively executed.",
"EventCode": "0x8008",
"EventName": "UOP_SPEC",
@@ -360,6 +486,24 @@
"BriefDescription": "Floating-point Operations speculatively executed."
},
{
+ "PublicDescription": "Floating-point half-precision operations speculatively executed",
+ "EventCode": "0x8014",
+ "EventName": "FP_HP_SPEC",
+ "BriefDescription": "Floating-point half-precision operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Floating-point single-precision operations speculatively executed",
+ "EventCode": "0x8018",
+ "EventName": "FP_SP_SPEC",
+ "BriefDescription": "Floating-point single-precision operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Floating-point double-precision operations speculatively executed",
+ "EventCode": "0x801C",
+ "EventName": "FP_DP_SPEC",
+ "BriefDescription": "Floating-point double-precision operations speculatively executed"
+ },
+ {
"PublicDescription": "Floating-point FMA Operations speculatively executed.",
"EventCode": "0x8028",
"EventName": "FP_FMA_SPEC",
@@ -390,6 +534,30 @@
"BriefDescription": "SVE predicated Operations speculatively executed."
},
{
+ "PublicDescription": "SVE predicated operations with no active predicates speculatively executed",
+ "EventCode": "0x8075",
+ "EventName": "SVE_PRED_EMPTY_SPEC",
+ "BriefDescription": "SVE predicated operations with no active predicates speculatively executed"
+ },
+ {
+ "PublicDescription": "SVE predicated operations speculatively executed with all active predicates",
+ "EventCode": "0x8076",
+ "EventName": "SVE_PRED_FULL_SPEC",
+ "BriefDescription": "SVE predicated operations speculatively executed with all active predicates"
+ },
+ {
+ "PublicDescription": "SVE predicated operations speculatively executed with partially active predicates",
+ "EventCode": "0x8077",
+ "EventName": "SVE_PRED_PARTIAL_SPEC",
+ "BriefDescription": "SVE predicated operations speculatively executed with partially active predicates"
+ },
+ {
+ "PublicDescription": "SVE predicated operations with empty or partially active predicates",
+ "EventCode": "0x8079",
+ "EventName": "SVE_PRED_NOT_FULL_SPEC",
+ "BriefDescription": "SVE predicated operations with empty or partially active predicates"
+ },
+ {
"PublicDescription": "SVE MOVPRFX Operations speculatively executed.",
"EventCode": "0x807C",
"EventName": "SVE_MOVPRFX_SPEC",
@@ -498,6 +666,12 @@
"BriefDescription": "SVE First-fault load Operations speculatively executed."
},
{
+ "PublicDescription": "SVE first-fault load operations speculatively executed which set FFR bit to 0",
+ "EventCode": "0x80BD",
+ "EventName": "SVE_LDFF_FAULT_SPEC",
+ "BriefDescription": "SVE first-fault load operations speculatively executed which set FFR bit to 0"
+ },
+ {
"PublicDescription": "Scalable floating-point element Operations speculatively executed.",
"EventCode": "0x80C0",
"EventName": "FP_SCALE_OPS_SPEC",
@@ -544,5 +718,29 @@
"EventCode": "0x80C7",
"EventName": "FP_DP_FIXED_OPS_SPEC",
"BriefDescription": "Non-scalable double-precision floating-point element Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed",
+ "EventCode": "0x80E3",
+ "EventName": "ASE_SVE_INT8_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE 16-bit integer operations speculatively executed",
+ "EventCode": "0x80E7",
+ "EventName": "ASE_SVE_INT16_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE 16-bit integer operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE 32-bit integer operations speculatively executed",
+ "EventCode": "0x80EB",
+ "EventName": "ASE_SVE_INT32_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE 32-bit integer operations speculatively executed"
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed",
+ "EventCode": "0x80EF",
+ "EventName": "ASE_SVE_INT64_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed"
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index 31d8b57ca9bb..b899db48c12a 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -19,6 +19,7 @@
0x00000000410fd0b0,v1,arm/cortex-a76-n1,core
0x00000000410fd0c0,v1,arm/cortex-a76-n1,core
0x00000000410fd400,v1,arm/neoverse-v1,core
+0x00000000410fd490,v1,arm/neoverse-n2,core
0x00000000420f5160,v1,cavium/thunderx2,core
0x00000000430f0af0,v1,cavium/thunderx2,core
0x00000000460f0010,v1,fujitsu/a64fx,core
diff --git a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json b/tools/perf/pmu-events/arch/arm64/recommended.json
index d0a19866563d..210afa856091 100644
--- a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json
+++ b/tools/perf/pmu-events/arch/arm64/recommended.json
@@ -148,305 +148,305 @@
"EventCode": "0x60",
"EventName": "BUS_ACCESS_RD",
"BriefDescription": "Bus access read"
- },
- {
+ },
+ {
"PublicDescription": "Bus access write",
"EventCode": "0x61",
"EventName": "BUS_ACCESS_WR",
"BriefDescription": "Bus access write"
- },
- {
+ },
+ {
"PublicDescription": "Bus access, Normal, Cacheable, Shareable",
"EventCode": "0x62",
"EventName": "BUS_ACCESS_SHARED",
"BriefDescription": "Bus access, Normal, Cacheable, Shareable"
- },
- {
+ },
+ {
"PublicDescription": "Bus access, not Normal, Cacheable, Shareable",
"EventCode": "0x63",
"EventName": "BUS_ACCESS_NOT_SHARED",
"BriefDescription": "Bus access, not Normal, Cacheable, Shareable"
- },
- {
+ },
+ {
"PublicDescription": "Bus access, Normal",
"EventCode": "0x64",
"EventName": "BUS_ACCESS_NORMAL",
"BriefDescription": "Bus access, Normal"
- },
- {
+ },
+ {
"PublicDescription": "Bus access, peripheral",
"EventCode": "0x65",
"EventName": "BUS_ACCESS_PERIPH",
"BriefDescription": "Bus access, peripheral"
- },
- {
+ },
+ {
"PublicDescription": "Data memory access, read",
"EventCode": "0x66",
"EventName": "MEM_ACCESS_RD",
"BriefDescription": "Data memory access, read"
- },
- {
+ },
+ {
"PublicDescription": "Data memory access, write",
"EventCode": "0x67",
"EventName": "MEM_ACCESS_WR",
"BriefDescription": "Data memory access, write"
- },
- {
+ },
+ {
"PublicDescription": "Unaligned access, read",
"EventCode": "0x68",
"EventName": "UNALIGNED_LD_SPEC",
"BriefDescription": "Unaligned access, read"
- },
- {
+ },
+ {
"PublicDescription": "Unaligned access, write",
"EventCode": "0x69",
"EventName": "UNALIGNED_ST_SPEC",
"BriefDescription": "Unaligned access, write"
- },
- {
+ },
+ {
"PublicDescription": "Unaligned access",
"EventCode": "0x6a",
"EventName": "UNALIGNED_LDST_SPEC",
"BriefDescription": "Unaligned access"
- },
- {
+ },
+ {
"PublicDescription": "Exclusive operation speculatively executed, LDREX or LDX",
"EventCode": "0x6c",
"EventName": "LDREX_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, LDREX or LDX"
- },
- {
+ },
+ {
"PublicDescription": "Exclusive operation speculatively executed, STREX or STX pass",
"EventCode": "0x6d",
"EventName": "STREX_PASS_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, STREX or STX pass"
- },
- {
+ },
+ {
"PublicDescription": "Exclusive operation speculatively executed, STREX or STX fail",
"EventCode": "0x6e",
"EventName": "STREX_FAIL_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, STREX or STX fail"
- },
- {
+ },
+ {
"PublicDescription": "Exclusive operation speculatively executed, STREX or STX",
"EventCode": "0x6f",
"EventName": "STREX_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, STREX or STX"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, load",
"EventCode": "0x70",
"EventName": "LD_SPEC",
"BriefDescription": "Operation speculatively executed, load"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, store",
"EventCode": "0x71",
"EventName": "ST_SPEC",
"BriefDescription": "Operation speculatively executed, store"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, load or store",
"EventCode": "0x72",
"EventName": "LDST_SPEC",
"BriefDescription": "Operation speculatively executed, load or store"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, integer data processing",
"EventCode": "0x73",
"EventName": "DP_SPEC",
"BriefDescription": "Operation speculatively executed, integer data processing"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, Advanced SIMD instruction",
"EventCode": "0x74",
"EventName": "ASE_SPEC",
"BriefDescription": "Operation speculatively executed, Advanced SIMD instruction"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, floating-point instruction",
"EventCode": "0x75",
"EventName": "VFP_SPEC",
"BriefDescription": "Operation speculatively executed, floating-point instruction"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, software change of the PC",
"EventCode": "0x76",
"EventName": "PC_WRITE_SPEC",
"BriefDescription": "Operation speculatively executed, software change of the PC"
- },
- {
+ },
+ {
"PublicDescription": "Operation speculatively executed, Cryptographic instruction",
"EventCode": "0x77",
"EventName": "CRYPTO_SPEC",
"BriefDescription": "Operation speculatively executed, Cryptographic instruction"
- },
- {
+ },
+ {
"PublicDescription": "Branch speculatively executed, immediate branch",
"EventCode": "0x78",
"EventName": "BR_IMMED_SPEC",
"BriefDescription": "Branch speculatively executed, immediate branch"
- },
- {
+ },
+ {
"PublicDescription": "Branch speculatively executed, procedure return",
"EventCode": "0x79",
"EventName": "BR_RETURN_SPEC",
"BriefDescription": "Branch speculatively executed, procedure return"
- },
- {
+ },
+ {
"PublicDescription": "Branch speculatively executed, indirect branch",
"EventCode": "0x7a",
"EventName": "BR_INDIRECT_SPEC",
"BriefDescription": "Branch speculatively executed, indirect branch"
- },
- {
+ },
+ {
"PublicDescription": "Barrier speculatively executed, ISB",
"EventCode": "0x7c",
"EventName": "ISB_SPEC",
"BriefDescription": "Barrier speculatively executed, ISB"
- },
- {
+ },
+ {
"PublicDescription": "Barrier speculatively executed, DSB",
"EventCode": "0x7d",
"EventName": "DSB_SPEC",
"BriefDescription": "Barrier speculatively executed, DSB"
- },
- {
+ },
+ {
"PublicDescription": "Barrier speculatively executed, DMB",
"EventCode": "0x7e",
"EventName": "DMB_SPEC",
"BriefDescription": "Barrier speculatively executed, DMB"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Other synchronous",
"EventCode": "0x81",
"EventName": "EXC_UNDEF",
"BriefDescription": "Exception taken, Other synchronous"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Supervisor Call",
"EventCode": "0x82",
"EventName": "EXC_SVC",
"BriefDescription": "Exception taken, Supervisor Call"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Instruction Abort",
"EventCode": "0x83",
"EventName": "EXC_PABORT",
"BriefDescription": "Exception taken, Instruction Abort"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Data Abort and SError",
"EventCode": "0x84",
"EventName": "EXC_DABORT",
"BriefDescription": "Exception taken, Data Abort and SError"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, IRQ",
"EventCode": "0x86",
"EventName": "EXC_IRQ",
"BriefDescription": "Exception taken, IRQ"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, FIQ",
"EventCode": "0x87",
"EventName": "EXC_FIQ",
"BriefDescription": "Exception taken, FIQ"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Secure Monitor Call",
"EventCode": "0x88",
"EventName": "EXC_SMC",
"BriefDescription": "Exception taken, Secure Monitor Call"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Hypervisor Call",
"EventCode": "0x8a",
"EventName": "EXC_HVC",
"BriefDescription": "Exception taken, Hypervisor Call"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Instruction Abort not taken locally",
"EventCode": "0x8b",
"EventName": "EXC_TRAP_PABORT",
"BriefDescription": "Exception taken, Instruction Abort not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Data Abort or SError not taken locally",
"EventCode": "0x8c",
"EventName": "EXC_TRAP_DABORT",
"BriefDescription": "Exception taken, Data Abort or SError not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, Other traps not taken locally",
"EventCode": "0x8d",
"EventName": "EXC_TRAP_OTHER",
"BriefDescription": "Exception taken, Other traps not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, IRQ not taken locally",
"EventCode": "0x8e",
"EventName": "EXC_TRAP_IRQ",
"BriefDescription": "Exception taken, IRQ not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Exception taken, FIQ not taken locally",
"EventCode": "0x8f",
"EventName": "EXC_TRAP_FIQ",
"BriefDescription": "Exception taken, FIQ not taken locally"
- },
- {
+ },
+ {
"PublicDescription": "Release consistency operation speculatively executed, Load-Acquire",
"EventCode": "0x90",
"EventName": "RC_LD_SPEC",
"BriefDescription": "Release consistency operation speculatively executed, Load-Acquire"
- },
- {
+ },
+ {
"PublicDescription": "Release consistency operation speculatively executed, Store-Release",
"EventCode": "0x91",
"EventName": "RC_ST_SPEC",
"BriefDescription": "Release consistency operation speculatively executed, Store-Release"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache access, read",
"EventCode": "0xa0",
"EventName": "L3D_CACHE_RD",
"BriefDescription": "Attributable Level 3 data or unified cache access, read"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache access, write",
"EventCode": "0xa1",
"EventName": "L3D_CACHE_WR",
"BriefDescription": "Attributable Level 3 data or unified cache access, write"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache refill, read",
"EventCode": "0xa2",
"EventName": "L3D_CACHE_REFILL_RD",
"BriefDescription": "Attributable Level 3 data or unified cache refill, read"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache refill, write",
"EventCode": "0xa3",
"EventName": "L3D_CACHE_REFILL_WR",
"BriefDescription": "Attributable Level 3 data or unified cache refill, write"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache Write-Back, victim",
"EventCode": "0xa6",
"EventName": "L3D_CACHE_WB_VICTIM",
"BriefDescription": "Attributable Level 3 data or unified cache Write-Back, victim"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean",
"EventCode": "0xa7",
"EventName": "L3D_CACHE_WB_CLEAN",
"BriefDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean"
- },
- {
+ },
+ {
"PublicDescription": "Attributable Level 3 data or unified cache access, invalidate",
"EventCode": "0xa8",
"EventName": "L3D_CACHE_INVAL",
"BriefDescription": "Attributable Level 3 data or unified cache access, invalidate"
- }
+ }
]
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 803ca426f8e6..af2b37ef7c70 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -65,6 +65,7 @@ perf-y += pe-file-parsing.o
perf-y += expand-cgroup.o
perf-y += perf-time-to-tsc.o
perf-y += dlfilter-test.o
+perf-y += sigtrap.o
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 8cb5a1c3489e..fac3717d9ba1 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -107,6 +107,7 @@ static struct test_suite *generic_tests[] = {
&suite__expand_cgroup_events,
&suite__perf_time_to_tsc,
&suite__dlfilter,
+ &suite__sigtrap,
NULL,
};
@@ -420,7 +421,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width,
continue;
st.file = ent->d_name;
- pr_info("%2d: %-*s:", i, width, test_suite.desc);
+ pr_info("%3d: %-*s:", i, width, test_suite.desc);
if (intlist__find(skiplist, i)) {
color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
@@ -470,7 +471,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
continue;
}
- pr_info("%2d: %-*s:", i, width, test_description(t, -1));
+ pr_info("%3d: %-*s:", i, width, test_description(t, -1));
if (intlist__find(skiplist, i)) {
color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
@@ -510,7 +511,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
curr, argc, argv))
continue;
- pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
+ pr_info("%3d.%1d: %-*s:", i, subi + 1, subw,
test_description(t, subi));
test_and_print(t, subi);
}
@@ -545,7 +546,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i)
if (!perf_test__matches(t.desc, curr, argc, argv))
continue;
- pr_info("%2d: %s\n", i, t.desc);
+ pr_info("%3d: %s\n", i, t.desc);
}
@@ -567,14 +568,14 @@ static int perf_test__list(int argc, const char **argv)
if (!perf_test__matches(test_description(t, -1), curr, argc, argv))
continue;
- pr_info("%2d: %s\n", i, test_description(t, -1));
+ pr_info("%3d: %s\n", i, test_description(t, -1));
if (has_subtests(t)) {
int subn = num_subtests(t);
int subi;
for (subi = 0; subi < subn; subi++)
- pr_info("%2d:%1d: %s\n", i, subi + 1,
+ pr_info("%3d:%1d: %s\n", i, subi + 1,
test_description(t, subi));
}
}
@@ -606,6 +607,9 @@ int cmd_test(int argc, const char **argv)
if (ret < 0)
return ret;
+ /* Unbuffered output */
+ setvbuf(stdout, NULL, _IONBF, 0);
+
argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0);
if (argc >= 1 && !strcmp(argv[0], "list"))
return perf_test__list(argc - 1, argv + 1);
diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c
new file mode 100644
index 000000000000..1f147fe6595f
--- /dev/null
+++ b/tools/perf/tests/sigtrap.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Basic test for sigtrap support.
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/string.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "cloexec.h"
+#include "debug.h"
+#include "event.h"
+#include "tests.h"
+#include "../perf-sys.h"
+
+/*
+ * PowerPC and S390 do not support creation of instruction breakpoints using the
+ * perf_event interface.
+ *
+ * Just disable the test for these architectures until these issues are
+ * resolved.
+ */
+#if defined(__powerpc__) || defined(__s390x__)
+#define BP_ACCOUNT_IS_SUPPORTED 0
+#else
+#define BP_ACCOUNT_IS_SUPPORTED 1
+#endif
+
+#define NUM_THREADS 5
+
+static struct {
+ int tids_want_signal; /* Which threads still want a signal. */
+ int signal_count; /* Sanity check number of signals received. */
+ volatile int iterate_on; /* Variable to set breakpoint on. */
+ siginfo_t first_siginfo; /* First observed siginfo_t. */
+} ctx;
+
+#define TEST_SIG_DATA (~(unsigned long)(&ctx.iterate_on))
+
+static struct perf_event_attr make_event_attr(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_BREAKPOINT,
+ .size = sizeof(attr),
+ .sample_period = 1,
+ .disabled = 1,
+ .bp_addr = (unsigned long)&ctx.iterate_on,
+ .bp_type = HW_BREAKPOINT_RW,
+ .bp_len = HW_BREAKPOINT_LEN_1,
+ .inherit = 1, /* Children inherit events ... */
+ .inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */
+ .remove_on_exec = 1, /* Required by sigtrap. */
+ .sigtrap = 1, /* Request synchronous SIGTRAP on event. */
+ .sig_data = TEST_SIG_DATA,
+ .exclude_kernel = 1, /* To allow */
+ .exclude_hv = 1, /* running as !root */
+ };
+ return attr;
+}
+
+static void
+sigtrap_handler(int signum __maybe_unused, siginfo_t *info, void *ucontext __maybe_unused)
+{
+ if (!__atomic_fetch_add(&ctx.signal_count, 1, __ATOMIC_RELAXED))
+ ctx.first_siginfo = *info;
+ __atomic_fetch_sub(&ctx.tids_want_signal, syscall(SYS_gettid), __ATOMIC_RELAXED);
+}
+
+static void *test_thread(void *arg)
+{
+ pthread_barrier_t *barrier = (pthread_barrier_t *)arg;
+ pid_t tid = syscall(SYS_gettid);
+ int i;
+
+ pthread_barrier_wait(barrier);
+
+ __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED);
+ for (i = 0; i < ctx.iterate_on - 1; i++)
+ __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED);
+
+ return NULL;
+}
+
+static int run_test_threads(pthread_t *threads, pthread_barrier_t *barrier)
+{
+ int i;
+
+ pthread_barrier_wait(barrier);
+ for (i = 0; i < NUM_THREADS; i++)
+ TEST_ASSERT_EQUAL("pthread_join() failed", pthread_join(threads[i], NULL), 0);
+
+ return TEST_OK;
+}
+
+static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrier)
+{
+ int ret;
+
+ ctx.iterate_on = 3000;
+
+ TEST_ASSERT_EQUAL("misfired signal?", ctx.signal_count, 0);
+ TEST_ASSERT_EQUAL("enable failed", ioctl(fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ ret = run_test_threads(threads, barrier);
+ TEST_ASSERT_EQUAL("disable failed", ioctl(fd, PERF_EVENT_IOC_DISABLE, 0), 0);
+
+ TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on);
+ TEST_ASSERT_EQUAL("missing signals or incorrectly delivered", ctx.tids_want_signal, 0);
+ TEST_ASSERT_VAL("unexpected si_addr", ctx.first_siginfo.si_addr == &ctx.iterate_on);
+#if 0 /* FIXME: enable when libc's signal.h has si_perf_{type,data} */
+ TEST_ASSERT_EQUAL("unexpected si_perf_type", ctx.first_siginfo.si_perf_type,
+ PERF_TYPE_BREAKPOINT);
+ TEST_ASSERT_EQUAL("unexpected si_perf_data", ctx.first_siginfo.si_perf_data,
+ TEST_SIG_DATA);
+#endif
+
+ return ret;
+}
+
+static int test__sigtrap(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct perf_event_attr attr = make_event_attr();
+ struct sigaction action = {};
+ struct sigaction oldact;
+ pthread_t threads[NUM_THREADS];
+ pthread_barrier_t barrier;
+ char sbuf[STRERR_BUFSIZE];
+ int i, fd, ret = TEST_FAIL;
+
+ if (!BP_ACCOUNT_IS_SUPPORTED) {
+ pr_debug("Test not supported on this architecture");
+ return TEST_SKIP;
+ }
+
+ pthread_barrier_init(&barrier, NULL, NUM_THREADS + 1);
+
+ action.sa_flags = SA_SIGINFO | SA_NODEFER;
+ action.sa_sigaction = sigtrap_handler;
+ sigemptyset(&action.sa_mask);
+ if (sigaction(SIGTRAP, &action, &oldact)) {
+ pr_debug("FAILED sigaction(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out;
+ }
+
+ fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag());
+ if (fd < 0) {
+ pr_debug("FAILED sys_perf_event_open(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_restore_sigaction;
+ }
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ if (pthread_create(&threads[i], NULL, test_thread, &barrier)) {
+ pr_debug("FAILED pthread_create(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_close_perf_event;
+ }
+ }
+
+ ret = run_stress_test(fd, threads, &barrier);
+
+out_close_perf_event:
+ close(fd);
+out_restore_sigaction:
+ sigaction(SIGTRAP, &oldact, NULL);
+out:
+ pthread_barrier_destroy(&barrier);
+ return ret;
+}
+
+DEFINE_SUITE("Sigtrap", sigtrap);
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 8f65098110fc..5bbb8f6a48fc 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -146,6 +146,7 @@ DECLARE_SUITE(pe_file_parsing);
DECLARE_SUITE(expand_cgroup_events);
DECLARE_SUITE(perf_time_to_tsc);
DECLARE_SUITE(dlfilter);
+DECLARE_SUITE(sigtrap);
/*
* PowerPC and S390 do not support creation of instruction breakpoints using the
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 2e5bfbb69960..2a403cefcaf2 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,3 +1,4 @@
+perf-y += arm64-frame-pointer-unwind-support.o
perf-y += annotate.o
perf-y += block-info.o
perf-y += block-range.o
@@ -144,6 +145,7 @@ perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_LIBBPF) += bpf_map.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
perf-$(CONFIG_LIBELF) += symbol-elf.o
perf-$(CONFIG_LIBELF) += probe-file.o
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 3fc528c9270c..5e390a1a79ab 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -179,6 +179,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.phys_addr = ip;
break;
case ARM_SPE_COUNTER:
+ if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT)
+ decoder->record.latency = payload;
break;
case ARM_SPE_CONTEXT:
decoder->record.context_id = payload;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 46a8556a9e95..69b31084d6be 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -33,6 +33,7 @@ struct arm_spe_record {
enum arm_spe_sample_type type;
int err;
u32 op;
+ u32 latency;
u64 from_ip;
u64 to_ip;
u64 timestamp;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index fccac06b573a..d2b64e3f588b 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -58,6 +58,8 @@ struct arm_spe {
u8 sample_branch;
u8 sample_remote_access;
u8 sample_memory;
+ u8 sample_instructions;
+ u64 instructions_sample_period;
u64 l1d_miss_id;
u64 l1d_access_id;
@@ -68,6 +70,7 @@ struct arm_spe {
u64 branch_miss_id;
u64 remote_access_id;
u64 memory_id;
+ u64 instructions_id;
u64 kernel_start;
@@ -90,6 +93,7 @@ struct arm_spe_queue {
u64 time;
u64 timestamp;
struct thread *thread;
+ u64 period_instructions;
};
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
@@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
speq->pid = -1;
speq->tid = -1;
speq->cpu = -1;
+ speq->period_instructions = 0;
/* params set */
params.get_trace = arm_spe_get_trace;
@@ -330,6 +335,7 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
sample.addr = record->virt_addr;
sample.phys_addr = record->phys_addr;
sample.data_src = data_src;
+ sample.weight = record->latency;
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
@@ -347,6 +353,36 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
sample.id = spe_events_id;
sample.stream_id = spe_events_id;
sample.addr = record->to_ip;
+ sample.weight = record->latency;
+
+ return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
+static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
+ u64 spe_events_id, u64 data_src)
+{
+ struct arm_spe *spe = speq->spe;
+ struct arm_spe_record *record = &speq->decoder->record;
+ union perf_event *event = speq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ /*
+ * Handles perf instruction sampling period.
+ */
+ speq->period_instructions++;
+ if (speq->period_instructions < spe->instructions_sample_period)
+ return 0;
+ speq->period_instructions = 0;
+
+ arm_spe_prep_sample(spe, speq, event, &sample);
+
+ sample.id = spe_events_id;
+ sample.stream_id = spe_events_id;
+ sample.addr = record->virt_addr;
+ sample.phys_addr = record->phys_addr;
+ sample.data_src = data_src;
+ sample.period = spe->instructions_sample_period;
+ sample.weight = record->latency;
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
@@ -480,6 +516,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
return err;
}
+ if (spe->sample_instructions) {
+ err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -993,7 +1035,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
attr.type = PERF_TYPE_HARDWARE;
attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
- PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
+ PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
+ PERF_SAMPLE_WEIGHT;
if (spe->timeless_decoding)
attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
else
@@ -1107,7 +1150,29 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
return err;
spe->memory_id = id;
arm_spe_set_event_name(evlist, id, "memory");
+ id += 1;
+ }
+
+ if (spe->synth_opts.instructions) {
+ if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
+ pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
+ goto synth_instructions_out;
+ }
+ if (spe->synth_opts.period > 1)
+ pr_warning("Arm SPE has a hardware-based sample period.\n"
+ "Additional instruction events will be discarded by --itrace\n");
+
+ spe->sample_instructions = true;
+ attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+ attr.sample_period = spe->synth_opts.period;
+ spe->instructions_sample_period = attr.sample_period;
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->instructions_id = id;
+ arm_spe_set_event_name(evlist, id, "instructions");
}
+synth_instructions_out:
return 0;
}
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c
new file mode 100644
index 000000000000..4f5ecf51ed38
--- /dev/null
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "arm64-frame-pointer-unwind-support.h"
+#include "callchain.h"
+#include "event.h"
+#include "perf_regs.h" // SMPL_REG_MASK
+#include "unwind.h"
+
+#define perf_event_arm_regs perf_event_arm64_regs
+#include "../arch/arm64/include/uapi/asm/perf_regs.h"
+#undef perf_event_arm_regs
+
+struct entries {
+ u64 stack[2];
+ size_t length;
+};
+
+static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
+{
+ return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs
+ && sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR);
+}
+
+static int add_entry(struct unwind_entry *entry, void *arg)
+{
+ struct entries *entries = arg;
+
+ entries->stack[entries->length++] = entry->ip;
+ return 0;
+}
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int usr_idx)
+{
+ int ret;
+ struct entries entries = {};
+ struct regs_dump old_regs = sample->user_regs;
+
+ if (!get_leaf_frame_caller_enabled(sample))
+ return 0;
+
+ /*
+ * If PC and SP are not recorded, get the value of PC from the stack
+ * and set its mask. SP is not used when doing the unwinding but it
+ * still needs to be set to prevent failures.
+ */
+
+ if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) {
+ sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC);
+ sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1];
+ }
+
+ if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) {
+ sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP);
+ sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0;
+ }
+
+ ret = unwind__get_entries(add_entry, &entries, thread, sample, 2);
+ sample->user_regs = old_regs;
+
+ if (ret || entries.length != 2)
+ return ret;
+
+ return callchain_param.order == ORDER_CALLER ? entries.stack[0] : entries.stack[1];
+}
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h
new file mode 100644
index 000000000000..32af9ce94398
--- /dev/null
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+
+#include "event.h"
+#include "thread.h"
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx);
+
+#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index fbb3c4057c30..22662fc85cc9 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -421,7 +421,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
size_t prologue_cnt = 0;
int i, err;
- if (IS_ERR(priv) || !priv || priv->is_tp)
+ if (IS_ERR_OR_NULL(priv) || priv->is_tp)
goto errout;
pev = &priv->pev;
@@ -570,7 +570,7 @@ static int hook_load_preprocessor(struct bpf_program *prog)
bool need_prologue = false;
int err, i;
- if (IS_ERR(priv) || !priv) {
+ if (IS_ERR_OR_NULL(priv)) {
pr_debug("Internal error when hook preprocessor\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -642,8 +642,11 @@ int bpf__probe(struct bpf_object *obj)
goto out;
priv = bpf_program__priv(prog);
- if (IS_ERR(priv) || !priv) {
- err = PTR_ERR(priv);
+ if (IS_ERR_OR_NULL(priv)) {
+ if (!priv)
+ err = -BPF_LOADER_ERRNO__INTERNAL;
+ else
+ err = PTR_ERR(priv);
goto out;
}
@@ -693,7 +696,7 @@ int bpf__unprobe(struct bpf_object *obj)
struct bpf_prog_priv *priv = bpf_program__priv(prog);
int i;
- if (IS_ERR(priv) || !priv || priv->is_tp)
+ if (IS_ERR_OR_NULL(priv) || priv->is_tp)
continue;
for (i = 0; i < priv->pev.ntevs; i++) {
@@ -751,7 +754,7 @@ int bpf__foreach_event(struct bpf_object *obj,
struct perf_probe_event *pev;
int i, fd;
- if (IS_ERR(priv) || !priv) {
+ if (IS_ERR_OR_NULL(priv)) {
pr_debug("bpf: failed to get private field\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
new file mode 100644
index 000000000000..f00a2de6778c
--- /dev/null
+++ b/tools/perf/util/bpf_ftrace.c
@@ -0,0 +1,152 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <linux/err.h>
+
+#include "util/ftrace.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/bpf_counter.h"
+
+#include "util/bpf_skel/func_latency.skel.h"
+
+static struct func_latency_bpf *skel;
+
+int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
+{
+ int fd, err;
+ int i, ncpus = 1, ntasks = 1;
+ struct filter_entry *func;
+
+ if (!list_is_singular(&ftrace->filters)) {
+ pr_err("ERROR: %s target function(s).\n",
+ list_empty(&ftrace->filters) ? "No" : "Too many");
+ return -1;
+ }
+
+ func = list_first_entry(&ftrace->filters, struct filter_entry, list);
+
+ skel = func_latency_bpf__open();
+ if (!skel) {
+ pr_err("Failed to open func latency skeleton\n");
+ return -1;
+ }
+
+ /* don't need to set cpu filter for system-wide mode */
+ if (ftrace->target.cpu_list) {
+ ncpus = perf_cpu_map__nr(ftrace->evlist->core.cpus);
+ bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ }
+
+ if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+ ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
+ bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ }
+
+ set_max_rlimit();
+
+ err = func_latency_bpf__load(skel);
+ if (err) {
+ pr_err("Failed to load func latency skeleton\n");
+ goto out;
+ }
+
+ if (ftrace->target.cpu_list) {
+ u32 cpu;
+ u8 val = 1;
+
+ skel->bss->has_cpu = 1;
+ fd = bpf_map__fd(skel->maps.cpu_filter);
+
+ for (i = 0; i < ncpus; i++) {
+ cpu = cpu_map__cpu(ftrace->evlist->core.cpus, i);
+ bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
+ }
+ }
+
+ if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+ u32 pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+
+ for (i = 0; i < ntasks; i++) {
+ pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+ }
+
+ skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
+ false, func->name);
+ if (IS_ERR(skel->links.func_begin)) {
+ pr_err("Failed to attach fentry program\n");
+ err = PTR_ERR(skel->links.func_begin);
+ goto out;
+ }
+
+ skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
+ true, func->name);
+ if (IS_ERR(skel->links.func_end)) {
+ pr_err("Failed to attach fexit program\n");
+ err = PTR_ERR(skel->links.func_end);
+ goto out;
+ }
+
+ /* XXX: we don't actually use this fd - just for poll() */
+ return open("/dev/null", O_RDONLY);
+
+out:
+ return err;
+}
+
+int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ skel->bss->enabled = 1;
+ return 0;
+}
+
+int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ skel->bss->enabled = 0;
+ return 0;
+}
+
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+ int buckets[])
+{
+ int i, fd, err;
+ u32 idx;
+ u64 *hist;
+ int ncpus = cpu__max_cpu();
+
+ fd = bpf_map__fd(skel->maps.latency);
+
+ hist = calloc(ncpus, sizeof(*hist));
+ if (hist == NULL)
+ return -ENOMEM;
+
+ for (idx = 0; idx < NUM_BUCKET; idx++) {
+ err = bpf_map_lookup_elem(fd, &idx, hist);
+ if (err) {
+ buckets[idx] = 0;
+ continue;
+ }
+
+ for (i = 0; i < ncpus; i++)
+ buckets[idx] += hist[i];
+ }
+
+ free(hist);
+ return 0;
+}
+
+int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ func_latency_bpf__destroy(skel);
+ return 0;
+}
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
new file mode 100644
index 000000000000..ea94187fe443
--- /dev/null
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+// This should be in sync with "util/ftrace.h"
+#define NUM_BUCKET 22
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, 10000);
+} functime SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} cpu_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} task_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, NUM_BUCKET);
+} latency SEC(".maps");
+
+
+int enabled = 0;
+int has_cpu = 0;
+int has_task = 0;
+
+SEC("kprobe/func")
+int BPF_PROG(func_begin)
+{
+ __u64 key, now;
+
+ if (!enabled)
+ return 0;
+
+ key = bpf_get_current_pid_tgid();
+
+ if (has_cpu) {
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_task) {
+ __u32 pid = key & 0xffffffff;
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&task_filter, &pid);
+ if (!ok)
+ return 0;
+ }
+
+ now = bpf_ktime_get_ns();
+
+ // overwrite timestamp for nested functions
+ bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
+ return 0;
+}
+
+SEC("kretprobe/func")
+int BPF_PROG(func_end)
+{
+ __u64 tid;
+ __u64 *start;
+
+ if (!enabled)
+ return 0;
+
+ tid = bpf_get_current_pid_tgid();
+
+ start = bpf_map_lookup_elem(&functime, &tid);
+ if (start) {
+ __s64 delta = bpf_ktime_get_ns() - *start;
+ __u32 key;
+ __u64 *hist;
+
+ bpf_map_delete_elem(&functime, &tid);
+
+ if (delta < 0)
+ return 0;
+
+ // calculate index using delta in usec
+ for (key = 0; key < (NUM_BUCKET - 1); key++) {
+ if (delta < ((1000UL) << key))
+ break;
+ }
+
+ hist = bpf_map_lookup_elem(&latency, &key);
+ if (!hist)
+ return 0;
+
+ *hist += 1;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 8e2777133bd9..131207b91d15 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1600,7 +1600,7 @@ void callchain_cursor_reset(struct callchain_cursor *cursor)
map__zput(node->ms.map);
}
-void callchain_param_setup(u64 sample_type)
+void callchain_param_setup(u64 sample_type, const char *arch)
{
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
@@ -1612,6 +1612,18 @@ void callchain_param_setup(u64 sample_type)
else
callchain_param.record_mode = CALLCHAIN_FP;
}
+
+ /*
+ * It's necessary to use libunwind to reliably determine the caller of
+ * a leaf function on aarch64, as otherwise we cannot know whether to
+ * start from the LR or FP.
+ *
+ * Always starting from the LR can result in duplicate or entirely
+ * erroneous entries. Always skipping the LR and starting from the FP
+ * can result in missing entries.
+ */
+ if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64"))
+ dwarf_callchain_users = true;
}
static bool chain_match(struct callchain_list *base_chain,
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 5824134f983b..d95615daed73 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -280,6 +280,8 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
}
#endif
+void arch__add_leaf_frame_record_opts(struct record_opts *opts);
+
char *callchain_list__sym_name(struct callchain_list *cl,
char *bf, size_t bfsize, bool show_dso);
char *callchain_node__scnprintf_value(struct callchain_node *node,
@@ -298,7 +300,7 @@ int callchain_branch_counts(struct callchain_root *root,
u64 *branch_count, u64 *predicted_count,
u64 *abort_count, u64 *cycles_count);
-void callchain_param_setup(u64 sample_type);
+void callchain_param_setup(u64 sample_type, const char *arch);
bool callchain_cnode_matched(struct callchain_node *base_cnode,
struct callchain_node *pair_cnode);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 97bfb8d0be4f..27594900a052 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -64,6 +64,7 @@ struct evlist {
struct evsel *selected;
struct events_stats stats;
struct perf_env *env;
+ const char *hybrid_pmu_name;
void (*trace_event_sample_raw)(struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample);
@@ -110,6 +111,7 @@ int __evlist__add_default_attrs(struct evlist *evlist,
__evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
int arch_evlist__add_default_attrs(struct evlist *evlist);
+struct evsel *arch_evlist__leader(struct list_head *list);
int evlist__add_dummy(struct evlist *evlist);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ac0127be0459..656c30b988ce 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1476,25 +1476,6 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
count->run = count->run - tmp.run;
}
-void perf_counts_values__scale(struct perf_counts_values *count,
- bool scale, s8 *pscaled)
-{
- s8 scaled = 0;
-
- if (scale) {
- if (count->run == 0) {
- scaled = -1;
- count->val = 0;
- } else if (count->run < count->ena) {
- scaled = 1;
- count->val = (u64)((double) count->val * count->ena / count->run);
- }
- }
-
- if (pscaled)
- *pscaled = scaled;
-}
-
static int evsel__read_one(struct evsel *evsel, int cpu, int thread)
{
struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 29d49a8c1e92..99aa3363def7 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -195,9 +195,6 @@ static inline int evsel__nr_cpus(struct evsel *evsel)
return evsel__cpus(evsel)->nr;
}
-void perf_counts_values__scale(struct perf_counts_values *count,
- bool scale, s8 *pscaled);
-
void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
new file mode 100644
index 000000000000..887f68a185f7
--- /dev/null
+++ b/tools/perf/util/ftrace.h
@@ -0,0 +1,81 @@
+#ifndef __PERF_FTRACE_H__
+#define __PERF_FTRACE_H__
+
+#include <linux/list.h>
+
+#include "target.h"
+
+struct evlist;
+
+struct perf_ftrace {
+ struct evlist *evlist;
+ struct target target;
+ const char *tracer;
+ struct list_head filters;
+ struct list_head notrace;
+ struct list_head graph_funcs;
+ struct list_head nograph_funcs;
+ unsigned long percpu_buffer_size;
+ bool inherit;
+ int graph_depth;
+ int func_stack_trace;
+ int func_irq_info;
+ int graph_nosleep_time;
+ int graph_noirqs;
+ int graph_verbose;
+ int graph_thresh;
+ unsigned int initial_delay;
+};
+
+struct filter_entry {
+ struct list_head list;
+ char name[];
+};
+
+#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */
+
+#ifdef HAVE_BPF_SKEL
+
+int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace);
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
+ int buckets[]);
+int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace);
+
+#else /* !HAVE_BPF_SKEL */
+
+static inline int
+perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+ int buckets[] __maybe_unused)
+{
+ return -1;
+}
+
+static inline int
+perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
+{
+ return -1;
+}
+
+#endif /* HAVE_BPF_SKEL */
+
+#endif /* __PERF_FTRACE_H__ */
diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c
index c397be0c2e32..15f60fd09424 100644
--- a/tools/perf/util/libunwind/arm64.c
+++ b/tools/perf/util/libunwind/arm64.c
@@ -23,7 +23,9 @@
#include "unwind.h"
#include "libunwind-aarch64.h"
+#define perf_event_arm_regs perf_event_arm64_regs
#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h>
+#undef perf_event_arm_regs
#include "../../arch/arm64/util/unwind-libunwind.c"
/* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index fb8496df8432..3901440aeff9 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -34,6 +34,7 @@
#include "bpf-event.h"
#include <internal/lib.h> // page_size
#include "cgroup.h"
+#include "arm64-frame-pointer-unwind-support.h"
#include <linux/ctype.h>
#include <symbol/kallsyms.h>
@@ -2710,6 +2711,15 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
return err;
}
+static u64 get_leaf_frame_caller(struct perf_sample *sample,
+ struct thread *thread, int usr_idx)
+{
+ if (machine__normalized_is(thread->maps->machine, "arm64"))
+ return get_leaf_frame_caller_aarch64(sample, thread, usr_idx);
+ else
+ return 0;
+}
+
static int thread__resolve_callchain_sample(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
@@ -2723,9 +2733,10 @@ static int thread__resolve_callchain_sample(struct thread *thread,
struct ip_callchain *chain = sample->callchain;
int chain_nr = 0;
u8 cpumode = PERF_RECORD_MISC_USER;
- int i, j, err, nr_entries;
+ int i, j, err, nr_entries, usr_idx;
int skip_idx = -1;
int first_call = 0;
+ u64 leaf_frame_caller;
if (chain)
chain_nr = chain->nr;
@@ -2850,6 +2861,34 @@ check_calls:
continue;
}
+ /*
+ * PERF_CONTEXT_USER allows us to locate where the user stack ends.
+ * Depending on callchain_param.order and the position of PERF_CONTEXT_USER,
+ * the index will be different in order to add the missing frame
+ * at the right place.
+ */
+
+ usr_idx = callchain_param.order == ORDER_CALLEE ? j-2 : j-1;
+
+ if (usr_idx >= 0 && chain->ips[usr_idx] == PERF_CONTEXT_USER) {
+
+ leaf_frame_caller = get_leaf_frame_caller(sample, thread, usr_idx);
+
+ /*
+ * check if leaf_frame_Caller != ip to not add the same
+ * value twice.
+ */
+
+ if (leaf_frame_caller && leaf_frame_caller != ip) {
+
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, leaf_frame_caller,
+ false, NULL, NULL, 0);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+ }
+
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
false, NULL, NULL, 0);
@@ -3079,14 +3118,19 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
}
/*
- * Compares the raw arch string. N.B. see instead perf_env__arch() if a
- * normalized arch is needed.
+ * Compares the raw arch string. N.B. see instead perf_env__arch() or
+ * machine__normalized_is() if a normalized arch is needed.
*/
bool machine__is(struct machine *machine, const char *arch)
{
return machine && !strcmp(perf_env__raw_arch(machine->env), arch);
}
+bool machine__normalized_is(struct machine *machine, const char *arch)
+{
+ return machine && !strcmp(perf_env__arch(machine->env), arch);
+}
+
int machine__nr_cpus_avail(struct machine *machine)
{
return machine ? perf_env__nr_cpus_avail(machine->env) : 0;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index a143087eeb47..c5a45dc8df4c 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -208,6 +208,7 @@ static inline bool machine__is_host(struct machine *machine)
}
bool machine__is(struct machine *machine, const char *arch);
+bool machine__normalized_is(struct machine *machine, const char *arch);
int machine__nr_cpus_avail(struct machine *machine);
struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 3167b4628b6d..ed0ab838bcc5 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -309,6 +309,9 @@ static const char * const mem_hops[] = {
* to be set with mem_hops field.
*/
"core, same node",
+ "node, same socket",
+ "socket, same board",
+ "board",
};
int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
@@ -316,7 +319,7 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
size_t i, l = 0;
u64 m = PERF_MEM_LVL_NA;
u64 hit, miss;
- int printed;
+ int printed = 0;
if (mem_info)
m = mem_info->data_src.mem_lvl;
@@ -335,18 +338,22 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
l += 7;
}
- if (mem_info && mem_info->data_src.mem_hops)
+ /*
+ * Incase mem_hops field is set, we can skip printing data source via
+ * PERF_MEM_LVL namespace.
+ */
+ if (mem_info && mem_info->data_src.mem_hops) {
l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
-
- printed = 0;
- for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
- if (!(m & 0x1))
- continue;
- if (printed++) {
- strcat(out, " or ");
- l += 4;
+ } else {
+ for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
+ if (!(m & 0x1))
+ continue;
+ if (printed++) {
+ strcat(out, " or ");
+ l += 4;
+ }
+ l += scnprintf(out + l, sz - l, mem_lvl[i]);
}
- l += scnprintf(out + l, sz - l, mem_lvl[i]);
}
if (mem_info && mem_info->data_src.mem_lvl_num) {
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index fffe02aae3ed..51c99cb08abf 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1115,13 +1115,27 @@ out:
return ret;
}
+/**
+ * metric_list_cmp - list_sort comparator that sorts metrics with more events to
+ * the front. duration_time is excluded from the count.
+ */
static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
const struct list_head *r)
{
const struct metric *left = container_of(l, struct metric, nd);
const struct metric *right = container_of(r, struct metric, nd);
+ struct expr_id_data *data;
+ int left_count, right_count;
+
+ left_count = hashmap__size(left->pctx->ids);
+ if (!expr__get_id(left->pctx, "duration_time", &data))
+ left_count--;
+
+ right_count = hashmap__size(right->pctx->ids);
+ if (!expr__get_id(right->pctx, "duration_time", &data))
+ right_count--;
- return hashmap__size(right->pctx->ids) - hashmap__size(left->pctx->ids);
+ return right_count - left_count;
}
/**
@@ -1299,14 +1313,16 @@ err_out:
/**
* parse_ids - Build the event string for the ids and parse them creating an
* evlist. The encoded metric_ids are decoded.
+ * @metric_no_merge: is metric sharing explicitly disabled.
* @fake_pmu: used when testing metrics not supported by the current CPU.
* @ids: the event identifiers parsed from a metric.
* @modifier: any modifiers added to the events.
* @has_constraint: false if events should be placed in a weak group.
* @out_evlist: the created list of events.
*/
-static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids,
- const char *modifier, bool has_constraint, struct evlist **out_evlist)
+static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
+ struct expr_parse_ctx *ids, const char *modifier,
+ bool has_constraint, struct evlist **out_evlist)
{
struct parse_events_error parse_error;
struct evlist *parsed_evlist;
@@ -1314,12 +1330,19 @@ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids,
int ret;
*out_evlist = NULL;
- if (hashmap__size(ids->ids) == 0) {
+ if (!metric_no_merge || hashmap__size(ids->ids) == 0) {
char *tmp;
/*
- * No ids/events in the expression parsing context. Events may
- * have been removed because of constant evaluation, e.g.:
- * event1 if #smt_on else 0
+ * We may fail to share events between metrics because
+ * duration_time isn't present in one metric. For example, a
+ * ratio of cache misses doesn't need duration_time but the same
+ * events may be used for a misses per second. Events without
+ * sharing implies multiplexing, that is best avoided, so place
+ * duration_time in every group.
+ *
+ * Also, there may be no ids/events in the expression parsing
+ * context because of constant evaluation, e.g.:
+ * event1 if #smt_on else 0
* Add a duration_time event to avoid a parse error on an empty
* string.
*/
@@ -1387,7 +1410,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
ret = build_combined_expr_ctx(&metric_list, &combined);
if (!ret && combined && hashmap__size(combined->ids)) {
- ret = parse_ids(fake_pmu, combined, /*modifier=*/NULL,
+ ret = parse_ids(metric_no_merge, fake_pmu, combined,
+ /*modifier=*/NULL,
/*has_constraint=*/true,
&combined_evlist);
}
@@ -1435,7 +1459,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
}
}
if (!metric_evlist) {
- ret = parse_ids(fake_pmu, m->pctx, m->modifier,
+ ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier,
m->has_constraint, &m->evlist);
if (ret)
goto out;
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
index 608b20c72a5c..48aa3217300b 100644
--- a/tools/perf/util/namespaces.c
+++ b/tools/perf/util/namespaces.c
@@ -60,17 +60,49 @@ void namespaces__free(struct namespaces *namespaces)
free(namespaces);
}
+static int nsinfo__get_nspid(struct nsinfo *nsi, const char *path)
+{
+ FILE *f = NULL;
+ char *statln = NULL;
+ size_t linesz = 0;
+ char *nspid;
+
+ f = fopen(path, "r");
+ if (f == NULL)
+ return -1;
+
+ while (getline(&statln, &linesz, f) != -1) {
+ /* Use tgid if CONFIG_PID_NS is not defined. */
+ if (strstr(statln, "Tgid:") != NULL) {
+ nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'),
+ NULL, 10);
+ nsi->nstgid = nsi->tgid;
+ }
+
+ if (strstr(statln, "NStgid:") != NULL) {
+ nspid = strrchr(statln, '\t');
+ nsi->nstgid = (pid_t)strtol(nspid, NULL, 10);
+ /*
+ * If innermost tgid is not the first, process is in a different
+ * PID namespace.
+ */
+ nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid;
+ break;
+ }
+ }
+
+ fclose(f);
+ free(statln);
+ return 0;
+}
+
int nsinfo__init(struct nsinfo *nsi)
{
char oldns[PATH_MAX];
char spath[PATH_MAX];
char *newns = NULL;
- char *statln = NULL;
- char *nspid;
struct stat old_stat;
struct stat new_stat;
- FILE *f = NULL;
- size_t linesz = 0;
int rv = -1;
if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
@@ -100,34 +132,9 @@ int nsinfo__init(struct nsinfo *nsi)
if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsi->pid) >= PATH_MAX)
goto out;
- f = fopen(spath, "r");
- if (f == NULL)
- goto out;
-
- while (getline(&statln, &linesz, f) != -1) {
- /* Use tgid if CONFIG_PID_NS is not defined. */
- if (strstr(statln, "Tgid:") != NULL) {
- nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'),
- NULL, 10);
- nsi->nstgid = nsi->tgid;
- }
-
- if (strstr(statln, "NStgid:") != NULL) {
- nspid = strrchr(statln, '\t');
- nsi->nstgid = (pid_t)strtol(nspid, NULL, 10);
- /* If innermost tgid is not the first, process is in a different
- * PID namespace.
- */
- nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid;
- break;
- }
- }
- rv = 0;
+ rv = nsinfo__get_nspid(nsi, spath);
out:
- if (f != NULL)
- (void) fclose(f);
- free(statln);
free(newns);
return rv;
}
@@ -299,3 +306,12 @@ int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi)
return ret;
}
+
+bool nsinfo__is_in_root_namespace(void)
+{
+ struct nsinfo nsi;
+
+ memset(&nsi, 0x0, sizeof(nsi));
+ nsinfo__get_nspid(&nsi, "/proc/self/status");
+ return !nsi.in_pidns;
+}
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
index ad9775db7b9c..9ceea9643507 100644
--- a/tools/perf/util/namespaces.h
+++ b/tools/perf/util/namespaces.h
@@ -59,6 +59,8 @@ void nsinfo__mountns_exit(struct nscookie *nc);
char *nsinfo__realpath(const char *path, struct nsinfo *nsi);
int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi);
+bool nsinfo__is_in_root_namespace(void);
+
static inline void __nsinfo__zput(struct nsinfo **nsip)
{
if (nsip) {
diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c
index 9fc86971027b..284f8eabd3b9 100644
--- a/tools/perf/util/parse-events-hybrid.c
+++ b/tools/perf/util/parse-events-hybrid.c
@@ -63,10 +63,13 @@ static int create_event_hybrid(__u32 config_type, int *idx,
static int pmu_cmp(struct parse_events_state *parse_state,
struct perf_pmu *pmu)
{
- if (!parse_state->hybrid_pmu_name)
- return 0;
+ if (parse_state->evlist && parse_state->evlist->hybrid_pmu_name)
+ return strcmp(parse_state->evlist->hybrid_pmu_name, pmu->name);
+
+ if (parse_state->hybrid_pmu_name)
+ return strcmp(parse_state->hybrid_pmu_name, pmu->name);
- return strcmp(parse_state->hybrid_pmu_name, pmu->name);
+ return 0;
}
static int add_hw_hybrid(struct parse_events_state *parse_state,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index ba74fdf74af9..acf20ce98ce9 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1824,6 +1824,11 @@ out:
return ret;
}
+__weak struct evsel *arch_evlist__leader(struct list_head *list)
+{
+ return list_first_entry(list, struct evsel, core.node);
+}
+
void parse_events__set_leader(char *name, struct list_head *list,
struct parse_events_state *parse_state)
{
@@ -1837,9 +1842,10 @@ void parse_events__set_leader(char *name, struct list_head *list,
if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state))
return;
- __perf_evlist__set_leader(list);
- leader = list_entry(list->next, struct evsel, core.node);
+ leader = arch_evlist__leader(list);
+ __perf_evlist__set_leader(list, &leader->core);
leader->group_name = name ? strdup(name) : NULL;
+ list_move(&leader->core.node, list);
}
/* list_event is assumed to point to malloc'ed memory */
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 06a7461ba864..a982e40ee5a9 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
+#include <string.h>
#include "perf_regs.h"
#include "event.h"
@@ -20,6 +21,671 @@ uint64_t __weak arch__user_reg_mask(void)
}
#ifdef HAVE_PERF_REGS_SUPPORT
+
+#define perf_event_arm_regs perf_event_arm64_regs
+#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
+#undef perf_event_arm_regs
+
+#include "../../arch/arm/include/uapi/asm/perf_regs.h"
+#include "../../arch/csky/include/uapi/asm/perf_regs.h"
+#include "../../arch/mips/include/uapi/asm/perf_regs.h"
+#include "../../arch/powerpc/include/uapi/asm/perf_regs.h"
+#include "../../arch/riscv/include/uapi/asm/perf_regs.h"
+#include "../../arch/s390/include/uapi/asm/perf_regs.h"
+#include "../../arch/x86/include/uapi/asm/perf_regs.h"
+
+static const char *__perf_reg_name_arm64(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM64_X0:
+ return "x0";
+ case PERF_REG_ARM64_X1:
+ return "x1";
+ case PERF_REG_ARM64_X2:
+ return "x2";
+ case PERF_REG_ARM64_X3:
+ return "x3";
+ case PERF_REG_ARM64_X4:
+ return "x4";
+ case PERF_REG_ARM64_X5:
+ return "x5";
+ case PERF_REG_ARM64_X6:
+ return "x6";
+ case PERF_REG_ARM64_X7:
+ return "x7";
+ case PERF_REG_ARM64_X8:
+ return "x8";
+ case PERF_REG_ARM64_X9:
+ return "x9";
+ case PERF_REG_ARM64_X10:
+ return "x10";
+ case PERF_REG_ARM64_X11:
+ return "x11";
+ case PERF_REG_ARM64_X12:
+ return "x12";
+ case PERF_REG_ARM64_X13:
+ return "x13";
+ case PERF_REG_ARM64_X14:
+ return "x14";
+ case PERF_REG_ARM64_X15:
+ return "x15";
+ case PERF_REG_ARM64_X16:
+ return "x16";
+ case PERF_REG_ARM64_X17:
+ return "x17";
+ case PERF_REG_ARM64_X18:
+ return "x18";
+ case PERF_REG_ARM64_X19:
+ return "x19";
+ case PERF_REG_ARM64_X20:
+ return "x20";
+ case PERF_REG_ARM64_X21:
+ return "x21";
+ case PERF_REG_ARM64_X22:
+ return "x22";
+ case PERF_REG_ARM64_X23:
+ return "x23";
+ case PERF_REG_ARM64_X24:
+ return "x24";
+ case PERF_REG_ARM64_X25:
+ return "x25";
+ case PERF_REG_ARM64_X26:
+ return "x26";
+ case PERF_REG_ARM64_X27:
+ return "x27";
+ case PERF_REG_ARM64_X28:
+ return "x28";
+ case PERF_REG_ARM64_X29:
+ return "x29";
+ case PERF_REG_ARM64_SP:
+ return "sp";
+ case PERF_REG_ARM64_LR:
+ return "lr";
+ case PERF_REG_ARM64_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_arm(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM_R0:
+ return "r0";
+ case PERF_REG_ARM_R1:
+ return "r1";
+ case PERF_REG_ARM_R2:
+ return "r2";
+ case PERF_REG_ARM_R3:
+ return "r3";
+ case PERF_REG_ARM_R4:
+ return "r4";
+ case PERF_REG_ARM_R5:
+ return "r5";
+ case PERF_REG_ARM_R6:
+ return "r6";
+ case PERF_REG_ARM_R7:
+ return "r7";
+ case PERF_REG_ARM_R8:
+ return "r8";
+ case PERF_REG_ARM_R9:
+ return "r9";
+ case PERF_REG_ARM_R10:
+ return "r10";
+ case PERF_REG_ARM_FP:
+ return "fp";
+ case PERF_REG_ARM_IP:
+ return "ip";
+ case PERF_REG_ARM_SP:
+ return "sp";
+ case PERF_REG_ARM_LR:
+ return "lr";
+ case PERF_REG_ARM_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_csky(int id)
+{
+ switch (id) {
+ case PERF_REG_CSKY_A0:
+ return "a0";
+ case PERF_REG_CSKY_A1:
+ return "a1";
+ case PERF_REG_CSKY_A2:
+ return "a2";
+ case PERF_REG_CSKY_A3:
+ return "a3";
+ case PERF_REG_CSKY_REGS0:
+ return "regs0";
+ case PERF_REG_CSKY_REGS1:
+ return "regs1";
+ case PERF_REG_CSKY_REGS2:
+ return "regs2";
+ case PERF_REG_CSKY_REGS3:
+ return "regs3";
+ case PERF_REG_CSKY_REGS4:
+ return "regs4";
+ case PERF_REG_CSKY_REGS5:
+ return "regs5";
+ case PERF_REG_CSKY_REGS6:
+ return "regs6";
+ case PERF_REG_CSKY_REGS7:
+ return "regs7";
+ case PERF_REG_CSKY_REGS8:
+ return "regs8";
+ case PERF_REG_CSKY_REGS9:
+ return "regs9";
+ case PERF_REG_CSKY_SP:
+ return "sp";
+ case PERF_REG_CSKY_LR:
+ return "lr";
+ case PERF_REG_CSKY_PC:
+ return "pc";
+#if defined(__CSKYABIV2__)
+ case PERF_REG_CSKY_EXREGS0:
+ return "exregs0";
+ case PERF_REG_CSKY_EXREGS1:
+ return "exregs1";
+ case PERF_REG_CSKY_EXREGS2:
+ return "exregs2";
+ case PERF_REG_CSKY_EXREGS3:
+ return "exregs3";
+ case PERF_REG_CSKY_EXREGS4:
+ return "exregs4";
+ case PERF_REG_CSKY_EXREGS5:
+ return "exregs5";
+ case PERF_REG_CSKY_EXREGS6:
+ return "exregs6";
+ case PERF_REG_CSKY_EXREGS7:
+ return "exregs7";
+ case PERF_REG_CSKY_EXREGS8:
+ return "exregs8";
+ case PERF_REG_CSKY_EXREGS9:
+ return "exregs9";
+ case PERF_REG_CSKY_EXREGS10:
+ return "exregs10";
+ case PERF_REG_CSKY_EXREGS11:
+ return "exregs11";
+ case PERF_REG_CSKY_EXREGS12:
+ return "exregs12";
+ case PERF_REG_CSKY_EXREGS13:
+ return "exregs13";
+ case PERF_REG_CSKY_EXREGS14:
+ return "exregs14";
+ case PERF_REG_CSKY_TLS:
+ return "tls";
+ case PERF_REG_CSKY_HI:
+ return "hi";
+ case PERF_REG_CSKY_LO:
+ return "lo";
+#endif
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_mips(int id)
+{
+ switch (id) {
+ case PERF_REG_MIPS_PC:
+ return "PC";
+ case PERF_REG_MIPS_R1:
+ return "$1";
+ case PERF_REG_MIPS_R2:
+ return "$2";
+ case PERF_REG_MIPS_R3:
+ return "$3";
+ case PERF_REG_MIPS_R4:
+ return "$4";
+ case PERF_REG_MIPS_R5:
+ return "$5";
+ case PERF_REG_MIPS_R6:
+ return "$6";
+ case PERF_REG_MIPS_R7:
+ return "$7";
+ case PERF_REG_MIPS_R8:
+ return "$8";
+ case PERF_REG_MIPS_R9:
+ return "$9";
+ case PERF_REG_MIPS_R10:
+ return "$10";
+ case PERF_REG_MIPS_R11:
+ return "$11";
+ case PERF_REG_MIPS_R12:
+ return "$12";
+ case PERF_REG_MIPS_R13:
+ return "$13";
+ case PERF_REG_MIPS_R14:
+ return "$14";
+ case PERF_REG_MIPS_R15:
+ return "$15";
+ case PERF_REG_MIPS_R16:
+ return "$16";
+ case PERF_REG_MIPS_R17:
+ return "$17";
+ case PERF_REG_MIPS_R18:
+ return "$18";
+ case PERF_REG_MIPS_R19:
+ return "$19";
+ case PERF_REG_MIPS_R20:
+ return "$20";
+ case PERF_REG_MIPS_R21:
+ return "$21";
+ case PERF_REG_MIPS_R22:
+ return "$22";
+ case PERF_REG_MIPS_R23:
+ return "$23";
+ case PERF_REG_MIPS_R24:
+ return "$24";
+ case PERF_REG_MIPS_R25:
+ return "$25";
+ case PERF_REG_MIPS_R28:
+ return "$28";
+ case PERF_REG_MIPS_R29:
+ return "$29";
+ case PERF_REG_MIPS_R30:
+ return "$30";
+ case PERF_REG_MIPS_R31:
+ return "$31";
+ default:
+ break;
+ }
+ return NULL;
+}
+
+static const char *__perf_reg_name_powerpc(int id)
+{
+ switch (id) {
+ case PERF_REG_POWERPC_R0:
+ return "r0";
+ case PERF_REG_POWERPC_R1:
+ return "r1";
+ case PERF_REG_POWERPC_R2:
+ return "r2";
+ case PERF_REG_POWERPC_R3:
+ return "r3";
+ case PERF_REG_POWERPC_R4:
+ return "r4";
+ case PERF_REG_POWERPC_R5:
+ return "r5";
+ case PERF_REG_POWERPC_R6:
+ return "r6";
+ case PERF_REG_POWERPC_R7:
+ return "r7";
+ case PERF_REG_POWERPC_R8:
+ return "r8";
+ case PERF_REG_POWERPC_R9:
+ return "r9";
+ case PERF_REG_POWERPC_R10:
+ return "r10";
+ case PERF_REG_POWERPC_R11:
+ return "r11";
+ case PERF_REG_POWERPC_R12:
+ return "r12";
+ case PERF_REG_POWERPC_R13:
+ return "r13";
+ case PERF_REG_POWERPC_R14:
+ return "r14";
+ case PERF_REG_POWERPC_R15:
+ return "r15";
+ case PERF_REG_POWERPC_R16:
+ return "r16";
+ case PERF_REG_POWERPC_R17:
+ return "r17";
+ case PERF_REG_POWERPC_R18:
+ return "r18";
+ case PERF_REG_POWERPC_R19:
+ return "r19";
+ case PERF_REG_POWERPC_R20:
+ return "r20";
+ case PERF_REG_POWERPC_R21:
+ return "r21";
+ case PERF_REG_POWERPC_R22:
+ return "r22";
+ case PERF_REG_POWERPC_R23:
+ return "r23";
+ case PERF_REG_POWERPC_R24:
+ return "r24";
+ case PERF_REG_POWERPC_R25:
+ return "r25";
+ case PERF_REG_POWERPC_R26:
+ return "r26";
+ case PERF_REG_POWERPC_R27:
+ return "r27";
+ case PERF_REG_POWERPC_R28:
+ return "r28";
+ case PERF_REG_POWERPC_R29:
+ return "r29";
+ case PERF_REG_POWERPC_R30:
+ return "r30";
+ case PERF_REG_POWERPC_R31:
+ return "r31";
+ case PERF_REG_POWERPC_NIP:
+ return "nip";
+ case PERF_REG_POWERPC_MSR:
+ return "msr";
+ case PERF_REG_POWERPC_ORIG_R3:
+ return "orig_r3";
+ case PERF_REG_POWERPC_CTR:
+ return "ctr";
+ case PERF_REG_POWERPC_LINK:
+ return "link";
+ case PERF_REG_POWERPC_XER:
+ return "xer";
+ case PERF_REG_POWERPC_CCR:
+ return "ccr";
+ case PERF_REG_POWERPC_SOFTE:
+ return "softe";
+ case PERF_REG_POWERPC_TRAP:
+ return "trap";
+ case PERF_REG_POWERPC_DAR:
+ return "dar";
+ case PERF_REG_POWERPC_DSISR:
+ return "dsisr";
+ case PERF_REG_POWERPC_SIER:
+ return "sier";
+ case PERF_REG_POWERPC_MMCRA:
+ return "mmcra";
+ case PERF_REG_POWERPC_MMCR0:
+ return "mmcr0";
+ case PERF_REG_POWERPC_MMCR1:
+ return "mmcr1";
+ case PERF_REG_POWERPC_MMCR2:
+ return "mmcr2";
+ case PERF_REG_POWERPC_MMCR3:
+ return "mmcr3";
+ case PERF_REG_POWERPC_SIER2:
+ return "sier2";
+ case PERF_REG_POWERPC_SIER3:
+ return "sier3";
+ case PERF_REG_POWERPC_PMC1:
+ return "pmc1";
+ case PERF_REG_POWERPC_PMC2:
+ return "pmc2";
+ case PERF_REG_POWERPC_PMC3:
+ return "pmc3";
+ case PERF_REG_POWERPC_PMC4:
+ return "pmc4";
+ case PERF_REG_POWERPC_PMC5:
+ return "pmc5";
+ case PERF_REG_POWERPC_PMC6:
+ return "pmc6";
+ case PERF_REG_POWERPC_SDAR:
+ return "sdar";
+ case PERF_REG_POWERPC_SIAR:
+ return "siar";
+ default:
+ break;
+ }
+ return NULL;
+}
+
+static const char *__perf_reg_name_riscv(int id)
+{
+ switch (id) {
+ case PERF_REG_RISCV_PC:
+ return "pc";
+ case PERF_REG_RISCV_RA:
+ return "ra";
+ case PERF_REG_RISCV_SP:
+ return "sp";
+ case PERF_REG_RISCV_GP:
+ return "gp";
+ case PERF_REG_RISCV_TP:
+ return "tp";
+ case PERF_REG_RISCV_T0:
+ return "t0";
+ case PERF_REG_RISCV_T1:
+ return "t1";
+ case PERF_REG_RISCV_T2:
+ return "t2";
+ case PERF_REG_RISCV_S0:
+ return "s0";
+ case PERF_REG_RISCV_S1:
+ return "s1";
+ case PERF_REG_RISCV_A0:
+ return "a0";
+ case PERF_REG_RISCV_A1:
+ return "a1";
+ case PERF_REG_RISCV_A2:
+ return "a2";
+ case PERF_REG_RISCV_A3:
+ return "a3";
+ case PERF_REG_RISCV_A4:
+ return "a4";
+ case PERF_REG_RISCV_A5:
+ return "a5";
+ case PERF_REG_RISCV_A6:
+ return "a6";
+ case PERF_REG_RISCV_A7:
+ return "a7";
+ case PERF_REG_RISCV_S2:
+ return "s2";
+ case PERF_REG_RISCV_S3:
+ return "s3";
+ case PERF_REG_RISCV_S4:
+ return "s4";
+ case PERF_REG_RISCV_S5:
+ return "s5";
+ case PERF_REG_RISCV_S6:
+ return "s6";
+ case PERF_REG_RISCV_S7:
+ return "s7";
+ case PERF_REG_RISCV_S8:
+ return "s8";
+ case PERF_REG_RISCV_S9:
+ return "s9";
+ case PERF_REG_RISCV_S10:
+ return "s10";
+ case PERF_REG_RISCV_S11:
+ return "s11";
+ case PERF_REG_RISCV_T3:
+ return "t3";
+ case PERF_REG_RISCV_T4:
+ return "t4";
+ case PERF_REG_RISCV_T5:
+ return "t5";
+ case PERF_REG_RISCV_T6:
+ return "t6";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_s390(int id)
+{
+ switch (id) {
+ case PERF_REG_S390_R0:
+ return "R0";
+ case PERF_REG_S390_R1:
+ return "R1";
+ case PERF_REG_S390_R2:
+ return "R2";
+ case PERF_REG_S390_R3:
+ return "R3";
+ case PERF_REG_S390_R4:
+ return "R4";
+ case PERF_REG_S390_R5:
+ return "R5";
+ case PERF_REG_S390_R6:
+ return "R6";
+ case PERF_REG_S390_R7:
+ return "R7";
+ case PERF_REG_S390_R8:
+ return "R8";
+ case PERF_REG_S390_R9:
+ return "R9";
+ case PERF_REG_S390_R10:
+ return "R10";
+ case PERF_REG_S390_R11:
+ return "R11";
+ case PERF_REG_S390_R12:
+ return "R12";
+ case PERF_REG_S390_R13:
+ return "R13";
+ case PERF_REG_S390_R14:
+ return "R14";
+ case PERF_REG_S390_R15:
+ return "R15";
+ case PERF_REG_S390_FP0:
+ return "FP0";
+ case PERF_REG_S390_FP1:
+ return "FP1";
+ case PERF_REG_S390_FP2:
+ return "FP2";
+ case PERF_REG_S390_FP3:
+ return "FP3";
+ case PERF_REG_S390_FP4:
+ return "FP4";
+ case PERF_REG_S390_FP5:
+ return "FP5";
+ case PERF_REG_S390_FP6:
+ return "FP6";
+ case PERF_REG_S390_FP7:
+ return "FP7";
+ case PERF_REG_S390_FP8:
+ return "FP8";
+ case PERF_REG_S390_FP9:
+ return "FP9";
+ case PERF_REG_S390_FP10:
+ return "FP10";
+ case PERF_REG_S390_FP11:
+ return "FP11";
+ case PERF_REG_S390_FP12:
+ return "FP12";
+ case PERF_REG_S390_FP13:
+ return "FP13";
+ case PERF_REG_S390_FP14:
+ return "FP14";
+ case PERF_REG_S390_FP15:
+ return "FP15";
+ case PERF_REG_S390_MASK:
+ return "MASK";
+ case PERF_REG_S390_PC:
+ return "PC";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__perf_reg_name_x86(int id)
+{
+ switch (id) {
+ case PERF_REG_X86_AX:
+ return "AX";
+ case PERF_REG_X86_BX:
+ return "BX";
+ case PERF_REG_X86_CX:
+ return "CX";
+ case PERF_REG_X86_DX:
+ return "DX";
+ case PERF_REG_X86_SI:
+ return "SI";
+ case PERF_REG_X86_DI:
+ return "DI";
+ case PERF_REG_X86_BP:
+ return "BP";
+ case PERF_REG_X86_SP:
+ return "SP";
+ case PERF_REG_X86_IP:
+ return "IP";
+ case PERF_REG_X86_FLAGS:
+ return "FLAGS";
+ case PERF_REG_X86_CS:
+ return "CS";
+ case PERF_REG_X86_SS:
+ return "SS";
+ case PERF_REG_X86_DS:
+ return "DS";
+ case PERF_REG_X86_ES:
+ return "ES";
+ case PERF_REG_X86_FS:
+ return "FS";
+ case PERF_REG_X86_GS:
+ return "GS";
+ case PERF_REG_X86_R8:
+ return "R8";
+ case PERF_REG_X86_R9:
+ return "R9";
+ case PERF_REG_X86_R10:
+ return "R10";
+ case PERF_REG_X86_R11:
+ return "R11";
+ case PERF_REG_X86_R12:
+ return "R12";
+ case PERF_REG_X86_R13:
+ return "R13";
+ case PERF_REG_X86_R14:
+ return "R14";
+ case PERF_REG_X86_R15:
+ return "R15";
+
+#define XMM(x) \
+ case PERF_REG_X86_XMM ## x: \
+ case PERF_REG_X86_XMM ## x + 1: \
+ return "XMM" #x;
+ XMM(0)
+ XMM(1)
+ XMM(2)
+ XMM(3)
+ XMM(4)
+ XMM(5)
+ XMM(6)
+ XMM(7)
+ XMM(8)
+ XMM(9)
+ XMM(10)
+ XMM(11)
+ XMM(12)
+ XMM(13)
+ XMM(14)
+ XMM(15)
+#undef XMM
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+const char *perf_reg_name(int id, const char *arch)
+{
+ const char *reg_name = NULL;
+
+ if (!strcmp(arch, "csky"))
+ reg_name = __perf_reg_name_csky(id);
+ else if (!strcmp(arch, "mips"))
+ reg_name = __perf_reg_name_mips(id);
+ else if (!strcmp(arch, "powerpc"))
+ reg_name = __perf_reg_name_powerpc(id);
+ else if (!strcmp(arch, "riscv"))
+ reg_name = __perf_reg_name_riscv(id);
+ else if (!strcmp(arch, "s390"))
+ reg_name = __perf_reg_name_s390(id);
+ else if (!strcmp(arch, "x86"))
+ reg_name = __perf_reg_name_x86(id);
+ else if (!strcmp(arch, "arm"))
+ reg_name = __perf_reg_name_arm(id);
+ else if (!strcmp(arch, "arm64"))
+ reg_name = __perf_reg_name_arm64(id);
+
+ return reg_name ?: "unknown";
+}
+
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id)
{
int i, idx = 0;
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index eeac181ebccf..ce1127af05e4 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -11,8 +11,11 @@ struct sample_reg {
const char *name;
uint64_t mask;
};
-#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) }
-#define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) }
+
+#define SMPL_REG_MASK(b) (1ULL << (b))
+#define SMPL_REG(n, b) { .name = #n, .mask = SMPL_REG_MASK(b) }
+#define SMPL_REG2_MASK(b) (3ULL << (b))
+#define SMPL_REG2(n, b) { .name = #n, .mask = SMPL_REG2_MASK(b) }
#define SMPL_REG_END { .name = NULL }
enum {
@@ -31,22 +34,16 @@ extern const struct sample_reg sample_reg_masks[];
#define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP))
+const char *perf_reg_name(int id, const char *arch);
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
-static inline const char *perf_reg_name(int id)
-{
- const char *reg_name = __perf_reg_name(id);
-
- return reg_name ?: "unknown";
-}
-
#else
#define PERF_REGS_MASK 0
#define PERF_REGS_MAX 0
#define DWARF_MINIMAL_REGS PERF_REGS_MASK
-static inline const char *perf_reg_name(int id __maybe_unused)
+static inline const char *perf_reg_name(int id __maybe_unused, const char *arch __maybe_unused)
{
return "unknown";
}
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index c0c010350bc2..0445bee9290f 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -36,6 +36,7 @@
#include "../debug.h"
#include "../dso.h"
#include "../callchain.h"
+#include "../env.h"
#include "../evsel.h"
#include "../event.h"
#include "../thread.h"
@@ -687,7 +688,7 @@ static void set_sample_datasrc_in_dict(PyObject *dict,
_PyUnicode_FromString(decode));
}
-static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size)
+static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, char *bf, int size)
{
unsigned int i = 0, r;
int printed = 0;
@@ -702,7 +703,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size)
printed += scnprintf(bf + printed, size - printed,
"%5s:0x%" PRIx64 " ",
- perf_reg_name(r), val);
+ perf_reg_name(r, arch), val);
}
}
@@ -711,6 +712,7 @@ static void set_regs_in_dict(PyObject *dict,
struct evsel *evsel)
{
struct perf_event_attr *attr = &evsel->core.attr;
+ const char *arch = perf_env__arch(evsel__env(evsel));
/*
* Here value 28 is a constant size which can be used to print
@@ -722,12 +724,12 @@ static void set_regs_in_dict(PyObject *dict,
int size = __sw_hweight64(attr->sample_regs_intr) * 28;
char bf[size];
- regs_map(&sample->intr_regs, attr->sample_regs_intr, bf, sizeof(bf));
+ regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, sizeof(bf));
pydict_set_item_string_decref(dict, "iregs",
_PyUnicode_FromString(bf));
- regs_map(&sample->user_regs, attr->sample_regs_user, bf, sizeof(bf));
+ regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, sizeof(bf));
pydict_set_item_string_decref(dict, "uregs",
_PyUnicode_FromString(bf));
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d8857d1b6d7c..e1a273048681 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -15,6 +15,7 @@
#include "map_symbol.h"
#include "branch.h"
#include "debug.h"
+#include "env.h"
#include "evlist.h"
#include "evsel.h"
#include "memswap.h"
@@ -1168,7 +1169,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
}
}
-static void regs_dump__printf(u64 mask, u64 *regs)
+static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
{
unsigned rid, i = 0;
@@ -1176,7 +1177,7 @@ static void regs_dump__printf(u64 mask, u64 *regs)
u64 val = regs[i++];
printf(".... %-5s 0x%016" PRIx64 "\n",
- perf_reg_name(rid), val);
+ perf_reg_name(rid, arch), val);
}
}
@@ -1194,7 +1195,7 @@ static inline const char *regs_dump_abi(struct regs_dump *d)
return regs_abi[d->abi];
}
-static void regs__printf(const char *type, struct regs_dump *regs)
+static void regs__printf(const char *type, struct regs_dump *regs, const char *arch)
{
u64 mask = regs->mask;
@@ -1203,23 +1204,23 @@ static void regs__printf(const char *type, struct regs_dump *regs)
mask,
regs_dump_abi(regs));
- regs_dump__printf(mask, regs->regs);
+ regs_dump__printf(mask, regs->regs, arch);
}
-static void regs_user__printf(struct perf_sample *sample)
+static void regs_user__printf(struct perf_sample *sample, const char *arch)
{
struct regs_dump *user_regs = &sample->user_regs;
if (user_regs->regs)
- regs__printf("user", user_regs);
+ regs__printf("user", user_regs, arch);
}
-static void regs_intr__printf(struct perf_sample *sample)
+static void regs_intr__printf(struct perf_sample *sample, const char *arch)
{
struct regs_dump *intr_regs = &sample->intr_regs;
if (intr_regs->regs)
- regs__printf("intr", intr_regs);
+ regs__printf("intr", intr_regs, arch);
}
static void stack_user__printf(struct stack_dump *dump)
@@ -1304,7 +1305,7 @@ char *get_page_size_name(u64 size, char *str)
}
static void dump_sample(struct evsel *evsel, union perf_event *event,
- struct perf_sample *sample)
+ struct perf_sample *sample, const char *arch)
{
u64 sample_type;
char str[PAGE_SIZE_NAME_LEN];
@@ -1325,10 +1326,10 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
if (sample_type & PERF_SAMPLE_REGS_USER)
- regs_user__printf(sample);
+ regs_user__printf(sample, arch);
if (sample_type & PERF_SAMPLE_REGS_INTR)
- regs_intr__printf(sample);
+ regs_intr__printf(sample, arch);
if (sample_type & PERF_SAMPLE_STACK_USER)
stack_user__printf(&sample->user_stack);
@@ -1502,7 +1503,7 @@ static int machines__deliver_event(struct machines *machines,
++evlist->stats.nr_unknown_id;
return 0;
}
- dump_sample(evsel, event, sample);
+ dump_sample(evsel, event, sample, perf_env__arch(machine->env));
if (machine == NULL) {
++evlist->stats.nr_unprocessable_samples;
return 0;